From f79d158bbf783144543ec7e288119d398a4ffb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B1=80?= Date: Mon, 4 Sep 2023 21:46:19 +0800 Subject: [PATCH 1/5] pretrainedModel add gconfig --- .../{transformers => }/generation/__init__.py | 0 .../generation/configuration_utils.py | 10 ++++---- .../generation/logits_process.py | 0 .../generation/stopping_criteria.py | 0 .../generation/streamers.py | 0 .../{transformers => }/generation/utils.py | 12 +++++++++- paddlenlp/transformers/generation_utils.py | 10 ++++---- paddlenlp/transformers/model_utils.py | 24 +++++++++++++++---- .../{transformers => }/generation/__init__.py | 0 .../generation/test_logits_process.py | 2 +- .../generation/test_stopping_criteria.py | 2 +- .../generation/test_streamers.py | 3 +-- 12 files changed, 44 insertions(+), 19 deletions(-) rename paddlenlp/{transformers => }/generation/__init__.py (100%) rename paddlenlp/{transformers => }/generation/configuration_utils.py (99%) rename paddlenlp/{transformers => }/generation/logits_process.py (100%) rename paddlenlp/{transformers => }/generation/stopping_criteria.py (100%) rename paddlenlp/{transformers => }/generation/streamers.py (100%) rename paddlenlp/{transformers => }/generation/utils.py (99%) rename tests/{transformers => }/generation/__init__.py (100%) rename tests/{transformers => }/generation/test_logits_process.py (99%) rename tests/{transformers => }/generation/test_stopping_criteria.py (97%) rename tests/{transformers => }/generation/test_streamers.py (98%) diff --git a/paddlenlp/transformers/generation/__init__.py b/paddlenlp/generation/__init__.py similarity index 100% rename from paddlenlp/transformers/generation/__init__.py rename to paddlenlp/generation/__init__.py diff --git a/paddlenlp/transformers/generation/configuration_utils.py b/paddlenlp/generation/configuration_utils.py similarity index 99% rename from paddlenlp/transformers/generation/configuration_utils.py rename to paddlenlp/generation/configuration_utils.py index 3222ca9203bb..7201dce7f7ed 100644 --- a/paddlenlp/transformers/generation/configuration_utils.py +++ b/paddlenlp/generation/configuration_utils.py @@ -23,18 +23,18 @@ from paddle.common_ops_import import convert_dtype from paddlenlp import __version__ +from paddlenlp.transformers.configuration_utils import PretrainedConfig +from paddlenlp.transformers.utils import resolve_cache_dir from paddlenlp.utils.log import logger -from ...utils import GENERATION_CONFIG_NAME -from ...utils.downloader import ( +from ..utils import GENERATION_CONFIG_NAME +from ..utils.downloader import ( COMMUNITY_MODEL_PREFIX, get_path_from_url_with_filelock, hf_file_exists, is_url, url_file_exists, ) -from ..configuration_utils import PretrainedConfig -from ..utils import resolve_cache_dir def resolve_hf_generation_config_path(repo_id: str, cache_dir: str, subfolder=None) -> str: @@ -202,7 +202,7 @@ def __eq__(self, other): self_dict = self.__dict__.copy() other_dict = other.__dict__.copy() # ignore metadata - for metadata_field in "paddlenlp_version": + for metadata_field in ["_from_model_config", "paddlenlp_version"]: self_dict.pop(metadata_field, None) other_dict.pop(metadata_field, None) return self_dict == other_dict diff --git a/paddlenlp/transformers/generation/logits_process.py b/paddlenlp/generation/logits_process.py similarity index 100% rename from paddlenlp/transformers/generation/logits_process.py rename to paddlenlp/generation/logits_process.py diff --git a/paddlenlp/transformers/generation/stopping_criteria.py b/paddlenlp/generation/stopping_criteria.py similarity index 100% rename from paddlenlp/transformers/generation/stopping_criteria.py rename to paddlenlp/generation/stopping_criteria.py diff --git a/paddlenlp/transformers/generation/streamers.py b/paddlenlp/generation/streamers.py similarity index 100% rename from paddlenlp/transformers/generation/streamers.py rename to paddlenlp/generation/streamers.py diff --git a/paddlenlp/transformers/generation/utils.py b/paddlenlp/generation/utils.py similarity index 99% rename from paddlenlp/transformers/generation/utils.py rename to paddlenlp/generation/utils.py index 9429eea6af22..c26cd238d317 100644 --- a/paddlenlp/transformers/generation/utils.py +++ b/paddlenlp/generation/utils.py @@ -14,6 +14,7 @@ # limitations under the License. from __future__ import annotations +import copy from typing import Union import paddle @@ -737,8 +738,17 @@ def generate( # ['是的', '嗯嗯'] """ if generation_config is None: - generation_config = GenerationConfig.from_model_config(self.config) + if self.generation_config._from_model_config: + new_generation_config = GenerationConfig.from_model_config(self.config) + if new_generation_config != self.generation_config: + logger.warning( + "model.generation_config is in conflict with model.config, " "model.config is used." + ) + self.generation_config = new_generation_config + generation_config = self.generation_config + # without update model.generation_config + generation_config = copy.deepcopy(generation_config) model_kwargs = generation_config.update(**kwargs) assert generation_config.decode_strategy in [ diff --git a/paddlenlp/transformers/generation_utils.py b/paddlenlp/transformers/generation_utils.py index aeb91637e3fd..7ee629a80f08 100644 --- a/paddlenlp/transformers/generation_utils.py +++ b/paddlenlp/transformers/generation_utils.py @@ -12,8 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from paddlenlp.transformers.generation.configuration_utils import * # noqa: F401, F403 -from paddlenlp.transformers.generation.logits_process import * # noqa: F401, F403 -from paddlenlp.transformers.generation.stopping_criteria import * # noqa: F401, F403 -from paddlenlp.transformers.generation.streamers import * # noqa: F401, F403 -from paddlenlp.transformers.generation.utils import * # noqa: F401, F403 +from paddlenlp.generation.configuration_utils import * # noqa: F401, F403 +from paddlenlp.generation.logits_process import * # noqa: F401, F403 +from paddlenlp.generation.stopping_criteria import * # noqa: F401, F403 +from paddlenlp.generation.streamers import * # noqa: F401, F403 +from paddlenlp.generation.utils import * # noqa: F401, F403 diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index 0adaddfefc84..74795e1f73ce 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -61,7 +61,7 @@ from ..utils import device_guard from .configuration_utils import PretrainedConfig from .conversion_utils import ConversionMixin -from .generation_utils import GenerationMixin +from .generation_utils import GenerationConfig, GenerationMixin from .utils import ( # convert_ndarray_dtype, ContextManagers, InitTrackerMeta, @@ -863,6 +863,7 @@ def __init__(self, *args, **kwargs): if config is not None: self.config: PretrainedConfig = config self.model_config_file = CONFIG_NAME + self.generation_config = GenerationConfig.from_model_config(self.config) if self.can_generate() else None return # extract config from kwargs @@ -876,6 +877,7 @@ def __init__(self, *args, **kwargs): raise TypeError("config parameter should be the instance of PretrainedConfig") self.config: PretrainedConfig = kwargs["config"] + self.generation_config = GenerationConfig.from_model_config(self.config) if self.can_generate() else None self.model_config_file = CONFIG_NAME self.warnings_issued = {} @@ -1993,6 +1995,22 @@ def from_pretrained( keep_in_fp32_modules=keep_in_fp32_modules, ) + # load generation_config.json + if model.can_generate() and pretrained_model_name_or_path is not None: + try: + model.generation_config = GenerationConfig.from_pretrained( + pretrained_model_name_or_path, + cache_dir=cache_dir, + force_download=force_download, + subfolder=subfolder, + **kwargs, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) + pass + if paddle.in_dynamic_mode(): return model @@ -2086,9 +2104,7 @@ def save_pretrained( if is_main_process: config_to_save.save_pretrained(save_directory) if self.can_generate(): - # to do support generation_config - pass - # model_to_save.generation_config.save_pretrained(save_directory) + model_to_save.generation_config.save_pretrained(save_directory) # Handle the case where some state_dict keys shouldn't be saved if self._keys_to_ignore_on_save is not None: diff --git a/tests/transformers/generation/__init__.py b/tests/generation/__init__.py similarity index 100% rename from tests/transformers/generation/__init__.py rename to tests/generation/__init__.py diff --git a/tests/transformers/generation/test_logits_process.py b/tests/generation/test_logits_process.py similarity index 99% rename from tests/transformers/generation/test_logits_process.py rename to tests/generation/test_logits_process.py index 0d070c7109cf..8c411181ffbb 100644 --- a/tests/transformers/generation/test_logits_process.py +++ b/tests/generation/test_logits_process.py @@ -35,7 +35,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None): return paddle.to_tensor(data=values).reshape(shape) -from paddlenlp.transformers.generation.logits_process import ( +from paddlenlp.generation.logits_process import ( ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, HammingDiversityLogitsProcessor, diff --git a/tests/transformers/generation/test_stopping_criteria.py b/tests/generation/test_stopping_criteria.py similarity index 97% rename from tests/transformers/generation/test_stopping_criteria.py rename to tests/generation/test_stopping_criteria.py index c3334081cfe8..f557dbaba279 100644 --- a/tests/transformers/generation/test_stopping_criteria.py +++ b/tests/generation/test_stopping_criteria.py @@ -18,7 +18,7 @@ import paddle -from paddlenlp.transformers.generation.stopping_criteria import ( +from paddlenlp.generation.stopping_criteria import ( MaxLengthCriteria, MaxTimeCriteria, StoppingCriteriaList, diff --git a/tests/transformers/generation/test_streamers.py b/tests/generation/test_streamers.py similarity index 98% rename from tests/transformers/generation/test_streamers.py rename to tests/generation/test_streamers.py index c71b21273f62..a8c8294c7438 100644 --- a/tests/transformers/generation/test_streamers.py +++ b/tests/generation/test_streamers.py @@ -22,8 +22,7 @@ from paddlenlp.transformers.generation_utils import TextIteratorStreamer, TextStreamer from paddlenlp.transformers.utils import CaptureStd from tests.testing_utils import slow - -from ..test_modeling_common import ids_tensor +from tests.transformers.test_modeling_common import ids_tensor class StreamerTester(unittest.TestCase): From 2e06d9fdb887a5dcc555d5d0c894b4e584176459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B1=80?= Date: Tue, 5 Sep 2023 14:50:26 +0800 Subject: [PATCH 2/5] max/min_new_token, bug fix --- llm/predictor.py | 21 ++++++++++++--------- llm/utils.py | 19 +++++++++++-------- paddlenlp/generation/configuration_utils.py | 19 ++++++++++++------- paddlenlp/generation/utils.py | 14 +++++++++++--- tests/transformers/test_generation_utils.py | 8 ++++---- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/llm/predictor.py b/llm/predictor.py index cd443a063a8f..9141449706be 100644 --- a/llm/predictor.py +++ b/llm/predictor.py @@ -41,6 +41,7 @@ PretrainedModel, PretrainedTokenizer, ) +from paddlenlp.transformers.generation_utils import GenerationConfig from paddlenlp.utils.import_utils import import_module, is_paddlenlp_ops_available @@ -199,15 +200,17 @@ def _infer(self, inputs: dict[str, paddle.Tensor]): max_length = max(self.config.max_length - inputs["input_ids"].shape[-1], 1) result = self.model.generate( **inputs, - max_length=max_length, - bos_token_id=self.tokenizer.bos_token_id, - eos_token_id=self.tokenizer.eos_token_id, - pad_token_id=self.tokenizer.pad_token_id, - decode_strategy=self.config.decode_strategy, - temperature=self.config.temperature, - top_k=self.config.top_k, - top_p=self.config.top_p, - repetition_penalty=self.config.repetition_penalty, + generation_config=GenerationConfig( + max_new_token=max_length, + bos_token_id=self.tokenizer.bos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.pad_token_id, + decode_strategy=self.config.decode_strategy, + temperature=self.config.temperature, + top_k=self.config.top_k, + top_p=self.config.top_p, + repetition_penalty=self.config.repetition_penalty, + ), ) result = result[0] return result diff --git a/llm/utils.py b/llm/utils.py index 1f5ffc8e5698..50c0ad9a9708 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -28,6 +28,7 @@ from paddlenlp.datasets import InTokensIterableDataset from paddlenlp.trainer import Trainer, TrainerCallback from paddlenlp.trainer.trainer_utils import IterableDatasetShard, has_length +from paddlenlp.transformers.generation_utils import GenerationConfig from paddlenlp.utils.log import logger @@ -200,14 +201,16 @@ def prediction_step( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"] if "attention_mask" in inputs else None, position_ids=inputs["position_ids"] if "position_ids" in inputs else None, - max_length=self.data_args.tgt_length, - decode_strategy="sampling", - top_k=self.gen_args.top_k, - top_p=self.gen_args.top_p, - bos_token_id=self.tokenizer.bos_token_id, - eos_token_id=self.tokenizer.eos_token_id, - pad_token_id=self.tokenizer.pad_token_id, - use_cache=True, + generation_config=GenerationConfig( + max_new_token=self.data_args.tgt_length, + decode_strategy="sampling", + top_k=self.gen_args.top_k, + top_p=self.gen_args.top_p, + bos_token_id=self.tokenizer.bos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.pad_token_id, + use_cache=True, + ), )[0] all_preds = [] for pred_tokens in generated_tokens: diff --git a/paddlenlp/generation/configuration_utils.py b/paddlenlp/generation/configuration_utils.py index 7201dce7f7ed..3f6726753668 100644 --- a/paddlenlp/generation/configuration_utils.py +++ b/paddlenlp/generation/configuration_utils.py @@ -36,6 +36,8 @@ url_file_exists, ) +DEFAULT_MAX_NEW_TOKEN = 20 + def resolve_hf_generation_config_path(repo_id: str, cache_dir: str, subfolder=None) -> str: """resolve config file from hf hub @@ -143,7 +145,9 @@ def _get_generation_mode(self): def __init__(self, **kwargs): # Parameters that control the length of the output - self.max_length = kwargs.pop("max_length", 20) + self.max_new_token = kwargs.get("max_new_token", DEFAULT_MAX_NEW_TOKEN) + self.min_new_token = kwargs.pop("min_new_token", 0) + self.max_length = kwargs.pop("max_length", 0) self.min_length = kwargs.pop("min_length", 0) self.early_stopping = kwargs.pop("early_stopping", False) @@ -176,11 +180,6 @@ def __init__(self, **kwargs): self._from_model_config = kwargs.pop("_from_model_config", False) self.paddlenlp_version = kwargs.pop("paddlenlp_version", __version__) - # Parameters that control the generation strategy used - self.decode_strategy = kwargs.pop("decode_strategy", None) - if self.decode_strategy is None: - self.decode_strategy = self._get_generation_mode() - # Additional attributes without default values if not self._from_model_config: # we don't want to copy values from the model config if we're initializing a `GenerationConfig` from a @@ -192,6 +191,12 @@ def __init__(self, **kwargs): logger.error(f"Can't set {key} with value {value} for {self}") raise err + # Parameters that control the generation strategy used + if "decode_strategy" in kwargs: + self.decode_strategy = kwargs.pop("decode_strategy") + else: + self.decode_strategy = self._get_generation_mode() + # Validate the values of the attributes self.validate(is_init=True) @@ -432,7 +437,7 @@ def from_pretrained( community_url = "/".join([COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, GENERATION_CONFIG_NAME]) if url_file_exists(community_url): resolved_config_file = get_path_from_url_with_filelock( - pretrained_model_name_or_path, cache_dir, check_exist=not force_download + community_url, cache_dir, check_exist=not force_download ) else: raise FileNotFoundError(f"configuration file<{GENERATION_CONFIG_NAME}> not found") diff --git a/paddlenlp/generation/utils.py b/paddlenlp/generation/utils.py index c26cd238d317..89dab1d2014f 100644 --- a/paddlenlp/generation/utils.py +++ b/paddlenlp/generation/utils.py @@ -38,7 +38,7 @@ from paddlenlp.transformers.utils import get_scale_by_dtype from paddlenlp.utils.log import logger -from .configuration_utils import GenerationConfig +from .configuration_utils import DEFAULT_MAX_NEW_TOKEN, GenerationConfig from .logits_process import ( ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, @@ -902,8 +902,16 @@ def generate( print("Setting `pad_token_id` to `eos_token_id`:{} for " "open-end generation.".format(eos_token_id)) pad_token_id = eos_token_id - max_length = generation_config.max_length - min_length = generation_config.min_length + if generation_config.max_length != 0 and generation_config.max_new_token == DEFAULT_MAX_NEW_TOKEN: + logger.warning("`max_length` will be deprecated in future, use" " `max_new_token` instead.") + generation_config.max_new_token = generation_config.max_length + + if generation_config.min_length != 0 and generation_config.min_new_token == 0: + logger.warning("`min_length` will be deprecated in future, use" " `min_new_token` instead.") + generation_config.min_new_token = generation_config.min_length + + max_length = generation_config.max_new_token + min_length = generation_config.min_new_token if is_tracing and not paddle.is_tensor(max_length): if hasattr(paddle.framework, "_no_check_dy2st_diff"): # TODO(daisiming): _no_check_dy2st_diff is used to turn off the checking of behavior diff --git a/tests/transformers/test_generation_utils.py b/tests/transformers/test_generation_utils.py index afbb817a44a7..1312f9cf10cc 100644 --- a/tests/transformers/test_generation_utils.py +++ b/tests/transformers/test_generation_utils.py @@ -235,7 +235,7 @@ def _greedy_generate( input_ids, attention_mask=attention_mask, generation_config=GenerationConfig( - max_length=max_length, + max_new_token=max_length, decode_strategy="greedy_search", **logits_process_kwargs, ), @@ -277,7 +277,7 @@ def _sample_generate( input_ids, attention_mask=attention_mask, generation_config=GenerationConfig( - max_length=max_length, + max_new_token=max_length, decode_strategy="sampling", num_return_sequences=num_return_sequences, top_k=1, @@ -332,7 +332,7 @@ def _beam_search_generate( attention_mask=attention_mask, generation_config=GenerationConfig( decode_strategy="beam_search", - max_length=max_length, + max_new_token=max_length, **beam_kwargs, **logits_process_kwargs, ), @@ -390,7 +390,7 @@ def _group_beam_search_generate( attention_mask=attention_mask, generation_config=GenerationConfig( decode_strategy="beam_search", - max_length=max_length, + max_new_token=max_length, **beam_kwargs, **logits_process_kwargs, ), From c53550bda837ed2416ac5e5b10ff67f04eb8dd1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B1=80?= Date: Tue, 5 Sep 2023 16:37:10 +0800 Subject: [PATCH 3/5] deprecate generation_utils --- llm/predictor.py | 2 +- llm/utils.py | 2 +- .../transformers/generation_utils.py | 2 +- paddlenlp/generation/__init__.py | 20 +++++++++++++++++++ paddlenlp/generation/streamers.py | 4 ++-- paddlenlp/transformers/dallebart/modeling.py | 2 +- paddlenlp/transformers/generation_utils.py | 19 ------------------ paddlenlp/transformers/model_utils.py | 2 +- tests/generation/test_streamers.py | 2 +- tests/transformers/test_generation_utils.py | 20 +++++++++---------- 10 files changed, 38 insertions(+), 37 deletions(-) delete mode 100644 paddlenlp/transformers/generation_utils.py diff --git a/llm/predictor.py b/llm/predictor.py index 9141449706be..5b499860bacf 100644 --- a/llm/predictor.py +++ b/llm/predictor.py @@ -30,6 +30,7 @@ load_real_time_tokens, ) +from paddlenlp.generation import GenerationConfig from paddlenlp.peft import LoRAConfig, LoRAModel, PrefixConfig, PrefixModelForCausalLM from paddlenlp.taskflow.utils import static_mode_guard from paddlenlp.trainer import PdArgumentParser @@ -41,7 +42,6 @@ PretrainedModel, PretrainedTokenizer, ) -from paddlenlp.transformers.generation_utils import GenerationConfig from paddlenlp.utils.import_utils import import_module, is_paddlenlp_ops_available diff --git a/llm/utils.py b/llm/utils.py index 50c0ad9a9708..2f4d3e9ff3e6 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -26,9 +26,9 @@ from sklearn.metrics import accuracy_score from paddlenlp.datasets import InTokensIterableDataset +from paddlenlp.generation import GenerationConfig from paddlenlp.trainer import Trainer, TrainerCallback from paddlenlp.trainer.trainer_utils import IterableDatasetShard, has_length -from paddlenlp.transformers.generation_utils import GenerationConfig from paddlenlp.utils.log import logger diff --git a/paddlenlp/experimental/transformers/generation_utils.py b/paddlenlp/experimental/transformers/generation_utils.py index 699b5f14f560..5ae02afc7359 100644 --- a/paddlenlp/experimental/transformers/generation_utils.py +++ b/paddlenlp/experimental/transformers/generation_utils.py @@ -28,7 +28,7 @@ except: from paddlenlp_ops import top_p_sampling -from paddlenlp.transformers.generation_utils import GenerationMixin +from paddlenlp.generation import GenerationMixin __all__ = ["GenerationInferenceModel"] diff --git a/paddlenlp/generation/__init__.py b/paddlenlp/generation/__init__.py index 595add0aed9e..a68f37574efe 100644 --- a/paddlenlp/generation/__init__.py +++ b/paddlenlp/generation/__init__.py @@ -11,3 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .configuration_utils import GenerationConfig +from .logits_process import ( + ForcedBOSTokenLogitsProcessor, + ForcedEOSTokenLogitsProcessor, + HammingDiversityLogitsProcessor, + LogitsProcessorList, + MinLengthLogitsProcessor, + RepetitionPenaltyLogitsProcessor, + TopKProcess, + TopPProcess, +) +from .stopping_criteria import ( + MaxLengthCriteria, + MaxTimeCriteria, + StoppingCriteria, + StoppingCriteriaList, + validate_stopping_criteria, +) +from .streamers import BaseStreamer, TextIteratorStreamer, TextStreamer +from .utils import BeamSearchScorer, GenerationMixin, get_unfinished_flag diff --git a/paddlenlp/generation/streamers.py b/paddlenlp/generation/streamers.py index c0b04f8207a0..67b97b0cf0f9 100644 --- a/paddlenlp/generation/streamers.py +++ b/paddlenlp/generation/streamers.py @@ -46,7 +46,7 @@ class TextStreamer(BaseStreamer): ```python >>> from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer - >>> from paddlenlp.transformers.generation_utils import TextStreamer + >>> from paddlenlp.generation import TextStreamer >>> tok = AutoTokenizer.from_pretrained("gpt2") >>> model = AutoModelForCausalLM.from_pretrained("gpt2") @@ -167,7 +167,7 @@ class TextIteratorStreamer(TextStreamer): ```python >>> from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer - >>> from paddlenlp.transformers.generation_utils import TextIteratorStreamer + >>> from paddlenlp.generation import TextIteratorStreamer >>> from threading import Thread >>> tok = AutoTokenizer.from_pretrained("gpt2") diff --git a/paddlenlp/transformers/dallebart/modeling.py b/paddlenlp/transformers/dallebart/modeling.py index 93e9f54c53de..1e5d50009363 100644 --- a/paddlenlp/transformers/dallebart/modeling.py +++ b/paddlenlp/transformers/dallebart/modeling.py @@ -22,10 +22,10 @@ import paddle.nn.functional as F from paddle.common_ops_import import convert_dtype +from ...generation import BeamSearchScorer from ...transformers import PretrainedModel, register_base_model from ...utils.env import CONFIG_NAME from ...utils.log import logger -from ..generation_utils import BeamSearchScorer from .configuration import ( DALLEBART_PRETRAINED_INIT_CONFIGURATION, DALLEBART_PRETRAINED_RESOURCE_FILES_MAP, diff --git a/paddlenlp/transformers/generation_utils.py b/paddlenlp/transformers/generation_utils.py deleted file mode 100644 index 7ee629a80f08..000000000000 --- a/paddlenlp/transformers/generation_utils.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddlenlp.generation.configuration_utils import * # noqa: F401, F403 -from paddlenlp.generation.logits_process import * # noqa: F401, F403 -from paddlenlp.generation.stopping_criteria import * # noqa: F401, F403 -from paddlenlp.generation.streamers import * # noqa: F401, F403 -from paddlenlp.generation.utils import * # noqa: F401, F403 diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index 74795e1f73ce..bbf10e408294 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -46,6 +46,7 @@ from paddle.utils.download import is_url as is_remote_url from tqdm.auto import tqdm +from paddlenlp.generation import GenerationConfig, GenerationMixin from paddlenlp.utils.downloader import get_path_from_url_with_filelock, hf_file_exists from paddlenlp.utils.env import ( CONFIG_NAME, @@ -61,7 +62,6 @@ from ..utils import device_guard from .configuration_utils import PretrainedConfig from .conversion_utils import ConversionMixin -from .generation_utils import GenerationConfig, GenerationMixin from .utils import ( # convert_ndarray_dtype, ContextManagers, InitTrackerMeta, diff --git a/tests/generation/test_streamers.py b/tests/generation/test_streamers.py index a8c8294c7438..9182cda9b88a 100644 --- a/tests/generation/test_streamers.py +++ b/tests/generation/test_streamers.py @@ -18,8 +18,8 @@ import paddle +from paddlenlp.generation import TextIteratorStreamer, TextStreamer from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer -from paddlenlp.transformers.generation_utils import TextIteratorStreamer, TextStreamer from paddlenlp.transformers.utils import CaptureStd from tests.testing_utils import slow from tests.transformers.test_modeling_common import ids_tensor diff --git a/tests/transformers/test_generation_utils.py b/tests/transformers/test_generation_utils.py index 1312f9cf10cc..51df60f76b29 100644 --- a/tests/transformers/test_generation_utils.py +++ b/tests/transformers/test_generation_utils.py @@ -19,16 +19,7 @@ import numpy as np import paddle -from paddlenlp.transformers import ( # import gpt model - AutoModelForCausalLM, - AutoTokenizer, - BartForConditionalGeneration, - BartTokenizer, - GPTLMHeadModel, - PretrainedConfig, - PretrainedTokenizer, -) -from paddlenlp.transformers.generation_utils import ( +from paddlenlp.generation import ( BeamSearchScorer, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, @@ -41,6 +32,15 @@ TopPProcess, get_unfinished_flag, ) +from paddlenlp.transformers import ( # import gpt model + AutoModelForCausalLM, + AutoTokenizer, + BartForConditionalGeneration, + BartTokenizer, + GPTLMHeadModel, + PretrainedConfig, + PretrainedTokenizer, +) from tests.testing_utils import slow From d541ae97739be614cf66385372843b94f0e4cc52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B1=80?= Date: Tue, 5 Sep 2023 20:11:15 +0800 Subject: [PATCH 4/5] code refinement --- llm/predictor.py | 21 ++++++++----------- llm/utils.py | 19 +++++++---------- .../transformers/generation_utils.py | 2 +- paddlenlp/generation/configuration_utils.py | 2 +- paddlenlp/generation/utils.py | 4 ++-- paddlenlp/transformers/model_utils.py | 2 +- 6 files changed, 22 insertions(+), 28 deletions(-) diff --git a/llm/predictor.py b/llm/predictor.py index 5b499860bacf..ac3896095dee 100644 --- a/llm/predictor.py +++ b/llm/predictor.py @@ -30,7 +30,6 @@ load_real_time_tokens, ) -from paddlenlp.generation import GenerationConfig from paddlenlp.peft import LoRAConfig, LoRAModel, PrefixConfig, PrefixModelForCausalLM from paddlenlp.taskflow.utils import static_mode_guard from paddlenlp.trainer import PdArgumentParser @@ -200,17 +199,15 @@ def _infer(self, inputs: dict[str, paddle.Tensor]): max_length = max(self.config.max_length - inputs["input_ids"].shape[-1], 1) result = self.model.generate( **inputs, - generation_config=GenerationConfig( - max_new_token=max_length, - bos_token_id=self.tokenizer.bos_token_id, - eos_token_id=self.tokenizer.eos_token_id, - pad_token_id=self.tokenizer.pad_token_id, - decode_strategy=self.config.decode_strategy, - temperature=self.config.temperature, - top_k=self.config.top_k, - top_p=self.config.top_p, - repetition_penalty=self.config.repetition_penalty, - ), + max_new_token=max_length, + bos_token_id=self.tokenizer.bos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.pad_token_id, + decode_strategy=self.config.decode_strategy, + temperature=self.config.temperature, + top_k=self.config.top_k, + top_p=self.config.top_p, + repetition_penalty=self.config.repetition_penalty, ) result = result[0] return result diff --git a/llm/utils.py b/llm/utils.py index 2f4d3e9ff3e6..ecbfc99bb89f 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -26,7 +26,6 @@ from sklearn.metrics import accuracy_score from paddlenlp.datasets import InTokensIterableDataset -from paddlenlp.generation import GenerationConfig from paddlenlp.trainer import Trainer, TrainerCallback from paddlenlp.trainer.trainer_utils import IterableDatasetShard, has_length from paddlenlp.utils.log import logger @@ -201,16 +200,14 @@ def prediction_step( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"] if "attention_mask" in inputs else None, position_ids=inputs["position_ids"] if "position_ids" in inputs else None, - generation_config=GenerationConfig( - max_new_token=self.data_args.tgt_length, - decode_strategy="sampling", - top_k=self.gen_args.top_k, - top_p=self.gen_args.top_p, - bos_token_id=self.tokenizer.bos_token_id, - eos_token_id=self.tokenizer.eos_token_id, - pad_token_id=self.tokenizer.pad_token_id, - use_cache=True, - ), + max_new_token=self.data_args.tgt_length, + decode_strategy="sampling", + top_k=self.gen_args.top_k, + top_p=self.gen_args.top_p, + bos_token_id=self.tokenizer.bos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.pad_token_id, + use_cache=True, )[0] all_preds = [] for pred_tokens in generated_tokens: diff --git a/paddlenlp/experimental/transformers/generation_utils.py b/paddlenlp/experimental/transformers/generation_utils.py index 5ae02afc7359..2deeec4c1100 100644 --- a/paddlenlp/experimental/transformers/generation_utils.py +++ b/paddlenlp/experimental/transformers/generation_utils.py @@ -28,7 +28,7 @@ except: from paddlenlp_ops import top_p_sampling -from paddlenlp.generation import GenerationMixin +from ...generation import GenerationMixin __all__ = ["GenerationInferenceModel"] diff --git a/paddlenlp/generation/configuration_utils.py b/paddlenlp/generation/configuration_utils.py index 3f6726753668..c32c3b9e911d 100644 --- a/paddlenlp/generation/configuration_utils.py +++ b/paddlenlp/generation/configuration_utils.py @@ -488,7 +488,7 @@ def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "GenerationConfig": config = cls(**{**config_dict, **kwargs}) unused_kwargs = config.update(**kwargs) - logger.info(f"Generate config {config}") + # logger.info(f"Generate config {config}") if return_unused_kwargs: return config, unused_kwargs else: diff --git a/paddlenlp/generation/utils.py b/paddlenlp/generation/utils.py index 89dab1d2014f..e7e4e84e4383 100644 --- a/paddlenlp/generation/utils.py +++ b/paddlenlp/generation/utils.py @@ -903,11 +903,11 @@ def generate( pad_token_id = eos_token_id if generation_config.max_length != 0 and generation_config.max_new_token == DEFAULT_MAX_NEW_TOKEN: - logger.warning("`max_length` will be deprecated in future, use" " `max_new_token` instead.") + logger.warning("`max_length` will be deprecated in future releases, use `max_new_token` instead.") generation_config.max_new_token = generation_config.max_length if generation_config.min_length != 0 and generation_config.min_new_token == 0: - logger.warning("`min_length` will be deprecated in future, use" " `min_new_token` instead.") + logger.warning("`min_length` will be deprecated in future releases, use `min_new_token` instead.") generation_config.min_new_token = generation_config.min_length max_length = generation_config.max_new_token diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index bbf10e408294..4972049b9b70 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -46,7 +46,6 @@ from paddle.utils.download import is_url as is_remote_url from tqdm.auto import tqdm -from paddlenlp.generation import GenerationConfig, GenerationMixin from paddlenlp.utils.downloader import get_path_from_url_with_filelock, hf_file_exists from paddlenlp.utils.env import ( CONFIG_NAME, @@ -59,6 +58,7 @@ ) from paddlenlp.utils.log import logger +from ..generation import GenerationConfig, GenerationMixin from ..utils import device_guard from .configuration_utils import PretrainedConfig from .conversion_utils import ConversionMixin From 6a605ba4dcd4f256c19eb4d5e22d17961ccf7e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B1=80?= Date: Tue, 5 Sep 2023 20:13:01 +0800 Subject: [PATCH 5/5] code refinement --- llm/predictor.py | 2 +- llm/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/predictor.py b/llm/predictor.py index ac3896095dee..cd443a063a8f 100644 --- a/llm/predictor.py +++ b/llm/predictor.py @@ -199,7 +199,7 @@ def _infer(self, inputs: dict[str, paddle.Tensor]): max_length = max(self.config.max_length - inputs["input_ids"].shape[-1], 1) result = self.model.generate( **inputs, - max_new_token=max_length, + max_length=max_length, bos_token_id=self.tokenizer.bos_token_id, eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.pad_token_id, diff --git a/llm/utils.py b/llm/utils.py index ecbfc99bb89f..1f5ffc8e5698 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -200,7 +200,7 @@ def prediction_step( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"] if "attention_mask" in inputs else None, position_ids=inputs["position_ids"] if "position_ids" in inputs else None, - max_new_token=self.data_args.tgt_length, + max_length=self.data_args.tgt_length, decode_strategy="sampling", top_k=self.gen_args.top_k, top_p=self.gen_args.top_p,