diff --git a/paddlenlp/experimental/transformers/generation_utils.py b/paddlenlp/experimental/transformers/generation_utils.py index 699b5f14f560..2deeec4c1100 100644 --- a/paddlenlp/experimental/transformers/generation_utils.py +++ b/paddlenlp/experimental/transformers/generation_utils.py @@ -28,7 +28,7 @@ except: from paddlenlp_ops import top_p_sampling -from paddlenlp.transformers.generation_utils import GenerationMixin +from ...generation import GenerationMixin __all__ = ["GenerationInferenceModel"] diff --git a/paddlenlp/generation/__init__.py b/paddlenlp/generation/__init__.py new file mode 100644 index 000000000000..a68f37574efe --- /dev/null +++ b/paddlenlp/generation/__init__.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .configuration_utils import GenerationConfig +from .logits_process import ( + ForcedBOSTokenLogitsProcessor, + ForcedEOSTokenLogitsProcessor, + HammingDiversityLogitsProcessor, + LogitsProcessorList, + MinLengthLogitsProcessor, + RepetitionPenaltyLogitsProcessor, + TopKProcess, + TopPProcess, +) +from .stopping_criteria import ( + MaxLengthCriteria, + MaxTimeCriteria, + StoppingCriteria, + StoppingCriteriaList, + validate_stopping_criteria, +) +from .streamers import BaseStreamer, TextIteratorStreamer, TextStreamer +from .utils import BeamSearchScorer, GenerationMixin, get_unfinished_flag diff --git a/paddlenlp/transformers/generation/configuration_utils.py b/paddlenlp/generation/configuration_utils.py similarity index 97% rename from paddlenlp/transformers/generation/configuration_utils.py rename to paddlenlp/generation/configuration_utils.py index 3222ca9203bb..c32c3b9e911d 100644 --- a/paddlenlp/transformers/generation/configuration_utils.py +++ b/paddlenlp/generation/configuration_utils.py @@ -23,18 +23,20 @@ from paddle.common_ops_import import convert_dtype from paddlenlp import __version__ +from paddlenlp.transformers.configuration_utils import PretrainedConfig +from paddlenlp.transformers.utils import resolve_cache_dir from paddlenlp.utils.log import logger -from ...utils import GENERATION_CONFIG_NAME -from ...utils.downloader import ( +from ..utils import GENERATION_CONFIG_NAME +from ..utils.downloader import ( COMMUNITY_MODEL_PREFIX, get_path_from_url_with_filelock, hf_file_exists, is_url, url_file_exists, ) -from ..configuration_utils import PretrainedConfig -from ..utils import resolve_cache_dir + +DEFAULT_MAX_NEW_TOKEN = 20 def resolve_hf_generation_config_path(repo_id: str, cache_dir: str, subfolder=None) -> str: @@ -143,7 +145,9 @@ def _get_generation_mode(self): def __init__(self, **kwargs): # Parameters that control the length of the output - self.max_length = kwargs.pop("max_length", 20) + self.max_new_token = kwargs.get("max_new_token", DEFAULT_MAX_NEW_TOKEN) + self.min_new_token = kwargs.pop("min_new_token", 0) + self.max_length = kwargs.pop("max_length", 0) self.min_length = kwargs.pop("min_length", 0) self.early_stopping = kwargs.pop("early_stopping", False) @@ -176,11 +180,6 @@ def __init__(self, **kwargs): self._from_model_config = kwargs.pop("_from_model_config", False) self.paddlenlp_version = kwargs.pop("paddlenlp_version", __version__) - # Parameters that control the generation strategy used - self.decode_strategy = kwargs.pop("decode_strategy", None) - if self.decode_strategy is None: - self.decode_strategy = self._get_generation_mode() - # Additional attributes without default values if not self._from_model_config: # we don't want to copy values from the model config if we're initializing a `GenerationConfig` from a @@ -192,6 +191,12 @@ def __init__(self, **kwargs): logger.error(f"Can't set {key} with value {value} for {self}") raise err + # Parameters that control the generation strategy used + if "decode_strategy" in kwargs: + self.decode_strategy = kwargs.pop("decode_strategy") + else: + self.decode_strategy = self._get_generation_mode() + # Validate the values of the attributes self.validate(is_init=True) @@ -202,7 +207,7 @@ def __eq__(self, other): self_dict = self.__dict__.copy() other_dict = other.__dict__.copy() # ignore metadata - for metadata_field in "paddlenlp_version": + for metadata_field in ["_from_model_config", "paddlenlp_version"]: self_dict.pop(metadata_field, None) other_dict.pop(metadata_field, None) return self_dict == other_dict @@ -432,7 +437,7 @@ def from_pretrained( community_url = "/".join([COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path, GENERATION_CONFIG_NAME]) if url_file_exists(community_url): resolved_config_file = get_path_from_url_with_filelock( - pretrained_model_name_or_path, cache_dir, check_exist=not force_download + community_url, cache_dir, check_exist=not force_download ) else: raise FileNotFoundError(f"configuration file<{GENERATION_CONFIG_NAME}> not found") @@ -483,7 +488,7 @@ def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "GenerationConfig": config = cls(**{**config_dict, **kwargs}) unused_kwargs = config.update(**kwargs) - logger.info(f"Generate config {config}") + # logger.info(f"Generate config {config}") if return_unused_kwargs: return config, unused_kwargs else: diff --git a/paddlenlp/transformers/generation/logits_process.py b/paddlenlp/generation/logits_process.py similarity index 100% rename from paddlenlp/transformers/generation/logits_process.py rename to paddlenlp/generation/logits_process.py diff --git a/paddlenlp/transformers/generation/stopping_criteria.py b/paddlenlp/generation/stopping_criteria.py similarity index 100% rename from paddlenlp/transformers/generation/stopping_criteria.py rename to paddlenlp/generation/stopping_criteria.py diff --git a/paddlenlp/transformers/generation/streamers.py b/paddlenlp/generation/streamers.py similarity index 98% rename from paddlenlp/transformers/generation/streamers.py rename to paddlenlp/generation/streamers.py index c0b04f8207a0..67b97b0cf0f9 100644 --- a/paddlenlp/transformers/generation/streamers.py +++ b/paddlenlp/generation/streamers.py @@ -46,7 +46,7 @@ class TextStreamer(BaseStreamer): ```python >>> from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer - >>> from paddlenlp.transformers.generation_utils import TextStreamer + >>> from paddlenlp.generation import TextStreamer >>> tok = AutoTokenizer.from_pretrained("gpt2") >>> model = AutoModelForCausalLM.from_pretrained("gpt2") @@ -167,7 +167,7 @@ class TextIteratorStreamer(TextStreamer): ```python >>> from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer - >>> from paddlenlp.transformers.generation_utils import TextIteratorStreamer + >>> from paddlenlp.generation import TextIteratorStreamer >>> from threading import Thread >>> tok = AutoTokenizer.from_pretrained("gpt2") diff --git a/paddlenlp/transformers/generation/utils.py b/paddlenlp/generation/utils.py similarity index 98% rename from paddlenlp/transformers/generation/utils.py rename to paddlenlp/generation/utils.py index 9429eea6af22..e7e4e84e4383 100644 --- a/paddlenlp/transformers/generation/utils.py +++ b/paddlenlp/generation/utils.py @@ -14,6 +14,7 @@ # limitations under the License. from __future__ import annotations +import copy from typing import Union import paddle @@ -37,7 +38,7 @@ from paddlenlp.transformers.utils import get_scale_by_dtype from paddlenlp.utils.log import logger -from .configuration_utils import GenerationConfig +from .configuration_utils import DEFAULT_MAX_NEW_TOKEN, GenerationConfig from .logits_process import ( ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, @@ -737,8 +738,17 @@ def generate( # ['是的', '嗯嗯'] """ if generation_config is None: - generation_config = GenerationConfig.from_model_config(self.config) + if self.generation_config._from_model_config: + new_generation_config = GenerationConfig.from_model_config(self.config) + if new_generation_config != self.generation_config: + logger.warning( + "model.generation_config is in conflict with model.config, " "model.config is used." + ) + self.generation_config = new_generation_config + generation_config = self.generation_config + # without update model.generation_config + generation_config = copy.deepcopy(generation_config) model_kwargs = generation_config.update(**kwargs) assert generation_config.decode_strategy in [ @@ -892,8 +902,16 @@ def generate( print("Setting `pad_token_id` to `eos_token_id`:{} for " "open-end generation.".format(eos_token_id)) pad_token_id = eos_token_id - max_length = generation_config.max_length - min_length = generation_config.min_length + if generation_config.max_length != 0 and generation_config.max_new_token == DEFAULT_MAX_NEW_TOKEN: + logger.warning("`max_length` will be deprecated in future releases, use `max_new_token` instead.") + generation_config.max_new_token = generation_config.max_length + + if generation_config.min_length != 0 and generation_config.min_new_token == 0: + logger.warning("`min_length` will be deprecated in future releases, use `min_new_token` instead.") + generation_config.min_new_token = generation_config.min_length + + max_length = generation_config.max_new_token + min_length = generation_config.min_new_token if is_tracing and not paddle.is_tensor(max_length): if hasattr(paddle.framework, "_no_check_dy2st_diff"): # TODO(daisiming): _no_check_dy2st_diff is used to turn off the checking of behavior diff --git a/paddlenlp/transformers/dallebart/modeling.py b/paddlenlp/transformers/dallebart/modeling.py index 93e9f54c53de..1e5d50009363 100644 --- a/paddlenlp/transformers/dallebart/modeling.py +++ b/paddlenlp/transformers/dallebart/modeling.py @@ -22,10 +22,10 @@ import paddle.nn.functional as F from paddle.common_ops_import import convert_dtype +from ...generation import BeamSearchScorer from ...transformers import PretrainedModel, register_base_model from ...utils.env import CONFIG_NAME from ...utils.log import logger -from ..generation_utils import BeamSearchScorer from .configuration import ( DALLEBART_PRETRAINED_INIT_CONFIGURATION, DALLEBART_PRETRAINED_RESOURCE_FILES_MAP, diff --git a/paddlenlp/transformers/generation_utils.py b/paddlenlp/transformers/generation_utils.py deleted file mode 100644 index aeb91637e3fd..000000000000 --- a/paddlenlp/transformers/generation_utils.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddlenlp.transformers.generation.configuration_utils import * # noqa: F401, F403 -from paddlenlp.transformers.generation.logits_process import * # noqa: F401, F403 -from paddlenlp.transformers.generation.stopping_criteria import * # noqa: F401, F403 -from paddlenlp.transformers.generation.streamers import * # noqa: F401, F403 -from paddlenlp.transformers.generation.utils import * # noqa: F401, F403 diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index 0adaddfefc84..4972049b9b70 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -58,10 +58,10 @@ ) from paddlenlp.utils.log import logger +from ..generation import GenerationConfig, GenerationMixin from ..utils import device_guard from .configuration_utils import PretrainedConfig from .conversion_utils import ConversionMixin -from .generation_utils import GenerationMixin from .utils import ( # convert_ndarray_dtype, ContextManagers, InitTrackerMeta, @@ -863,6 +863,7 @@ def __init__(self, *args, **kwargs): if config is not None: self.config: PretrainedConfig = config self.model_config_file = CONFIG_NAME + self.generation_config = GenerationConfig.from_model_config(self.config) if self.can_generate() else None return # extract config from kwargs @@ -876,6 +877,7 @@ def __init__(self, *args, **kwargs): raise TypeError("config parameter should be the instance of PretrainedConfig") self.config: PretrainedConfig = kwargs["config"] + self.generation_config = GenerationConfig.from_model_config(self.config) if self.can_generate() else None self.model_config_file = CONFIG_NAME self.warnings_issued = {} @@ -1993,6 +1995,22 @@ def from_pretrained( keep_in_fp32_modules=keep_in_fp32_modules, ) + # load generation_config.json + if model.can_generate() and pretrained_model_name_or_path is not None: + try: + model.generation_config = GenerationConfig.from_pretrained( + pretrained_model_name_or_path, + cache_dir=cache_dir, + force_download=force_download, + subfolder=subfolder, + **kwargs, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) + pass + if paddle.in_dynamic_mode(): return model @@ -2086,9 +2104,7 @@ def save_pretrained( if is_main_process: config_to_save.save_pretrained(save_directory) if self.can_generate(): - # to do support generation_config - pass - # model_to_save.generation_config.save_pretrained(save_directory) + model_to_save.generation_config.save_pretrained(save_directory) # Handle the case where some state_dict keys shouldn't be saved if self._keys_to_ignore_on_save is not None: diff --git a/paddlenlp/transformers/generation/__init__.py b/tests/generation/__init__.py similarity index 100% rename from paddlenlp/transformers/generation/__init__.py rename to tests/generation/__init__.py diff --git a/tests/transformers/generation/test_logits_process.py b/tests/generation/test_logits_process.py similarity index 99% rename from tests/transformers/generation/test_logits_process.py rename to tests/generation/test_logits_process.py index 0d070c7109cf..8c411181ffbb 100644 --- a/tests/transformers/generation/test_logits_process.py +++ b/tests/generation/test_logits_process.py @@ -35,7 +35,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None): return paddle.to_tensor(data=values).reshape(shape) -from paddlenlp.transformers.generation.logits_process import ( +from paddlenlp.generation.logits_process import ( ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, HammingDiversityLogitsProcessor, diff --git a/tests/transformers/generation/test_stopping_criteria.py b/tests/generation/test_stopping_criteria.py similarity index 97% rename from tests/transformers/generation/test_stopping_criteria.py rename to tests/generation/test_stopping_criteria.py index c3334081cfe8..f557dbaba279 100644 --- a/tests/transformers/generation/test_stopping_criteria.py +++ b/tests/generation/test_stopping_criteria.py @@ -18,7 +18,7 @@ import paddle -from paddlenlp.transformers.generation.stopping_criteria import ( +from paddlenlp.generation.stopping_criteria import ( MaxLengthCriteria, MaxTimeCriteria, StoppingCriteriaList, diff --git a/tests/transformers/generation/test_streamers.py b/tests/generation/test_streamers.py similarity index 97% rename from tests/transformers/generation/test_streamers.py rename to tests/generation/test_streamers.py index c71b21273f62..9182cda9b88a 100644 --- a/tests/transformers/generation/test_streamers.py +++ b/tests/generation/test_streamers.py @@ -18,12 +18,11 @@ import paddle +from paddlenlp.generation import TextIteratorStreamer, TextStreamer from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer -from paddlenlp.transformers.generation_utils import TextIteratorStreamer, TextStreamer from paddlenlp.transformers.utils import CaptureStd from tests.testing_utils import slow - -from ..test_modeling_common import ids_tensor +from tests.transformers.test_modeling_common import ids_tensor class StreamerTester(unittest.TestCase): diff --git a/tests/transformers/generation/__init__.py b/tests/transformers/generation/__init__.py deleted file mode 100644 index 595add0aed9e..000000000000 --- a/tests/transformers/generation/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tests/transformers/test_generation_utils.py b/tests/transformers/test_generation_utils.py index afbb817a44a7..51df60f76b29 100644 --- a/tests/transformers/test_generation_utils.py +++ b/tests/transformers/test_generation_utils.py @@ -19,16 +19,7 @@ import numpy as np import paddle -from paddlenlp.transformers import ( # import gpt model - AutoModelForCausalLM, - AutoTokenizer, - BartForConditionalGeneration, - BartTokenizer, - GPTLMHeadModel, - PretrainedConfig, - PretrainedTokenizer, -) -from paddlenlp.transformers.generation_utils import ( +from paddlenlp.generation import ( BeamSearchScorer, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, @@ -41,6 +32,15 @@ TopPProcess, get_unfinished_flag, ) +from paddlenlp.transformers import ( # import gpt model + AutoModelForCausalLM, + AutoTokenizer, + BartForConditionalGeneration, + BartTokenizer, + GPTLMHeadModel, + PretrainedConfig, + PretrainedTokenizer, +) from tests.testing_utils import slow @@ -235,7 +235,7 @@ def _greedy_generate( input_ids, attention_mask=attention_mask, generation_config=GenerationConfig( - max_length=max_length, + max_new_token=max_length, decode_strategy="greedy_search", **logits_process_kwargs, ), @@ -277,7 +277,7 @@ def _sample_generate( input_ids, attention_mask=attention_mask, generation_config=GenerationConfig( - max_length=max_length, + max_new_token=max_length, decode_strategy="sampling", num_return_sequences=num_return_sequences, top_k=1, @@ -332,7 +332,7 @@ def _beam_search_generate( attention_mask=attention_mask, generation_config=GenerationConfig( decode_strategy="beam_search", - max_length=max_length, + max_new_token=max_length, **beam_kwargs, **logits_process_kwargs, ), @@ -390,7 +390,7 @@ def _group_beam_search_generate( attention_mask=attention_mask, generation_config=GenerationConfig( decode_strategy="beam_search", - max_length=max_length, + max_new_token=max_length, **beam_kwargs, **logits_process_kwargs, ),