From b04d991547e2da4530dacf785e903c0c38e282b4 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Mon, 18 Sep 2023 05:26:55 +0000 Subject: [PATCH 1/7] add chatglm finetune --- tests/fixtures/llm/finetune.yaml | 51 ++++++++++++++++++ tests/fixtures/llm/pretrain.yaml | 12 +++-- tests/llm/test_finetune.py | 90 ++++++++++++++++++++++++++++++++ tests/llm/test_pretrain.py | 15 ++++-- tests/testing_utils.py | 22 ++++++-- 5 files changed, 177 insertions(+), 13 deletions(-) create mode 100644 tests/fixtures/llm/finetune.yaml create mode 100644 tests/llm/test_finetune.py diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml new file mode 100644 index 000000000000..30d19ee31b9b --- /dev/null +++ b/tests/fixtures/llm/finetune.yaml @@ -0,0 +1,51 @@ +finetune: + base: + dataset_name_or_path: "./data" + per_device_train_batch_size: 4 + gradient_accumulation_steps: 4 + per_device_eval_batch_size: 8 + eval_accumulation_steps: 16 + num_train_epochs: 3 + learning_rate: 3e-05 + warmup_steps: 30 + logging_steps: 1 + evaluation_strategy: "epoch" + save_strategy: "epoch" + src_length: 1024 + max_length: 2048 + fp16: true + fp16_opt_level: "O2" + do_train: true + do_eval: true + disable_tqdm: true + load_best_model_at_end: true + eval_with_do_generation: false + metric_for_best_model: "accuracy" + recompute: true + save_total_limit: 1 + tensor_parallel_degree: 1 + pipeline_parallel_degree: 1 + default: + llama: + model_name_or_path: __internal_testing__/tiny-random-llama + +inference-predict: + default: + mode: dynamic + max_length: 20 + batch_size: 2 + decode_strategy: greedy_search + dtype: float16 + +inference-to-static: + default: + dtype: float16 + +inference-infer: + default: + mode: static + dtype: float16 + batch_size: 2 + decode_strategy: greedy_search + max_length: 20 + enable_compare: false \ No newline at end of file diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml index 3e8c4fa55d17..cfebf521af92 100644 --- a/tests/fixtures/llm/pretrain.yaml +++ b/tests/fixtures/llm/pretrain.yaml @@ -1,7 +1,5 @@ pretrain: - default: - model_type: llama - model_name_or_path: __internal_testing__/tiny-random-llama + base: weight_decay: 0.01 max_steps: 2 save_steps: 2 @@ -20,7 +18,13 @@ pretrain: use_flash_attention: 0 use_fused_rms_norm: 0 continue_training: 1 - + default: + llama: + model_type: llama + model_name_or_path: __internal_testing__/tiny-random-llama + chatglm: + model_type: chatglm + model_name_or_path: __internal_testing__/tiny-random-chatglm inference-predict: default: diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py new file mode 100644 index 000000000000..79f41e80e545 --- /dev/null +++ b/tests/llm/test_finetune.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import os +import shutil +import sys +import tempfile +import unittest + +from parameterized import parameterized_class + +from paddlenlp.utils.downloader import get_path_from_url +from tests.testing_utils import argv_context_guard, load_test_config + +from .testing_utils import LLMTest + + +@parameterized_class( + ["model_dir"], + [ + ["llama"], + ], +) +class FinetuneTest(LLMTest, unittest.TestCase): + config_path: str = "./tests/fixtures/llm/finetune.yaml" + model_dir: str = None + + def setUp(self) -> None: + LLMTest.setUp(self) + + self.data_dir = tempfile.mkdtemp() + sys.path.insert(0, self.model_dir) + + # Run pretrain + URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" + get_path_from_url(URL, root_dir=self.data_dir) + self.data_dir = os.path.join(self.data_dir, "data") + self.use_small_datasets() + + def use_small_datasets(self): + # use 20 examples + def use_few_examples(file): + with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: + lines = [line.strip() for line in f.readlines()] + with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: + f.write("\n".join(lines[:20])) + + shutil.copyfile( + os.path.join(self.data_dir, "dev.json"), + os.path.join(self.data_dir, "validation.json"), + ) + use_few_examples("train.json") + use_few_examples("dev.json") + use_few_examples("validation.json") + + def tearDown(self) -> None: + LLMTest.tearDown(self) + shutil.rmtree(self.data_dir) + + def test_pretrain(self): + finetune_config = load_test_config(self.config_path, "finetune", self.model_dir) + + finetune_config["dataset_name_or_path"] = self.data_dir + finetune_config["output_dir"] = self.output_dir + + with argv_context_guard(finetune_config): + from finetune_generation import main + + main() + + self._test_inference_predictor() + self._test_predictor() + + def _test_inference_predictor(self): + self.run_predictor({"inference_model": "true"}) + + def _test_predictor(self): + self.run_predictor({"inference_model": "false"}) diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py index cf14cc82d7b2..a379b69297f7 100644 --- a/tests/llm/test_pretrain.py +++ b/tests/llm/test_pretrain.py @@ -13,6 +13,7 @@ # limitations under the License. from __future__ import annotations +import os import shutil import sys import tempfile @@ -30,6 +31,7 @@ ["model_dir"], [ ["llama"], + ["chatglm"], ], ) class PretrainTest(LLMTest, unittest.TestCase): @@ -37,16 +39,18 @@ class PretrainTest(LLMTest, unittest.TestCase): model_dir: str = None def setUp(self) -> None: - self.model_dir = "./llm/llama" + LLMTest.setUp(self) + self.data_dir = tempfile.mkdtemp() + self.model_codes_dir = os.path.join(self.root_path, self.model_dir) sys.path.insert(0, self.model_dir) - LLMTest.setUp(self) def tearDown(self) -> None: - sys.path.remove(self.model_dir) - shutil.rmtree(self.data_dir) LLMTest.tearDown(self) + sys.path.remove(self.model_codes_dir) + shutil.rmtree(self.data_dir) + def test_pretrain(self): # Run pretrain URL = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy" @@ -54,7 +58,8 @@ def test_pretrain(self): get_path_from_url(URL, root_dir=self.data_dir) get_path_from_url(URL2, root_dir=self.data_dir) - pretrain_config = load_test_config(self.config_path, "pretrain") + pretrain_config = load_test_config(self.config_path, "pretrain", self.model_dir) + pretrain_config["input_dir"] = self.data_dir pretrain_config["output_dir"] = self.output_dir diff --git a/tests/testing_utils.py b/tests/testing_utils.py index 96798fe52f31..6ea39bec0197 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -300,7 +300,7 @@ def is_slow_test() -> bool: return os.getenv("RUN_SLOW_TEST") is not None -def load_test_config(config_file: str, key: str) -> dict | None: +def load_test_config(config_file: str, key: str, sub_key: str = None) -> dict | None: """parse config file to argv Args: @@ -314,12 +314,26 @@ def load_test_config(config_file: str, key: str) -> dict | None: assert key in config, f"<{key}> should be the top key in configuration file" config = config[key] - sub_key = "slow" if is_slow_test() else "default" + mode_key = "slow" if is_slow_test() else "default" - if sub_key not in config: + if mode_key not in config: return None - config = config[sub_key] + # 2. load base common config + base_config = config.get("base", {}) + + config = config.get(mode_key, {}) + config.update(base_config) + + # 3. load sub key config + sub_config = config.get(sub_key, {}) + config.update(sub_config) + + # remove dict value + for key in list(config.keys()): + if isinstance(config[key], dict): + config.pop(key) + return config From b2fa9be09361b0f57c93a84a4830bbc61744bbb0 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Mon, 18 Sep 2023 13:20:47 +0000 Subject: [PATCH 2/7] add llm ci scripts --- llm/chatglm/sft_argument.json | 5 +- .../transformers/chatglm/modeling.py | 2 +- tests/fixtures/llm/finetune.yaml | 6 + tests/fixtures/llm/lora.yaml | 59 ++++++++++ tests/fixtures/llm/prefix_tuning.yaml | 55 +++++++++ tests/llm/test_finetune.py | 7 ++ tests/llm/test_lora.py | 107 ++++++++++++++++++ tests/llm/test_prefix_tuning.py | 99 ++++++++++++++++ tests/llm/test_pretrain.py | 3 +- tests/llm/testing_utils.py | 25 +++- 10 files changed, 357 insertions(+), 11 deletions(-) create mode 100644 tests/fixtures/llm/lora.yaml create mode 100644 tests/fixtures/llm/prefix_tuning.yaml create mode 100644 tests/llm/test_lora.py create mode 100644 tests/llm/test_prefix_tuning.py diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json index 5884b2f99882..0602a60b2441 100644 --- a/llm/chatglm/sft_argument.json +++ b/llm/chatglm/sft_argument.json @@ -1,5 +1,5 @@ { - "model_name_or_path": "THUDM/chatglm-6b", + "model_name_or_path": "__internal_testing__/tiny-fused-chatglm", "dataset_name_or_path": "./data", "output_dir": "./checkpoints/chatglm_sft_ckpts", "per_device_train_batch_size": 4, @@ -10,6 +10,7 @@ "learning_rate": 3e-05, "warmup_steps": 30, "logging_steps": 1, + "max_steps": 10, "evaluation_strategy": "epoch", "save_strategy": "epoch", "src_length": 1024, @@ -24,6 +25,6 @@ "metric_for_best_model": "accuracy", "recompute": true, "save_total_limit": 1, - "tensor_parallel_degree": 4, + "tensor_parallel_degree": 1, "pipeline_parallel_degree": 1 } \ No newline at end of file diff --git a/paddlenlp/experimental/transformers/chatglm/modeling.py b/paddlenlp/experimental/transformers/chatglm/modeling.py index 9d21adae386b..0ca1caec8d2d 100644 --- a/paddlenlp/experimental/transformers/chatglm/modeling.py +++ b/paddlenlp/experimental/transformers/chatglm/modeling.py @@ -332,7 +332,7 @@ def set_state_dict(self, state_dict, use_structured_name=True): continue elif k.startswith("lm_head.weight"): continue - elif k.endswith("rotary_emb.inv_freq"): + elif k.endswith("rotary_emb.inv_freq") or k.endswith("rotary_embeddings.inv_freq"): continue idx = int(k.split(".")[2]) if k.endswith("input_layernorm.weight"): diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml index 30d19ee31b9b..aa651f1c075f 100644 --- a/tests/fixtures/llm/finetune.yaml +++ b/tests/fixtures/llm/finetune.yaml @@ -28,6 +28,12 @@ finetune: default: llama: model_name_or_path: __internal_testing__/tiny-random-llama + chatglm: + model_name_or_path: __internal_testing__/tiny-fused-chatglm + chatglm2: + model_name_or_path: __internal_testing__/tiny-random-chatglm2 + bloom: + model_name_or_path: __internal_testing__/tiny-random-bloom inference-predict: default: diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml new file mode 100644 index 000000000000..b4038eb78fb6 --- /dev/null +++ b/tests/fixtures/llm/lora.yaml @@ -0,0 +1,59 @@ +lora: + base: + dataset_name_or_path: "./data" + per_device_train_batch_size: 4 + gradient_accumulation_steps: 4 + per_device_eval_batch_size: 8 + eval_accumulation_steps: 16 + num_train_epochs: 3 + learning_rate: 3e-04 + warmup_steps: 30 + logging_steps: 1 + evaluation_strategy: "epoch" + save_strategy: "epoch" + src_length: 1024 + max_length: 2048 + fp16: true + fp16_opt_level: "O2" + do_train: true + do_eval: true + disable_tqdm: true + load_best_model_at_end: true + eval_with_do_generation: false + metric_for_best_model: "accuracy" + recompute: true + save_total_limit: 1 + tensor_parallel_degree: 1 + pipeline_parallel_degree: 1 + lora: true + + default: + llama: + model_name_or_path: __internal_testing__/tiny-random-llama + chatglm: + model_name_or_path: __internal_testing__/tiny-fused-chatglm + chatglm2: + model_name_or_path: __internal_testing__/tiny-random-chatglm2 + bloom: + model_name_or_path: __internal_testing__/tiny-fused-bloom + +inference-predict: + default: + mode: dynamic + max_length: 20 + batch_size: 2 + decode_strategy: greedy_search + dtype: float16 + +inference-to-static: + default: + dtype: float16 + +inference-infer: + default: + mode: static + dtype: float16 + batch_size: 2 + decode_strategy: greedy_search + max_length: 20 + enable_compare: false \ No newline at end of file diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml new file mode 100644 index 000000000000..e46a0d11d34a --- /dev/null +++ b/tests/fixtures/llm/prefix_tuning.yaml @@ -0,0 +1,55 @@ +prefix_tuning: + base: + per_device_train_batch_size: 4 + gradient_accumulation_steps: 4 + per_device_eval_batch_size: 8 + eval_accumulation_steps: 16 + num_train_epochs: 3 + learning_rate: 3e-02 + warmup_steps: 30 + logging_steps: 1 + evaluation_strategy: "epoch" + save_strategy: "epoch" + src_length: 1024 + max_length: 2048 + fp16: true + fp16_opt_level: "O2" + do_train: true + do_eval: true + disable_tqdm: true + load_best_model_at_end: true + eval_with_do_generation: false + metric_for_best_model: "accuracy" + recompute: true + save_total_limit: 1 + tensor_parallel_degree: 1 + pipeline_parallel_degree: 1 + prefix_tuning: true + + default: + llama: + model_name_or_path: __internal_testing__/tiny-random-llama + +inference-predict: + default: + mode: dynamic + max_length: 20 + batch_size: 2 + decode_strategy: greedy_search + dtype: float16 + export_precache: true + +inference-to-static: + default: + dtype: float16 + export_precache: true + +inference-infer: + default: + mode: static + dtype: float16 + batch_size: 2 + decode_strategy: greedy_search + max_length: 20 + enable_compare: false + export_precache: true \ No newline at end of file diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py index 79f41e80e545..e6ee001c80dc 100644 --- a/tests/llm/test_finetune.py +++ b/tests/llm/test_finetune.py @@ -31,6 +31,9 @@ ["model_dir"], [ ["llama"], + # ["chatglm"], + # ["chatglm2"], + ["bloom"], ], ) class FinetuneTest(LLMTest, unittest.TestCase): @@ -84,6 +87,10 @@ def test_pretrain(self): self._test_predictor() def _test_inference_predictor(self): + # TODO(wj-Mcat): OPTModel do not support inference model + if self.model_dir == "opt": + return + self.run_predictor({"inference_model": "true"}) def _test_predictor(self): diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py new file mode 100644 index 000000000000..b17684e9b710 --- /dev/null +++ b/tests/llm/test_lora.py @@ -0,0 +1,107 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import os +import shutil +import sys +import tempfile +import unittest + +import paddle +from parameterized import parameterized_class + +from paddlenlp.utils.downloader import get_path_from_url +from tests.testing_utils import argv_context_guard, load_test_config + +from .testing_utils import LLMTest + + +@parameterized_class( + ["model_dir"], + [ + ["llama"], + # TODO(wj-Mcat): to enable chatglm/chatglm2 unit test + # ["chatglm"], + # ["chatglm2"], + ["bloom"], + ], +) +class LoraTest(LLMTest, unittest.TestCase): + config_path: str = "./tests/fixtures/llm/lora.yaml" + model_dir: str = None + + def setUp(self) -> None: + LLMTest.setUp(self) + + self.data_dir = tempfile.mkdtemp() + self.model_codes_dir = os.path.join(self.root_path, self.model_dir) + sys.path.insert(0, self.model_codes_dir) + + # Run pretrain + URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" + get_path_from_url(URL, root_dir=self.data_dir) + self.data_dir = os.path.join(self.data_dir, "data") + self.use_small_datasets() + + def use_small_datasets(self): + # use 20 examples + def use_few_examples(file): + with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: + lines = [line.strip() for line in f.readlines()] + with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: + f.write("\n".join(lines[:20])) + + shutil.copyfile( + os.path.join(self.data_dir, "dev.json"), + os.path.join(self.data_dir, "validation.json"), + ) + use_few_examples("train.json") + use_few_examples("dev.json") + use_few_examples("validation.json") + + def tearDown(self) -> None: + LLMTest.tearDown(self) + shutil.rmtree(self.data_dir) + sys.path.remove(self.model_codes_dir) + + def test_lora(self): + self.disable_static() + paddle.set_default_dtype("float32") + + lora_config = load_test_config(self.config_path, "lora", self.model_dir) + + lora_config["dataset_name_or_path"] = self.data_dir + lora_config["output_dir"] = self.output_dir + + with argv_context_guard(lora_config): + from finetune_generation import main + + main() + + # merge weights + merge_lora_weights_config = { + "model_name_or_path": lora_config["model_name_or_path"], + "lora_path": lora_config["output_dir"], + "merge_model_path": lora_config["output_dir"], + } + with argv_context_guard(merge_lora_weights_config): + from merge_lora_params import merge + + merge() + + if self.model_dir not in ["chatglm2"]: + self.run_predictor({"inference_model": "true"}) + + self.run_predictor({"inference_model": "false"}) diff --git a/tests/llm/test_prefix_tuning.py b/tests/llm/test_prefix_tuning.py new file mode 100644 index 000000000000..1fad2647a84d --- /dev/null +++ b/tests/llm/test_prefix_tuning.py @@ -0,0 +1,99 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import os +import shutil +import sys +import tempfile +import unittest + +from parameterized import parameterized_class + +from paddlenlp.utils.downloader import get_path_from_url +from tests.testing_utils import argv_context_guard, load_test_config + +from .testing_utils import LLMTest + + +@parameterized_class( + ["model_dir"], + [ + ["llama"], + ], +) +class LoraTest(LLMTest, unittest.TestCase): + config_path: str = "./tests/fixtures/llm/prefix_tuning.yaml" + model_dir: str = None + + def setUp(self) -> None: + LLMTest.setUp(self) + + self.data_dir = tempfile.mkdtemp() + self.model_codes_dir = os.path.join(self.root_path, self.model_dir) + sys.path.insert(0, self.model_codes_dir) + + # Run pretrain + URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" + get_path_from_url(URL, root_dir=self.data_dir) + self.data_dir = os.path.join(self.data_dir, "data") + self.use_small_datasets() + + def use_small_datasets(self): + # use 20 examples + def use_few_examples(file): + with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: + lines = [line.strip() for line in f.readlines()] + with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: + f.write("\n".join(lines[:20])) + + shutil.copyfile( + os.path.join(self.data_dir, "dev.json"), + os.path.join(self.data_dir, "validation.json"), + ) + use_few_examples("train.json") + use_few_examples("dev.json") + use_few_examples("validation.json") + + def tearDown(self) -> None: + LLMTest.tearDown(self) + shutil.rmtree(self.data_dir) + + sys.path.remove(self.model_codes_dir) + + def test_pretrain(self): + prefix_tuning_config = load_test_config(self.config_path, "prefix_tuning", self.model_dir) + + prefix_tuning_config["dataset_name_or_path"] = self.data_dir + prefix_tuning_config["output_dir"] = self.output_dir + + with argv_context_guard(prefix_tuning_config): + from finetune_generation import main + + main() + + self.run_predictor( + { + "inference_model": True, + "prefix_path": self.output_dir, + "model_name_or_path": prefix_tuning_config["model_name_or_path"], + } + ) + self.run_predictor( + { + "inference_model": False, + "prefix_path": self.output_dir, + "model_name_or_path": prefix_tuning_config["model_name_or_path"], + } + ) diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py index a379b69297f7..7c74c15e5907 100644 --- a/tests/llm/test_pretrain.py +++ b/tests/llm/test_pretrain.py @@ -31,7 +31,6 @@ ["model_dir"], [ ["llama"], - ["chatglm"], ], ) class PretrainTest(LLMTest, unittest.TestCase): @@ -43,7 +42,7 @@ def setUp(self) -> None: self.data_dir = tempfile.mkdtemp() self.model_codes_dir = os.path.join(self.root_path, self.model_dir) - sys.path.insert(0, self.model_dir) + sys.path.insert(0, self.model_codes_dir) def tearDown(self) -> None: LLMTest.tearDown(self) diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py index 5e61587ec4e1..d0d867c818a9 100644 --- a/tests/llm/testing_utils.py +++ b/tests/llm/testing_utils.py @@ -31,29 +31,39 @@ def setUp(self) -> None: self.output_dir = tempfile.mkdtemp() self.inference_output_dir = tempfile.mkdtemp() sys.path.insert(0, self.root_path) + self.disable_static() + paddle.set_default_dtype("float32") def tearDown(self) -> None: sys.path.remove(self.root_path) shutil.rmtree(self.output_dir) shutil.rmtree(self.inference_output_dir) + self.disable_static() + + def disable_static(self): + paddle.utils.unique_name.switch() + paddle.disable_static() def run_predictor(self, config_params=None): config_params = config_params or {} # to avoid the same parameter - paddle.utils.unique_name.switch() + self.disable_static() predict_config = load_test_config(self.config_path, "inference-predict") predict_config["output_file"] = os.path.join(self.output_dir, "predict.json") - predict_config.update(config_params) predict_config["model_name_or_path"] = self.output_dir + predict_config.update(config_params) with argv_context_guard(predict_config): from predictor import predict predict() + # prefix_tuning dynamic graph do not support to_static + if not predict_config["inference_model"]: + return + # to static - paddle.disable_static() - paddle.utils.unique_name.switch() + self.disable_static() config = load_test_config(self.config_path, "inference-to-static") config["output_path"] = self.inference_output_dir config["model_name_or_path"] = self.output_dir @@ -64,17 +74,20 @@ def run_predictor(self, config_params=None): main() # inference - paddle.disable_static() + self.disable_static() config = load_test_config(self.config_path, "inference-infer") config["model_name_or_path"] = self.inference_output_dir config["output_file"] = os.path.join(self.inference_output_dir, "infer.json") - enable_compare = config.pop("enable_compare", False) + + config_params.pop("model_name_or_path", None) config.update(config_params) + enable_compare = config.pop("enable_compare", False) with argv_context_guard(config): from predictor import predict predict() + self.disable_static() # compare result if enable_compare: predict_result = self._read_result(predict_config["output_file"]) From 93ac1ec0fd7e24a1603750fc175ff32b03ee174c Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Tue, 19 Sep 2023 07:54:21 +0000 Subject: [PATCH 3/7] add llama llm testing --- tests/fixtures/llm/finetune.yaml | 5 ++--- tests/fixtures/llm/lora.yaml | 3 +-- tests/fixtures/llm/prefix_tuning.yaml | 7 ++++++- tests/fixtures/llm/pretrain.yaml | 5 ++--- tests/llm/test_finetune.py | 20 ++++++------------- tests/llm/test_lora.py | 8 ++++---- tests/llm/test_pretrain.py | 10 ++-------- tests/llm/testing_utils.py | 28 +++++++++++++++++++-------- 8 files changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml index aa651f1c075f..ed47f2610799 100644 --- a/tests/fixtures/llm/finetune.yaml +++ b/tests/fixtures/llm/finetune.yaml @@ -33,7 +33,7 @@ finetune: chatglm2: model_name_or_path: __internal_testing__/tiny-random-chatglm2 bloom: - model_name_or_path: __internal_testing__/tiny-random-bloom + model_name_or_path: __internal_testing__/tiny-fused-bloom inference-predict: default: @@ -53,5 +53,4 @@ inference-infer: dtype: float16 batch_size: 2 decode_strategy: greedy_search - max_length: 20 - enable_compare: false \ No newline at end of file + max_length: 20 \ No newline at end of file diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml index b4038eb78fb6..e38459c04551 100644 --- a/tests/fixtures/llm/lora.yaml +++ b/tests/fixtures/llm/lora.yaml @@ -55,5 +55,4 @@ inference-infer: dtype: float16 batch_size: 2 decode_strategy: greedy_search - max_length: 20 - enable_compare: false \ No newline at end of file + max_length: 20 \ No newline at end of file diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml index e46a0d11d34a..9a9580f96bd5 100644 --- a/tests/fixtures/llm/prefix_tuning.yaml +++ b/tests/fixtures/llm/prefix_tuning.yaml @@ -29,6 +29,12 @@ prefix_tuning: default: llama: model_name_or_path: __internal_testing__/tiny-random-llama + chatglm: + model_name_or_path: __internal_testing__/tiny-fused-chatglm + chatglm2: + model_name_or_path: __internal_testing__/tiny-random-chatglm2 + bloom: + model_name_or_path: __internal_testing__/tiny-fused-bloom inference-predict: default: @@ -51,5 +57,4 @@ inference-infer: batch_size: 2 decode_strategy: greedy_search max_length: 20 - enable_compare: false export_precache: true \ No newline at end of file diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml index cfebf521af92..4ca4fa3e6b46 100644 --- a/tests/fixtures/llm/pretrain.yaml +++ b/tests/fixtures/llm/pretrain.yaml @@ -24,7 +24,7 @@ pretrain: model_name_or_path: __internal_testing__/tiny-random-llama chatglm: model_type: chatglm - model_name_or_path: __internal_testing__/tiny-random-chatglm + model_name_or_path: __internal_testing__/tiny-fused-chatglm inference-predict: default: @@ -44,5 +44,4 @@ inference-infer: dtype: float16 batch_size: 2 decode_strategy: greedy_search - max_length: 20 - enable_compare: false \ No newline at end of file + max_length: 20 \ No newline at end of file diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py index e6ee001c80dc..543149d41c5f 100644 --- a/tests/llm/test_finetune.py +++ b/tests/llm/test_finetune.py @@ -28,12 +28,12 @@ @parameterized_class( - ["model_dir"], + ["model_dir", "enable_compare"], [ - ["llama"], + ["llama", False], # ["chatglm"], # ["chatglm2"], - ["bloom"], + # ["bloom"], ], ) class FinetuneTest(LLMTest, unittest.TestCase): @@ -83,15 +83,7 @@ def test_pretrain(self): main() - self._test_inference_predictor() - self._test_predictor() + if self.model_dir != "opt": + self.run_predictor({"inference_model": True}) - def _test_inference_predictor(self): - # TODO(wj-Mcat): OPTModel do not support inference model - if self.model_dir == "opt": - return - - self.run_predictor({"inference_model": "true"}) - - def _test_predictor(self): - self.run_predictor({"inference_model": "false"}) + self.run_predictor({"inference_model": False}) diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py index b17684e9b710..8f12fffb5b5f 100644 --- a/tests/llm/test_lora.py +++ b/tests/llm/test_lora.py @@ -29,9 +29,9 @@ @parameterized_class( - ["model_dir"], + ["model_dir", "enable_compare"], [ - ["llama"], + ["llama", False], # TODO(wj-Mcat): to enable chatglm/chatglm2 unit test # ["chatglm"], # ["chatglm2"], @@ -102,6 +102,6 @@ def test_lora(self): merge() if self.model_dir not in ["chatglm2"]: - self.run_predictor({"inference_model": "true"}) + self.run_predictor({"inference_model": True}) - self.run_predictor({"inference_model": "false"}) + self.run_predictor({"inference_model": False}) diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py index 7c74c15e5907..e65803dbc386 100644 --- a/tests/llm/test_pretrain.py +++ b/tests/llm/test_pretrain.py @@ -67,11 +67,5 @@ def test_pretrain(self): main() - self._test_inference_predictor() - self._test_predictor() - - def _test_inference_predictor(self): - self.run_predictor({"inference_model": "true"}) - - def _test_predictor(self): - self.run_predictor({"inference_model": "false"}) + self.run_predictor({"inference_model": True}) + self.run_predictor({"inference_model": False}) diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py index d0d867c818a9..efde13c4d623 100644 --- a/tests/llm/testing_utils.py +++ b/tests/llm/testing_utils.py @@ -13,6 +13,7 @@ # limitations under the License. from __future__ import annotations +import json import os import shutil import sys @@ -25,6 +26,7 @@ class LLMTest: config_path: str = None + enable_compare: bool = True def setUp(self) -> None: self.root_path = "./llm" @@ -44,6 +46,15 @@ def disable_static(self): paddle.utils.unique_name.switch() paddle.disable_static() + def _read_result(self, file): + result = [] + # read output field from json file + with open(file, "r", encoding="utf-8") as f: + for line in f: + data = json.loads(line) + result.append(data["output"]) + return result + def run_predictor(self, config_params=None): config_params = config_params or {} # to avoid the same parameter @@ -81,17 +92,18 @@ def run_predictor(self, config_params=None): config_params.pop("model_name_or_path", None) config.update(config_params) - enable_compare = config.pop("enable_compare", False) with argv_context_guard(config): from predictor import predict predict() self.disable_static() - # compare result - if enable_compare: - predict_result = self._read_result(predict_config["output_file"]) - infer_result = self._read_result(config["output_file"]) - assert len(predict_result) == len(infer_result) - for predict_item, infer_item in zip(predict_result, infer_result): - self.assertEqual(predict_item, infer_item) + + if not config_params["inference_model"]: + return + + predict_result = self._read_result(predict_config["output_file"]) + infer_result = self._read_result(config["output_file"]) + assert len(predict_result) == len(infer_result) + for predict_item, infer_item in zip(predict_result, infer_result): + self.assertEqual(predict_item, infer_item) From 68ff4643b8e70a9dc52a6def4848e7beaa03e315 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Tue, 19 Sep 2023 07:57:40 +0000 Subject: [PATCH 4/7] update llm testing --- scripts/regression/ci_case.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/regression/ci_case.sh b/scripts/regression/ci_case.sh index 5d33529ca31c..2cf56fef5edd 100644 --- a/scripts/regression/ci_case.sh +++ b/scripts/regression/ci_case.sh @@ -1083,7 +1083,7 @@ python setup_cuda.py install echo ' Testing all LLMs ' cd ${nlp_dir} -python -m pytest tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1 +python -m pytest -v -s tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1 print_info $? llm } fast_generation(){ From ff6ac92abdb5dc5787c49518c8fd686ded62fd84 Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Tue, 19 Sep 2023 12:10:31 +0000 Subject: [PATCH 5/7] revert chatglm sft-arguments --- llm/chatglm/sft_argument.json | 57 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json index 0602a60b2441..54960bf8a379 100644 --- a/llm/chatglm/sft_argument.json +++ b/llm/chatglm/sft_argument.json @@ -1,30 +1,29 @@ { - "model_name_or_path": "__internal_testing__/tiny-fused-chatglm", - "dataset_name_or_path": "./data", - "output_dir": "./checkpoints/chatglm_sft_ckpts", - "per_device_train_batch_size": 4, - "gradient_accumulation_steps": 4, - "per_device_eval_batch_size": 8, - "eval_accumulation_steps":16, - "num_train_epochs": 3, - "learning_rate": 3e-05, - "warmup_steps": 30, - "logging_steps": 1, - "max_steps": 10, - "evaluation_strategy": "epoch", - "save_strategy": "epoch", - "src_length": 1024, - "max_length": 2048, - "fp16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "load_best_model_at_end": true, - "eval_with_do_generation": false, - "metric_for_best_model": "accuracy", - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 1, - "pipeline_parallel_degree": 1 - } \ No newline at end of file + "model_name_or_path": "THUDM/chatglm-6b", + "dataset_name_or_path": "./data", + "output_dir": "./checkpoints/chatglm_sft_ckpts", + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 4, + "per_device_eval_batch_size": 8, + "eval_accumulation_steps":16, + "num_train_epochs": 3, + "learning_rate": 3e-05, + "warmup_steps": 30, + "logging_steps": 1, + "evaluation_strategy": "epoch", + "save_strategy": "epoch", + "src_length": 1024, + "max_length": 2048, + "fp16": true, + "fp16_opt_level": "O2", + "do_train": true, + "do_eval": true, + "disable_tqdm": true, + "load_best_model_at_end": true, + "eval_with_do_generation": false, + "metric_for_best_model": "accuracy", + "recompute": true, + "save_total_limit": 1, + "tensor_parallel_degree": 4, + "pipeline_parallel_degree": 1 +} \ No newline at end of file From 7311f097d05a9c2148d36a5f9c12058a27319feb Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Tue, 19 Sep 2023 12:11:33 +0000 Subject: [PATCH 6/7] revert chatglm sft-arguments --- llm/chatglm/sft_argument.json | 54 +++++++++++++++++------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json index 54960bf8a379..02656828e141 100644 --- a/llm/chatglm/sft_argument.json +++ b/llm/chatglm/sft_argument.json @@ -1,29 +1,29 @@ { - "model_name_or_path": "THUDM/chatglm-6b", - "dataset_name_or_path": "./data", - "output_dir": "./checkpoints/chatglm_sft_ckpts", - "per_device_train_batch_size": 4, - "gradient_accumulation_steps": 4, - "per_device_eval_batch_size": 8, - "eval_accumulation_steps":16, - "num_train_epochs": 3, - "learning_rate": 3e-05, - "warmup_steps": 30, - "logging_steps": 1, - "evaluation_strategy": "epoch", - "save_strategy": "epoch", - "src_length": 1024, - "max_length": 2048, - "fp16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "load_best_model_at_end": true, - "eval_with_do_generation": false, - "metric_for_best_model": "accuracy", - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 4, - "pipeline_parallel_degree": 1 + "model_name_or_path": "THUDM/chatglm-6b", + "dataset_name_or_path": "./data", + "output_dir": "./checkpoints/chatglm_sft_ckpts", + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 4, + "per_device_eval_batch_size": 8, + "eval_accumulation_steps":16, + "num_train_epochs": 3, + "learning_rate": 3e-05, + "warmup_steps": 30, + "logging_steps": 1, + "evaluation_strategy": "epoch", + "save_strategy": "epoch", + "src_length": 1024, + "max_length": 2048, + "fp16": true, + "fp16_opt_level": "O2", + "do_train": true, + "do_eval": true, + "disable_tqdm": true, + "load_best_model_at_end": true, + "eval_with_do_generation": false, + "metric_for_best_model": "accuracy", + "recompute": true, + "save_total_limit": 1, + "tensor_parallel_degree": 4, + "pipeline_parallel_degree": 1 } \ No newline at end of file From 5e2c23a0ae3ac7d3b65f0618f5eb380374d5700f Mon Sep 17 00:00:00 2001 From: wj-Mcat <1435130236@qq.com> Date: Wed, 20 Sep 2023 08:58:17 +0000 Subject: [PATCH 7/7] update llm testing --- tests/llm/test_finetune.py | 30 +----------------------------- tests/llm/test_lora.py | 27 --------------------------- tests/llm/test_prefix_tuning.py | 30 +----------------------------- tests/llm/test_pretrain.py | 10 +++++----- tests/llm/testing_utils.py | 1 + 5 files changed, 8 insertions(+), 90 deletions(-) diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py index 543149d41c5f..3449f953cd6b 100644 --- a/tests/llm/test_finetune.py +++ b/tests/llm/test_finetune.py @@ -13,15 +13,11 @@ # limitations under the License. from __future__ import annotations -import os -import shutil import sys -import tempfile import unittest from parameterized import parameterized_class -from paddlenlp.utils.downloader import get_path_from_url from tests.testing_utils import argv_context_guard, load_test_config from .testing_utils import LLMTest @@ -43,36 +39,12 @@ class FinetuneTest(LLMTest, unittest.TestCase): def setUp(self) -> None: LLMTest.setUp(self) - self.data_dir = tempfile.mkdtemp() sys.path.insert(0, self.model_dir) - # Run pretrain - URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" - get_path_from_url(URL, root_dir=self.data_dir) - self.data_dir = os.path.join(self.data_dir, "data") - self.use_small_datasets() - - def use_small_datasets(self): - # use 20 examples - def use_few_examples(file): - with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: - lines = [line.strip() for line in f.readlines()] - with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: - f.write("\n".join(lines[:20])) - - shutil.copyfile( - os.path.join(self.data_dir, "dev.json"), - os.path.join(self.data_dir, "validation.json"), - ) - use_few_examples("train.json") - use_few_examples("dev.json") - use_few_examples("validation.json") - def tearDown(self) -> None: LLMTest.tearDown(self) - shutil.rmtree(self.data_dir) - def test_pretrain(self): + def test_finetune(self): finetune_config = load_test_config(self.config_path, "finetune", self.model_dir) finetune_config["dataset_name_or_path"] = self.data_dir diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py index 8f12fffb5b5f..0112730bdf85 100644 --- a/tests/llm/test_lora.py +++ b/tests/llm/test_lora.py @@ -14,15 +14,12 @@ from __future__ import annotations import os -import shutil import sys -import tempfile import unittest import paddle from parameterized import parameterized_class -from paddlenlp.utils.downloader import get_path_from_url from tests.testing_utils import argv_context_guard, load_test_config from .testing_utils import LLMTest @@ -45,35 +42,11 @@ class LoraTest(LLMTest, unittest.TestCase): def setUp(self) -> None: LLMTest.setUp(self) - self.data_dir = tempfile.mkdtemp() self.model_codes_dir = os.path.join(self.root_path, self.model_dir) sys.path.insert(0, self.model_codes_dir) - # Run pretrain - URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" - get_path_from_url(URL, root_dir=self.data_dir) - self.data_dir = os.path.join(self.data_dir, "data") - self.use_small_datasets() - - def use_small_datasets(self): - # use 20 examples - def use_few_examples(file): - with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: - lines = [line.strip() for line in f.readlines()] - with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: - f.write("\n".join(lines[:20])) - - shutil.copyfile( - os.path.join(self.data_dir, "dev.json"), - os.path.join(self.data_dir, "validation.json"), - ) - use_few_examples("train.json") - use_few_examples("dev.json") - use_few_examples("validation.json") - def tearDown(self) -> None: LLMTest.tearDown(self) - shutil.rmtree(self.data_dir) sys.path.remove(self.model_codes_dir) def test_lora(self): diff --git a/tests/llm/test_prefix_tuning.py b/tests/llm/test_prefix_tuning.py index 1fad2647a84d..60b78edd7015 100644 --- a/tests/llm/test_prefix_tuning.py +++ b/tests/llm/test_prefix_tuning.py @@ -14,14 +14,11 @@ from __future__ import annotations import os -import shutil import sys -import tempfile import unittest from parameterized import parameterized_class -from paddlenlp.utils.downloader import get_path_from_url from tests.testing_utils import argv_context_guard, load_test_config from .testing_utils import LLMTest @@ -40,39 +37,14 @@ class LoraTest(LLMTest, unittest.TestCase): def setUp(self) -> None: LLMTest.setUp(self) - self.data_dir = tempfile.mkdtemp() self.model_codes_dir = os.path.join(self.root_path, self.model_dir) sys.path.insert(0, self.model_codes_dir) - # Run pretrain - URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" - get_path_from_url(URL, root_dir=self.data_dir) - self.data_dir = os.path.join(self.data_dir, "data") - self.use_small_datasets() - - def use_small_datasets(self): - # use 20 examples - def use_few_examples(file): - with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: - lines = [line.strip() for line in f.readlines()] - with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: - f.write("\n".join(lines[:20])) - - shutil.copyfile( - os.path.join(self.data_dir, "dev.json"), - os.path.join(self.data_dir, "validation.json"), - ) - use_few_examples("train.json") - use_few_examples("dev.json") - use_few_examples("validation.json") - def tearDown(self) -> None: LLMTest.tearDown(self) - shutil.rmtree(self.data_dir) - sys.path.remove(self.model_codes_dir) - def test_pretrain(self): + def test_prefix_tuning(self): prefix_tuning_config = load_test_config(self.config_path, "prefix_tuning", self.model_dir) prefix_tuning_config["dataset_name_or_path"] = self.data_dir diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py index e65803dbc386..aca07a0690ad 100644 --- a/tests/llm/test_pretrain.py +++ b/tests/llm/test_pretrain.py @@ -40,7 +40,7 @@ class PretrainTest(LLMTest, unittest.TestCase): def setUp(self) -> None: LLMTest.setUp(self) - self.data_dir = tempfile.mkdtemp() + self.dataset_dir = tempfile.mkdtemp() self.model_codes_dir = os.path.join(self.root_path, self.model_dir) sys.path.insert(0, self.model_codes_dir) @@ -48,18 +48,18 @@ def tearDown(self) -> None: LLMTest.tearDown(self) sys.path.remove(self.model_codes_dir) - shutil.rmtree(self.data_dir) + shutil.rmtree(self.dataset_dir) def test_pretrain(self): # Run pretrain URL = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy" URL2 = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_idx.npz" - get_path_from_url(URL, root_dir=self.data_dir) - get_path_from_url(URL2, root_dir=self.data_dir) + get_path_from_url(URL, root_dir=self.dataset_dir) + get_path_from_url(URL2, root_dir=self.dataset_dir) pretrain_config = load_test_config(self.config_path, "pretrain", self.model_dir) - pretrain_config["input_dir"] = self.data_dir + pretrain_config["input_dir"] = self.dataset_dir pretrain_config["output_dir"] = self.output_dir with argv_context_guard(pretrain_config): diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py index efde13c4d623..0a1ded580a9e 100644 --- a/tests/llm/testing_utils.py +++ b/tests/llm/testing_utils.py @@ -27,6 +27,7 @@ class LLMTest: config_path: str = None enable_compare: bool = True + data_dir = "./tests/fixtures/llm/data/" def setUp(self) -> None: self.root_path = "./llm"