[New Features] add llm pretrain & lora & sft & prefix_tuning testing …

…scripts (#7056) * add chatglm finetune * add llm ci scripts * add llama llm testing * update llm testing * revert chatglm sft-arguments * revert chatglm sft-arguments * update llm testing
PaddlePaddle · Sep 20, 2023 · a150627 · a150627
1 parent d2524ab
commit a150627
Show file tree

Hide file tree

Showing 12 changed files with 469 additions and 42 deletions.
diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json
@@ -26,4 +26,4 @@
     "save_total_limit": 1,
     "tensor_parallel_degree": 4,
     "pipeline_parallel_degree": 1
-  }
+}
diff --git a/scripts/regression/ci_case.sh b/scripts/regression/ci_case.sh
@@ -1083,7 +1083,7 @@ python setup_cuda.py install
 
 echo ' Testing all LLMs '
 cd ${nlp_dir}
-python -m pytest tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
+python -m pytest -v -s tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
 print_info $? llm
 }
 fast_generation(){

diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml
@@ -0,0 +1,56 @@
+finetune:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-05
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml
@@ -0,0 +1,58 @@
+lora:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-04
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    lora: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml
@@ -0,0 +1,60 @@
+prefix_tuning:
+  base:
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-02
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    prefix_tuning: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+    export_precache: true
+
+inference-to-static:
+  default:
+    dtype: float16
+    export_precache: true
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
+    export_precache: true
diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml
@@ -1,7 +1,5 @@
 pretrain:
-  default:
-    model_type: llama
-    model_name_or_path: __internal_testing__/tiny-random-llama
+  base:
     weight_decay: 0.01
     max_steps: 2
     save_steps: 2
@@ -20,7 +18,13 @@ pretrain:
     use_flash_attention: 0
     use_fused_rms_norm: 0
     continue_training: 1
-
+  default:
+    llama:
+      model_type: llama
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_type: chatglm
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
 
 inference-predict:
   default:
@@ -40,5 +44,4 @@ inference-infer:
     dtype: float16
     batch_size: 2
     decode_strategy: greedy_search
-    max_length: 20
-    enable_compare: false
+    max_length: 20
diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import sys
+import unittest
+
+from parameterized import parameterized_class
+
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir", "enable_compare"],
+    [
+        ["llama", False],
+        # ["chatglm"],
+        # ["chatglm2"],
+        # ["bloom"],
+    ],
+)
+class FinetuneTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/finetune.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        sys.path.insert(0, self.model_dir)
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+
+    def test_finetune(self):
+        finetune_config = load_test_config(self.config_path, "finetune", self.model_dir)
+
+        finetune_config["dataset_name_or_path"] = self.data_dir
+        finetune_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(finetune_config):
+            from finetune_generation import main
+
+            main()
+
+        if self.model_dir != "opt":
+            self.run_predictor({"inference_model": True})
+
+        self.run_predictor({"inference_model": False})
diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import sys
+import unittest
+
+import paddle
+from parameterized import parameterized_class
+
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir", "enable_compare"],
+    [
+        ["llama", False],
+        # TODO(wj-Mcat): to enable chatglm/chatglm2 unit test
+        # ["chatglm"],
+        # ["chatglm2"],
+        ["bloom"],
+    ],
+)
+class LoraTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/lora.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
+        sys.path.insert(0, self.model_codes_dir)
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        sys.path.remove(self.model_codes_dir)
+
+    def test_lora(self):
+        self.disable_static()
+        paddle.set_default_dtype("float32")
+
+        lora_config = load_test_config(self.config_path, "lora", self.model_dir)
+
+        lora_config["dataset_name_or_path"] = self.data_dir
+        lora_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(lora_config):
+            from finetune_generation import main
+
+            main()
+
+        # merge weights
+        merge_lora_weights_config = {
+            "model_name_or_path": lora_config["model_name_or_path"],
+            "lora_path": lora_config["output_dir"],
+            "merge_model_path": lora_config["output_dir"],
+        }
+        with argv_context_guard(merge_lora_weights_config):
+            from merge_lora_params import merge
+
+            merge()
+
+        if self.model_dir not in ["chatglm2"]:
+            self.run_predictor({"inference_model": True})
+
+        self.run_predictor({"inference_model": False})