PaddlePaddle · sijunhe · Sep 20, 2023 · Sep 18, 2023 · Sep 18, 2023 · Sep 19, 2023
diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json
@@ -1,29 +1,29 @@
 {
-    "model_name_or_path": "THUDM/chatglm-6b",
-    "dataset_name_or_path": "./data",
-    "output_dir": "./checkpoints/chatglm_sft_ckpts",
-    "per_device_train_batch_size": 4,
-    "gradient_accumulation_steps": 4,
-    "per_device_eval_batch_size": 8,
-    "eval_accumulation_steps":16,
-    "num_train_epochs": 3,
-    "learning_rate": 3e-05,
-    "warmup_steps": 30,
-    "logging_steps": 1,
-    "evaluation_strategy": "epoch",
-    "save_strategy": "epoch",
-    "src_length": 1024,
-    "max_length": 2048,
-    "fp16": true,
-    "fp16_opt_level": "O2",
-    "do_train": true,
-    "do_eval": true,
-    "disable_tqdm": true,
-    "load_best_model_at_end": true,
-    "eval_with_do_generation": false,
-    "metric_for_best_model": "accuracy",
-    "recompute": true,
-    "save_total_limit": 1,
-    "tensor_parallel_degree": 4,
-    "pipeline_parallel_degree": 1
-  }
+  "model_name_or_path": "THUDM/chatglm-6b",
+  "dataset_name_or_path": "./data",
+  "output_dir": "./checkpoints/chatglm_sft_ckpts",
+  "per_device_train_batch_size": 4,
+  "gradient_accumulation_steps": 4,
+  "per_device_eval_batch_size": 8,
+  "eval_accumulation_steps":16,
+  "num_train_epochs": 3,
+  "learning_rate": 3e-05,
+  "warmup_steps": 30,
+  "logging_steps": 1,
+  "evaluation_strategy": "epoch",
+  "save_strategy": "epoch",
+  "src_length": 1024,
+  "max_length": 2048,
+  "fp16": true,
+  "fp16_opt_level": "O2",
+  "do_train": true,
+  "do_eval": true,
+  "disable_tqdm": true,
+  "load_best_model_at_end": true,
+  "eval_with_do_generation": false,
+  "metric_for_best_model": "accuracy",
+  "recompute": true,
+  "save_total_limit": 1,
+  "tensor_parallel_degree": 4,
+  "pipeline_parallel_degree": 1
+}
diff --git a/scripts/regression/ci_case.sh b/scripts/regression/ci_case.sh
@@ -1083,7 +1083,7 @@ python setup_cuda.py install
 
 echo ' Testing all LLMs '
 cd ${nlp_dir}
-python -m pytest tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
+python -m pytest -v -s tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
 print_info $? llm
 }
 fast_generation(){

diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml
@@ -0,0 +1,56 @@
+finetune:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-05
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml
@@ -0,0 +1,58 @@
+lora:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-04
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    lora: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml
@@ -0,0 +1,60 @@
+prefix_tuning:
+  base:
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-02
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    prefix_tuning: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+    export_precache: true
+
+inference-to-static:
+  default:
+    dtype: float16
+    export_precache: true
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
+    export_precache: true
diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml
@@ -1,7 +1,5 @@
 pretrain:
-  default:
-    model_type: llama
-    model_name_or_path: __internal_testing__/tiny-random-llama
+  base:
     weight_decay: 0.01
     max_steps: 2
     save_steps: 2
@@ -20,7 +18,13 @@ pretrain:
     use_flash_attention: 0
     use_fused_rms_norm: 0
     continue_training: 1
-
+  default:
+    llama:
+      model_type: llama
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_type: chatglm
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
 
 inference-predict:
   default:
@@ -40,5 +44,4 @@ inference-infer:
     dtype: float16
     batch_size: 2
     decode_strategy: greedy_search
-    max_length: 20
-    enable_compare: false
+    max_length: 20
diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+from parameterized import parameterized_class
+
+from paddlenlp.utils.downloader import get_path_from_url
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir", "enable_compare"],
+    [
+        ["llama", False],
+        # ["chatglm"],
+        # ["chatglm2"],
+        # ["bloom"],
+    ],
+)
+class FinetuneTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/finetune.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.data_dir = tempfile.mkdtemp()
+        sys.path.insert(0, self.model_dir)
+
+        # Run pretrain
+        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
+        get_path_from_url(URL, root_dir=self.data_dir)
+        self.data_dir = os.path.join(self.data_dir, "data")
+        self.use_small_datasets()
+
+    def use_small_datasets(self):
+        # use 20 examples
+        def use_few_examples(file):
+            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
+                lines = [line.strip() for line in f.readlines()]
+            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
+                f.write("\n".join(lines[:20]))
+
+        shutil.copyfile(
+            os.path.join(self.data_dir, "dev.json"),
+            os.path.join(self.data_dir, "validation.json"),
+        )
+        use_few_examples("train.json")
+        use_few_examples("dev.json")
+        use_few_examples("validation.json")
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        shutil.rmtree(self.data_dir)
+
+    def test_pretrain(self):
+        finetune_config = load_test_config(self.config_path, "finetune", self.model_dir)
+
+        finetune_config["dataset_name_or_path"] = self.data_dir
+        finetune_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(finetune_config):
+            from finetune_generation import main
+
+            main()
+
+        if self.model_dir != "opt":
+            self.run_predictor({"inference_model": True})
+
+        self.run_predictor({"inference_model": False})