From b04d991547e2da4530dacf785e903c0c38e282b4 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Mon, 18 Sep 2023 05:26:55 +0000
Subject: [PATCH 1/7] add chatglm finetune

---
 tests/fixtures/llm/finetune.yaml | 51 ++++++++++++++++++
 tests/fixtures/llm/pretrain.yaml | 12 +++--
 tests/llm/test_finetune.py       | 90 ++++++++++++++++++++++++++++++++
 tests/llm/test_pretrain.py       | 15 ++++--
 tests/testing_utils.py           | 22 ++++++--
 5 files changed, 177 insertions(+), 13 deletions(-)
 create mode 100644 tests/fixtures/llm/finetune.yaml
 create mode 100644 tests/llm/test_finetune.py

diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml
new file mode 100644
index 000000000000..30d19ee31b9b
--- /dev/null
+++ b/tests/fixtures/llm/finetune.yaml
@@ -0,0 +1,51 @@
+finetune:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-05
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
+    enable_compare: false
\ No newline at end of file
diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml
index 3e8c4fa55d17..cfebf521af92 100644
--- a/tests/fixtures/llm/pretrain.yaml
+++ b/tests/fixtures/llm/pretrain.yaml
@@ -1,7 +1,5 @@
 pretrain:
-  default:
-    model_type: llama
-    model_name_or_path: __internal_testing__/tiny-random-llama
+  base:
     weight_decay: 0.01
     max_steps: 2
     save_steps: 2
@@ -20,7 +18,13 @@ pretrain:
     use_flash_attention: 0
     use_fused_rms_norm: 0
     continue_training: 1
-
+  default:
+    llama:
+      model_type: llama
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_type: chatglm
+      model_name_or_path: __internal_testing__/tiny-random-chatglm
 
 inference-predict:
   default:
diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
new file mode 100644
index 000000000000..79f41e80e545
--- /dev/null
+++ b/tests/llm/test_finetune.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+from parameterized import parameterized_class
+
+from paddlenlp.utils.downloader import get_path_from_url
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir"],
+    [
+        ["llama"],
+    ],
+)
+class FinetuneTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/finetune.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.data_dir = tempfile.mkdtemp()
+        sys.path.insert(0, self.model_dir)
+
+        # Run pretrain
+        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
+        get_path_from_url(URL, root_dir=self.data_dir)
+        self.data_dir = os.path.join(self.data_dir, "data")
+        self.use_small_datasets()
+
+    def use_small_datasets(self):
+        # use 20 examples
+        def use_few_examples(file):
+            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
+                lines = [line.strip() for line in f.readlines()]
+            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
+                f.write("\n".join(lines[:20]))
+
+        shutil.copyfile(
+            os.path.join(self.data_dir, "dev.json"),
+            os.path.join(self.data_dir, "validation.json"),
+        )
+        use_few_examples("train.json")
+        use_few_examples("dev.json")
+        use_few_examples("validation.json")
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        shutil.rmtree(self.data_dir)
+
+    def test_pretrain(self):
+        finetune_config = load_test_config(self.config_path, "finetune", self.model_dir)
+
+        finetune_config["dataset_name_or_path"] = self.data_dir
+        finetune_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(finetune_config):
+            from finetune_generation import main
+
+            main()
+
+        self._test_inference_predictor()
+        self._test_predictor()
+
+    def _test_inference_predictor(self):
+        self.run_predictor({"inference_model": "true"})
+
+    def _test_predictor(self):
+        self.run_predictor({"inference_model": "false"})
diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py
index cf14cc82d7b2..a379b69297f7 100644
--- a/tests/llm/test_pretrain.py
+++ b/tests/llm/test_pretrain.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import os
 import shutil
 import sys
 import tempfile
@@ -30,6 +31,7 @@
     ["model_dir"],
     [
         ["llama"],
+        ["chatglm"],
     ],
 )
 class PretrainTest(LLMTest, unittest.TestCase):
@@ -37,16 +39,18 @@ class PretrainTest(LLMTest, unittest.TestCase):
     model_dir: str = None
 
     def setUp(self) -> None:
-        self.model_dir = "./llm/llama"
+        LLMTest.setUp(self)
+
         self.data_dir = tempfile.mkdtemp()
+        self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
         sys.path.insert(0, self.model_dir)
-        LLMTest.setUp(self)
 
     def tearDown(self) -> None:
-        sys.path.remove(self.model_dir)
-        shutil.rmtree(self.data_dir)
         LLMTest.tearDown(self)
 
+        sys.path.remove(self.model_codes_dir)
+        shutil.rmtree(self.data_dir)
+
     def test_pretrain(self):
         # Run pretrain
         URL = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy"
@@ -54,7 +58,8 @@ def test_pretrain(self):
         get_path_from_url(URL, root_dir=self.data_dir)
         get_path_from_url(URL2, root_dir=self.data_dir)
 
-        pretrain_config = load_test_config(self.config_path, "pretrain")
+        pretrain_config = load_test_config(self.config_path, "pretrain", self.model_dir)
+
         pretrain_config["input_dir"] = self.data_dir
         pretrain_config["output_dir"] = self.output_dir
 
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
index 96798fe52f31..6ea39bec0197 100644
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -300,7 +300,7 @@ def is_slow_test() -> bool:
     return os.getenv("RUN_SLOW_TEST") is not None
 
 
-def load_test_config(config_file: str, key: str) -> dict | None:
+def load_test_config(config_file: str, key: str, sub_key: str = None) -> dict | None:
     """parse config file to argv
 
     Args:
@@ -314,12 +314,26 @@ def load_test_config(config_file: str, key: str) -> dict | None:
     assert key in config, f"<{key}> should be the top key in configuration file"
     config = config[key]
 
-    sub_key = "slow" if is_slow_test() else "default"
+    mode_key = "slow" if is_slow_test() else "default"
 
-    if sub_key not in config:
+    if mode_key not in config:
         return None
 
-    config = config[sub_key]
+    # 2. load base common config
+    base_config = config.get("base", {})
+
+    config = config.get(mode_key, {})
+    config.update(base_config)
+
+    # 3. load sub key config
+    sub_config = config.get(sub_key, {})
+    config.update(sub_config)
+
+    # remove dict value
+    for key in list(config.keys()):
+        if isinstance(config[key], dict):
+            config.pop(key)
+
     return config
 
 

From b2fa9be09361b0f57c93a84a4830bbc61744bbb0 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Mon, 18 Sep 2023 13:20:47 +0000
Subject: [PATCH 2/7] add llm ci scripts

---
 llm/chatglm/sft_argument.json                 |   5 +-
 .../transformers/chatglm/modeling.py          |   2 +-
 tests/fixtures/llm/finetune.yaml              |   6 +
 tests/fixtures/llm/lora.yaml                  |  59 ++++++++++
 tests/fixtures/llm/prefix_tuning.yaml         |  55 +++++++++
 tests/llm/test_finetune.py                    |   7 ++
 tests/llm/test_lora.py                        | 107 ++++++++++++++++++
 tests/llm/test_prefix_tuning.py               |  99 ++++++++++++++++
 tests/llm/test_pretrain.py                    |   3 +-
 tests/llm/testing_utils.py                    |  25 +++-
 10 files changed, 357 insertions(+), 11 deletions(-)
 create mode 100644 tests/fixtures/llm/lora.yaml
 create mode 100644 tests/fixtures/llm/prefix_tuning.yaml
 create mode 100644 tests/llm/test_lora.py
 create mode 100644 tests/llm/test_prefix_tuning.py

diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json
index 5884b2f99882..0602a60b2441 100644
--- a/llm/chatglm/sft_argument.json
+++ b/llm/chatglm/sft_argument.json
@@ -1,5 +1,5 @@
 {
-    "model_name_or_path": "THUDM/chatglm-6b",
+    "model_name_or_path": "__internal_testing__/tiny-fused-chatglm",
     "dataset_name_or_path": "./data",
     "output_dir": "./checkpoints/chatglm_sft_ckpts",
     "per_device_train_batch_size": 4,
@@ -10,6 +10,7 @@
     "learning_rate": 3e-05,
     "warmup_steps": 30,
     "logging_steps": 1,
+    "max_steps": 10,
     "evaluation_strategy": "epoch",
     "save_strategy": "epoch",
     "src_length": 1024,
@@ -24,6 +25,6 @@
     "metric_for_best_model": "accuracy",
     "recompute": true,
     "save_total_limit": 1,
-    "tensor_parallel_degree": 4,
+    "tensor_parallel_degree": 1,
     "pipeline_parallel_degree": 1
   }
\ No newline at end of file
diff --git a/paddlenlp/experimental/transformers/chatglm/modeling.py b/paddlenlp/experimental/transformers/chatglm/modeling.py
index 9d21adae386b..0ca1caec8d2d 100644
--- a/paddlenlp/experimental/transformers/chatglm/modeling.py
+++ b/paddlenlp/experimental/transformers/chatglm/modeling.py
@@ -332,7 +332,7 @@ def set_state_dict(self, state_dict, use_structured_name=True):
                 continue
             elif k.startswith("lm_head.weight"):
                 continue
-            elif k.endswith("rotary_emb.inv_freq"):
+            elif k.endswith("rotary_emb.inv_freq") or k.endswith("rotary_embeddings.inv_freq"):
                 continue
             idx = int(k.split(".")[2])
             if k.endswith("input_layernorm.weight"):
diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml
index 30d19ee31b9b..aa651f1c075f 100644
--- a/tests/fixtures/llm/finetune.yaml
+++ b/tests/fixtures/llm/finetune.yaml
@@ -28,6 +28,12 @@ finetune:
   default:
     llama:
       model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-random-bloom
 
 inference-predict:
   default:
diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml
new file mode 100644
index 000000000000..b4038eb78fb6
--- /dev/null
+++ b/tests/fixtures/llm/lora.yaml
@@ -0,0 +1,59 @@
+lora:
+  base:
+    dataset_name_or_path: "./data"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-04
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    lora: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+
+inference-to-static:
+  default:
+    dtype: float16
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
+    enable_compare: false
\ No newline at end of file
diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml
new file mode 100644
index 000000000000..e46a0d11d34a
--- /dev/null
+++ b/tests/fixtures/llm/prefix_tuning.yaml
@@ -0,0 +1,55 @@
+prefix_tuning:
+  base:
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 4
+    per_device_eval_batch_size: 8
+    eval_accumulation_steps: 16
+    num_train_epochs: 3
+    learning_rate: 3e-02
+    warmup_steps: 30
+    logging_steps: 1
+    evaluation_strategy: "epoch"
+    save_strategy: "epoch"
+    src_length: 1024
+    max_length: 2048
+    fp16: true
+    fp16_opt_level: "O2"
+    do_train: true
+    do_eval: true
+    disable_tqdm: true
+    load_best_model_at_end: true
+    eval_with_do_generation: false
+    metric_for_best_model: "accuracy"
+    recompute: true
+    save_total_limit: 1
+    tensor_parallel_degree: 1
+    pipeline_parallel_degree: 1
+    prefix_tuning: true
+
+  default:
+    llama:
+      model_name_or_path: __internal_testing__/tiny-random-llama
+
+inference-predict:
+  default:
+    mode: dynamic 
+    max_length: 20
+    batch_size: 2
+    decode_strategy: greedy_search
+    dtype: float16
+    export_precache: true
+
+inference-to-static:
+  default:
+    dtype: float16
+    export_precache: true
+
+inference-infer:
+  default:
+    mode: static
+    dtype: float16
+    batch_size: 2
+    decode_strategy: greedy_search
+    max_length: 20
+    enable_compare: false
+    export_precache: true
\ No newline at end of file
diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
index 79f41e80e545..e6ee001c80dc 100644
--- a/tests/llm/test_finetune.py
+++ b/tests/llm/test_finetune.py
@@ -31,6 +31,9 @@
     ["model_dir"],
     [
         ["llama"],
+        # ["chatglm"],
+        # ["chatglm2"],
+        ["bloom"],
     ],
 )
 class FinetuneTest(LLMTest, unittest.TestCase):
@@ -84,6 +87,10 @@ def test_pretrain(self):
         self._test_predictor()
 
     def _test_inference_predictor(self):
+        # TODO(wj-Mcat): OPTModel do not support inference model
+        if self.model_dir == "opt":
+            return
+
         self.run_predictor({"inference_model": "true"})
 
     def _test_predictor(self):
diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py
new file mode 100644
index 000000000000..b17684e9b710
--- /dev/null
+++ b/tests/llm/test_lora.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+import paddle
+from parameterized import parameterized_class
+
+from paddlenlp.utils.downloader import get_path_from_url
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir"],
+    [
+        ["llama"],
+        # TODO(wj-Mcat): to enable chatglm/chatglm2 unit test
+        # ["chatglm"],
+        # ["chatglm2"],
+        ["bloom"],
+    ],
+)
+class LoraTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/lora.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.data_dir = tempfile.mkdtemp()
+        self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
+        sys.path.insert(0, self.model_codes_dir)
+
+        # Run pretrain
+        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
+        get_path_from_url(URL, root_dir=self.data_dir)
+        self.data_dir = os.path.join(self.data_dir, "data")
+        self.use_small_datasets()
+
+    def use_small_datasets(self):
+        # use 20 examples
+        def use_few_examples(file):
+            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
+                lines = [line.strip() for line in f.readlines()]
+            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
+                f.write("\n".join(lines[:20]))
+
+        shutil.copyfile(
+            os.path.join(self.data_dir, "dev.json"),
+            os.path.join(self.data_dir, "validation.json"),
+        )
+        use_few_examples("train.json")
+        use_few_examples("dev.json")
+        use_few_examples("validation.json")
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        shutil.rmtree(self.data_dir)
+        sys.path.remove(self.model_codes_dir)
+
+    def test_lora(self):
+        self.disable_static()
+        paddle.set_default_dtype("float32")
+
+        lora_config = load_test_config(self.config_path, "lora", self.model_dir)
+
+        lora_config["dataset_name_or_path"] = self.data_dir
+        lora_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(lora_config):
+            from finetune_generation import main
+
+            main()
+
+        # merge weights
+        merge_lora_weights_config = {
+            "model_name_or_path": lora_config["model_name_or_path"],
+            "lora_path": lora_config["output_dir"],
+            "merge_model_path": lora_config["output_dir"],
+        }
+        with argv_context_guard(merge_lora_weights_config):
+            from merge_lora_params import merge
+
+            merge()
+
+        if self.model_dir not in ["chatglm2"]:
+            self.run_predictor({"inference_model": "true"})
+
+        self.run_predictor({"inference_model": "false"})
diff --git a/tests/llm/test_prefix_tuning.py b/tests/llm/test_prefix_tuning.py
new file mode 100644
index 000000000000..1fad2647a84d
--- /dev/null
+++ b/tests/llm/test_prefix_tuning.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+from parameterized import parameterized_class
+
+from paddlenlp.utils.downloader import get_path_from_url
+from tests.testing_utils import argv_context_guard, load_test_config
+
+from .testing_utils import LLMTest
+
+
+@parameterized_class(
+    ["model_dir"],
+    [
+        ["llama"],
+    ],
+)
+class LoraTest(LLMTest, unittest.TestCase):
+    config_path: str = "./tests/fixtures/llm/prefix_tuning.yaml"
+    model_dir: str = None
+
+    def setUp(self) -> None:
+        LLMTest.setUp(self)
+
+        self.data_dir = tempfile.mkdtemp()
+        self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
+        sys.path.insert(0, self.model_codes_dir)
+
+        # Run pretrain
+        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
+        get_path_from_url(URL, root_dir=self.data_dir)
+        self.data_dir = os.path.join(self.data_dir, "data")
+        self.use_small_datasets()
+
+    def use_small_datasets(self):
+        # use 20 examples
+        def use_few_examples(file):
+            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
+                lines = [line.strip() for line in f.readlines()]
+            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
+                f.write("\n".join(lines[:20]))
+
+        shutil.copyfile(
+            os.path.join(self.data_dir, "dev.json"),
+            os.path.join(self.data_dir, "validation.json"),
+        )
+        use_few_examples("train.json")
+        use_few_examples("dev.json")
+        use_few_examples("validation.json")
+
+    def tearDown(self) -> None:
+        LLMTest.tearDown(self)
+        shutil.rmtree(self.data_dir)
+
+        sys.path.remove(self.model_codes_dir)
+
+    def test_pretrain(self):
+        prefix_tuning_config = load_test_config(self.config_path, "prefix_tuning", self.model_dir)
+
+        prefix_tuning_config["dataset_name_or_path"] = self.data_dir
+        prefix_tuning_config["output_dir"] = self.output_dir
+
+        with argv_context_guard(prefix_tuning_config):
+            from finetune_generation import main
+
+            main()
+
+        self.run_predictor(
+            {
+                "inference_model": True,
+                "prefix_path": self.output_dir,
+                "model_name_or_path": prefix_tuning_config["model_name_or_path"],
+            }
+        )
+        self.run_predictor(
+            {
+                "inference_model": False,
+                "prefix_path": self.output_dir,
+                "model_name_or_path": prefix_tuning_config["model_name_or_path"],
+            }
+        )
diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py
index a379b69297f7..7c74c15e5907 100644
--- a/tests/llm/test_pretrain.py
+++ b/tests/llm/test_pretrain.py
@@ -31,7 +31,6 @@
     ["model_dir"],
     [
         ["llama"],
-        ["chatglm"],
     ],
 )
 class PretrainTest(LLMTest, unittest.TestCase):
@@ -43,7 +42,7 @@ def setUp(self) -> None:
 
         self.data_dir = tempfile.mkdtemp()
         self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
-        sys.path.insert(0, self.model_dir)
+        sys.path.insert(0, self.model_codes_dir)
 
     def tearDown(self) -> None:
         LLMTest.tearDown(self)
diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py
index 5e61587ec4e1..d0d867c818a9 100644
--- a/tests/llm/testing_utils.py
+++ b/tests/llm/testing_utils.py
@@ -31,29 +31,39 @@ def setUp(self) -> None:
         self.output_dir = tempfile.mkdtemp()
         self.inference_output_dir = tempfile.mkdtemp()
         sys.path.insert(0, self.root_path)
+        self.disable_static()
+        paddle.set_default_dtype("float32")
 
     def tearDown(self) -> None:
         sys.path.remove(self.root_path)
         shutil.rmtree(self.output_dir)
         shutil.rmtree(self.inference_output_dir)
+        self.disable_static()
+
+    def disable_static(self):
+        paddle.utils.unique_name.switch()
+        paddle.disable_static()
 
     def run_predictor(self, config_params=None):
         config_params = config_params or {}
         # to avoid the same parameter
-        paddle.utils.unique_name.switch()
+        self.disable_static()
         predict_config = load_test_config(self.config_path, "inference-predict")
         predict_config["output_file"] = os.path.join(self.output_dir, "predict.json")
-        predict_config.update(config_params)
         predict_config["model_name_or_path"] = self.output_dir
+        predict_config.update(config_params)
 
         with argv_context_guard(predict_config):
             from predictor import predict
 
             predict()
 
+        # prefix_tuning dynamic graph do not support to_static
+        if not predict_config["inference_model"]:
+            return
+
         # to static
-        paddle.disable_static()
-        paddle.utils.unique_name.switch()
+        self.disable_static()
         config = load_test_config(self.config_path, "inference-to-static")
         config["output_path"] = self.inference_output_dir
         config["model_name_or_path"] = self.output_dir
@@ -64,17 +74,20 @@ def run_predictor(self, config_params=None):
             main()
 
         # inference
-        paddle.disable_static()
+        self.disable_static()
         config = load_test_config(self.config_path, "inference-infer")
         config["model_name_or_path"] = self.inference_output_dir
         config["output_file"] = os.path.join(self.inference_output_dir, "infer.json")
-        enable_compare = config.pop("enable_compare", False)
+
+        config_params.pop("model_name_or_path", None)
         config.update(config_params)
+        enable_compare = config.pop("enable_compare", False)
         with argv_context_guard(config):
             from predictor import predict
 
             predict()
 
+        self.disable_static()
         # compare result
         if enable_compare:
             predict_result = self._read_result(predict_config["output_file"])

From 93ac1ec0fd7e24a1603750fc175ff32b03ee174c Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 19 Sep 2023 07:54:21 +0000
Subject: [PATCH 3/7] add llama llm testing

---
 tests/fixtures/llm/finetune.yaml      |  5 ++---
 tests/fixtures/llm/lora.yaml          |  3 +--
 tests/fixtures/llm/prefix_tuning.yaml |  7 ++++++-
 tests/fixtures/llm/pretrain.yaml      |  5 ++---
 tests/llm/test_finetune.py            | 20 ++++++-------------
 tests/llm/test_lora.py                |  8 ++++----
 tests/llm/test_pretrain.py            | 10 ++--------
 tests/llm/testing_utils.py            | 28 +++++++++++++++++++--------
 8 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/tests/fixtures/llm/finetune.yaml b/tests/fixtures/llm/finetune.yaml
index aa651f1c075f..ed47f2610799 100644
--- a/tests/fixtures/llm/finetune.yaml
+++ b/tests/fixtures/llm/finetune.yaml
@@ -33,7 +33,7 @@ finetune:
     chatglm2:
       model_name_or_path: __internal_testing__/tiny-random-chatglm2
     bloom:
-      model_name_or_path: __internal_testing__/tiny-random-bloom
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
 
 inference-predict:
   default:
@@ -53,5 +53,4 @@ inference-infer:
     dtype: float16
     batch_size: 2
     decode_strategy: greedy_search
-    max_length: 20
-    enable_compare: false
\ No newline at end of file
+    max_length: 20
\ No newline at end of file
diff --git a/tests/fixtures/llm/lora.yaml b/tests/fixtures/llm/lora.yaml
index b4038eb78fb6..e38459c04551 100644
--- a/tests/fixtures/llm/lora.yaml
+++ b/tests/fixtures/llm/lora.yaml
@@ -55,5 +55,4 @@ inference-infer:
     dtype: float16
     batch_size: 2
     decode_strategy: greedy_search
-    max_length: 20
-    enable_compare: false
\ No newline at end of file
+    max_length: 20
\ No newline at end of file
diff --git a/tests/fixtures/llm/prefix_tuning.yaml b/tests/fixtures/llm/prefix_tuning.yaml
index e46a0d11d34a..9a9580f96bd5 100644
--- a/tests/fixtures/llm/prefix_tuning.yaml
+++ b/tests/fixtures/llm/prefix_tuning.yaml
@@ -29,6 +29,12 @@ prefix_tuning:
   default:
     llama:
       model_name_or_path: __internal_testing__/tiny-random-llama
+    chatglm:
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
+    chatglm2:
+      model_name_or_path: __internal_testing__/tiny-random-chatglm2
+    bloom:
+      model_name_or_path: __internal_testing__/tiny-fused-bloom
 
 inference-predict:
   default:
@@ -51,5 +57,4 @@ inference-infer:
     batch_size: 2
     decode_strategy: greedy_search
     max_length: 20
-    enable_compare: false
     export_precache: true
\ No newline at end of file
diff --git a/tests/fixtures/llm/pretrain.yaml b/tests/fixtures/llm/pretrain.yaml
index cfebf521af92..4ca4fa3e6b46 100644
--- a/tests/fixtures/llm/pretrain.yaml
+++ b/tests/fixtures/llm/pretrain.yaml
@@ -24,7 +24,7 @@ pretrain:
       model_name_or_path: __internal_testing__/tiny-random-llama
     chatglm:
       model_type: chatglm
-      model_name_or_path: __internal_testing__/tiny-random-chatglm
+      model_name_or_path: __internal_testing__/tiny-fused-chatglm
 
 inference-predict:
   default:
@@ -44,5 +44,4 @@ inference-infer:
     dtype: float16
     batch_size: 2
     decode_strategy: greedy_search
-    max_length: 20
-    enable_compare: false
\ No newline at end of file
+    max_length: 20
\ No newline at end of file
diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
index e6ee001c80dc..543149d41c5f 100644
--- a/tests/llm/test_finetune.py
+++ b/tests/llm/test_finetune.py
@@ -28,12 +28,12 @@
 
 
 @parameterized_class(
-    ["model_dir"],
+    ["model_dir", "enable_compare"],
     [
-        ["llama"],
+        ["llama", False],
         # ["chatglm"],
         # ["chatglm2"],
-        ["bloom"],
+        # ["bloom"],
     ],
 )
 class FinetuneTest(LLMTest, unittest.TestCase):
@@ -83,15 +83,7 @@ def test_pretrain(self):
 
             main()
 
-        self._test_inference_predictor()
-        self._test_predictor()
+        if self.model_dir != "opt":
+            self.run_predictor({"inference_model": True})
 
-    def _test_inference_predictor(self):
-        # TODO(wj-Mcat): OPTModel do not support inference model
-        if self.model_dir == "opt":
-            return
-
-        self.run_predictor({"inference_model": "true"})
-
-    def _test_predictor(self):
-        self.run_predictor({"inference_model": "false"})
+        self.run_predictor({"inference_model": False})
diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py
index b17684e9b710..8f12fffb5b5f 100644
--- a/tests/llm/test_lora.py
+++ b/tests/llm/test_lora.py
@@ -29,9 +29,9 @@
 
 
 @parameterized_class(
-    ["model_dir"],
+    ["model_dir", "enable_compare"],
     [
-        ["llama"],
+        ["llama", False],
         # TODO(wj-Mcat): to enable chatglm/chatglm2 unit test
         # ["chatglm"],
         # ["chatglm2"],
@@ -102,6 +102,6 @@ def test_lora(self):
             merge()
 
         if self.model_dir not in ["chatglm2"]:
-            self.run_predictor({"inference_model": "true"})
+            self.run_predictor({"inference_model": True})
 
-        self.run_predictor({"inference_model": "false"})
+        self.run_predictor({"inference_model": False})
diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py
index 7c74c15e5907..e65803dbc386 100644
--- a/tests/llm/test_pretrain.py
+++ b/tests/llm/test_pretrain.py
@@ -67,11 +67,5 @@ def test_pretrain(self):
 
             main()
 
-        self._test_inference_predictor()
-        self._test_predictor()
-
-    def _test_inference_predictor(self):
-        self.run_predictor({"inference_model": "true"})
-
-    def _test_predictor(self):
-        self.run_predictor({"inference_model": "false"})
+        self.run_predictor({"inference_model": True})
+        self.run_predictor({"inference_model": False})
diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py
index d0d867c818a9..efde13c4d623 100644
--- a/tests/llm/testing_utils.py
+++ b/tests/llm/testing_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import json
 import os
 import shutil
 import sys
@@ -25,6 +26,7 @@
 
 class LLMTest:
     config_path: str = None
+    enable_compare: bool = True
 
     def setUp(self) -> None:
         self.root_path = "./llm"
@@ -44,6 +46,15 @@ def disable_static(self):
         paddle.utils.unique_name.switch()
         paddle.disable_static()
 
+    def _read_result(self, file):
+        result = []
+        # read output field from json file
+        with open(file, "r", encoding="utf-8") as f:
+            for line in f:
+                data = json.loads(line)
+                result.append(data["output"])
+        return result
+
     def run_predictor(self, config_params=None):
         config_params = config_params or {}
         # to avoid the same parameter
@@ -81,17 +92,18 @@ def run_predictor(self, config_params=None):
 
         config_params.pop("model_name_or_path", None)
         config.update(config_params)
-        enable_compare = config.pop("enable_compare", False)
         with argv_context_guard(config):
             from predictor import predict
 
             predict()
 
         self.disable_static()
-        # compare result
-        if enable_compare:
-            predict_result = self._read_result(predict_config["output_file"])
-            infer_result = self._read_result(config["output_file"])
-            assert len(predict_result) == len(infer_result)
-            for predict_item, infer_item in zip(predict_result, infer_result):
-                self.assertEqual(predict_item, infer_item)
+
+        if not config_params["inference_model"]:
+            return
+
+        predict_result = self._read_result(predict_config["output_file"])
+        infer_result = self._read_result(config["output_file"])
+        assert len(predict_result) == len(infer_result)
+        for predict_item, infer_item in zip(predict_result, infer_result):
+            self.assertEqual(predict_item, infer_item)

From 68ff4643b8e70a9dc52a6def4848e7beaa03e315 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 19 Sep 2023 07:57:40 +0000
Subject: [PATCH 4/7] update llm testing

---
 scripts/regression/ci_case.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/regression/ci_case.sh b/scripts/regression/ci_case.sh
index 5d33529ca31c..2cf56fef5edd 100644
--- a/scripts/regression/ci_case.sh
+++ b/scripts/regression/ci_case.sh
@@ -1083,7 +1083,7 @@ python setup_cuda.py install
 
 echo ' Testing all LLMs '
 cd ${nlp_dir}
-python -m pytest tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
+python -m pytest -v -s tests/llm/test_*.py >${log_path}/llm >>${log_path}/llm 2>&1
 print_info $? llm
 }
 fast_generation(){

From ff6ac92abdb5dc5787c49518c8fd686ded62fd84 Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 19 Sep 2023 12:10:31 +0000
Subject: [PATCH 5/7] revert chatglm sft-arguments

---
 llm/chatglm/sft_argument.json | 57 +++++++++++++++++------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json
index 0602a60b2441..54960bf8a379 100644
--- a/llm/chatglm/sft_argument.json
+++ b/llm/chatglm/sft_argument.json
@@ -1,30 +1,29 @@
 {
-    "model_name_or_path": "__internal_testing__/tiny-fused-chatglm",
-    "dataset_name_or_path": "./data",
-    "output_dir": "./checkpoints/chatglm_sft_ckpts",
-    "per_device_train_batch_size": 4,
-    "gradient_accumulation_steps": 4,
-    "per_device_eval_batch_size": 8,
-    "eval_accumulation_steps":16,
-    "num_train_epochs": 3,
-    "learning_rate": 3e-05,
-    "warmup_steps": 30,
-    "logging_steps": 1,
-    "max_steps": 10,
-    "evaluation_strategy": "epoch",
-    "save_strategy": "epoch",
-    "src_length": 1024,
-    "max_length": 2048,
-    "fp16": true,
-    "fp16_opt_level": "O2",
-    "do_train": true,
-    "do_eval": true,
-    "disable_tqdm": true,
-    "load_best_model_at_end": true,
-    "eval_with_do_generation": false,
-    "metric_for_best_model": "accuracy",
-    "recompute": true,
-    "save_total_limit": 1,
-    "tensor_parallel_degree": 1,
-    "pipeline_parallel_degree": 1
-  }
\ No newline at end of file
+  "model_name_or_path": "THUDM/chatglm-6b",
+  "dataset_name_or_path": "./data",
+  "output_dir": "./checkpoints/chatglm_sft_ckpts",
+  "per_device_train_batch_size": 4,
+  "gradient_accumulation_steps": 4,
+  "per_device_eval_batch_size": 8,
+  "eval_accumulation_steps":16,
+  "num_train_epochs": 3,
+  "learning_rate": 3e-05,
+  "warmup_steps": 30,
+  "logging_steps": 1,
+  "evaluation_strategy": "epoch",
+  "save_strategy": "epoch",
+  "src_length": 1024,
+  "max_length": 2048,
+  "fp16": true,
+  "fp16_opt_level": "O2",
+  "do_train": true,
+  "do_eval": true,
+  "disable_tqdm": true,
+  "load_best_model_at_end": true,
+  "eval_with_do_generation": false,
+  "metric_for_best_model": "accuracy",
+  "recompute": true,
+  "save_total_limit": 1,
+  "tensor_parallel_degree": 4,
+  "pipeline_parallel_degree": 1
+}
\ No newline at end of file

From 7311f097d05a9c2148d36a5f9c12058a27319feb Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Tue, 19 Sep 2023 12:11:33 +0000
Subject: [PATCH 6/7] revert chatglm sft-arguments

---
 llm/chatglm/sft_argument.json | 54 +++++++++++++++++------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/llm/chatglm/sft_argument.json b/llm/chatglm/sft_argument.json
index 54960bf8a379..02656828e141 100644
--- a/llm/chatglm/sft_argument.json
+++ b/llm/chatglm/sft_argument.json
@@ -1,29 +1,29 @@
 {
-  "model_name_or_path": "THUDM/chatglm-6b",
-  "dataset_name_or_path": "./data",
-  "output_dir": "./checkpoints/chatglm_sft_ckpts",
-  "per_device_train_batch_size": 4,
-  "gradient_accumulation_steps": 4,
-  "per_device_eval_batch_size": 8,
-  "eval_accumulation_steps":16,
-  "num_train_epochs": 3,
-  "learning_rate": 3e-05,
-  "warmup_steps": 30,
-  "logging_steps": 1,
-  "evaluation_strategy": "epoch",
-  "save_strategy": "epoch",
-  "src_length": 1024,
-  "max_length": 2048,
-  "fp16": true,
-  "fp16_opt_level": "O2",
-  "do_train": true,
-  "do_eval": true,
-  "disable_tqdm": true,
-  "load_best_model_at_end": true,
-  "eval_with_do_generation": false,
-  "metric_for_best_model": "accuracy",
-  "recompute": true,
-  "save_total_limit": 1,
-  "tensor_parallel_degree": 4,
-  "pipeline_parallel_degree": 1
+    "model_name_or_path": "THUDM/chatglm-6b",
+    "dataset_name_or_path": "./data",
+    "output_dir": "./checkpoints/chatglm_sft_ckpts",
+    "per_device_train_batch_size": 4,
+    "gradient_accumulation_steps": 4,
+    "per_device_eval_batch_size": 8,
+    "eval_accumulation_steps":16,
+    "num_train_epochs": 3,
+    "learning_rate": 3e-05,
+    "warmup_steps": 30,
+    "logging_steps": 1,
+    "evaluation_strategy": "epoch",
+    "save_strategy": "epoch",
+    "src_length": 1024,
+    "max_length": 2048,
+    "fp16": true,
+    "fp16_opt_level": "O2",
+    "do_train": true,
+    "do_eval": true,
+    "disable_tqdm": true,
+    "load_best_model_at_end": true,
+    "eval_with_do_generation": false,
+    "metric_for_best_model": "accuracy",
+    "recompute": true,
+    "save_total_limit": 1,
+    "tensor_parallel_degree": 4,
+    "pipeline_parallel_degree": 1
 }
\ No newline at end of file

From 5e2c23a0ae3ac7d3b65f0618f5eb380374d5700f Mon Sep 17 00:00:00 2001
From: wj-Mcat <1435130236@qq.com>
Date: Wed, 20 Sep 2023 08:58:17 +0000
Subject: [PATCH 7/7] update llm testing

---
 tests/llm/test_finetune.py      | 30 +-----------------------------
 tests/llm/test_lora.py          | 27 ---------------------------
 tests/llm/test_prefix_tuning.py | 30 +-----------------------------
 tests/llm/test_pretrain.py      | 10 +++++-----
 tests/llm/testing_utils.py      |  1 +
 5 files changed, 8 insertions(+), 90 deletions(-)

diff --git a/tests/llm/test_finetune.py b/tests/llm/test_finetune.py
index 543149d41c5f..3449f953cd6b 100644
--- a/tests/llm/test_finetune.py
+++ b/tests/llm/test_finetune.py
@@ -13,15 +13,11 @@
 # limitations under the License.
 from __future__ import annotations
 
-import os
-import shutil
 import sys
-import tempfile
 import unittest
 
 from parameterized import parameterized_class
 
-from paddlenlp.utils.downloader import get_path_from_url
 from tests.testing_utils import argv_context_guard, load_test_config
 
 from .testing_utils import LLMTest
@@ -43,36 +39,12 @@ class FinetuneTest(LLMTest, unittest.TestCase):
     def setUp(self) -> None:
         LLMTest.setUp(self)
 
-        self.data_dir = tempfile.mkdtemp()
         sys.path.insert(0, self.model_dir)
 
-        # Run pretrain
-        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
-        get_path_from_url(URL, root_dir=self.data_dir)
-        self.data_dir = os.path.join(self.data_dir, "data")
-        self.use_small_datasets()
-
-    def use_small_datasets(self):
-        # use 20 examples
-        def use_few_examples(file):
-            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
-                lines = [line.strip() for line in f.readlines()]
-            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
-                f.write("\n".join(lines[:20]))
-
-        shutil.copyfile(
-            os.path.join(self.data_dir, "dev.json"),
-            os.path.join(self.data_dir, "validation.json"),
-        )
-        use_few_examples("train.json")
-        use_few_examples("dev.json")
-        use_few_examples("validation.json")
-
     def tearDown(self) -> None:
         LLMTest.tearDown(self)
-        shutil.rmtree(self.data_dir)
 
-    def test_pretrain(self):
+    def test_finetune(self):
         finetune_config = load_test_config(self.config_path, "finetune", self.model_dir)
 
         finetune_config["dataset_name_or_path"] = self.data_dir
diff --git a/tests/llm/test_lora.py b/tests/llm/test_lora.py
index 8f12fffb5b5f..0112730bdf85 100644
--- a/tests/llm/test_lora.py
+++ b/tests/llm/test_lora.py
@@ -14,15 +14,12 @@
 from __future__ import annotations
 
 import os
-import shutil
 import sys
-import tempfile
 import unittest
 
 import paddle
 from parameterized import parameterized_class
 
-from paddlenlp.utils.downloader import get_path_from_url
 from tests.testing_utils import argv_context_guard, load_test_config
 
 from .testing_utils import LLMTest
@@ -45,35 +42,11 @@ class LoraTest(LLMTest, unittest.TestCase):
     def setUp(self) -> None:
         LLMTest.setUp(self)
 
-        self.data_dir = tempfile.mkdtemp()
         self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
         sys.path.insert(0, self.model_codes_dir)
 
-        # Run pretrain
-        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
-        get_path_from_url(URL, root_dir=self.data_dir)
-        self.data_dir = os.path.join(self.data_dir, "data")
-        self.use_small_datasets()
-
-    def use_small_datasets(self):
-        # use 20 examples
-        def use_few_examples(file):
-            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
-                lines = [line.strip() for line in f.readlines()]
-            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
-                f.write("\n".join(lines[:20]))
-
-        shutil.copyfile(
-            os.path.join(self.data_dir, "dev.json"),
-            os.path.join(self.data_dir, "validation.json"),
-        )
-        use_few_examples("train.json")
-        use_few_examples("dev.json")
-        use_few_examples("validation.json")
-
     def tearDown(self) -> None:
         LLMTest.tearDown(self)
-        shutil.rmtree(self.data_dir)
         sys.path.remove(self.model_codes_dir)
 
     def test_lora(self):
diff --git a/tests/llm/test_prefix_tuning.py b/tests/llm/test_prefix_tuning.py
index 1fad2647a84d..60b78edd7015 100644
--- a/tests/llm/test_prefix_tuning.py
+++ b/tests/llm/test_prefix_tuning.py
@@ -14,14 +14,11 @@
 from __future__ import annotations
 
 import os
-import shutil
 import sys
-import tempfile
 import unittest
 
 from parameterized import parameterized_class
 
-from paddlenlp.utils.downloader import get_path_from_url
 from tests.testing_utils import argv_context_guard, load_test_config
 
 from .testing_utils import LLMTest
@@ -40,39 +37,14 @@ class LoraTest(LLMTest, unittest.TestCase):
     def setUp(self) -> None:
         LLMTest.setUp(self)
 
-        self.data_dir = tempfile.mkdtemp()
         self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
         sys.path.insert(0, self.model_codes_dir)
 
-        # Run pretrain
-        URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz"
-        get_path_from_url(URL, root_dir=self.data_dir)
-        self.data_dir = os.path.join(self.data_dir, "data")
-        self.use_small_datasets()
-
-    def use_small_datasets(self):
-        # use 20 examples
-        def use_few_examples(file):
-            with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f:
-                lines = [line.strip() for line in f.readlines()]
-            with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f:
-                f.write("\n".join(lines[:20]))
-
-        shutil.copyfile(
-            os.path.join(self.data_dir, "dev.json"),
-            os.path.join(self.data_dir, "validation.json"),
-        )
-        use_few_examples("train.json")
-        use_few_examples("dev.json")
-        use_few_examples("validation.json")
-
     def tearDown(self) -> None:
         LLMTest.tearDown(self)
-        shutil.rmtree(self.data_dir)
-
         sys.path.remove(self.model_codes_dir)
 
-    def test_pretrain(self):
+    def test_prefix_tuning(self):
         prefix_tuning_config = load_test_config(self.config_path, "prefix_tuning", self.model_dir)
 
         prefix_tuning_config["dataset_name_or_path"] = self.data_dir
diff --git a/tests/llm/test_pretrain.py b/tests/llm/test_pretrain.py
index e65803dbc386..aca07a0690ad 100644
--- a/tests/llm/test_pretrain.py
+++ b/tests/llm/test_pretrain.py
@@ -40,7 +40,7 @@ class PretrainTest(LLMTest, unittest.TestCase):
     def setUp(self) -> None:
         LLMTest.setUp(self)
 
-        self.data_dir = tempfile.mkdtemp()
+        self.dataset_dir = tempfile.mkdtemp()
         self.model_codes_dir = os.path.join(self.root_path, self.model_dir)
         sys.path.insert(0, self.model_codes_dir)
 
@@ -48,18 +48,18 @@ def tearDown(self) -> None:
         LLMTest.tearDown(self)
 
         sys.path.remove(self.model_codes_dir)
-        shutil.rmtree(self.data_dir)
+        shutil.rmtree(self.dataset_dir)
 
     def test_pretrain(self):
         # Run pretrain
         URL = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy"
         URL2 = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_idx.npz"
-        get_path_from_url(URL, root_dir=self.data_dir)
-        get_path_from_url(URL2, root_dir=self.data_dir)
+        get_path_from_url(URL, root_dir=self.dataset_dir)
+        get_path_from_url(URL2, root_dir=self.dataset_dir)
 
         pretrain_config = load_test_config(self.config_path, "pretrain", self.model_dir)
 
-        pretrain_config["input_dir"] = self.data_dir
+        pretrain_config["input_dir"] = self.dataset_dir
         pretrain_config["output_dir"] = self.output_dir
 
         with argv_context_guard(pretrain_config):
diff --git a/tests/llm/testing_utils.py b/tests/llm/testing_utils.py
index efde13c4d623..0a1ded580a9e 100644
--- a/tests/llm/testing_utils.py
+++ b/tests/llm/testing_utils.py
@@ -27,6 +27,7 @@
 class LLMTest:
     config_path: str = None
     enable_compare: bool = True
+    data_dir = "./tests/fixtures/llm/data/"
 
     def setUp(self) -> None:
         self.root_path = "./llm"