-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[New Features] add llm pretrain & lora & sft & prefix_tuning testing scripts #7056
Changes from 7 commits
b04d991
b2fa9be
04db738
93ac1ec
68ff464
c58ba2e
ff6ac92
7311f09
5e2c23a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,29 @@ | ||
{ | ||
"model_name_or_path": "THUDM/chatglm-6b", | ||
"dataset_name_or_path": "./data", | ||
"output_dir": "./checkpoints/chatglm_sft_ckpts", | ||
"per_device_train_batch_size": 4, | ||
"gradient_accumulation_steps": 4, | ||
"per_device_eval_batch_size": 8, | ||
"eval_accumulation_steps":16, | ||
"num_train_epochs": 3, | ||
"learning_rate": 3e-05, | ||
"warmup_steps": 30, | ||
"logging_steps": 1, | ||
"evaluation_strategy": "epoch", | ||
"save_strategy": "epoch", | ||
"src_length": 1024, | ||
"max_length": 2048, | ||
"fp16": true, | ||
"fp16_opt_level": "O2", | ||
"do_train": true, | ||
"do_eval": true, | ||
"disable_tqdm": true, | ||
"load_best_model_at_end": true, | ||
"eval_with_do_generation": false, | ||
"metric_for_best_model": "accuracy", | ||
"recompute": true, | ||
"save_total_limit": 1, | ||
"tensor_parallel_degree": 4, | ||
"pipeline_parallel_degree": 1 | ||
} | ||
"model_name_or_path": "THUDM/chatglm-6b", | ||
"dataset_name_or_path": "./data", | ||
"output_dir": "./checkpoints/chatglm_sft_ckpts", | ||
"per_device_train_batch_size": 4, | ||
"gradient_accumulation_steps": 4, | ||
"per_device_eval_batch_size": 8, | ||
"eval_accumulation_steps":16, | ||
"num_train_epochs": 3, | ||
"learning_rate": 3e-05, | ||
"warmup_steps": 30, | ||
"logging_steps": 1, | ||
"evaluation_strategy": "epoch", | ||
"save_strategy": "epoch", | ||
"src_length": 1024, | ||
"max_length": 2048, | ||
"fp16": true, | ||
"fp16_opt_level": "O2", | ||
"do_train": true, | ||
"do_eval": true, | ||
"disable_tqdm": true, | ||
"load_best_model_at_end": true, | ||
"eval_with_do_generation": false, | ||
"metric_for_best_model": "accuracy", | ||
"recompute": true, | ||
"save_total_limit": 1, | ||
"tensor_parallel_degree": 4, | ||
"pipeline_parallel_degree": 1 | ||
} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
finetune: | ||
base: | ||
dataset_name_or_path: "./data" | ||
per_device_train_batch_size: 4 | ||
gradient_accumulation_steps: 4 | ||
per_device_eval_batch_size: 8 | ||
eval_accumulation_steps: 16 | ||
num_train_epochs: 3 | ||
learning_rate: 3e-05 | ||
warmup_steps: 30 | ||
logging_steps: 1 | ||
evaluation_strategy: "epoch" | ||
save_strategy: "epoch" | ||
src_length: 1024 | ||
max_length: 2048 | ||
fp16: true | ||
fp16_opt_level: "O2" | ||
do_train: true | ||
do_eval: true | ||
disable_tqdm: true | ||
load_best_model_at_end: true | ||
eval_with_do_generation: false | ||
metric_for_best_model: "accuracy" | ||
recompute: true | ||
save_total_limit: 1 | ||
tensor_parallel_degree: 1 | ||
pipeline_parallel_degree: 1 | ||
default: | ||
llama: | ||
model_name_or_path: __internal_testing__/tiny-random-llama | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的random和fused区别点是什么? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 因为 fused 模型中的 head_dim 必须是在:10,26,32,64,128 .... 中的一个,所以为了不影响之前的单测,就直接新创建了一个tiny-random 的模型专门用来做 非 fused & fuse 相关的单测
Comment on lines
+29
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 加个baichuan的吧,带alibi的那种 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个交给 @wtmlon 来添加吧。 |
||
chatglm: | ||
model_name_or_path: __internal_testing__/tiny-fused-chatglm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tiny-fused-chatglm这里的fused指的是什么 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 因为 fused 模型中的 head_dim 必须是在:10,26,32,64,128 .... 中的一个,所以为了不影响之前的单测,就直接新创建了一个tiny-random 的模型专门用来做 非 fused & fuse 相关的单测 |
||
chatglm2: | ||
model_name_or_path: __internal_testing__/tiny-random-chatglm2 | ||
bloom: | ||
model_name_or_path: __internal_testing__/tiny-fused-bloom | ||
|
||
inference-predict: | ||
default: | ||
mode: dynamic | ||
max_length: 20 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
dtype: float16 | ||
|
||
inference-to-static: | ||
default: | ||
dtype: float16 | ||
|
||
inference-infer: | ||
default: | ||
mode: static | ||
dtype: float16 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
max_length: 20 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
lora: | ||
base: | ||
dataset_name_or_path: "./data" | ||
per_device_train_batch_size: 4 | ||
gradient_accumulation_steps: 4 | ||
per_device_eval_batch_size: 8 | ||
eval_accumulation_steps: 16 | ||
num_train_epochs: 3 | ||
learning_rate: 3e-04 | ||
warmup_steps: 30 | ||
logging_steps: 1 | ||
evaluation_strategy: "epoch" | ||
save_strategy: "epoch" | ||
src_length: 1024 | ||
max_length: 2048 | ||
fp16: true | ||
fp16_opt_level: "O2" | ||
do_train: true | ||
do_eval: true | ||
disable_tqdm: true | ||
load_best_model_at_end: true | ||
eval_with_do_generation: false | ||
metric_for_best_model: "accuracy" | ||
recompute: true | ||
save_total_limit: 1 | ||
tensor_parallel_degree: 1 | ||
pipeline_parallel_degree: 1 | ||
lora: true | ||
|
||
default: | ||
llama: | ||
model_name_or_path: __internal_testing__/tiny-random-llama | ||
chatglm: | ||
model_name_or_path: __internal_testing__/tiny-fused-chatglm | ||
chatglm2: | ||
model_name_or_path: __internal_testing__/tiny-random-chatglm2 | ||
bloom: | ||
model_name_or_path: __internal_testing__/tiny-fused-bloom | ||
|
||
inference-predict: | ||
default: | ||
mode: dynamic | ||
max_length: 20 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
dtype: float16 | ||
|
||
inference-to-static: | ||
default: | ||
dtype: float16 | ||
|
||
inference-infer: | ||
default: | ||
mode: static | ||
dtype: float16 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
max_length: 20 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
prefix_tuning: | ||
base: | ||
per_device_train_batch_size: 4 | ||
gradient_accumulation_steps: 4 | ||
per_device_eval_batch_size: 8 | ||
eval_accumulation_steps: 16 | ||
num_train_epochs: 3 | ||
learning_rate: 3e-02 | ||
warmup_steps: 30 | ||
logging_steps: 1 | ||
evaluation_strategy: "epoch" | ||
save_strategy: "epoch" | ||
src_length: 1024 | ||
max_length: 2048 | ||
fp16: true | ||
fp16_opt_level: "O2" | ||
do_train: true | ||
do_eval: true | ||
disable_tqdm: true | ||
load_best_model_at_end: true | ||
eval_with_do_generation: false | ||
metric_for_best_model: "accuracy" | ||
recompute: true | ||
save_total_limit: 1 | ||
tensor_parallel_degree: 1 | ||
pipeline_parallel_degree: 1 | ||
prefix_tuning: true | ||
|
||
default: | ||
llama: | ||
model_name_or_path: __internal_testing__/tiny-random-llama | ||
chatglm: | ||
model_name_or_path: __internal_testing__/tiny-fused-chatglm | ||
chatglm2: | ||
model_name_or_path: __internal_testing__/tiny-random-chatglm2 | ||
bloom: | ||
model_name_or_path: __internal_testing__/tiny-fused-bloom | ||
|
||
inference-predict: | ||
default: | ||
mode: dynamic | ||
max_length: 20 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
dtype: float16 | ||
export_precache: true | ||
|
||
inference-to-static: | ||
default: | ||
dtype: float16 | ||
export_precache: true | ||
|
||
inference-infer: | ||
default: | ||
mode: static | ||
dtype: float16 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
max_length: 20 | ||
export_precache: true |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,5 @@ | ||
pretrain: | ||
default: | ||
model_type: llama | ||
model_name_or_path: __internal_testing__/tiny-random-llama | ||
base: | ||
weight_decay: 0.01 | ||
max_steps: 2 | ||
save_steps: 2 | ||
|
@@ -20,7 +18,13 @@ pretrain: | |
use_flash_attention: 0 | ||
use_fused_rms_norm: 0 | ||
continue_training: 1 | ||
|
||
default: | ||
llama: | ||
model_type: llama | ||
model_name_or_path: __internal_testing__/tiny-random-llama | ||
chatglm: | ||
model_type: chatglm | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. chatglm有预训练流程吗? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里是没有的,然后 fine-tune 这里也没有配置 chatglm,我可以把它先删掉。 |
||
model_name_or_path: __internal_testing__/tiny-fused-chatglm | ||
|
||
inference-predict: | ||
default: | ||
|
@@ -40,5 +44,4 @@ inference-infer: | |
dtype: float16 | ||
batch_size: 2 | ||
decode_strategy: greedy_search | ||
max_length: 20 | ||
enable_compare: false | ||
max_length: 20 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from __future__ import annotations | ||
|
||
import os | ||
import shutil | ||
import sys | ||
import tempfile | ||
import unittest | ||
|
||
from parameterized import parameterized_class | ||
|
||
from paddlenlp.utils.downloader import get_path_from_url | ||
from tests.testing_utils import argv_context_guard, load_test_config | ||
|
||
from .testing_utils import LLMTest | ||
|
||
|
||
@parameterized_class( | ||
["model_dir", "enable_compare"], | ||
[ | ||
["llama", False], | ||
# ["chatglm"], | ||
# ["chatglm2"], | ||
# ["bloom"], | ||
], | ||
) | ||
class FinetuneTest(LLMTest, unittest.TestCase): | ||
config_path: str = "./tests/fixtures/llm/finetune.yaml" | ||
model_dir: str = None | ||
|
||
def setUp(self) -> None: | ||
LLMTest.setUp(self) | ||
|
||
self.data_dir = tempfile.mkdtemp() | ||
sys.path.insert(0, self.model_dir) | ||
|
||
# Run pretrain | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 注释部分看起来有点问题 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我来改一下。 |
||
URL = "https://bj.bcebos.com/paddlenlp/datasets/examples/AdvertiseGen.tar.gz" | ||
get_path_from_url(URL, root_dir=self.data_dir) | ||
self.data_dir = os.path.join(self.data_dir, "data") | ||
self.use_small_datasets() | ||
|
||
def use_small_datasets(self): | ||
# use 20 examples | ||
def use_few_examples(file): | ||
with open(os.path.join(self.data_dir, file), "r", encoding="utf8") as f: | ||
lines = [line.strip() for line in f.readlines()] | ||
with open(os.path.join(self.data_dir, file), "w+", encoding="utf8") as f: | ||
f.write("\n".join(lines[:20])) | ||
|
||
shutil.copyfile( | ||
os.path.join(self.data_dir, "dev.json"), | ||
os.path.join(self.data_dir, "validation.json"), | ||
) | ||
use_few_examples("train.json") | ||
use_few_examples("dev.json") | ||
use_few_examples("validation.json") | ||
|
||
def tearDown(self) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 除了删除data文件外,模型文件需要删除吗? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 因为是 from_pretrained 的,所以会缓存在 .paddlenlp/models 目录下,应该也没必要删除吧。 |
||
LLMTest.tearDown(self) | ||
shutil.rmtree(self.data_dir) | ||
|
||
def test_pretrain(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个函数看起是来做finetune There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copy 过来的还没有改,我来调整一下。 |
||
finetune_config = load_test_config(self.config_path, "finetune", self.model_dir) | ||
|
||
finetune_config["dataset_name_or_path"] = self.data_dir | ||
finetune_config["output_dir"] = self.output_dir | ||
|
||
with argv_context_guard(finetune_config): | ||
from finetune_generation import main | ||
|
||
main() | ||
|
||
if self.model_dir != "opt": | ||
self.run_predictor({"inference_model": True}) | ||
|
||
self.run_predictor({"inference_model": False}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个是 pre-commit 给 format 的