Merge branch 'PaddlePaddle:develop' into LoRA-GA

PaddlePaddle · Dec 17, 2024 · 44f633e · 44f633e
2 parents 86c5b33 + 2231feb
commit 44f633e
Show file tree

Hide file tree

Showing 38 changed files with 1,740 additions and 228 deletions.
diff --git a/README.md b/README.md
diff --git a/csrc/sdaa/README.md b/csrc/sdaa/README.md
@@ -0,0 +1,15 @@
+# PaddleNLP 自定义 OP
+
+此文档介绍如何编译安装 PaddleNLP SDAA 自定义 OP。
+
+# 1. 安装 PaddleCustomDevice
+
+参考 [PaddleCustomDevice SDAA 安装文档](https://github.com/PaddlePaddle/PaddleCustomDevice/blob/develop/backends/sdaa/README_cn.md) 进行安装
+
+
+# 2. 安装 paddlenlp_ops
+```shell
+python setup_sdaa.py build bdist_wheel
+
+pip install dist/paddlenlp_ops*.whl
+```
diff --git a/csrc/sdaa/python/paddlenlp_ops/__init__.py b/csrc/sdaa/python/paddlenlp_ops/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_sdaa.sdaa_ext import *
diff --git a/csrc/sdaa/setup_sdaa.py b/csrc/sdaa/setup_sdaa.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from setuptools import Distribution, setup
+
+packages = []
+package_data = {}
+
+
+class BinaryDistribution(Distribution):
+    def has_ext_modules(self):
+        return True
+
+
+def main():
+    setup(
+        name="paddlenlp_ops",
+        version="0.0.0",
+        description="PaddleNLP SDAA CustomOps",
+        long_description="",
+        long_description_content_type="text/markdown",
+        author_email="Paddle-better@baidu.com",
+        maintainer="PaddlePaddle",
+        maintainer_email="Paddle-better@baidu.com",
+        project_urls={},
+        license="Apache Software License",
+        packages=[
+            "paddlenlp_ops",
+        ],
+        include_package_data=True,
+        package_data={
+            "": ["*.py"],
+        },
+        package_dir={
+            "": "python",
+        },
+        zip_safe=False,
+        distclass=BinaryDistribution,
+        entry_points={"console_scripts": []},
+        classifiers=[],
+        keywords="PaddleNLP SDAA CustomOps",
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/llm/sdaa/llama/README.md b/docs/llm/sdaa/llama/README.md
@@ -0,0 +1 @@
+../../../../llm/sdaa/llama/README.md
diff --git a/docs/trainer.md b/docs/trainer.md
@@ -691,6 +691,9 @@ Trainer 是一个简单，但功能完整的 Paddle 训练和评估模块，并
   --optim
                         优化器名称，默认为adamw，(`str`, 可选，默认为 `adamw`)
                         The optimizer to use. (default: adamw)
+                        可能的值为：
+                            - `"adamw"`
+                            - `"adamw_mini"`
 
   --report_to
                         日志可视化显示，默认使用visualdl可视化展示。(可选，默认为 None，展示所有)

diff --git a/llm/config/qwen/emb_argument.json b/llm/config/qwen/emb_argument.json
@@ -0,0 +1,36 @@
+{
+  "model_name_or_path": "Qwen/Qwen2-0.5B",
+  "dataset_name_or_path": "./dureader_data",
+  "output_dir": "./checkpoints/sft_ckpts",
+  "per_device_train_batch_size": 1,
+  "gradient_accumulation_steps": 4,
+  "per_device_eval_batch_size": 1,
+  "eval_accumulation_steps": 1,
+  "max_steps": 2000,
+  "learning_rate": 3e-5,
+  "warmup_steps": 30,
+  "logging_steps": 1,
+  "evaluation_strategy": "no",
+  "save_strategy": "epoch",
+  "max_query_len": 512,
+  "max_passage_len": 512,
+  "group_size": 4,
+  "bf16": true,
+  "fp16_opt_level": "O2",
+  "do_train": true,
+  "do_eval": false,
+  "disable_tqdm": true,
+  "load_best_model_at_end": false,
+  "eval_with_do_generation": false,
+  "metric_for_best_model": "accuracy",
+  "recompute": true,
+  "save_total_limit": 1,
+  "tensor_parallel_degree": 1,
+  "pipeline_parallel_degree": 1,
+  "sharding": "stage1",
+  "zero_padding": false,
+  "unified_checkpoint": true,
+  "use_flash_attention": true,
+  "amp_custom_black_list": "elementwise_div",
+  "release_grads": true
+}
diff --git a/llm/docs/dpo.md b/llm/docs/dpo.md
@@ -119,6 +119,7 @@ python -u  -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" ./alignment/dpo
 - `unified_checkpoint`: 是否使用统一的 checkpoint，默认为 `True`。
 - `autotuner_benchmark`: 是否启用 autotuner 基准测试，默认为 `False`。
 - `benchmark`: 是否开启基准测试，默认为 `False`。
+- `optim`:默认为`adamw`，支持`adamw`, `adamw_mini`。
 ### DPO 参数（DPOArguments）
 - `beta`: DPO 损失函数的 beta 参数，默认为 0.1。
 - `simpo_gamma`: SimPO 损失函数的 gamma 参数，默认为 0.5。

diff --git a/llm/docs/finetune.md b/llm/docs/finetune.md
@@ -36,7 +36,7 @@
 ### 3.1 环境准备
 
 - PaddlePaddle 3.0-beta
-- PaddleNLP   3.0.0b2
+- PaddleNLP   3.0.0b3
 - PaddleSlim develop
 
 git clone 代码到本地，即可开始。
@@ -184,6 +184,7 @@ python merge_lora_params.py \
 - `pipeline_parallel_degree`: 表示划分流水线的大小.(假设该参数为4, 模型12层, 则每一个 pp stage 包含3层模型) 默认值-1, 表示不启用流水线并行。
 - `sharding_parallel_degree`: 表示分组参数切片的数据并行大小. 默认值1, 表示不启用分组参数切片的数据并行。
 - `sharding`:是否使用 Paddle 的 Sharding 数据并行功能，用户的参数。支持 sharding `stage1`, `stage2` or `stage3`。其中`stage2``stage3`可以和`offload`组合使用。
+- `optim`:默认为`adamw`，支持`adamw`, `adamw_mini`。
 </div>
 
 

diff --git a/llm/docs/predict/inference.md b/llm/docs/predict/inference.md
@@ -39,13 +39,13 @@ PaddleNLP 中已经添加高性能推理模型相关实现，已验证过的模
 
 PaddleNLP 提供了多种硬件平台和精度支持，包括：
 
-| Precision      | Hopper| Ada | Ampere | Turing | Volta | 昆仑XPU | 昇腾NPU | 海光K100 | 燧原GCU | x86 CPU |
-|:--------------:|:-----:|:---:|:------:|:------:|:-----:|:------:|:-------:|:-------:|:------:|:-------:|
-| FP32           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅      |  ✅     | ✅      | ✅      |   ✅    |
-| FP16           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅      |  ✅     | ✅      | ✅      |   ✅    |
-| BF16           |  ✅   |  ✅ | ✅     | ❌      | ❌    | ❌      |  ❌     | ❌      | ❌      |   ✅    |
-| INT8           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅      |  ✅     | ✅      | ❌      |   ✅    |
-| FP8            |  🚧   |  ✅ | ❌     | ❌      | ❌    | ❌      |  ❌     | ❌      | ❌      |   ❌    |
+| Precision      | Hopper| Ada | Ampere | Turing | Volta | 昆仑XPU | 昇腾NPU | 海光K100 | 燧原GCU  | 太初SDAA| x86 CPU |
+|:--------------:|:-----:|:---:|:------:|:------:|:-----:|:------:|:-------:|:-------:|:------:|:------:|:-------:|
+| FP32           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅    |  ✅    | ✅    | ✅   |  ✅    |   ✅    |
+| FP16           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅    |  ✅    | ✅    | ✅   |  ✅    |   ✅    |
+| BF16           |  ✅   |  ✅ | ✅     | ❌      | ❌    | ❌    |  ❌    | ❌    | ❌   |  ❌    |   ✅    |
+| INT8           |  ✅   |  ✅ | ✅     | ✅      | ✅    | ✅    |  ✅    | ✅    | ❌   |  ✅    |   ✅    |
+| FP8            |  🚧   |  ✅ | ❌     | ❌      | ❌    | ❌    |  ❌    | ❌    | ❌   |  ❌    |   ❌    |
 
 
 ## 3. 推理参数
@@ -196,6 +196,7 @@ python ./predict/predictor.py --model_name_or_path meta-llama/Llama-2-7b-chat --
 - [昇腾NPU](../../npu/llama/README.md)
 - [海光K100](../dcu_install.md)
 - [燧原GCU](../../gcu/llama/README.md)
+- [太初SDAA](../../sdaa/llama/README.md)
 - [X86 CPU](../cpu_install.md)
 
 ## 致谢

diff --git a/llm/docs/predict/installation.md b/llm/docs/predict/installation.md
@@ -16,6 +16,8 @@ cd PaddleNLP/csrc && python setup_cuda.py install
 cd PaddleNLP/csrc/xpu/src && sh cmake_build.sh
 #DCU设备安装自定义算子
 cd PaddleNLP/csrc && python setup_hip.py install
+#SDAA设备安装自定义算子
+cd PaddleNLP/csrc/sdaa && python setup_sdaa.py install
 ```
 
 到达运行目录，即可开始：
@@ -32,4 +34,4 @@ cd PaddleNLP/llm
 
 获取最佳推理性能：
 
-- [最佳实践](./best_practices.md)
+- [最佳实践](./best_practices.md)