From c621362575da7a70539296004e0c322dd49ba54c Mon Sep 17 00:00:00 2001 From: vivienfang Date: Thu, 6 Apr 2023 14:18:07 +0800 Subject: [PATCH 1/4] add_gpt_dev --- .../gpt-3/ppfleetx/core/engine/auto_engine.py | 5 +++- .../ppfleetx/core/engine/eager_engine.py | 12 +++++++--- .../gpt/dygraph/hybrid_model.py | 24 +++++++++++++++---- model_zoo/gpt-3/ppfleetx/utils/device.py | 3 ++- .../projects/gpt/auto_export_gpt_345M_mp2.sh | 2 +- .../gpt/auto_export_gpt_345M_single_card.sh | 6 ++--- model_zoo/gpt-3/run_mp8.sh | 8 +++++++ 7 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 model_zoo/gpt-3/run_mp8.sh diff --git a/model_zoo/gpt-3/ppfleetx/core/engine/auto_engine.py b/model_zoo/gpt-3/ppfleetx/core/engine/auto_engine.py index 0dcfa3d6c5dc..6ae71fc9f7c1 100644 --- a/model_zoo/gpt-3/ppfleetx/core/engine/auto_engine.py +++ b/model_zoo/gpt-3/ppfleetx/core/engine/auto_engine.py @@ -20,7 +20,10 @@ from paddle.distributed.fleet import auto from ppfleetx.core.engine import BasicEngine from ppfleetx.core.module import BasicModule -from ppfleetx.optims import build_lr_scheduler, build_optimizer +try: + from ppfleetx.optims import build_lr_scheduler, build_optimizer +except Exception: + pass from ppfleetx.utils.log import logger from ppfleetx.utils.version import version_check diff --git a/model_zoo/gpt-3/ppfleetx/core/engine/eager_engine.py b/model_zoo/gpt-3/ppfleetx/core/engine/eager_engine.py index 384286398856..24124a2650a6 100644 --- a/model_zoo/gpt-3/ppfleetx/core/engine/eager_engine.py +++ b/model_zoo/gpt-3/ppfleetx/core/engine/eager_engine.py @@ -22,15 +22,21 @@ from paddle.distributed.fleet.utils.hybrid_parallel_util import ( fused_allreduce_gradients, ) -from paddle.distributed.parallel import sync_params_buffers +try: + from paddle.distributed.parallel import sync_params_buffers +except Exception: + pass from paddle.distributed.sharding import group_sharded_parallel from paddle.incubate.distributed.utils.io import save_for_auto_inference from paddle.profiler import SummaryView from ppfleetx.core.engine import BasicEngine, InferenceEngine, TensorRTConfig from ppfleetx.core.module import BasicModule from ppfleetx.distributed.apis import amp, env -from ppfleetx.optims import build_lr_scheduler, build_optimizer -from ppfleetx.utils.compression_helper import prune_model, quant_model +try: + from ppfleetx.optims import build_lr_scheduler, build_optimizer + from ppfleetx.utils.compression_helper import prune_model, quant_model +except Exception: + pass from ppfleetx.utils.device import synchronize as device_synchronize from ppfleetx.utils.export import export_inference_model from ppfleetx.utils.log import convert_timestamp_to_data, get_timestamp, logger diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py index 9110951a1273..4c1709814a67 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py @@ -59,7 +59,11 @@ from paddle.nn.functional.flash_attention import flash_attention except: flash_attention = None -from paddle.incubate.nn.layer.fused_dropout_add import FusedDropoutAdd +try: + from paddle.incubate.nn.layer.fused_dropout_add import FusedDropoutAdd +except: + FusedDropoutAdd = None +FusedDropoutAdd = None def get_attr(layer, name): @@ -570,8 +574,12 @@ def __init__( mark_as_sequence_parallel_parameter(self.norm1.bias) mark_as_sequence_parallel_parameter(self.norm2.weight) mark_as_sequence_parallel_parameter(self.norm2.bias) - self.fused_dropout_add1 = FusedDropoutAdd(dropout, mode="upscale_in_train") - self.fused_dropout_add2 = FusedDropoutAdd(act_dropout, mode="upscale_in_train") + if not FusedDropoutAdd: + self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train") + self.dropout2 = nn.Dropout(act_dropout, mode="upscale_in_train") + else: + self.fused_dropout_add1 = FusedDropoutAdd(dropout, mode="upscale_in_train") + self.fused_dropout_add2 = FusedDropoutAdd(act_dropout, mode="upscale_in_train") self.activation = getattr(F, activation) @@ -595,7 +603,10 @@ def forward(self, tgt, memory=None, tgt_mask=None, use_cache=False, cache=None): else: current_seed = "global_seed" with get_rng_state_tracker().rng_state(current_seed): - tgt = self.fused_dropout_add1(tgt, residual) + if not FusedDropoutAdd: + tgt = residual + self.dropout1(tgt) + else: + tgt = self.fused_dropout_add1(tgt, residual) if not self.normalize_before: tgt = self.norm1(tgt) @@ -605,7 +616,10 @@ def forward(self, tgt, memory=None, tgt_mask=None, use_cache=False, cache=None): tgt = self.norm2(tgt) with get_rng_state_tracker().rng_state(current_seed): - tgt = self.fused_dropout_add2(self.linear2(F.gelu(self.linear1(tgt), approximate=True)), residual) + if not FusedDropoutAdd: + tgt = residual + self.linear2(F.gelu(self.linear1(tgt), approximate=True)) + else: + tgt = self.fused_dropout_add2(self.linear2(F.gelu(self.linear1(tgt), approximate=True)), residual) if not self.normalize_before: tgt = self.norm2(tgt) diff --git a/model_zoo/gpt-3/ppfleetx/utils/device.py b/model_zoo/gpt-3/ppfleetx/utils/device.py index 5593d82e034c..18b911ec27bc 100644 --- a/model_zoo/gpt-3/ppfleetx/utils/device.py +++ b/model_zoo/gpt-3/ppfleetx/utils/device.py @@ -25,7 +25,8 @@ def get_device_and_mapping(): "gpu": paddle.is_compiled_with_cuda(), "xpu": paddle.is_compiled_with_xpu(), "rocm": paddle.is_compiled_with_rocm(), - "npu": paddle.is_compiled_with_npu() or paddle.is_compiled_with_custom_device("npu"), + # "npu": paddle.is_compiled_with_npu() or paddle.is_compiled_with_custom_device("npu"), + "npu": paddle.is_compiled_with_npu(), "cpu": True, } for d, v in suppoted_device_map.items(): diff --git a/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_mp2.sh b/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_mp2.sh index 932f6525e220..0110e8fba8db 100644 --- a/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_mp2.sh +++ b/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_mp2.sh @@ -19,4 +19,4 @@ rm -rf $log_dir python -m paddle.distributed.launch --log_dir $log_dir --devices "0,1" \ ./tools/auto_export.py \ - -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_mp2.yaml \ + -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_mp2.yaml diff --git a/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_single_card.sh b/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_single_card.sh index 59505bb7a6a5..b0f057fb5d8b 100644 --- a/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_single_card.sh +++ b/model_zoo/gpt-3/projects/gpt/auto_export_gpt_345M_single_card.sh @@ -20,11 +20,11 @@ rm -rf $log_dir DIRECTORY=./auto_infer if [ ! -d "$DIRECTORY" ]; then echo "start download ckpt" - wget -O https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GPT_auto_345M.tar.gz - tar -zxvf GPT_auto_345M.tar.gz + wget https://paddlefleetx.bj.bcebos.com/model/nlp/gpt/GPT_345M_FP16.tar.gz + tar -zxvf GPT_345M_FP16.tar.gz fi python -m paddle.distributed.launch --log_dir $log_dir --devices "1" \ ./tools/auto_export.py \ -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_345M_single_card.yaml \ - -o Engine.save_load.ckpt_dir=./auto_infer/auto + -o Engine.save_load.ckpt_dir=./pretrained/auto diff --git a/model_zoo/gpt-3/run_mp8.sh b/model_zoo/gpt-3/run_mp8.sh new file mode 100644 index 000000000000..444597fe7019 --- /dev/null +++ b/model_zoo/gpt-3/run_mp8.sh @@ -0,0 +1,8 @@ +# cd external_ops && python setup.py install && cd - + +export USE_FAST_LN=1 +export USE_LINEAR_WITH_GRAD_ADD=1 + +python -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./tools/auto_export.py -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_175B_mp8.yaml + +python -m paddle.distributed.launch projects/gpt/inference.py --mp_degree 8 --model_dir output \ No newline at end of file From 813bc48f57a69466b0ac3e6e2f8981a722f8a157 Mon Sep 17 00:00:00 2001 From: vivienfang Date: Fri, 7 Apr 2023 13:47:52 +0800 Subject: [PATCH 2/4] add sys path --- model_zoo/gpt-3/projects/gpt/inference.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/model_zoo/gpt-3/projects/gpt/inference.py b/model_zoo/gpt-3/projects/gpt/inference.py index 3178002b4449..6e76f55500da 100644 --- a/model_zoo/gpt-3/projects/gpt/inference.py +++ b/model_zoo/gpt-3/projects/gpt/inference.py @@ -17,11 +17,15 @@ import argparse import os import time - +import sys import numpy as np import paddle import paddle.distributed.fleet as fleet import ppfleetx_ops + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.abspath(os.path.join(__dir__, "../"))) + from ppfleetx.core.engine.inference_engine import InferenceEngine from ppfleetx.data import build_dataloader, tokenizers From 7722a4fcd8a13186db6ce369ba56cbdbe9d561bb Mon Sep 17 00:00:00 2001 From: vivienfang Date: Fri, 7 Apr 2023 14:32:23 +0800 Subject: [PATCH 3/4] add --- model_zoo/gpt-3/projects/gpt/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model_zoo/gpt-3/projects/gpt/inference.py b/model_zoo/gpt-3/projects/gpt/inference.py index 40134f0664cd..66c55075ef16 100644 --- a/model_zoo/gpt-3/projects/gpt/inference.py +++ b/model_zoo/gpt-3/projects/gpt/inference.py @@ -21,7 +21,7 @@ import numpy as np import paddle import paddle.distributed.fleet as fleet -import ppfleetx_ops +from ppfleetx_ops import topp_sampling __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.abspath(os.path.join(__dir__, "../"))) From b12b9a4c174da9f5d6f00fed88e2dbc877a3bebc Mon Sep 17 00:00:00 2001 From: vivienfang Date: Fri, 7 Apr 2023 14:40:21 +0800 Subject: [PATCH 4/4] add --- model_zoo/gpt-3/ppfleetx/core/engine/inference_engine.py | 4 ++++ model_zoo/gpt-3/projects/gpt/benchmark.py | 2 +- model_zoo/gpt-3/projects/gpt/inference.py | 1 - model_zoo/gpt-3/run_mp8.sh | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/model_zoo/gpt-3/ppfleetx/core/engine/inference_engine.py b/model_zoo/gpt-3/ppfleetx/core/engine/inference_engine.py index 988dedb3ad82..b8d8747525b1 100644 --- a/model_zoo/gpt-3/ppfleetx/core/engine/inference_engine.py +++ b/model_zoo/gpt-3/ppfleetx/core/engine/inference_engine.py @@ -18,6 +18,10 @@ import numpy as np import paddle import paddle.distributed.fleet as fleet +try: + from ppfleetx_ops import topp_sampling +except Exception as e: + pass # TensorRT precisions TRT_PRECISIONS = { diff --git a/model_zoo/gpt-3/projects/gpt/benchmark.py b/model_zoo/gpt-3/projects/gpt/benchmark.py index 9d13d781dacf..ea5e973d6045 100644 --- a/model_zoo/gpt-3/projects/gpt/benchmark.py +++ b/model_zoo/gpt-3/projects/gpt/benchmark.py @@ -46,7 +46,7 @@ def predict(engine, data, args): for _ in range(args.iter): engine.predictor.run() end = time.perf_counter() - print(f"batch {args.iter} run time: {1000 * (end - start) / args.iter}ms") + print(f"batch {data.shape} run time: {1000 * (end - start) / args.iter}ms") return {name: engine.predictor.get_output_handle(name).copy_to_cpu() for name in engine.output_names()} diff --git a/model_zoo/gpt-3/projects/gpt/inference.py b/model_zoo/gpt-3/projects/gpt/inference.py index 66c55075ef16..662b1fa89170 100644 --- a/model_zoo/gpt-3/projects/gpt/inference.py +++ b/model_zoo/gpt-3/projects/gpt/inference.py @@ -21,7 +21,6 @@ import numpy as np import paddle import paddle.distributed.fleet as fleet -from ppfleetx_ops import topp_sampling __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.abspath(os.path.join(__dir__, "../"))) diff --git a/model_zoo/gpt-3/run_mp8.sh b/model_zoo/gpt-3/run_mp8.sh index 444597fe7019..81e2d4536e19 100644 --- a/model_zoo/gpt-3/run_mp8.sh +++ b/model_zoo/gpt-3/run_mp8.sh @@ -5,4 +5,6 @@ export USE_LINEAR_WITH_GRAD_ADD=1 python -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./tools/auto_export.py -c ./ppfleetx/configs/nlp/gpt/auto/generation_gpt_175B_mp8.yaml -python -m paddle.distributed.launch projects/gpt/inference.py --mp_degree 8 --model_dir output \ No newline at end of file +python -m paddle.distributed.launch projects/gpt/inference.py --mp_degree 8 --model_dir output + +python -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" projects/gpt/benchmark.py --seq_len 128 --iter 10 --mp_degree 8 --model_dir ./output \ No newline at end of file