PaddlePaddle · wawltor · Feb 20, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py
@@ -49,6 +49,11 @@
 except:
     flash_attention = None
 
+try:
+    from paddle.jit.api import set_dynamic_shape
+except:
+    from paddle.jit.dy2static.utils_helper import set_dynamic_shape
+
 def shard_op_for_sequence_parallel_linear(tgt, mesh):
     # FIXME Hack to shard op for module (linear)
     # we only shard the second to the last op (matmul) leave the last op (elementwise_add) un-touched
@@ -1206,7 +1211,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f
 
         attn_mask = model_kwargs["attention_mask"]
         # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static.
-        paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
+        set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
         model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None
         max_length = paddle.to_tensor(max_length)
         while cur_len < max_length:

diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py
@@ -62,11 +62,17 @@
     from paddle.nn.functional.flash_attention import flash_attention
 except:
     flash_attention = None
+
 try:
     from paddle.incubate.nn.layer.fused_dropout_add import FusedDropoutAdd
 except:
     FusedDropoutAdd = None
 
+try:
+    from paddle.jit.api import set_dynamic_shape
+except:
+    from paddle.jit.dy2static.utils_helper import set_dynamic_shape
+
 def get_attr(layer, name):
     if getattr(layer, name, None) is not None:
         return getattr(layer, name, None)
@@ -1501,7 +1507,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f
 
         attn_mask = model_kwargs["attention_mask"]
         # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static.
-        paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
+        set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
         model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None
         while cur_len < max_length:
             # Note(GuoxiaWang): Remove outputs = _forward_(**model_kwargs)

diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py
@@ -43,6 +43,10 @@
 except:
     flash_attention = None
 
+try:
+    from paddle.jit.api import set_dynamic_shape
+except:
+    from paddle.jit.dy2static.utils_helper import set_dynamic_shape
 
 def get_attr(layer, name):
     if getattr(layer, name, None) is not None:
@@ -1077,7 +1081,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f
 
         attn_mask = model_kwargs["attention_mask"]
         # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static.
-        paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
+        set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1])
         model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None
         if hasattr(paddle.framework, "_no_check_dy2st_diff"):
             # TODO(wanghuancoder): _no_check_dy2st_diff is used to turn off the checking of behavior