PaddlePaddle · zhangbo9674 · Sep 7, 2023 · Aug 30, 2023 · Aug 31, 2023 · Aug 31, 2023
diff --git a/paddle/fluid/ir/dialect/op_generator/python_c_gen.py b/paddle/fluid/ir/dialect/op_generator/python_c_gen.py
@@ -283,11 +283,12 @@ def _gen_cast_attrs(self, op_info, op_name, with_mutable):
 
     def _gen_one_impl(self, op_info, op_name):
         input_name_list = op_info.input_name_list
+        output_name_list = op_info.output_name_list
         attr_name_list = op_info.attribute_name_list
         mutable_attr_name_list = op_info.mutable_attribute_name_list
         no_mutable_attr_name_list = op_info.non_mutable_attribute_name_list
 
-        if op_name == "send_v2":
+        if len(output_name_list) == 0:
             ret = NO_OUTPUT_API_IMPL_TEMPLATE.format(
                 api_name=op_name,
                 inputs=self._gen_inputs(op_info, op_name),

diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_ops.yaml b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_ops.yaml
@@ -92,7 +92,7 @@
   output : Tensor(out)
   infer_meta:
     func: RecvV2InferMeta
-    param: [peer, dtype, out_shape]
+    param: [ring_id, dynamic_shape, peer, out_shape, dtype]
   kernel :
     func : recv_v2
     param : [ring_id, dynamic_shape, peer, out_shape, dtype, use_calc_stream]

diff --git a/paddle/phi/infermeta/nullary.cc b/paddle/phi/infermeta/nullary.cc
@@ -181,6 +181,48 @@ void PRecvArrayInferMeta(int peer,
   out->set_dtype(dtype);
 }
 
+void RecvV2InferMeta(const int ring_id,
+                     const bool dynamic_shape,
+                     const int peer,
+                     const std::vector<int>& out_shape,
+                     DataType dtype,
+                     MetaTensor* out) {
+  PADDLE_ENFORCE_GE(
+      peer,
+      0,
+      errors::InvalidArgument(
+          "The peer (%d) for recv_v2 op must be non-negative.", peer));
+
+  PADDLE_ENFORCE_GE(
+      ring_id,
+      0,
+      errors::InvalidArgument(
+          "The ring_id (%d) for recv_v2 op must be non-negative.", ring_id));
+
+  PADDLE_ENFORCE_GE(out_shape.size(),
+                    1,
+                    errors::InvalidArgument(
+                        "The size of the output shape must be greater than 0 "
+                        "but the value given is %d.",
+                        out_shape.size()));
+
+  if (!dynamic_shape) {
+    for (size_t i = 0; i < out_shape.size(); ++i) {
+      PADDLE_ENFORCE_GE(out_shape[i],
+                        1,
+                        errors::InvalidArgument(
+                            "The shape attribute for recv_v2 must be set "
+                            "explicitly, but the %dth element is %d which "
+                            "is less than 1. Or dynamic_shape should be "
+                            "set to True for both send_v2 and recv_v2.",
+                            i,
+                            out_shape[i]));
+    }
+    out->set_dims(phi::make_ddim(out_shape));
+  }
+  out->set_dtype(dtype);
+}
+
 void TruncatedGaussianRandomInferMeta(const std::vector<int>& shape,
                                       float mean,
                                       float std,

diff --git a/paddle/phi/infermeta/nullary.h b/paddle/phi/infermeta/nullary.h
@@ -76,6 +76,13 @@ void PRecvArrayInferMeta(int peer,
                          const std::vector<int>& out_shape,
                          MetaTensor* out);
 
+void RecvV2InferMeta(const int ring_id,
+                     const bool dynamic_shape,
+                     const int peer,
+                     const std::vector<int>& out_shape,
+                     DataType dtype,
+                     MetaTensor* out);
+
 void TruncatedGaussianRandomInferMeta(const std::vector<int>& shape,
                                       float mean,
                                       float std,

diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
@@ -405,50 +405,6 @@ void CConcatInferMeta(const MetaTensor& x, int nranks, MetaTensor* out) {
   out->set_dtype(x.dtype());
 }
 
-void SendV2InferMeta(const int peer, const int ring_id) {
-  PADDLE_ENFORCE_GE(
-      peer,
-      0,
-      errors::InvalidArgument(
-          "The peer (%d) for send_v2 op must be non-negative.", peer));
-  PADDLE_ENFORCE_GE(
-      ring_id,
-      0,
-      errors::InvalidArgument(
-          "The ring_id (%d) for send_v2 op must be non-negative.", ring_id));
-}
-
-void RecvV2InferMeta(int peer,
-                     DataType dtype,
-                     const std::vector<int>& out_shape,
-                     MetaTensor* out) {
-  PADDLE_ENFORCE_GE(
-      peer,
-      0,
-      errors::InvalidArgument(
-          "The peer (%d) for p_recv op must be non-negative.", peer));
-
-  PADDLE_ENFORCE_GE(out_shape.size(),
-                    1,
-                    errors::InvalidArgument(
-                        "The size of the output shape must be greater than 0 "
-                        "but the value given is %d.",
-                        out_shape.size()));
-
-  for (size_t i = 0; i < out_shape.size(); ++i) {
-    PADDLE_ENFORCE_GE(
-        out_shape[i],
-        1,
-        errors::InvalidArgument("The shape attribute for recv must be set "
-                                "explicitly, but the %dth element is %d which "
-                                "is less than 1. Or dynamic_shape should be "
-                                "set to True for both send_v2 and recv_v2.",
-                                i,
-                                out_shape[i]));
-  }
-  out->set_dtype(dtype);
-}
-
 void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out) {
   auto dims = x.dims();
   auto rank = dims.size();
@@ -3045,6 +3001,19 @@ void PSendArrayInferMeta(const MetaTensor& x, int peer) {
           "The peer (%d) for p_send op must be non-negative.", peer));
 }
 
+void SendV2InferMeta(const int peer, const int ring_id) {
+  PADDLE_ENFORCE_GE(
+      peer,
+      0,
+      errors::InvalidArgument(
+          "The peer (%d) for send_v2 op must be non-negative.", peer));
+  PADDLE_ENFORCE_GE(
+      ring_id,
+      0,
+      errors::InvalidArgument(
+          "The ring_id (%d) for send_v2 op must be non-negative.", ring_id));
+}
+
 void PoolInferMeta(const MetaTensor& x,
                    const std::vector<int>& kernel_size,
                    const std::vector<int>& strides,

diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
@@ -73,13 +73,6 @@ void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out);
 
 void CConcatInferMeta(const MetaTensor& x, int nranks, MetaTensor* out);
 
-void SendV2InferMeta(const int peer, const int ring_id);
-
-void RecvV2InferMeta(int peer,
-                     DataType dtype,
-                     const std::vector<int>& out_shape,
-                     MetaTensor* out);
-
 void ChannelShuffleInferMeta(const MetaTensor& x,
                              int groups,
                              const std::string& data_format,
@@ -448,6 +441,8 @@ void PSendInferMeta(const MetaTensor& x, int peer);
 
 void PSendArrayInferMeta(const MetaTensor& x, int peer);
 
+void SendV2InferMeta(const int peer, const int ring_id);
+
 void QrInferMeta(const MetaTensor& x,
                  const std::string& mode,
                  MetaTensor* q,

diff --git a/python/paddle/distributed/auto_parallel/static/engine.py b/python/paddle/distributed/auto_parallel/static/engine.py
@@ -833,6 +833,14 @@ def _initialize(self, mode):
                 dist_main_program, self._place, dist_context
             )
 
+        # NOTE(zhaoyinglia): Skip startup program when use new ir temporarily.
+        use_new_ir = False
+        if auto_utils.use_new_ir():
+            use_new_ir = True
+            paddle.framework.set_flags(
+                {"FLAGS_enable_new_ir_in_executor": False}
+            )
+
         if self._executor is None:
             self._executor = paddle.static.Executor(self._place)
             uninitialized = []
@@ -860,6 +868,11 @@ def _initialize(self, mode):
             ]
             self._executor.run(dist_startup_prog)
 
+        if use_new_ir:
+            paddle.framework.set_flags(
+                {"FLAGS_enable_new_ir_in_executor": True}
+            )
+
     def fit(
         self,
         train_data,

diff --git a/python/paddle/distributed/auto_parallel/static/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py
@@ -2423,6 +2423,19 @@ def use_new_executor():
     ]
 
 
+def use_new_ir():
+    enable_new_ir_in_executor = os.environ.get(
+        'FLAGS_enable_new_ir_in_executor', None
+    )
+    return enable_new_ir_in_executor in [
+        1,
+        '1',
+        True,
+        'True',
+        'true',
+    ]
+
+
 def get_pp_stage(dist_context, rank):
     pp_idx = None
     for idx, process_mesh in enumerate(dist_context.process_meshes):

diff --git a/test/auto_parallel/CMakeLists.txt b/test/auto_parallel/CMakeLists.txt
@@ -78,20 +78,23 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
                        PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
   py_test_modules(test_pass_quantization MODULES test_pass_quantization)
   set_tests_properties(test_pass_quantization
-                       PROPERTIES LABELS "RUN_TYPE=EXECLUSIVE" TIMEOUT 60)
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 60)
   py_test_modules(test_reshard_s_to_r MODULES test_reshard_s_to_r)
   set_tests_properties(test_reshard_s_to_r
-                       PROPERTIES LABELS "RUN_TYPE=EXECLUSIVE" TIMEOUT 100)
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
   py_test_modules(test_reshard_r_to_s MODULES test_reshard_r_to_s)
   set_tests_properties(test_reshard_r_to_s
-                       PROPERTIES LABELS "RUN_TYPE=EXECLUSIVE" TIMEOUT 100)
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
   py_test_modules(test_reshard_r_to_p MODULES test_reshard_r_to_p)
   set_tests_properties(test_reshard_r_to_p
-                       PROPERTIES LABELS "RUN_TYPE=EXECLUSIVE" TIMEOUT 100)
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
   py_test_modules(test_semi_auto_parallel_basic MODULES
                   test_semi_auto_parallel_basic)
   set_tests_properties(test_semi_auto_parallel_basic
-                       PROPERTIES LABELS "RUN_TYPE=EXECLUSIVE" TIMEOUT 100)
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
+  py_test_modules(test_gpt_with_newir MODULES test_gpt_with_newir)
+  set_tests_properties(test_gpt_with_newir
+                       PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
   # End of unittests WITH multi cards and timeout
 
   # NOTE(zyl): unittests WITH multi cards and WITHOUT timeout