From f93f0ea9bf37cd747610891ff0f91e4b1c8576c6 Mon Sep 17 00:00:00 2001
From: Ghost Screaming <mofengshenjieII@163.com>
Date: Wed, 11 Oct 2023 08:07:09 -0500
Subject: [PATCH] [NewComm] Set new communication library as default. (#57768)

* [NewComm] Set Flags_dynamic_static_unified_comm `True` in default.
New communication library will be used defaultly.

* Polish code.

* Fix problems of distributed testcases using new comm lib.

* Fix problems of testcases using new comm lib in default.

* Fix failed testcase.

* Fix falied testcases.
---
 paddle/phi/core/flags.cc                                     | 4 ++--
 test/collective/fleet/c_comm_init_op.py                      | 5 -----
 test/collective/fleet/test_fused_attention_pass_with_mp.sh   | 1 +
 test/distributed_passes/auto_parallel_pass_test_base.py      | 1 +
 test/distributed_passes/dist_pass_test_base.py               | 1 +
 test/legacy_test/test_collective_api_base.py                 | 1 +
 test/legacy_test/test_collective_base.py                     | 2 +-
 test/legacy_test/test_dist_base.py                           | 1 +
 test/legacy_test/test_dist_hapi_model.py                     | 1 +
 test/legacy_test/test_distributed_fused_lamb_op_with_clip.py | 1 +
 test/legacy_test/test_parallel_dygraph_dataparallel.py       | 1 +
 test/legacy_test/test_run.py                                 | 1 +
 tools/test_runner.py                                         | 1 +
 13 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc
index 19e707c40bc551..384470273a9546 100644
--- a/paddle/phi/core/flags.cc
+++ b/paddle/phi/core/flags.cc
@@ -1338,13 +1338,13 @@ PHI_DEFINE_EXPORTED_int32(
  * Communication library related FLAG
  * Name: FLAGS_dynamic_static_unified_comm
  * Since Version: 2.5
- * Value Range: bool, default=false
+ * Value Range: bool, default=true
  * Example:
  * Note: Whether to use new communication library in auto parallel and static
  * mode. If true, it will use unified CommContextManager for communication.
  */
 PHI_DEFINE_EXPORTED_bool(dynamic_static_unified_comm,
-                         false,
+                         true,
                          "Whether to use new communication library in auto "
                          "parallel and static mode.");
 #endif  // FLAGS_dynamic_static_unified_comm
diff --git a/test/collective/fleet/c_comm_init_op.py b/test/collective/fleet/c_comm_init_op.py
index 988c0fcc27954b..15230b9b71f331 100644
--- a/test/collective/fleet/c_comm_init_op.py
+++ b/test/collective/fleet/c_comm_init_op.py
@@ -17,9 +17,6 @@
 
 import paddle
 from paddle import base
-from paddle.distributed.fleet.base.private_helper_function import (
-    wait_server_ready,
-)
 
 paddle.enable_static()
 
@@ -35,8 +32,6 @@ def setUp(self):
         self.exe = base.Executor(self.place)
         self.endpoints.remove(self.current_endpoint)
         self.other_endpoints = self.endpoints
-        if self.rank == 0:
-            wait_server_ready(self.other_endpoints)
 
     def test_specifying_devices(self):
         program = base.Program()
diff --git a/test/collective/fleet/test_fused_attention_pass_with_mp.sh b/test/collective/fleet/test_fused_attention_pass_with_mp.sh
index d00f2fdbac0e1d..4b2b48cdc08df8 100644
--- a/test/collective/fleet/test_fused_attention_pass_with_mp.sh
+++ b/test/collective/fleet/test_fused_attention_pass_with_mp.sh
@@ -17,4 +17,5 @@
 set -e
 # use default values
 # FIXME: random fails on Unknown command lines -c (or -m).
+export FLAGS_dynamic_static_unified_comm=0
 CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch fused_attention_pass_with_mp.py
diff --git a/test/distributed_passes/auto_parallel_pass_test_base.py b/test/distributed_passes/auto_parallel_pass_test_base.py
index 69c2d051c7db37..90173e43de5722 100644
--- a/test/distributed_passes/auto_parallel_pass_test_base.py
+++ b/test/distributed_passes/auto_parallel_pass_test_base.py
@@ -37,6 +37,7 @@ class AutoPallelPassTestBase(DistPassTestBase):
     def setUp(self):
         paddle.enable_static()
         seed = int(os.environ.get('SEED', -1))
+        os.environ["FLAGS_dynamic_static_unified_comm"] = "0"
         if seed <= 0:
             seed = np.random.randint(low=1, high=1000000, size=[1])[0]
             os.environ['SEED'] = str(seed)
diff --git a/test/distributed_passes/dist_pass_test_base.py b/test/distributed_passes/dist_pass_test_base.py
index 72bc7ca78d9de2..945f6f29eeb434 100644
--- a/test/distributed_passes/dist_pass_test_base.py
+++ b/test/distributed_passes/dist_pass_test_base.py
@@ -64,6 +64,7 @@ def setUp(self):
         if paddle.is_compiled_with_cuda():
             paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
 
+        os.environ["FLAGS_dynamic_static_unified_comm"] = "0"
         seed = int(os.environ.get('SEED', -1))
         if seed <= 0:
             seed = np.random.randint(low=1, high=1000000, size=[1])[0]
diff --git a/test/legacy_test/test_collective_api_base.py b/test/legacy_test/test_collective_api_base.py
index 08de4a1be9a322..669910ee0283ab 100644
--- a/test/legacy_test/test_collective_api_base.py
+++ b/test/legacy_test/test_collective_api_base.py
@@ -359,6 +359,7 @@ def check_with_place(
             "PATH_ID": path_id,
             "DTYPE": dtype,
             "REDUCE_TYPE": str(reduce_type),
+            "FLAGS_dynamic_static_unified_comm": "0",
         }
         required_envs.update(additional_envs)
         required_envs.update(need_envs)
diff --git a/test/legacy_test/test_collective_base.py b/test/legacy_test/test_collective_base.py
index 9d3a602b8d051a..544cee3ac0e7ec 100644
--- a/test/legacy_test/test_collective_base.py
+++ b/test/legacy_test/test_collective_base.py
@@ -266,7 +266,7 @@ def check_with_place(
             "LD_PRELOAD": os.getenv("LD_PRELOAD", ""),
             "GLOG_v": "3",
             "NCCL_P2P_DISABLE": "1",
-            "Flags_dynamic_static_unified_comm": "False",
+            "FLAGS_dynamic_static_unified_comm": "0",
             "DTYPE": "float32",
         }
         required_envs.update(need_envs)
diff --git a/test/legacy_test/test_dist_base.py b/test/legacy_test/test_dist_base.py
index db7d490e3a5afe..b4d8257503d401 100755
--- a/test/legacy_test/test_dist_base.py
+++ b/test/legacy_test/test_dist_base.py
@@ -1692,6 +1692,7 @@ def _get_required_envs(self, check_error_log=False, need_envs={}):
             "NCCL_P2P_DISABLE": "1",
             "NCCL_SHM_DISABLE": "1",
             "FLAGS_new_executor_static_build": "1",
+            "FLAGS_dynamic_static_unified_comm": "0",
         }
 
         if check_error_log:
diff --git a/test/legacy_test/test_dist_hapi_model.py b/test/legacy_test/test_dist_hapi_model.py
index 1e5ec1d341f71f..03a92d6f3cbc91 100644
--- a/test/legacy_test/test_dist_hapi_model.py
+++ b/test/legacy_test/test_dist_hapi_model.py
@@ -75,6 +75,7 @@ def start_local_trainers(
             "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint,
             "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
             "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()),
+            "FLAGS_dynamic_static_unified_comm": "0",
         }
 
         current_env.update(proc_env)
diff --git a/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py b/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py
index 32ee6fd8b39581..62a94832d1ae9e 100644
--- a/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py
+++ b/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py
@@ -68,6 +68,7 @@ def run_test(
     os.environ['MAX_GLOBAL_NORM'] = str(max_global_norm)
     os.environ['GRADIENT_MERGE_STEPS'] = str(gradient_merge_steps)
     os.environ['USE_MASTER_ACC_GRAD'] = str(1 if use_master_acc_grad else 0)
+    os.environ["FLAGS_dynamic_static_unified_comm"] = "0"
     os.environ.update(need_env)
 
     touch_file_env = 'SUCCESS_TOUCH_FILE'
diff --git a/test/legacy_test/test_parallel_dygraph_dataparallel.py b/test/legacy_test/test_parallel_dygraph_dataparallel.py
index de3160e9c6f9c9..b3cbfbf0966f89 100644
--- a/test/legacy_test/test_parallel_dygraph_dataparallel.py
+++ b/test/legacy_test/test_parallel_dygraph_dataparallel.py
@@ -121,6 +121,7 @@ def start_local_trainers(
             "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint,
             "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
             "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()),
+            "FLAGS_dynamic_static_unified_comm": "0",
         }
 
         proc_env["FLAGS_allocator_strategy"] = allocator_strategy
diff --git a/test/legacy_test/test_run.py b/test/legacy_test/test_run.py
index e0ec7c9657fb54..331d45a514a932 100644
--- a/test/legacy_test/test_run.py
+++ b/test/legacy_test/test_run.py
@@ -207,4 +207,5 @@ def test_ps_4(self):
 
 
 if __name__ == '__main__':
+    os.environ["FLAGS_dynamic_static_unified_comm"] = "0"
     unittest.main()
diff --git a/tools/test_runner.py b/tools/test_runner.py
index 37d132fbc1535a..49603fd9a3afa5 100644
--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -40,6 +40,7 @@
 
 def main():
     sys.path.append(os.getcwd())
+    os.environ["FLAGS_dynamic_static_unified_comm"] = "false"
     if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
         if os.getenv('FLAGS_enable_gpu_memory_usage_log') is None:
             os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'