From f93f0ea9bf37cd747610891ff0f91e4b1c8576c6 Mon Sep 17 00:00:00 2001 From: Ghost Screaming Date: Wed, 11 Oct 2023 08:07:09 -0500 Subject: [PATCH] [NewComm] Set new communication library as default. (#57768) * [NewComm] Set Flags_dynamic_static_unified_comm `True` in default. New communication library will be used defaultly. * Polish code. * Fix problems of distributed testcases using new comm lib. * Fix problems of testcases using new comm lib in default. * Fix failed testcase. * Fix falied testcases. --- paddle/phi/core/flags.cc | 4 ++-- test/collective/fleet/c_comm_init_op.py | 5 ----- test/collective/fleet/test_fused_attention_pass_with_mp.sh | 1 + test/distributed_passes/auto_parallel_pass_test_base.py | 1 + test/distributed_passes/dist_pass_test_base.py | 1 + test/legacy_test/test_collective_api_base.py | 1 + test/legacy_test/test_collective_base.py | 2 +- test/legacy_test/test_dist_base.py | 1 + test/legacy_test/test_dist_hapi_model.py | 1 + test/legacy_test/test_distributed_fused_lamb_op_with_clip.py | 1 + test/legacy_test/test_parallel_dygraph_dataparallel.py | 1 + test/legacy_test/test_run.py | 1 + tools/test_runner.py | 1 + 13 files changed, 13 insertions(+), 8 deletions(-) diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc index 19e707c40bc551..384470273a9546 100644 --- a/paddle/phi/core/flags.cc +++ b/paddle/phi/core/flags.cc @@ -1338,13 +1338,13 @@ PHI_DEFINE_EXPORTED_int32( * Communication library related FLAG * Name: FLAGS_dynamic_static_unified_comm * Since Version: 2.5 - * Value Range: bool, default=false + * Value Range: bool, default=true * Example: * Note: Whether to use new communication library in auto parallel and static * mode. If true, it will use unified CommContextManager for communication. */ PHI_DEFINE_EXPORTED_bool(dynamic_static_unified_comm, - false, + true, "Whether to use new communication library in auto " "parallel and static mode."); #endif // FLAGS_dynamic_static_unified_comm diff --git a/test/collective/fleet/c_comm_init_op.py b/test/collective/fleet/c_comm_init_op.py index 988c0fcc27954b..15230b9b71f331 100644 --- a/test/collective/fleet/c_comm_init_op.py +++ b/test/collective/fleet/c_comm_init_op.py @@ -17,9 +17,6 @@ import paddle from paddle import base -from paddle.distributed.fleet.base.private_helper_function import ( - wait_server_ready, -) paddle.enable_static() @@ -35,8 +32,6 @@ def setUp(self): self.exe = base.Executor(self.place) self.endpoints.remove(self.current_endpoint) self.other_endpoints = self.endpoints - if self.rank == 0: - wait_server_ready(self.other_endpoints) def test_specifying_devices(self): program = base.Program() diff --git a/test/collective/fleet/test_fused_attention_pass_with_mp.sh b/test/collective/fleet/test_fused_attention_pass_with_mp.sh index d00f2fdbac0e1d..4b2b48cdc08df8 100644 --- a/test/collective/fleet/test_fused_attention_pass_with_mp.sh +++ b/test/collective/fleet/test_fused_attention_pass_with_mp.sh @@ -17,4 +17,5 @@ set -e # use default values # FIXME: random fails on Unknown command lines -c (or -m). +export FLAGS_dynamic_static_unified_comm=0 CUDA_VISIBLE_DEVICES=0,1 python -m paddle.distributed.launch fused_attention_pass_with_mp.py diff --git a/test/distributed_passes/auto_parallel_pass_test_base.py b/test/distributed_passes/auto_parallel_pass_test_base.py index 69c2d051c7db37..90173e43de5722 100644 --- a/test/distributed_passes/auto_parallel_pass_test_base.py +++ b/test/distributed_passes/auto_parallel_pass_test_base.py @@ -37,6 +37,7 @@ class AutoPallelPassTestBase(DistPassTestBase): def setUp(self): paddle.enable_static() seed = int(os.environ.get('SEED', -1)) + os.environ["FLAGS_dynamic_static_unified_comm"] = "0" if seed <= 0: seed = np.random.randint(low=1, high=1000000, size=[1])[0] os.environ['SEED'] = str(seed) diff --git a/test/distributed_passes/dist_pass_test_base.py b/test/distributed_passes/dist_pass_test_base.py index 72bc7ca78d9de2..945f6f29eeb434 100644 --- a/test/distributed_passes/dist_pass_test_base.py +++ b/test/distributed_passes/dist_pass_test_base.py @@ -64,6 +64,7 @@ def setUp(self): if paddle.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) + os.environ["FLAGS_dynamic_static_unified_comm"] = "0" seed = int(os.environ.get('SEED', -1)) if seed <= 0: seed = np.random.randint(low=1, high=1000000, size=[1])[0] diff --git a/test/legacy_test/test_collective_api_base.py b/test/legacy_test/test_collective_api_base.py index 08de4a1be9a322..669910ee0283ab 100644 --- a/test/legacy_test/test_collective_api_base.py +++ b/test/legacy_test/test_collective_api_base.py @@ -359,6 +359,7 @@ def check_with_place( "PATH_ID": path_id, "DTYPE": dtype, "REDUCE_TYPE": str(reduce_type), + "FLAGS_dynamic_static_unified_comm": "0", } required_envs.update(additional_envs) required_envs.update(need_envs) diff --git a/test/legacy_test/test_collective_base.py b/test/legacy_test/test_collective_base.py index 9d3a602b8d051a..544cee3ac0e7ec 100644 --- a/test/legacy_test/test_collective_base.py +++ b/test/legacy_test/test_collective_base.py @@ -266,7 +266,7 @@ def check_with_place( "LD_PRELOAD": os.getenv("LD_PRELOAD", ""), "GLOG_v": "3", "NCCL_P2P_DISABLE": "1", - "Flags_dynamic_static_unified_comm": "False", + "FLAGS_dynamic_static_unified_comm": "0", "DTYPE": "float32", } required_envs.update(need_envs) diff --git a/test/legacy_test/test_dist_base.py b/test/legacy_test/test_dist_base.py index db7d490e3a5afe..b4d8257503d401 100755 --- a/test/legacy_test/test_dist_base.py +++ b/test/legacy_test/test_dist_base.py @@ -1692,6 +1692,7 @@ def _get_required_envs(self, check_error_log=False, need_envs={}): "NCCL_P2P_DISABLE": "1", "NCCL_SHM_DISABLE": "1", "FLAGS_new_executor_static_build": "1", + "FLAGS_dynamic_static_unified_comm": "0", } if check_error_log: diff --git a/test/legacy_test/test_dist_hapi_model.py b/test/legacy_test/test_dist_hapi_model.py index 1e5ec1d341f71f..03a92d6f3cbc91 100644 --- a/test/legacy_test/test_dist_hapi_model.py +++ b/test/legacy_test/test_dist_hapi_model.py @@ -75,6 +75,7 @@ def start_local_trainers( "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), + "FLAGS_dynamic_static_unified_comm": "0", } current_env.update(proc_env) diff --git a/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py b/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py index 32ee6fd8b39581..62a94832d1ae9e 100644 --- a/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py +++ b/test/legacy_test/test_distributed_fused_lamb_op_with_clip.py @@ -68,6 +68,7 @@ def run_test( os.environ['MAX_GLOBAL_NORM'] = str(max_global_norm) os.environ['GRADIENT_MERGE_STEPS'] = str(gradient_merge_steps) os.environ['USE_MASTER_ACC_GRAD'] = str(1 if use_master_acc_grad else 0) + os.environ["FLAGS_dynamic_static_unified_comm"] = "0" os.environ.update(need_env) touch_file_env = 'SUCCESS_TOUCH_FILE' diff --git a/test/legacy_test/test_parallel_dygraph_dataparallel.py b/test/legacy_test/test_parallel_dygraph_dataparallel.py index de3160e9c6f9c9..b3cbfbf0966f89 100644 --- a/test/legacy_test/test_parallel_dygraph_dataparallel.py +++ b/test/legacy_test/test_parallel_dygraph_dataparallel.py @@ -121,6 +121,7 @@ def start_local_trainers( "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), + "FLAGS_dynamic_static_unified_comm": "0", } proc_env["FLAGS_allocator_strategy"] = allocator_strategy diff --git a/test/legacy_test/test_run.py b/test/legacy_test/test_run.py index e0ec7c9657fb54..331d45a514a932 100644 --- a/test/legacy_test/test_run.py +++ b/test/legacy_test/test_run.py @@ -207,4 +207,5 @@ def test_ps_4(self): if __name__ == '__main__': + os.environ["FLAGS_dynamic_static_unified_comm"] = "0" unittest.main() diff --git a/tools/test_runner.py b/tools/test_runner.py index 37d132fbc1535a..49603fd9a3afa5 100644 --- a/tools/test_runner.py +++ b/tools/test_runner.py @@ -40,6 +40,7 @@ def main(): sys.path.append(os.getcwd()) + os.environ["FLAGS_dynamic_static_unified_comm"] = "false" if core.is_compiled_with_cuda() or core.is_compiled_with_rocm(): if os.getenv('FLAGS_enable_gpu_memory_usage_log') is None: os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'