Fix spece spec, etc (#63092)

* Fix * ci
PaddlePaddle · Apr 2, 2024 · 993e06b · 993e06b
1 parent f04e0d2
commit 993e06b
Show file tree

Hide file tree

Showing 23 changed files with 71 additions and 71 deletions.
diff --git a/python/cinn/auto_schedule/cost_model/xgb_cost_model.py b/python/cinn/auto_schedule/cost_model/xgb_cost_model.py
@@ -94,7 +94,7 @@ def load(self, path):
             self.booster = xgb.Booster()
         self.booster.load_model(path)
         # Should we save/load config parameters? Not now because it is pre-set.
-        # But we should do that here if that's changable in the future.
+        # But we should do that here if that's changeable in the future.
 
     def update(self, samples, labels):
         # xgb doesn't support incremental training, we leave this method as TODO

diff --git a/python/paddle/distributed/auto_parallel/static/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/static/operators/dist_eltwise.py
@@ -66,7 +66,7 @@ def update_dims_mapping(dist_op):
         output_spec = get_dist_tensor_spec(dist_op, output_arg_name, False)
 
         # step2: infer spmd
-        # TODO reivse me
+        # TODO revise me
         op_type = op_desc.type()
         rule = get_phi_spmd_rule(op_type)
         fw_results = rule.infer_forward(*input_specs)

diff --git a/python/paddle/distributed/auto_parallel/static/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/static/operators/dist_matmul.py
@@ -515,7 +515,7 @@ def update_dims_mapping_matmul(dist_op):
         trans_x = False
         trans_y = False
 
-    # TODO (zhangyichen) replace dist tensor spece by dist tensor in future.
+    # TODO (zhangyichen) replace dist tensor spec by dist tensor in future.
     x_spec = get_dist_tensor_spec(dist_op, x_name)
     y_spec = get_dist_tensor_spec(dist_op, y_name)
     out_spec = get_dist_tensor_spec(dist_op, out_name, False)

diff --git a/python/paddle/distributed/auto_parallel/static/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/static/operators/dist_reduce_sum_p.py
@@ -54,7 +54,7 @@ def update_dims_mapping(dist_op):
         keep_dim = op_desc.attr('keep_dim')
         dims = op_desc.attr('dim')
 
-        # TODO (zhangyichen) replace dist tensor spece by dist tensor in future.
+        # TODO (zhangyichen) replace dist tensor spec by dist tensor in future.
         input_spec = get_dist_tensor_spec(dist_op, input_arg_name)
         output_spec = get_dist_tensor_spec(dist_op, output_arg_name, False)
         # len(dims) == 0 means reduce_all
@@ -118,18 +118,18 @@ def is_partial_reduce(axes, dims_mapping):
 register_distributed_operator_impl_container(DistributedReduceSum("reduce_sum"))
 
 
-class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
+class DistributedReduceSumPrimitive(DistributedOperatorImplContainer):
     def __init__(self, op_type):
         super().__init__(op_type)
 
 
 register_distributed_operator_impl_container(
-    DistributedReduceSumPrimtive("reduce_sum_p")
+    DistributedReduceSumPrimitive("reduce_sum_p")
 )
 
 
 # Batch Dimension ReduceSum Primitive
-class DistributedReduceSumPrimtiveImpl0(DistributedOperatorImpl):
+class DistributedReduceSumPrimitiveImpl0(DistributedOperatorImpl):
     def __init__(self, name):
         super().__init__(name)
         self._forward_implemented = True
@@ -237,5 +237,5 @@ def backward(ctx, *args, **kwargs):
 
 register_distributed_operator_impl(
     "reduce_sum_p",
-    DistributedReduceSumPrimtiveImpl0("batch_dimension_reduce_sum_p"),
+    DistributedReduceSumPrimitiveImpl0("batch_dimension_reduce_sum_p"),
 )
diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py
@@ -387,7 +387,7 @@ def allreduce_perf(
         )
         if perf_threshold_time > -1 and ret > perf_threshold_time:
             logger.warning(
-                f"[Perf Warnning] AllReduce Test Timeout! {ret} > {perf_threshold_time}"
+                f"[Perf Warning] AllReduce Test Timeout! {ret} > {perf_threshold_time}"
             )
 
     # test reduce perf
@@ -408,7 +408,7 @@ def reduce_perf(self, iteration, x, group, perf_size, perf_threshold_time):
         )
         if perf_threshold_time > -1 and ret > perf_threshold_time:
             logger.warning(
-                f"[Perf Warnning] Reduce Test Timeout! {ret} > {perf_threshold_time}"
+                f"[Perf Warning] Reduce Test Timeout! {ret} > {perf_threshold_time}"
             )
 
     # test broadcast perf
@@ -431,7 +431,7 @@ def broadcast_perf(
         )
         if perf_threshold_time > -1 and ret > perf_threshold_time:
             logger.warning(
-                f"[Perf Warnning] Broadcast Test Timeout! {ret} > {perf_threshold_time}"
+                f"[Perf Warning] Broadcast Test Timeout! {ret} > {perf_threshold_time}"
             )
 
     # test allgather perf
@@ -455,7 +455,7 @@ def allgather_perf(
         )
         if perf_threshold_time > -1 and ret > perf_threshold_time:
             logger.warning(
-                f"[Perf Warnning] Allgather Test Timeout! {ret} > {perf_threshold_time}"
+                f"[Perf Warning] Allgather Test Timeout! {ret} > {perf_threshold_time}"
             )
 
     # test reduce_scatter perf
@@ -498,7 +498,7 @@ def reduce_scatter_perf(
         )
         if perf_threshold_time > -1 and ret > perf_threshold_time:
             logger.warning(
-                f"[Perf Warnning] ReduceScatter Test Timeout! {ret} > {perf_threshold_time}"
+                f"[Perf Warning] ReduceScatter Test Timeout! {ret} > {perf_threshold_time}"
             )
 
     def _collective_perf_impl(self, round=50, context={}, hcg=None):

diff --git a/python/paddle/distributed/fleet/fleet_executor_utils.py b/python/paddle/distributed/fleet/fleet_executor_utils.py
@@ -119,7 +119,7 @@ def task_node(self):
     def set_program(self, program):
         assert (
             self.lazy_initialize
-        ), "Inside program is unchangable for immediate initialized task node. Set the lazy_initialize to be true if the inside program need to be update. Remember to do all your change before eval node.task_node()."
+        ), "Inside program is unchangeable for immediate initialized task node. Set the lazy_initialize to be true if the inside program need to be update. Remember to do all your change before eval node.task_node()."
         self.program = program
 
     def get_program(self):
@@ -423,7 +423,7 @@ def run1f1b(
 ):
     """
     Split the program to support 1f1b pipeline scheduler.
-    This funct will split the program based on the op_role.
+    This function will split the program based on the op_role.
     The program will be split into four parts: lr_sched, fwd, bwd, opt.
     And will create task nodes based on the four parts of the program.
     :param program: The origin program.

diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py
@@ -303,7 +303,7 @@ def get_cluster(
 
 
 def terminate_local_procs(procs):
-    # try to terminate process by group, this happend in multiprocess senario in user process
+    # try to terminate process by group, this happened in multiprocess scenario in user process
     if os.name != 'nt':
         for p in procs:
             if p.proc.poll() is None:

diff --git a/test/legacy_test/test_merged_momentum_op.py b/test/legacy_test/test_merged_momentum_op.py
@@ -24,7 +24,7 @@
 def run_momentum_op(
     params,
     grads,
-    velocitys,
+    velocities,
     master_params,
     learning_rate,
     place,
@@ -34,7 +34,7 @@ def run_momentum_op(
     use_merged=False,
 ):
     assert len(params) == len(grads)
-    assert len(params) == len(velocitys)
+    assert len(params) == len(velocities)
     if multi_precision:
         assert len(params) == len(master_params)
     op_type = 'merged_momentum' if use_merged else 'momentum'
@@ -61,7 +61,7 @@ def run_momentum_op(
             helper.create_variable(
                 persistable=True, shape=v.shape, dtype=v.dtype
             )
-            for v in velocitys
+            for v in velocities
         ]
         lr_var = helper.create_variable(
             persistable=True,
@@ -83,7 +83,7 @@ def run_momentum_op(
             OrderedDict(
                 [
                     (v_var.name, v_val)
-                    for v_var, v_val in zip(velocity_vars, velocitys)
+                    for v_var, v_val in zip(velocity_vars, velocities)
                 ]
             )
         )
@@ -162,7 +162,7 @@ def run_momentum_op(
 def run_momentum_op2(
     params,
     grads,
-    velocitys,
+    velocities,
     master_params,
     learning_rate,
     place,
@@ -173,7 +173,7 @@ def run_momentum_op2(
     use_nesterov=True,
 ):
     assert len(params) == len(grads)
-    assert len(params) == len(velocitys)
+    assert len(params) == len(velocities)
     if multi_precision:
         assert len(params) == len(master_params)
     op_type = 'merged_momentum' if use_merged else 'momentum'
@@ -195,7 +195,7 @@ def run_momentum_op2(
             helper.create_variable(
                 persistable=True, shape=v.shape, dtype=v.dtype
             )
-            for v in velocitys
+            for v in velocities
         ]
         lr_var = helper.create_variable(
             persistable=True,
@@ -217,7 +217,7 @@ def run_momentum_op2(
             OrderedDict(
                 [
                     (v_var.name, v_val)
-                    for v_var, v_val in zip(velocity_vars, velocitys)
+                    for v_var, v_val in zip(velocity_vars, velocities)
                 ]
             )
         )
@@ -331,19 +331,19 @@ def prepare_data(self, shapes, multi_precision, seed, place):
         )
         params = self.gen_rand_data(shapes, dtype)
         grads = self.gen_rand_data(shapes, dtype)
-        velocitys = self.gen_rand_data(shapes, mp_dtype)
+        velocities = self.gen_rand_data(shapes, mp_dtype)
         learning_rate = self.gen_rand_data([[1]], mp_dtype)[0]
         if multi_precision:
             master_params = [p.astype(mp_dtype) for p in params]
         else:
             master_params = None
-        return params, grads, velocitys, master_params, learning_rate
+        return params, grads, velocities, master_params, learning_rate
 
     def check_with_place(self, place, multi_precision):
         (
             params,
             grads,
-            velocitys,
+            velocities,
             master_params,
             learning_rate,
         ) = self.prepare_data(self.shapes, multi_precision, self.seed, place)
@@ -354,7 +354,7 @@ def run_op(use_merged):
             return run_momentum_op(
                 params,
                 grads,
-                velocitys,
+                velocities,
                 master_params,
                 learning_rate,
                 place,
@@ -403,19 +403,19 @@ def prepare_data(self, shapes, multi_precision, seed, place):
         )
         params = self.gen_rand_data(shapes, dtype)
         grads = self.gen_rand_data(shapes, dtype)
-        velocitys = self.gen_rand_data(shapes, mp_dtype)
+        velocities = self.gen_rand_data(shapes, mp_dtype)
         learning_rate = self.gen_rand_data([[1]], mp_dtype)[0]
         if multi_precision:
             master_params = [p.astype(mp_dtype) for p in params]
         else:
             master_params = None
-        return params, grads, velocitys, master_params, learning_rate
+        return params, grads, velocities, master_params, learning_rate
 
     def check_with_place(self, place, multi_precision):
         (
             params,
             grads,
-            velocitys,
+            velocities,
             master_params,
             learning_rate,
         ) = self.prepare_data(self.shapes, multi_precision, self.seed, place)
@@ -426,7 +426,7 @@ def run_op(use_nesterov, use_merged):
             return run_momentum_op2(
                 params,
                 grads,
-                velocitys,
+                velocities,
                 master_params,
                 learning_rate,
                 place,

diff --git a/test/legacy_test/test_momentum_op.py b/test/legacy_test/test_momentum_op.py
@@ -184,7 +184,7 @@ def setUp(self):
 
         params = []
         grads = []
-        velocitys = []
+        velocities = []
         learning_rates = []
         master_params = []
         param_outs = []
@@ -216,7 +216,7 @@ def setUp(self):
 
             params.append(("SubParam_" + str(i), param))
             grads.append(("SubGrad_" + str(i), grad))
-            velocitys.append(("SubVelocity_" + str(i), velocity))
+            velocities.append(("SubVelocity_" + str(i), velocity))
             learning_rates.append(("SubLearning_rate_" + str(i), learning_rate))
             velocity_outs.append(("SubVelocity_out_" + str(i), velocity_out))
             param_outs.append(("SubParam_out_" + str(i), param_out))
@@ -228,7 +228,7 @@ def setUp(self):
         self.inputs = {
             'Param': params,
             'Grad': grads,
-            'Velocity': velocitys,
+            'Velocity': velocities,
             'LearningRate': learning_rates,
             'MasterParam': master_params,
         }
@@ -268,7 +268,7 @@ def setUp(self):
 
         params = []
         grads = []
-        velocitys = []
+        velocities = []
         param_outs = []
         velocity_outs = []
         learning_rates = []
@@ -292,15 +292,15 @@ def setUp(self):
 
             params.append(("SubParam_" + str(i), param))
             grads.append(("SubGrad_" + str(i), grad))
-            velocitys.append(("SubVelocity_" + str(i), velocity))
+            velocities.append(("SubVelocity_" + str(i), velocity))
             learning_rates.append(("SubLearning_rate_" + str(i), learning_rate))
             velocity_outs.append(("SubVelocity_out_" + str(i), velocity_out))
             param_outs.append(("SubParam_out_" + str(i), param_out))
 
         self.inputs = {
             'Param': params,
             'Grad': grads,
-            'Velocity': velocitys,
+            'Velocity': velocities,
             'LearningRate': learning_rates,
         }
 

diff --git a/test/legacy_test/test_mul_op.py b/test/legacy_test/test_mul_op.py
@@ -312,7 +312,7 @@ def test_check_grad_ignore_y(self):
         )
 
 
-# TODO: verify the requirments of CUDA ARCH
+# TODO: verify the requirements of CUDA ARCH
 @unittest.skipIf(
     not core.is_compiled_with_cuda() or get_cuda_version() < 11060,
     "MatmulInt8 requires CUDA >= 11.6",

diff --git a/test/legacy_test/test_multi_label_soft_margin_loss.py b/test/legacy_test/test_multi_label_soft_margin_loss.py
@@ -26,10 +26,10 @@ def call_MultiLabelSoftMarginLoss_layer(
     weight=None,
     reduction='mean',
 ):
-    multilabel_margin_loss = paddle.nn.MultiLabelSoftMarginLoss(
+    multi_label_margin_loss = paddle.nn.MultiLabelSoftMarginLoss(
         weight=weight, reduction=reduction
     )
-    res = multilabel_margin_loss(
+    res = multi_label_margin_loss(
         input=input,
         label=label,
     )
@@ -115,7 +115,7 @@ def test_dygraph(
         return dy_result
 
 
-def calc_multilabel_margin_loss(
+def calc_multi_label_margin_loss(
     input,
     label,
     weight=None,
@@ -151,7 +151,7 @@ def test_MultiLabelSoftMarginLoss(self):
         reductions = ['sum', 'mean', 'none']
         for place in places:
             for reduction in reductions:
-                expected = calc_multilabel_margin_loss(
+                expected = calc_multi_label_margin_loss(
                     input=input, label=label, reduction=reduction
                 )
 
@@ -218,7 +218,7 @@ def test_MultiLabelSoftMarginLoss_weights(self):
         weight = np.random.randint(0, 2, size=(5, 5)).astype(np.float64)
         place = 'cpu'
         reduction = 'mean'
-        expected = calc_multilabel_margin_loss(
+        expected = calc_multi_label_margin_loss(
             input=input, label=label, weight=weight, reduction=reduction
         )
 

diff --git a/test/legacy_test/test_multinomial_op.py b/test/legacy_test/test_multinomial_op.py
@@ -393,7 +393,7 @@ def test_fixed_random_number(self):
         if not paddle.is_compiled_with_cuda():
             return
 
-        # Different GPU generatte different random value. Only test V100 here.
+        # Different GPU generate different random value. Only test V100 here.
         if "V100" not in paddle.device.cuda.get_device_name():
             return