PaddlePaddle · luotao1 · Feb 19, 2024 · Feb 4, 2024 · Feb 4, 2024 · Feb 4, 2024
diff --git a/python/paddle/distributed/auto_parallel/static/operators/common.py b/python/paddle/distributed/auto_parallel/static/operators/common.py
@@ -137,7 +137,7 @@ def get_compatible_impls(self, dist_op):
         return compatible_impls
 
     # (NOTE) Currently, both DistributedOperatorImplContainer and DistributedOperatorImpl have update_dims_mapping method.
-    # But this method is supposed to be maitained by DistributedOperatorImplContainer, and we are ongoing adding method
+    # But this method is supposed to be maintained by DistributedOperatorImplContainer, and we are ongoing adding method
     # to DistributedOperatorImplContainer and removing those in DistributedOperatorImpl.
     # @abc.abstractmethod
     def update_dims_mapping(self, dist_op):
@@ -369,15 +369,15 @@ def is_parameter_related(varname, block, dist_context=None):
 
 def infer_shape(block, src_var, src_var_dist_attr, op_input_dist_attr):
     var_shape = block._var_recursive(src_var.name).shape
-    var_topoloy = src_var_dist_attr.process_mesh.shape
+    var_topology = src_var_dist_attr.process_mesh.shape
     var_dims_mapping = src_var_dist_attr.dims_mapping
 
     complete_shape = []
     for idx, shape in enumerate(var_shape):
         if var_dims_mapping[idx] == -1:
             complete_shape.append(shape)
         else:
-            new_shape = shape * var_topoloy[var_dims_mapping[idx]]
+            new_shape = shape * var_topology[var_dims_mapping[idx]]
             complete_shape.append(new_shape)
 
     exact_shape = []
@@ -488,7 +488,7 @@ def get_data_parallel_group(dist_ctx, op, act_grad_names, rank):
 
 def sync_and_scale_gradients(dist_ctx, op, groups, allreduce_var_names):
     """
-    insert the allreudce and scale ops for gradients of model
+    insert the allreduce and scale ops for gradients of model
     parameters for operator in data parallelism.
 
     Args:
@@ -557,7 +557,7 @@ def sync_and_scale_gradients(dist_ctx, op, groups, allreduce_var_names):
 
 def get_partial_groups(dist_ctx, op, out_grad_names, rank):
     """
-    deduce the partial comminication group for current operator output vars.
+    deduce the partial communication group for current operator output vars.
 
     Args:
         dist_ctx (DistributedContext): dist context.
@@ -608,7 +608,7 @@ def gradient_synchronization(
     dist_ctx, op, act_grad_names, out_grad_names, rank
 ):
     """
-    conduct the allreudce and scaling for gradients of model
+    conduct the allreduce and scaling for gradients of model
     parameters for operator in parallelism train.
 
     Args:
@@ -727,12 +727,12 @@ def update_op_dims_mapping(
     changed = False
     assert len(input_arg_names) == len(
         infered_input_dims_mappings
-    ), "dims mapping is NOT Match, infered [{}], orignal: [{}]; dist op: [{}]".format(
+    ), "dims mapping is NOT Match, infered [{}], original: [{}]; dist op: [{}]".format(
         len(infered_input_dims_mappings), len(input_arg_names), str(dist_op)
     )
     assert len(output_arg_names) == len(
         infered_output_dims_mappings
-    ), "dims mapping is NOT Match, infered [{}], orignal: [{}]; dist op: [{}]".format(
+    ), "dims mapping is NOT Match, infered [{}], original: [{}]; dist op: [{}]".format(
         len(infered_output_dims_mappings), len(output_arg_names), str(dist_op)
     )
 

diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
@@ -302,7 +302,7 @@ def is_exist(self, fs_path):
             fs_path(str): The local file path.
 
         Returns:
-            Bool: Wheter it's a file or directory, return true if the path exists,
+            Bool: Whether it's a file or directory, return true if the path exists,
             otherwise return false.
 
         Examples:
@@ -1534,7 +1534,7 @@ def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=True):
             fs_src_path(str):  Name of the file or directory, that's needed to be moved.
             fs_dst_path(str):  Name of the file or directory to which to move to.
             overwrite(bool): Whether to re-write `fs_dst_path` if that exists. Default is False.
-            test_exists(bool): Check the existence of `fs_src_path` and `fs_dst_path` . When `test_exists` is set true, if `fs_src_path` doesn't exist or `fs_dst_path` exists, program will throw an Excetption.
+            test_exists(bool): Check the existence of `fs_src_path` and `fs_dst_path` . When `test_exists` is set true, if `fs_src_path` doesn't exist or `fs_dst_path` exists, program will throw an Exception.
 
         Examples:
 

diff --git a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
@@ -469,28 +469,28 @@ def parse_args():
         '--src_mp',
         type=int,
         default=2,
-        help='mp degree of the origin training task that dumpped this model',
+        help='mp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--src_pp',
         type=int,
         default=2,
-        help='pp degree of the origin training task that dumpped this model',
+        help='pp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--src_vp',
         type=int,
         default=2,
-        help='vp degree of the origin training task that dumpped this model',
+        help='vp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
         '--dst_mp',
         type=int,
         default=None,
-        help='mp degree of the origin training task that dumpped this model',
+        help='mp degree of the origin training task that dumped this model',
     )
 
     parser.add_argument(
@@ -511,7 +511,7 @@ def parse_args():
         '--sharding',
         type=int,
         default=1,
-        help=" sharding degree of both the origin training task that dumpped this model and the expected training task that would recover this model",
+        help=" sharding degree of both the origin training task that dumped this model and the expected training task that would recover this model",
     )
 
     parser.add_argument(

diff --git a/python/paddle/static/amp/function_overload.py b/python/paddle/static/amp/function_overload.py
@@ -88,7 +88,7 @@ def register(self, fn, key):
         """
         assert isinstance(
             key, FunctionType
-        ), f"The type of  key is expected to be FunctionType, but recieved {type(key)}."
+        ), f"The type of  key is expected to be FunctionType, but received {type(key)}."
         func = Function(fn)
         self.function_map[key] = fn
         return func

diff --git a/python/paddle/static/nn/sequence_lod.py b/python/paddle/static/nn/sequence_lod.py
@@ -69,7 +69,7 @@ def sequence_conv(
                 down_pad_len = max(0, filter_size + padding_start - 1) = 1
 
                 The output of the input sequence after padding is:
-                data_aftet_padding = [[0, 0, 1, 1, 2, 2],
+                data_after_padding = [[0, 0, 1, 1, 2, 2],
                                       [1, 1, 2, 2, 3, 3],
                                       [2, 2, 3, 3, 0, 0],
                                       [0, 0, 4, 4, 0, 0]]
@@ -968,7 +968,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
 
     Args:
         x (Tensor): Input 1-level Tensor with dims ``[M, K]``. The batch \
-            size is described by lod infor (the number of sequences ). \
+            size is described by lod info (the number of sequences ). \
             The data type should be float32, float64, int8, int32 or int64.
         pad_value (Tensor): Padding value. It can be a scalar or a 1D tensor \
             with length ``K``. If it's a scalar, it will be automatically broadcasted \
@@ -984,7 +984,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
     Returns:
             tuple, A Python tuple (Out, Length): the 1st is a 0 level Tensor \
             ``Out``, with the shape ``[batch_size, maxlen, K]``; the second is the original \
-            sequences length infor ``Length``, which should be a 0-level 1D Tensor. \
+            sequences length info ``Length``, which should be a 0-level 1D Tensor. \
             The size of ``Length`` is equal to batch size, and the data type is int64.
 
     Examples:

diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
@@ -584,7 +584,7 @@ def _handle_np_dtype(ndarray, dtype):
             data = np.array(data)
             if data.dtype == np.object_:
                 raise ValueError(
-                    "\n\tFaild to convert input data to a regular ndarray :\n\t - Usually "
+                    "\n\tFailed to convert input data to a regular ndarray :\n\t - Usually "
                     "this means the input data contains nested lists with different lengths. "
                 )
         elif isinstance(data, paddle.Tensor) and not in_dynamic_mode():
@@ -600,7 +600,7 @@ def _handle_np_dtype(ndarray, dtype):
         elif isinstance(data, (core.LoDTensor, core.Tensor)):
             # should't expose it to users, just for internal use.
             # convert core.Tensor/core.LoDTensor to Tensor first
-            # Currenly, there is no copy when places are same
+            # Currently, there is no copy when places are same
             if in_dynamic_mode():
                 data = core.eager.Tensor(data)
             else:
@@ -915,7 +915,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
             elif isinstance(shape, paddle.pir.Value):
                 pass
             else:
-                TypeError("Shape only supports OpReslut, or list, or tuple.")
+                TypeError("Shape only supports OpResult, or list, or tuple.")
 
         if out is None:
             out = _C_ops.full(shape, value, dtype, place)
@@ -1321,12 +1321,12 @@ def arange(start=0, end=None, step=1, dtype=None, name=None):
             If ``end`` is None, the half-open interval is [0, ``start``).
             Default is None.
         step(float|int|Tensor, optional): Spacing between values. For any out,
-            it is the istance between two adjacent values, out[i+1] - out[i].
+            it is the instance between two adjacent values, out[i+1] - out[i].
             If ``step`` is a Tensor, it is a 0-D Tensor which represents a scalar
             and data type is int32, int64, float32, float64. . Default is 1.
         dtype(str|np.dtype, optional): The data type of the
             output tensor. Supported data types: int32, int64, float32, float64.
-            If ``dytpe`` is None, the data type is float32. Default is None.
+            If ``dtype`` is None, the data type is float32. Default is None.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:
@@ -2070,7 +2070,7 @@ def empty(shape, dtype=None, name=None):
             If ``shape`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape [].
             If ``shape`` is an Tensor, it should be an 1-D Tensor which represents a list.
         dtype(np.dtype|str, optional): Data type of the output Tensor
-            which can be bool, float16, float32, float64, int32, int64, complex64, complex128 if dytpe is `None`, the data
+            which can be bool, float16, float32, float64, int32, int64, complex64, complex128 if dtype is `None`, the data
             type of created Tensor use global default dtype (see ``get_default_dtype``
             for details).
         name(str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
@@ -2592,7 +2592,7 @@ def _memcpy(input, place=None, output=None):
 
 
 def complex(real, imag, name=None):
-    """Return a compelx tensor given the real and image component.
+    """Return a complex tensor given the real and image component.
 
     Args:
         real (Tensor): The real component. The data type should be 'float32' or 'float64'.
@@ -2646,7 +2646,7 @@ def complex(real, imag, name=None):
 def tril_indices(row, col, offset=0, dtype='int64'):
     """
     Return the indices of the lower triangular part of the 2-D matrix
-    whose row and col is knowed.Indices are ordered based on row and then columns.
+    whose row and col is known. Indices are ordered based on row and then columns.
     The lower triangular part of the matrix is defined as the elements on
     and below the diagonal.
 
@@ -2737,7 +2737,7 @@ def triu_indices(row, col=None, offset=0, dtype='int64'):
     Args:
         row (int): The input x which is a int number describe the number of row of the matrix.
         col (int, optional): The input x which is a int number describe the number of col of the matrix.
-            default value for col is None, then it will be set equal to row, indicting a square matix.
+            default value for col is None, then it will be set equal to row, indicting a square matrix.
         offset (int, optional): The offset to consider, default value is 0.
 
             - If offset = 0, all elements on and above the main diagonal are retained.
@@ -2807,11 +2807,11 @@ def triu_indices(row, col=None, offset=0, dtype='int64'):
 
 
 def polar(abs, angle, name=None):
-    """Return a Cartesian coordinates corresponding to the polar coordinates compelx tensor given the ``abs`` and ``angle`` component.
+    """Return a Cartesian coordinates corresponding to the polar coordinates complex tensor given the ``abs`` and ``angle`` component.
 
     Args:
         abs (Tensor): The abs component. The data type should be 'float32' or 'float64'.
-        angle (Tensor): The anglee component. The data type should be the same as ``abs``.
+        angle (Tensor): The angle component. The data type should be the same as ``abs``.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
     Returns:

diff --git a/python/paddle/tensor/einsum.py b/python/paddle/tensor/einsum.py
@@ -110,7 +110,7 @@ def validate_rhs(rhs, input_labels, n_bcast_dims):
     rhs = rhs.replace('...', '')
     rhs_set = set(rhs)
 
-    # Hidden assumption: availble labels don't include '.'
+    # Hidden assumption: available labels don't include '.'
     assert '.' not in input_labels
 
     # Verify that output labels all come from the set of input labels
@@ -195,7 +195,7 @@ def build_global_view(nop_labels, rhs, n_bcast_dims):
     rhs:
         The equation right hand side
     n_bcast_dims:
-        The maxium number of broadcast dimensions
+        The maximum number of broadcast dimensions
 
     Returns
     -------
@@ -336,7 +336,7 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K):
     plan matmul
     '''
     # Transpose and re-shape op1 and op2 in I, J1, K and I, J2, K
-    # Then apply matmul(x, y, transpose_x=False, tranpose_y=True)
+    # Then apply matmul(x, y, transpose_x=False, transpose_y=True)
     var1, var2 = f'op{op1}', f'op{op2}'
 
     op1_view, op2_view = (g_view[op] for op in (op1, op2))
@@ -366,7 +366,7 @@ def plan_matmul(plan, g_view, op1, op2, g_supports, g_shape, I, J1, J2, K):
         step = transpose, [var2], var2, list(op2_dims)
         plan.add_step(step)
 
-    # Check if conditions hold for turnning the operation into a matmul
+    # Check if conditions hold for turning the operation into a matmul
     if (
         j1 + j2 > 0
         and k > 0
@@ -538,7 +538,7 @@ def plan_broadcast(plan, operands, nop_axes):
     varnames = [f'op{i}' for i in range(nop)]
 
     for i, op_axes in zip(range(nop), nop_axes):
-        # Re-arrange the dimesions according to the global layout
+        # Re-arrange the dimensions according to the global layout
         perm, fill = rearrange(op_axes)
         var = varnames[i]
         if perm: