PaddlePaddle · luotao1 · Feb 29, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
@@ -53,7 +53,7 @@ def __init__(self):
         self.model_parameters = []
         self.use_master_grad = False
         self.already_register_final_backward_hook = False
-        self.already_classify_params_meshs = False  # For dist
+        self.already_classify_params_meshes = False  # For dist
         self.mesh2params = {}  # For dist
         self.amp_dtype = 'float32'
 
@@ -471,7 +471,7 @@ def master_grad_hook():
                 # NOTE(lizhiyu): To support semi-auto of dygraph mode, we must
                 # classify the params of model into different calsses according to their process_mesh.
                 # Otherwise, fault will occur.
-                if not amp_global_state().already_classify_params_meshs:
+                if not amp_global_state().already_classify_params_meshes:
                     for param in amp_global_state().model_parameters:
                         if param is not None and param.process_mesh is not None:
                             if (
@@ -485,7 +485,7 @@ def master_grad_hook():
                                 amp_global_state().mesh2params[
                                     param.process_mesh
                                 ].append(param)
-                    amp_global_state().already_classify_params_meshs = True
+                    amp_global_state().already_classify_params_meshes = True
 
                 if len(amp_global_state().mesh2params):
                     for _, params in amp_global_state().mesh2params.items():

diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py
@@ -270,7 +270,7 @@ def _set_seed(self, flag):
             self.seed = self.initial_seed
 
         if self.seed > np.iinfo(np.uint32).max or self.seed < 0:
-            print("[Warnning: Seed must be between 0 and 2**32 - 1")
+            print("[Warning: Seed must be between 0 and 2**32 - 1")
             self.seed = 123
 
         # get random seed
@@ -616,7 +616,7 @@ def compare_accuracy(
             ...             [1, 5, 2, 0], dtype="float32"
             ...         )
             ...         z1 = x + y
-            ...         out_excel = "compary_accuracy_out_excel.csv"
+            ...         out_excel = "compare_accuracy_out_excel.csv"
             ...         paddle.amp.debugging.compare_accuracy(
             ...             path, path, out_excel, loss_scale=1, dump_all_tensors=False
             ...         )

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
@@ -18,7 +18,7 @@
 __all__ = []
 
 
-def with_mateclass(meta, *bases):
+def with_metaclass(meta, *bases):
     class impl(meta):
         def __new__(cls, name, temp_bases, attrs):
             return meta(name, bases, attrs)
@@ -267,7 +267,7 @@ def __init__(cls, name, bases, attrs):
         return super().__init__(name, bases, attrs)
 
 
-class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
+class PyLayer(with_metaclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     """
     Paddle implements Python custom operators on the PaddlePaddle framework by creating a subclass of
     ``PyLayer``, which must comply with the following rules:

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -104,7 +104,7 @@ def _to_static_var(self, to_parameter=False, **kwargs):
         """
 
         # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph.
-        # It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None).
+        # It will fail. So, for property that different between dynamic and static graph, should not getattr(self, attr, None).
         attr_not_need_keys = [
             'grad',
             'T',
@@ -227,7 +227,7 @@ def set_value(self, value):
 
             # NOTE(wuweilong): self could be Tensor, the subsequent behavior are defined in different files
             # if self is Tensor, method value() return self that defined in this file, get_tensor() defined in eager_method.cc
-            # this Interface behavior will be unifed in the future.
+            # this Interface behavior will be unified in the future.
             if self.is_dist():
                 if isinstance(value, paddle.Tensor) and value.is_dist():
                     from paddle.distributed.auto_parallel.placement_type import (
@@ -702,7 +702,7 @@ def get_device_dtype_from_tensor(other):
 
         if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
             raise TypeError(
-                "to() received too mant arguments - expected one of:\n  \
+                "to() received too many arguments - expected one of:\n  \
                 * (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] \
                 device, Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
                 * (Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
@@ -976,7 +976,7 @@ def __array__(self, dtype=None):
         return array
 
     def pre_deal_index(self, item):
-        # since in pybind there is no effiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
+        # since in pybind there is no efficiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
         # we call this function in python level.
         item = list(item) if isinstance(item, tuple) else [item]
         for i, slice_item in enumerate(item):

diff --git a/python/paddle/base/incubate/checkpoint/auto_checkpoint.py b/python/paddle/base/incubate/checkpoint/auto_checkpoint.py
@@ -419,7 +419,7 @@ def _serialize(self, pop_keys=["restored_from", "checkpoint_epoch_no"]):
         for k in pop_keys:
             d.pop(k, None)
 
-        # registerd exes
+        # registered exes
         d["exe_status"] = {}
         e = d["exe_status"]
         for k, t in self._exe_status.items():
@@ -625,7 +625,7 @@ def train_epoch_range(max_epoch_num, save_checkpoint_inter=None):
     global g_acp_type
     if not _get_checker().valid():
         logger.warning(
-            "auto checkpoint will take effect  automaticly on PaddleCloud"
+            "auto checkpoint will take effect automatically on PaddleCloud"
         )
         for i in _normal_yield(max_epoch_num):
             yield i

diff --git a/python/paddle/base/layers/io.py b/python/paddle/base/layers/io.py
@@ -74,7 +74,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
     var_name = unique_name(op_type)
     startup_blk = default_startup_program().current_block()
     startup_var = startup_blk.create_var(name=var_name)
-    startop_op = startup_blk.append_op(
+    startup_op = startup_blk.append_op(
         type=op_type,
         inputs={'UnderlyingReader': reader},
         outputs={'Out': [startup_var]},
@@ -83,7 +83,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
     startup_var.persistable = True
     main_prog_block = default_main_program().current_block()
     main_prog_var = _copy_reader_var_(main_prog_block, startup_var)
-    _copy_reader_create_op_(main_prog_block, startop_op)
+    _copy_reader_create_op_(main_prog_block, startup_op)
     return monkey_patch_reader_methods(main_prog_var)
 
 

diff --git a/python/paddle/base/layers/layer_function_generator.py b/python/paddle/base/layers/layer_function_generator.py
@@ -86,7 +86,7 @@ def _generate_doc_string_(
         buf.write(" (Tensor): ")
         buf.write(escape_math(each_input.comment))
         if each_input.duplicable:
-            buf.write("  Duplicatable.")
+            buf.write("  Duplicable.")
         if each_input.dispensable:
             buf.write("  Optional.")
         buf.write('\n')
@@ -327,7 +327,7 @@ def func(x, name=None):
                 and x.is_view_var
             ):
                 raise ValueError(
-                    'Sorry about what\'s happend. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
+                    'Sorry about what\'s happened. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
                         inplace_op_type, x.name, x.name, x.nameb
                     )
                 )

diff --git a/python/paddle/base/reader.py b/python/paddle/base/reader.py
@@ -137,7 +137,7 @@ def _check_input_array(cls, item):
         arr = np.asarray(item)
         if arr.dtype == np.object_:
             raise TypeError(
-                "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually "
+                "\n\tFailed to convert input data to a regular ndarray :\n\t* Usually "
                 "this means the input data contains nested lists with different lengths. "
                 "\n\t* Check the reader function passed to 'decorate_batch_generator'"
                 " to locate the data causes this issue.\n\t* Please consider using "
@@ -532,7 +532,7 @@ def __init__(
         # NOTE: the C++ LoDTensorBlockingQueue instance
         self._blocking_queue = None
         # NOTE: 1. In multiprocess mode, this thread is used to get next batch data from
-        # self._data_queue, then push it into self._blocking_queue; 2. In singleprocess
+        # self._data_queue, then push it into self._blocking_queue; 2. In single process
         # mode, this thread is used to get next batch data from self._batch_reader, then
         # push it into self._blocking_queue
         self._thread = None

diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
@@ -293,7 +293,7 @@ def _update_input_info(inputs):
 class StaticGraphAdapter:
     """
 
-    Model traning/inference with a static graph.
+    Model training/inference with a static graph.
 
     """
 
@@ -633,7 +633,7 @@ def _make_program(self, mode):
         prog = self._orig_prog.clone()
         # NOTE: When defining learning rate scheduling in static-graph, ops to
         # increase the global step var and calculate learning rate would be
-        # prepended into _orig_prog. test program maked by `_orig_prog.clone`
+        # prepended into _orig_prog. test program marked by `_orig_prog.clone`
         # also would include these ops. Thus must prune these ops in test
         # program, otherwise the global step would be changed in test.
         if mode != 'train':
@@ -794,16 +794,16 @@ def __init__(self, model):
 
         if self._nranks > 1:
             dist.init_parallel_env()
-            stradegy = paddle.distributed.parallel.ParallelStrategy()
-            stradegy.nranks = paddle.distributed.ParallelEnv().nranks
-            stradegy.local_rank = paddle.distributed.ParallelEnv().local_rank
-            stradegy.trainer_endpoints = (
+            strategy = paddle.distributed.parallel.ParallelStrategy()
+            strategy.nranks = paddle.distributed.ParallelEnv().nranks
+            strategy.local_rank = paddle.distributed.ParallelEnv().local_rank
+            strategy.trainer_endpoints = (
                 paddle.distributed.ParallelEnv().trainer_endpoints
             )
-            stradegy.current_endpoint = (
+            strategy.current_endpoint = (
                 paddle.distributed.ParallelEnv().current_endpoint
             )
-            self.ddp_model = paddle.DataParallel(self.model.network, stradegy)
+            self.ddp_model = paddle.DataParallel(self.model.network, strategy)
 
     @property
     def mode(self):
@@ -879,7 +879,7 @@ def eval_batch(self, inputs, labels=None):
 
         outputs = self.model.network(*[paddle.to_tensor(x) for x in inputs])
 
-        # Transfrom data to expected device
+        # Transform data to expected device
         expected_device = paddle.device.get_device()
         for o in to_list(outputs):
             o._to(device=expected_device)
@@ -966,7 +966,7 @@ def load(self, param_state_pairs, optim_state, scaler_state=None):
             if scaler_state:
                 self.model._scaler.load_state_dict(scaler_state)
 
-        # resotre optimizer states
+        # restore optimizer states
         if not self.model._optimizer or not optim_state:
             return
 
@@ -1077,7 +1077,7 @@ class Model:
             or dict ({name: InputSpec}), and it couldn't be None in static
             graph. Default: None.
         labels (InputSpec|list|tuple|None, optional): `labels`, entry points of network,
-            could be a InputSpec instnace or list/tuple of InputSpec instances,
+            could be a InputSpec instance or list/tuple of InputSpec instances,
             or None. For static graph, if labels is required in loss,
             labels must be set. Otherwise, it could be None. Default: None.
 
@@ -1676,7 +1676,7 @@ def prepare(
     ):
         """
 
-        Configures the model before runing.
+        Configures the model before running.
 
         Args:
             optimizer (Optimizer|None, optional): Optimizer must be set in training
@@ -1777,16 +1777,16 @@ def fit(
         Args:
             train_data (Dataset|DataLoader, optional): An iterable data loader is used for
                 train. An instance of paddle paddle.io.Dataset or
-                paddle.io.Dataloader is recomended. Default: None.
+                paddle.io.Dataloader is recommended. Default: None.
             eval_data (Dataset|DataLoader, optional): An iterable data loader is used for
                 evaluation at the end of epoch. If None, will not do evaluation.
                 An instance of paddle.io.Dataset or paddle.io.Dataloader
-                is recomended. Default: None.
+                is recommended. Default: None.
             batch_size (int|list, optional): The batch size of train_data and eval_data. When
                 train_data and eval_data are both the instance of Dataloader, this
                 parameter will be ignored. Default: 1.
             epochs (int, optional): The number of epochs to train the model. Default: 1.
-            eval_freq (int, optional): The frequency, in number of epochs, an evalutation
+            eval_freq (int, optional): The frequency, in number of epochs, an evaluation
                 is performed. Default: 1.
             log_freq (int, optional): The frequency, in number of steps, the training logs
                 are printed. Default: 10.
@@ -1800,7 +1800,7 @@ def fit(
                 train_data when dataset size is not divisible by the batch size.
                 When train_data is an instance of Dataloader, this parameter
                 will be ignored. Default: False.
-            shuffle (bool, optional): Whther to shuffle train_data. When train_data is
+            shuffle (bool, optional): Whether to shuffle train_data. When train_data is
                 an instance of Dataloader, this parameter will be ignored.
                 Default: True.
             num_workers (int, optional): The number of subprocess to load data, 0 for no
@@ -1810,7 +1810,7 @@ def fit(
             callbacks (Callback|None, optional): A list of `Callback` instances to apply
                 during training. If None, :ref:`api_paddle_callbacks_ProgBarLogger` and
                 :ref:`api_paddle_callbacks_ModelCheckpoint` are automatically inserted. Default: None.
-            accumulate_grad_batches (int, optional): The number of batches to accumulate gradident
+            accumulate_grad_batches (int, optional): The number of batches to accumulate gradient
                 during training process before optimizer updates. It can mimic large batch
                 size. Default: 1.
             num_iters (int|None, optional): The number of iterations to evaluate the model.
@@ -2016,7 +2016,7 @@ def evaluate(
         Args:
             eval_data (Dataset|DataLoader): An iterable data loader is used for
                 evaluation. An instance of paddle.io.Dataset or
-                paddle.io.Dataloader is recomended.
+                paddle.io.Dataloader is recommended.
             batch_size (int, optional): The batch size of train_data and eval_data.
                 When eval_data is the instance of Dataloader, this argument will be
                 ignored. Default: 1.
@@ -2126,7 +2126,7 @@ def predict(
         Args:
             test_data (Dataset|DataLoader): An iterable data loader is used for
                 predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
-                is recomended.
+                is recommended.
             batch_size (int, optional): The batch size of test_data. When test_data is the
                 instance of Dataloader, this argument will be ignored. Default: 1.
             num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess
@@ -2300,13 +2300,13 @@ def _run_one_epoch(
             # Data might come from different types of data_loader and have
             # different format, as following:
             # 1. DataLoader in static graph:
-            #    [[input1, input2, ..., label1, lable2, ...]]
+            #    [[input1, input2, ..., label1, label2, ...]]
             # 2. DataLoader in dygraph
-            #    [input1, input2, ..., label1, lable2, ...]
+            #    [input1, input2, ..., label1, label2, ...]
             # 3. custumed iterator yield concated inputs and labels:
-            #   [input1, input2, ..., label1, lable2, ...]
+            #   [input1, input2, ..., label1, label2, ...]
             # 4. custumed iterator yield separated inputs and labels:
-            #   ([input1, input2, ...], [label1, lable2, ...])
+            #   ([input1, input2, ...], [label1, label2, ...])
             # To handle all of these, flatten (nested) list to list.
             data = paddle.utils.flatten(data)
             # LoDTensor.shape is callable, where LoDTensor comes from

diff --git a/python/paddle/incubate/asp/supported_layer_list.py b/python/paddle/incubate/asp/supported_layer_list.py
@@ -35,16 +35,16 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
     shape = weight_nparray.shape
     weight_pruned_nparray = copy.deepcopy(weight_nparray)
     weight_sparse_mask = np.ones_like(weight_pruned_nparray)
-    exlude_cond_shape2 = len(shape) == 2 and shape[0] < m
-    exlude_cond_shape4 = len(shape) == 4 and shape[1] < m
-    if exlude_cond_shape2:
+    exclude_cond_shape2 = len(shape) == 2 and shape[0] < m
+    exclude_cond_shape4 = len(shape) == 4 and shape[1] < m
+    if exclude_cond_shape2:
         _logger.warning(
             '{} is not pruned because the first dimension of {} is smaller than {}'.format(
                 param_name, shape, m
             )
         )
         return weight_pruned_nparray, weight_sparse_mask
-    if exlude_cond_shape4:
+    if exclude_cond_shape4:
         _logger.warning(
             '{} is not pruned because the second dimension of {} is smaller than {}'.format(
                 param_name, shape, m
@@ -58,12 +58,12 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
     # SPMMA in cuSparseLt: D = (AxB) + C, where matrix A (mxk) is sparse matrix.
     # cuSparseLt would prune matrix A along k dimension.
     # In sparse training, layer weight matrices is viewed sparse matrix A, so
-    # the math fomula should be 'Act(WX + b)'. However, default fomula in PaddlePaddle
+    # the math formula should be 'Act(WX + b)'. However, default formula in PaddlePaddle
     #  is 'Act(XW + b)'. For enabling SPMMA, weights and inputs should be transposed
     # for computing, Act( (W^T X^T)^T + b). Therefore, we have to prune alog k dimension
-    # of W^T, which is m dimension of W. Moreove, all mask generating functions in
+    # of W^T, which is m dimension of W. Moreover, all mask generating functions in
     # asp/utils is row-major pruning. That is the reason we have to transpose weight
-    # matrices beforce invoking create_mask. Then we transpose the result mask to make
+    # matrices before invoking create_mask. Then we transpose the result mask to make
     # sure its shape to be the same as the input weight.
     weight_sparse_mask = asp.create_mask(
         weight_nparray.T, func_name=func_name, n=n, m=m