Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix with_mateclass with_metaclass, etc #62162

Merged
merged 4 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/paddle/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self):
self.model_parameters = []
self.use_master_grad = False
self.already_register_final_backward_hook = False
self.already_classify_params_meshs = False # For dist
self.already_classify_params_meshes = False # For dist
self.mesh2params = {} # For dist
self.amp_dtype = 'float32'

Expand Down Expand Up @@ -471,7 +471,7 @@ def master_grad_hook():
# NOTE(lizhiyu): To support semi-auto of dygraph mode, we must
# classify the params of model into different calsses according to their process_mesh.
# Otherwise, fault will occur.
if not amp_global_state().already_classify_params_meshs:
if not amp_global_state().already_classify_params_meshes:
for param in amp_global_state().model_parameters:
if param is not None and param.process_mesh is not None:
if (
Expand All @@ -485,7 +485,7 @@ def master_grad_hook():
amp_global_state().mesh2params[
param.process_mesh
].append(param)
amp_global_state().already_classify_params_meshs = True
amp_global_state().already_classify_params_meshes = True

if len(amp_global_state().mesh2params):
for _, params in amp_global_state().mesh2params.items():
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/amp/debugging.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def _set_seed(self, flag):
self.seed = self.initial_seed

if self.seed > np.iinfo(np.uint32).max or self.seed < 0:
print("[Warnning: Seed must be between 0 and 2**32 - 1")
print("[Warning: Seed must be between 0 and 2**32 - 1")
self.seed = 123

# get random seed
Expand Down Expand Up @@ -616,7 +616,7 @@ def compare_accuracy(
... [1, 5, 2, 0], dtype="float32"
... )
... z1 = x + y
... out_excel = "compary_accuracy_out_excel.csv"
... out_excel = "compare_accuracy_out_excel.csv"
... paddle.amp.debugging.compare_accuracy(
... path, path, out_excel, loss_scale=1, dump_all_tensors=False
... )
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/autograd/py_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
__all__ = []


def with_mateclass(meta, *bases):
def with_metaclass(meta, *bases):
class impl(meta):
def __new__(cls, name, temp_bases, attrs):
return meta(name, bases, attrs)
Expand Down Expand Up @@ -267,7 +267,7 @@ def __init__(cls, name, bases, attrs):
return super().__init__(name, bases, attrs)


class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
class PyLayer(with_metaclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
"""
Paddle implements Python custom operators on the PaddlePaddle framework by creating a subclass of
``PyLayer``, which must comply with the following rules:
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/base/dygraph/tensor_patch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _to_static_var(self, to_parameter=False, **kwargs):
"""

# Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph.
# It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None).
# It will fail. So, for property that different between dynamic and static graph, should not getattr(self, attr, None).
attr_not_need_keys = [
'grad',
'T',
Expand Down Expand Up @@ -227,7 +227,7 @@ def set_value(self, value):

# NOTE(wuweilong): self could be Tensor, the subsequent behavior are defined in different files
# if self is Tensor, method value() return self that defined in this file, get_tensor() defined in eager_method.cc
# this Interface behavior will be unifed in the future.
# this Interface behavior will be unified in the future.
if self.is_dist():
if isinstance(value, paddle.Tensor) and value.is_dist():
from paddle.distributed.auto_parallel.placement_type import (
Expand Down Expand Up @@ -702,7 +702,7 @@ def get_device_dtype_from_tensor(other):

if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
raise TypeError(
"to() received too mant arguments - expected one of:\n \
"to() received too many arguments - expected one of:\n \
* (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] \
device, Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
* (Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
Expand Down Expand Up @@ -976,7 +976,7 @@ def __array__(self, dtype=None):
return array

def pre_deal_index(self, item):
# since in pybind there is no effiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
# since in pybind there is no efficiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
# we call this function in python level.
item = list(item) if isinstance(item, tuple) else [item]
for i, slice_item in enumerate(item):
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/base/incubate/checkpoint/auto_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _serialize(self, pop_keys=["restored_from", "checkpoint_epoch_no"]):
for k in pop_keys:
d.pop(k, None)

# registerd exes
# registered exes
d["exe_status"] = {}
e = d["exe_status"]
for k, t in self._exe_status.items():
Expand Down Expand Up @@ -625,7 +625,7 @@ def train_epoch_range(max_epoch_num, save_checkpoint_inter=None):
global g_acp_type
if not _get_checker().valid():
logger.warning(
"auto checkpoint will take effect automaticly on PaddleCloud"
"auto checkpoint will take effect automatically on PaddleCloud"
)
for i in _normal_yield(max_epoch_num):
yield i
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/base/layers/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
var_name = unique_name(op_type)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name)
startop_op = startup_blk.append_op(
startup_op = startup_blk.append_op(
type=op_type,
inputs={'UnderlyingReader': reader},
outputs={'Out': [startup_var]},
Expand All @@ -83,7 +83,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
startup_var.persistable = True
main_prog_block = default_main_program().current_block()
main_prog_var = _copy_reader_var_(main_prog_block, startup_var)
_copy_reader_create_op_(main_prog_block, startop_op)
_copy_reader_create_op_(main_prog_block, startup_op)
return monkey_patch_reader_methods(main_prog_var)


Expand Down
4 changes: 2 additions & 2 deletions python/paddle/base/layers/layer_function_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _generate_doc_string_(
buf.write(" (Tensor): ")
buf.write(escape_math(each_input.comment))
if each_input.duplicable:
buf.write(" Duplicatable.")
buf.write(" Duplicable.")
if each_input.dispensable:
buf.write(" Optional.")
buf.write('\n')
Expand Down Expand Up @@ -327,7 +327,7 @@ def func(x, name=None):
and x.is_view_var
):
raise ValueError(
'Sorry about what\'s happend. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
'Sorry about what\'s happened. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
inplace_op_type, x.name, x.name, x.nameb
)
)
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/base/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _check_input_array(cls, item):
arr = np.asarray(item)
if arr.dtype == np.object_:
raise TypeError(
"\n\tFaild to convert input data to a regular ndarray :\n\t* Usually "
"\n\tFailed to convert input data to a regular ndarray :\n\t* Usually "
"this means the input data contains nested lists with different lengths. "
"\n\t* Check the reader function passed to 'decorate_batch_generator'"
" to locate the data causes this issue.\n\t* Please consider using "
Expand Down Expand Up @@ -532,7 +532,7 @@ def __init__(
# NOTE: the C++ LoDTensorBlockingQueue instance
self._blocking_queue = None
# NOTE: 1. In multiprocess mode, this thread is used to get next batch data from
# self._data_queue, then push it into self._blocking_queue; 2. In singleprocess
# self._data_queue, then push it into self._blocking_queue; 2. In single process
# mode, this thread is used to get next batch data from self._batch_reader, then
# push it into self._blocking_queue
self._thread = None
Expand Down
46 changes: 23 additions & 23 deletions python/paddle/hapi/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def _update_input_info(inputs):
class StaticGraphAdapter:
"""

Model traning/inference with a static graph.
Model training/inference with a static graph.

"""

Expand Down Expand Up @@ -633,7 +633,7 @@ def _make_program(self, mode):
prog = self._orig_prog.clone()
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# prepended into _orig_prog. test program marked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
if mode != 'train':
Expand Down Expand Up @@ -794,16 +794,16 @@ def __init__(self, model):

if self._nranks > 1:
dist.init_parallel_env()
stradegy = paddle.distributed.parallel.ParallelStrategy()
stradegy.nranks = paddle.distributed.ParallelEnv().nranks
stradegy.local_rank = paddle.distributed.ParallelEnv().local_rank
stradegy.trainer_endpoints = (
strategy = paddle.distributed.parallel.ParallelStrategy()
strategy.nranks = paddle.distributed.ParallelEnv().nranks
strategy.local_rank = paddle.distributed.ParallelEnv().local_rank
strategy.trainer_endpoints = (
paddle.distributed.ParallelEnv().trainer_endpoints
)
stradegy.current_endpoint = (
strategy.current_endpoint = (
paddle.distributed.ParallelEnv().current_endpoint
)
self.ddp_model = paddle.DataParallel(self.model.network, stradegy)
self.ddp_model = paddle.DataParallel(self.model.network, strategy)

@property
def mode(self):
Expand Down Expand Up @@ -879,7 +879,7 @@ def eval_batch(self, inputs, labels=None):

outputs = self.model.network(*[paddle.to_tensor(x) for x in inputs])

# Transfrom data to expected device
# Transform data to expected device
expected_device = paddle.device.get_device()
for o in to_list(outputs):
o._to(device=expected_device)
Expand Down Expand Up @@ -966,7 +966,7 @@ def load(self, param_state_pairs, optim_state, scaler_state=None):
if scaler_state:
self.model._scaler.load_state_dict(scaler_state)

# resotre optimizer states
# restore optimizer states
if not self.model._optimizer or not optim_state:
return

Expand Down Expand Up @@ -1077,7 +1077,7 @@ class Model:
or dict ({name: InputSpec}), and it couldn't be None in static
graph. Default: None.
labels (InputSpec|list|tuple|None, optional): `labels`, entry points of network,
could be a InputSpec instnace or list/tuple of InputSpec instances,
could be a InputSpec instance or list/tuple of InputSpec instances,
or None. For static graph, if labels is required in loss,
labels must be set. Otherwise, it could be None. Default: None.

Expand Down Expand Up @@ -1676,7 +1676,7 @@ def prepare(
):
"""

Configures the model before runing.
Configures the model before running.

Args:
optimizer (Optimizer|None, optional): Optimizer must be set in training
Expand Down Expand Up @@ -1777,16 +1777,16 @@ def fit(
Args:
train_data (Dataset|DataLoader, optional): An iterable data loader is used for
train. An instance of paddle paddle.io.Dataset or
paddle.io.Dataloader is recomended. Default: None.
paddle.io.Dataloader is recommended. Default: None.
eval_data (Dataset|DataLoader, optional): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. Default: None.
is recommended. Default: None.
batch_size (int|list, optional): The batch size of train_data and eval_data. When
train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored. Default: 1.
epochs (int, optional): The number of epochs to train the model. Default: 1.
eval_freq (int, optional): The frequency, in number of epochs, an evalutation
eval_freq (int, optional): The frequency, in number of epochs, an evaluation
is performed. Default: 1.
log_freq (int, optional): The frequency, in number of steps, the training logs
are printed. Default: 10.
Expand All @@ -1800,7 +1800,7 @@ def fit(
train_data when dataset size is not divisible by the batch size.
When train_data is an instance of Dataloader, this parameter
will be ignored. Default: False.
shuffle (bool, optional): Whther to shuffle train_data. When train_data is
shuffle (bool, optional): Whether to shuffle train_data. When train_data is
an instance of Dataloader, this parameter will be ignored.
Default: True.
num_workers (int, optional): The number of subprocess to load data, 0 for no
Expand All @@ -1810,7 +1810,7 @@ def fit(
callbacks (Callback|None, optional): A list of `Callback` instances to apply
during training. If None, :ref:`api_paddle_callbacks_ProgBarLogger` and
:ref:`api_paddle_callbacks_ModelCheckpoint` are automatically inserted. Default: None.
accumulate_grad_batches (int, optional): The number of batches to accumulate gradident
accumulate_grad_batches (int, optional): The number of batches to accumulate gradient
during training process before optimizer updates. It can mimic large batch
size. Default: 1.
num_iters (int|None, optional): The number of iterations to evaluate the model.
Expand Down Expand Up @@ -2016,7 +2016,7 @@ def evaluate(
Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
paddle.io.Dataloader is recommended.
batch_size (int, optional): The batch size of train_data and eval_data.
When eval_data is the instance of Dataloader, this argument will be
ignored. Default: 1.
Expand Down Expand Up @@ -2126,7 +2126,7 @@ def predict(
Args:
test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended.
is recommended.
batch_size (int, optional): The batch size of test_data. When test_data is the
instance of Dataloader, this argument will be ignored. Default: 1.
num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess
Expand Down Expand Up @@ -2300,13 +2300,13 @@ def _run_one_epoch(
# Data might come from different types of data_loader and have
# different format, as following:
# 1. DataLoader in static graph:
# [[input1, input2, ..., label1, lable2, ...]]
# [[input1, input2, ..., label1, label2, ...]]
# 2. DataLoader in dygraph
# [input1, input2, ..., label1, lable2, ...]
# [input1, input2, ..., label1, label2, ...]
# 3. custumed iterator yield concated inputs and labels:
# [input1, input2, ..., label1, lable2, ...]
# [input1, input2, ..., label1, label2, ...]
# 4. custumed iterator yield separated inputs and labels:
# ([input1, input2, ...], [label1, lable2, ...])
# ([input1, input2, ...], [label1, label2, ...])
# To handle all of these, flatten (nested) list to list.
data = paddle.utils.flatten(data)
# LoDTensor.shape is callable, where LoDTensor comes from
Expand Down
14 changes: 7 additions & 7 deletions python/paddle/incubate/asp/supported_layer_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
shape = weight_nparray.shape
weight_pruned_nparray = copy.deepcopy(weight_nparray)
weight_sparse_mask = np.ones_like(weight_pruned_nparray)
exlude_cond_shape2 = len(shape) == 2 and shape[0] < m
exlude_cond_shape4 = len(shape) == 4 and shape[1] < m
if exlude_cond_shape2:
exclude_cond_shape2 = len(shape) == 2 and shape[0] < m
exclude_cond_shape4 = len(shape) == 4 and shape[1] < m
if exclude_cond_shape2:
_logger.warning(
'{} is not pruned because the first dimension of {} is smaller than {}'.format(
param_name, shape, m
)
)
return weight_pruned_nparray, weight_sparse_mask
if exlude_cond_shape4:
if exclude_cond_shape4:
_logger.warning(
'{} is not pruned because the second dimension of {} is smaller than {}'.format(
param_name, shape, m
Expand All @@ -58,12 +58,12 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
# SPMMA in cuSparseLt: D = (AxB) + C, where matrix A (mxk) is sparse matrix.
# cuSparseLt would prune matrix A along k dimension.
# In sparse training, layer weight matrices is viewed sparse matrix A, so
# the math fomula should be 'Act(WX + b)'. However, default fomula in PaddlePaddle
# the math formula should be 'Act(WX + b)'. However, default formula in PaddlePaddle
# is 'Act(XW + b)'. For enabling SPMMA, weights and inputs should be transposed
# for computing, Act( (W^T X^T)^T + b). Therefore, we have to prune alog k dimension
# of W^T, which is m dimension of W. Moreove, all mask generating functions in
# of W^T, which is m dimension of W. Moreover, all mask generating functions in
# asp/utils is row-major pruning. That is the reason we have to transpose weight
# matrices beforce invoking create_mask. Then we transpose the result mask to make
# matrices before invoking create_mask. Then we transpose the result mask to make
# sure its shape to be the same as the input weight.
weight_sparse_mask = asp.create_mask(
weight_nparray.T, func_name=func_name, n=n, m=m
Expand Down
Loading