diff --git a/python/paddle/distributed/auto_parallel/static/engine.py b/python/paddle/distributed/auto_parallel/static/engine.py index 853ff7d0aedbf..1bae7ee701993 100644 --- a/python/paddle/distributed/auto_parallel/static/engine.py +++ b/python/paddle/distributed/auto_parallel/static/engine.py @@ -79,39 +79,39 @@ class Engine: .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - valid_dataset = MNIST(mode='test', transform=transform) - - model = paddle.vision.models.LeNet() - loss = paddle.nn.CrossEntropyLoss() - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - metrics = paddle.metric.Accuracy(topk=(1, 2)) - - engine = auto.Engine(model, loss, optimizer, metrics) - # fit - engine.fit(train_dataset, - epochs=2, - batch_size=64) - # evaluate - engine.evaluate(valid_dataset, - batch_size=64) - # predict - engine.predict(valid_dataset, - batch_size=64) - # save - engine.save("./my_model") - # load - engine.load("./my_model") + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST + + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> train_dataset = MNIST(mode='train', transform=transform) + >>> valid_dataset = MNIST(mode='test', transform=transform) + + >>> model = paddle.vision.models.LeNet() + >>> loss = paddle.nn.CrossEntropyLoss() + >>> optimizer = paddle.optimizer.Adam( + ... learning_rate=0.001, parameters=model.parameters()) + >>> metrics = paddle.metric.Accuracy(topk=(1, 2)) + + >>> engine = auto.Engine(model, loss, optimizer, metrics) + >>> # fit + >>> engine.fit(train_dataset, + ... epochs=2, + ... batch_size=64) + >>> # evaluate + >>> engine.evaluate(valid_dataset, + ... batch_size=64) + >>> # predict + >>> engine.predict(valid_dataset, + ... batch_size=64) + >>> # save + >>> engine.save("./my_model") + >>> # load + >>> engine.load("./my_model") """ @@ -916,27 +916,27 @@ def fit( .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - - model = paddle.vision.models.LeNet() - loss = paddle.nn.CrossEntropyLoss() - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - metrics = paddle.metric.Accuracy(topk=(1, 2)) - - engine = auto.Engine(model, loss, optimizer, metrics) - engine.fit(train_dataset, - epochs=2, - batch_size=64) + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST + + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> train_dataset = MNIST(mode='train', transform=transform) + + >>> model = paddle.vision.models.LeNet() + >>> loss = paddle.nn.CrossEntropyLoss() + >>> optimizer = paddle.optimizer.Adam( + ... learning_rate=0.001, parameters=model.parameters()) + >>> metrics = paddle.metric.Accuracy(topk=(1, 2)) + + >>> engine = auto.Engine(model, loss, optimizer, metrics) + >>> engine.fit(train_dataset, + ... epochs=2, + ... batch_size=64) """ self._mode = 'train' self._inputs_spec, self._labels_spec = self._prepare_data_spec( @@ -1069,23 +1069,23 @@ def evaluate( .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - valid_dataset = MNIST(mode='test', transform=transform) + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> valid_dataset = MNIST(mode='test', transform=transform) - model = paddle.vision.models.LeNet() - loss = paddle.nn.CrossEntropyLoss() - metrics = paddle.metric.Accuracy(topk=(1, 2)) + >>> model = paddle.vision.models.LeNet() + >>> loss = paddle.nn.CrossEntropyLoss() + >>> metrics = paddle.metric.Accuracy(topk=(1, 2)) - engine = auto.Engine(model, loss, metrics=metrics) - engine.evaluate(valid_dataset, batch_size=64) + >>> engine = auto.Engine(model, loss, metrics=metrics) + >>> engine.evaluate(valid_dataset, batch_size=64) """ self._mode = 'eval' @@ -1179,21 +1179,21 @@ def predict( .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - valid_dataset = MNIST(mode='test', transform=transform) + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> valid_dataset = MNIST(mode='test', transform=transform) - model = paddle.vision.models.LeNet() + >>> model = paddle.vision.models.LeNet() - engine = auto.Engine(model) - engine.predict(valid_dataset, batch_size=64) + >>> engine = auto.Engine(model) + >>> engine.predict(valid_dataset, batch_size=64) """ self._mode = 'predict' self._inputs_spec, self._labels_spec = self._prepare_data_spec( @@ -1648,28 +1648,29 @@ def save(self, path, training=True): Examples: .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - - model = paddle.vision.models.LeNet() - loss = paddle.nn.CrossEntropyLoss() - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - metrics = paddle.metric.Accuracy(topk=(1, 2)) - - engine = auto.Engine(model, loss, optimizer, metrics) - engine.fit(train_dataset, - epochs=1, - batch_size=64) - engine.save("./my_model") + + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST + + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> train_dataset = MNIST(mode='train', transform=transform) + + >>> model = paddle.vision.models.LeNet() + >>> loss = paddle.nn.CrossEntropyLoss() + >>> optimizer = paddle.optimizer.Adam( + ... learning_rate=0.001, parameters=model.parameters()) + >>> metrics = paddle.metric.Accuracy(topk=(1, 2)) + + >>> engine = auto.Engine(model, loss, optimizer, metrics) + >>> engine.fit(train_dataset, + ... epochs=1, + ... batch_size=64) + >>> engine.save("./my_model") """ if training: @@ -1732,29 +1733,30 @@ def load(self, path, strict=True, load_optimizer=True): Examples: .. code-block:: python - import paddle - import paddle.vision.transforms as T - from paddle.distributed.fleet import auto - from paddle.vision.datasets import MNIST - - transform = T.Compose([ - T.Transpose(), - T.Normalize([127.5], [127.5]) - ]) - train_dataset = MNIST(mode='train', transform=transform) - - model = paddle.vision.models.LeNet() - loss = paddle.nn.CrossEntropyLoss() - optimizer = paddle.optimizer.Adam( - learning_rate=0.001, parameters=model.parameters()) - metrics = paddle.metric.Accuracy(topk=(1, 2)) - - engine = auto.Engine(model, loss, optimizer, metrics) - engine.fit(train_dataset, - epochs=1, - batch_size=64) - engine.save("./my_model") - engine.load("./my_model") + + >>> import paddle + >>> import paddle.vision.transforms as T + >>> from paddle.distributed.fleet import auto + >>> from paddle.vision.datasets import MNIST + + >>> transform = T.Compose([ + ... T.Transpose(), + ... T.Normalize([127.5], [127.5]) + >>> ]) + >>> train_dataset = MNIST(mode='train', transform=transform) + + >>> model = paddle.vision.models.LeNet() + >>> loss = paddle.nn.CrossEntropyLoss() + >>> optimizer = paddle.optimizer.Adam( + ... learning_rate=0.001, parameters=model.parameters()) + >>> metrics = paddle.metric.Accuracy(topk=(1, 2)) + + >>> engine = auto.Engine(model, loss, optimizer, metrics) + >>> engine.fit(train_dataset, + ... epochs=1, + ... batch_size=64) + >>> engine.save("./my_model") + >>> engine.load("./my_model") """ self._strict = strict diff --git a/python/paddle/distributed/auto_parallel/static/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/static/process_mesh_v2.py index 23fe66ab4bd28..76b3ead1e3bdd 100644 --- a/python/paddle/distributed/auto_parallel/static/process_mesh_v2.py +++ b/python/paddle/distributed/auto_parallel/static/process_mesh_v2.py @@ -33,14 +33,13 @@ class ProcessMesh(core.ProcessMesh): Examples: .. code-block:: python - import paddle - import paddle.distributed as dist + >>> import paddle + >>> import paddle.distributed as dist + >>> paddle.enable_static() - paddle.enable_static() - - mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]]) - assert mesh.shape == [2, 3] - assert mesh.processe_ids == [2, 4, 5, 0, 1, 3] + >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]]) + >>> assert mesh.shape == [2, 3] + >>> assert mesh.process_ids == [2, 4, 5, 0, 1, 3] """ diff --git a/python/paddle/distributed/auto_parallel/static/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py index a5e19baf7cf12..80aac2e257530 100644 --- a/python/paddle/distributed/auto_parallel/static/utils.py +++ b/python/paddle/distributed/auto_parallel/static/utils.py @@ -611,10 +611,18 @@ def save_distributed_checkpoint( Examples: .. code-block:: python - path = os.path.join("./output", "step_%d" % step) - os.makedirs(path, exist_ok=True) - add_info = {'batch': step, "batch_size": global_batch_size} - save_distributed_checkpoint(program, path, path, add_info) + >>> import os + >>> from paddle.distributed.auto_parallel.static.utils import save_distributed_checkpoint + + >>> step = 16000 + >>> global_batch_size = 32 + >>> path = os.path.join("./output", "step_%d" % step) + >>> os.makedirs(path, exist_ok=True) + >>> program = paddle.static.Program() + + >>> add_info = {'batch': step, "batch_size": global_batch_size} + >>> save_distributed_checkpoint(program, path, path, add_info) + """ from .dist_context import get_default_distributed_context @@ -653,11 +661,18 @@ def load_distributed_checkpoint(checkpoint_path, dist_attr_path): Examples: .. code-block:: python - ckpt_path = ['./model_state_rank0.pdmodel', - './model_state_rank1.pdmodel'] - dist_attr_path = ['./dist_attr_rank0.pdattr', - './dist_attr_rank1.pdattr'] - param_dict, dist_attr, add_info = load_distributed_checkpoint(ckpt_path, dist_attr_path) + >>> # doctest: +SKIP('Depends on external files.') + >>> from paddle.distributed.auto_parallel.static.utils import load_distributed_checkpoint + + >>> ckpt_path = [ + ... './model_state_rank0.pdmodel', + ... './model_state_rank1.pdmodel', + ... ] + >>> dist_attr_path = [ + ... './dist_attr_rank0.pdattr', + ... './dist_attr_rank1.pdattr', + ... ] + >>> param_dict, dist_attr, add_info = load_distributed_checkpoint(ckpt_path, dist_attr_path) """ assert _check_valid_path( checkpoint_path @@ -692,12 +707,19 @@ def load_checkpoint_into_program( Examples: .. code-block:: python - exe.run(startup_program) - ckpt_path = ['./model_state_rank0.pdmodel', - './model_state_rank1.pdmodel'] - dist_attr_path = ['./dist_attr_rank0.pdattr', - './dist_attr_rank1.pdattr'] - load_checkpoint_into_program(ckpt_path, dist_attr_path, main_program) + >>> # doctest: +SKIP('Depends on external files.') + >>> from paddle.distributed.auto_parallel.static.utils import load_checkpoint_into_program + + >>> exe.run(startup_program) + >>> ckpt_path = [ + ... './model_state_rank0.pdmodel', + ... './model_state_rank1.pdmodel', + ... ] + >>> dist_attr_path = [ + ... './dist_attr_rank0.pdattr', + ... './dist_attr_rank1.pdattr', + ... ] + >>> load_checkpoint_into_program(ckpt_path, dist_attr_path, main_program) """ from .dist_context import get_default_distributed_context @@ -999,13 +1021,18 @@ def _merge_parameter( Examples: .. code-block:: python - import numpy as np - partition_param_list = [(np.array([[[1.11, 1.12]]]), [[0,1],[0,1],[0,2]])] - param = np.array([[[1.13, 1.14]]]) - partition_index = [[0,1],[0,1],[2,4]] + >>> import numpy as np + >>> from paddle.distributed.auto_parallel.static.utils import _merge_parameter + + >>> partition_param_list = [(np.array([[[1.11, 1.12]]]), [[0, 1],[0, 1],[0, 2]])] + >>> param = np.array([[[1.13, 1.14]]]) + >>> partition_index = [[0, 1],[0, 1],[2, 4]] + >>> complete_shape = [2, 2, 4] + + >>> _merge_parameter(partition_param_list, param, partition_index, complete_shape) + >>> print(partition_param_list) + [(array([[[1.11, 1.12, 1.13, 1.14]]]), [[0, 1],[0, 1],[0, 4]])] - _merge_parameter(partition_param_list, param, partition_index) - # partition_param_list: [(np.array([[[1.11, 1.12, 1.13, 1.14]]]), [[0,1],[0,1],[0,4]])] """ from .reshard import Resharder @@ -1061,16 +1088,20 @@ def _slice_parameter(complete_param, partition_index_list, length): Examples: .. code-block:: python - import numpy as np - complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) - rank = 2 - complete_shape = [1, 1, 6] - dims_mapping = [-1, -1, 0] - process_shape = [3] - process_group = [0, 1, 2] + >>> import numpy as np + >>> from paddle.distributed.auto_parallel.static.utils import _slice_parameter + + >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) + >>> rank = 2 + >>> complete_shape = [1, 1, 6] + >>> dims_mapping = [-1, -1, 0] + >>> process_shape = [3] + >>> process_group = [0, 1, 2] + + >>> sliced_param_list = _slice_parameter(complete_param, [[], [], [2, 4]], 3) + >>> print(sliced_param_list) + [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])] - sliced_param_list = _slice_parameter(complete_param, [[], [], [2, 4]], 3) - # [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])] """ sliced_param_list = [] axis = len(complete_param.shape) - length @@ -1098,21 +1129,24 @@ def _get_sliced_param_index( Examples: .. code-block:: python - import numpy as np - complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) - rank = 2 - complete_shape = [1, 1, 6] - dims_mapping = [-1, -1, 0] - process_shape = [3] - process_group = [0, 1, 2] - - slice_param = _slice_parameter(complete_param, [[], [], [2, 4]], 3) - # slice_param: - # [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])] - - index = _get_sliced_param_index(rank, complete_shape, dims_mapping - process_shape, process_group) - # index: 2 + >>> import numpy as np + >>> from paddle.distributed.auto_parallel.static.utils import _get_sliced_param_index + + >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) + >>> rank = 2 + >>> complete_shape = [1, 1, 6] + >>> dims_mapping = [-1, -1, 0] + >>> process_shape = [3] + >>> process_group = [0, 1, 2] + + >>> slice_param = _slice_parameter(complete_param, [[], [], [2, 4]], 3) + >>> print(slice_param) + [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])] + + >>> index = _get_sliced_param_index(rank, complete_shape, dims_mapping, + ... process_shape, process_group) + >>> print(index) + 2 """ from .reshard import Resharder @@ -1145,15 +1179,18 @@ def _get_split_indices( Examples: .. code-block:: python - import numpy as np - complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) - complete_shape = [1, 1, 6] - dims_mapping = [-1, -1, 0] - process_shape = [3] - process_group = [0, 1, 2] + >>> import numpy as np + >>> from paddle.distributed.auto_parallel.static.utils import _get_split_indices + + >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) + >>> complete_shape = [1, 1, 6] + >>> dims_mapping = [-1, -1, 0] + >>> process_shape = [3] + >>> process_group = [0, 1, 2] - index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group) - # index: [[], [], [2, 4]] + >>> index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group) + >>> print(index) + [[], [], [2, 4]] """ from .reshard import Resharder @@ -2410,19 +2447,26 @@ def wrap_data_for_completion( output_specs: list, DistTensorSpec for each output tensor of the dist_op attrs: dict, attribute map of the dist op - Usage: - op_desc = dist_op.serial_op.desc - input_name_list = [] - output_name_list = [] - input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op - input_name_list.append(op_desc.input('Y')[0]) - output_name_list.append(op_desc.output('Out')[0]) - attr_name_list = ['trans_x', 'trans_y'] - input_specs, output_specs, attrs = wrap_data_for_completion( - dist_op, - input_name_list, - output_name_list, - attr_name_list) + Examples: + .. code-block:: python + + >>> # doctest: +SKIP('Depends on other ops.') + >>> from paddle.distributed.auto_parallel.static.utils import wrap_data_for_completion + + >>> op_desc = dist_op.serial_op.desc + >>> input_name_list = [] + >>> output_name_list = [] + + >>> input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op + >>> input_name_list.append(op_desc.input('Y')[0]) + >>> output_name_list.append(op_desc.output('Out')[0]) + + >>> attr_name_list = ['trans_x', 'trans_y'] + >>> input_specs, output_specs, attrs = wrap_data_for_completion( + ... dist_op, + ... input_name_list, + ... output_name_list, + ... attr_name_list) """ diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index 8392ef7f37633..36607f0903bc7 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -152,20 +152,21 @@ class Strategy(BaseConfig): Examples: .. code-block:: python - import paddle - from paddle.distributed.fleet import auto - - strategy = auto.Strategy() - sharding = strategy.sharding - self.assertEqual(sharding.enabled, False) - self.assertEqual(sharding.stage, 1) - self.assertEqual(sharding.degree, 8) - sharding.enabled = True - sharding.stage = 2 - sharding.degree = 2 - self.assertEqual(sharding.enabled, True) - self.assertEqual(sharding.stage, 2) - self.assertEqual(sharding.degree, 2) + >>> import paddle + >>> from paddle.distributed.fleet import auto + + >>> strategy = auto.Strategy() + >>> sharding = strategy.sharding + >>> assert sharding.enable == False + >>> assert sharding.stage == 1 + >>> assert sharding.degree == 8 + + >>> sharding.enable = True + >>> sharding.stage = 2 + >>> sharding.degree = 2 + >>> assert sharding.enable == True + >>> assert sharding.stage == 2 + >>> assert sharding.degree == 2 """ diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index a14d67429e917..e27557df6af9c 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -188,12 +188,13 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout): Examples: .. code-block:: python - import paddle + >>> # doctest: +REQUIRES(env: DISTRIBUTED) + >>> import paddle - paddle.distributed.init_parallel_env() - tindata = paddle.randn(shape=[2, 3]) - gp = paddle.distributed.new_group([2,4,6]) - paddle.distributed.all_reduce(tindata, group=gp, sync_op=False) + >>> paddle.distributed.init_parallel_env() + >>> tindata = paddle.randn(shape=[2, 3]) + >>> gp = paddle.distributed.new_group([2, 4, 6]) + >>> paddle.distributed.all_reduce(tindata, group=gp, sync_op=False) """ global _custom_gid @@ -310,9 +311,8 @@ def is_available(): Examples: .. code-block:: python - import paddle - - print(paddle.distributed.is_available()) + >>> import paddle + >>> print(paddle.distributed.is_available()) """ return core.is_compiled_with_dist()