pytorch · vfdev-5 · Sep 27, 2021 · Sep 25, 2021 · Sep 27, 2021 · Sep 27, 2021
diff --git a/ignite/distributed/auto.py b/ignite/distributed/auto.py
@@ -35,21 +35,6 @@ def auto_dataloader(dataset: Dataset, **kwargs: Any) -> Union[DataLoader, "_MpDe
         Custom batch sampler is not adapted for distributed configuration. Please, make sure that provided batch
         sampler is compatible with distributed configuration.
 
-    Examples:
-
-    .. code-block:: python
-
-        import ignite.distribted as idist
-
-        train_loader = idist.auto_dataloader(
-            train_dataset,
-            batch_size=32,
-            num_workers=4,
-            shuffle=True,
-            pin_memory="cuda" in idist.device().type,
-            drop_last=True,
-        )
-
     Args:
         dataset: input torch dataset. If input dataset is `torch IterableDataset`_ then dataloader will be
             created without any distributed sampling. Please, make sure that the dataset itself produces
@@ -59,6 +44,20 @@ def auto_dataloader(dataset: Dataset, **kwargs: Any) -> Union[DataLoader, "_MpDe
     Returns:
         `torch DataLoader`_ or `XLA MpDeviceLoader`_ for XLA devices
 
+    Examples:
+        .. code-block:: python
+
+            import ignite.distribted as idist
+
+            train_loader = idist.auto_dataloader(
+                train_dataset,
+                batch_size=32,
+                num_workers=4,
+                shuffle=True,
+                pin_memory="cuda" in idist.device().type,
+                drop_last=True,
+            )
+
     .. _torch DataLoader: https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
     .. _XLA MpDeviceLoader: https://github.com/pytorch/xla/blob/master/torch_xla/distributed/parallel_loader.py#L178
     .. _torch DistributedSampler:
@@ -150,23 +149,6 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod
     - wrap the model to `torch DataParallel`_ if no distributed context found and more than one CUDA devices available.
     - broadcast the initial variable states from rank 0 to all other processes if Horovod distributed framework is used.
 
-    Examples:
-
-    .. code-block:: python
-
-        import ignite.distribted as idist
-
-        model = idist.auto_model(model)
-
-    In addition with NVidia/Apex, it can be used in the following way:
-
-    .. code-block:: python
-
-        import ignite.distribted as idist
-
-        model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)
-        model = idist.auto_model(model)
-
     Args:
         model: model to adapt.
         sync_bn: if True, applies `torch convert_sync_batchnorm`_ to the model for native torch
@@ -178,6 +160,22 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod
     Returns:
         torch.nn.Module
 
+    Examples:
+        .. code-block:: python
+
+            import ignite.distribted as idist
+
+            model = idist.auto_model(model)
+
+        In addition with NVidia/Apex, it can be used in the following way:
+
+        .. code-block:: python
+
+            import ignite.distribted as idist
+
+            model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)
+            model = idist.auto_model(model)
+
     .. _torch DistributedDataParallel: https://pytorch.org/docs/stable/generated/torch.nn.parallel.
         DistributedDataParallel.html
     .. _torch DataParallel: https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html
@@ -246,21 +244,20 @@ def auto_optim(optimizer: Optimizer, **kwargs: Any) -> Optimizer:
     For Horovod distributed configuration, optimizer is wrapped with Horovod Distributed Optimizer and
     its state is broadcasted from rank 0 to all other processes.
 
-    Examples:
-
-    .. code-block:: python
-
-        import ignite.distributed as idist
-
-        optimizer = idist.auto_optim(optimizer)
-
     Args:
         optimizer: input torch optimizer
         kwargs: kwargs to Horovod backend's DistributedOptimizer.
 
     Returns:
         Optimizer
 
+    Examples:
+        .. code-block:: python
+
+            import ignite.distributed as idist
+
+            optimizer = idist.auto_optim(optimizer)
+
     .. _xm.optimizer_step: http://pytorch.org/xla/release/1.5/index.html#torch_xla.core.xla_model.optimizer_step
 
     .. versionchanged:: 0.4.2
@@ -289,15 +286,13 @@ class DistributedProxySampler(DistributedSampler):
 
     Code is based on https://github.com/pytorch/pytorch/issues/23430#issuecomment-562350407
 
-
-    .. note::
-        Input sampler is assumed to have a constant size.
-
     Args:
         sampler: Input torch data sampler.
         num_replicas: Number of processes participating in distributed training.
         rank: Rank of the current process within ``num_replicas``.
 
+    .. note::
+        Input sampler is assumed to have a constant size.
     """
 
     def __init__(self, sampler: Sampler, num_replicas: Optional[int] = None, rank: Optional[int] = None) -> None:

diff --git a/ignite/distributed/comp_models/native.py b/ignite/distributed/comp_models/native.py
@@ -431,13 +431,13 @@ def _expand_hostlist(nodelist: str) -> List[str]:
 
         Source : https://github.com/LLNL/py-hostlist/blob/master/hostlist/hostlist.py
 
+        Args:
+            nodelist: Compressed hostlist string
+
         .. note::
             The host names can be composed by any character except the special ones `[`, `]`, `,`. Only one
             sequence `[...]` is supported per hostname.
 
-        Args:
-            nodelist: Compressed hostlist string
-
         .. versionadded:: 0.4.6
         """
         result_hostlist = []

diff --git a/ignite/distributed/launcher.py b/ignite/distributed/launcher.py
@@ -25,8 +25,31 @@ class Parallel:
     2) Only initialize a processing group given the ``backend``
     (useful with tools like `torch.distributed.launch`_, `horovodrun`_, etc).
 
-    Examples:
+    Args:
+        backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
+            configuration.
+        nproc_per_node: optional argument, number of processes per
+            node to specify. If not None, :meth:`~ignite.distributed.launcher.Parallel.run`
+            will spawn ``nproc_per_node`` processes that run input function with its arguments.
+        nnodes: optional argument, number of nodes participating in distributed configuration.
+            If not None, :meth:`~ignite.distributed.launcher.Parallel.run` will spawn ``nproc_per_node``
+            processes that run input function with its arguments. Total world size is `nproc_per_node * nnodes`.
+            This option is only supported by native torch distributed module. For other modules, please setup
+            ``spawn_kwargs`` with backend specific arguments.
+        node_rank: optional argument, current machine index. Mandatory argument if ``nnodes`` is
+            specified and larger than one.
+            This option is only supported by native torch distributed module. For other modules, please setup
+            ``spawn_kwargs`` with backend specific arguments.
+        master_addr: optional argument, master node TCP/IP address for torch native backends
+            (`nccl`, `gloo`). Mandatory argument if ``nnodes`` is specified and larger than one.
+        master_port: optional argument, master node port for torch native backends
+            (`nccl`, `gloo`). Mandatory argument if ``master_addr`` is specified.
+        init_method: optional argument to specify processing group initialization method for torch native
+            backends (`nccl`, `gloo`). Default, "env://".
+            See more info: `dist.init_process_group`_.
+        spawn_kwargs: kwargs to ``idist.spawn`` function.
 
+    Examples:
         1) Single node or Multi-node, Multi-GPU training launched with `torch.distributed.launch`_ or `horovodrun`_
         tools
 
@@ -172,31 +195,6 @@ def training(local_rank, config, **kwargs):
 
     .. _torch.distributed.launch: https://pytorch.org/docs/stable/distributed.html#launch-utility
     .. _horovodrun: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run
-
-    Args:
-        backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
-            configuration.
-        nproc_per_node: optional argument, number of processes per
-            node to specify. If not None, :meth:`~ignite.distributed.launcher.Parallel.run`
-            will spawn ``nproc_per_node`` processes that run input function with its arguments.
-        nnodes: optional argument, number of nodes participating in distributed configuration.
-            If not None, :meth:`~ignite.distributed.launcher.Parallel.run` will spawn ``nproc_per_node``
-            processes that run input function with its arguments. Total world size is `nproc_per_node * nnodes`.
-            This option is only supported by native torch distributed module. For other modules, please setup
-            ``spawn_kwargs`` with backend specific arguments.
-        node_rank: optional argument, current machine index. Mandatory argument if ``nnodes`` is
-            specified and larger than one.
-            This option is only supported by native torch distributed module. For other modules, please setup
-            ``spawn_kwargs`` with backend specific arguments.
-        master_addr: optional argument, master node TCP/IP address for torch native backends
-            (`nccl`, `gloo`). Mandatory argument if ``nnodes`` is specified and larger than one.
-        master_port: optional argument, master node port for torch native backends
-            (`nccl`, `gloo`). Mandatory argument if ``master_addr`` is specified.
-        init_method: optional argument to specify processing group initialization method for torch native
-            backends (`nccl`, `gloo`). Default, "env://".
-            See more info: `dist.init_process_group`_.
-        spawn_kwargs: kwargs to ``idist.spawn`` function.
-
     .. _dist.init_process_group: https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group
     .. versionchanged:: 0.4.2
         ``backend`` now accepts `horovod` distributed framework.
@@ -282,24 +280,23 @@ def _setup_spawn_params(
     def run(self, func: Callable, *args: Any, **kwargs: Any) -> None:
         """Execute ``func`` with provided arguments in distributed context.
 
-        Example
-
-        .. code-block:: python
-
-            def training(local_rank, config, **kwargs):
-                # ...
-                print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend())
-                # ...
-
-            with idist.Parallel(backend=backend) as parallel:
-                parallel.run(training, config, a=1, b=2)
-
         Args:
             func: function to execute. First argument of the function should be `local_rank` - local process
                 index.
             args: positional arguments of ``func`` (without `local_rank`).
             kwargs: keyword arguments of ``func``.
 
+        Examples:
+            .. code-block:: python
+
+                def training(local_rank, config, **kwargs):
+                    # ...
+                    print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend())
+                    # ...
+
+                with idist.Parallel(backend=backend) as parallel:
+                    parallel.run(training, config, a=1, b=2)
+
         """
         if self._spawn_params is not None and self.backend is not None:
             self._logger.info(  # type: ignore[attr-defined]