diff --git a/distributed/ddp-tutorial-series/multigpu.py b/distributed/ddp-tutorial-series/multigpu.py index 7ddb14e524..029731b5d2 100644 --- a/distributed/ddp-tutorial-series/multigpu.py +++ b/distributed/ddp-tutorial-series/multigpu.py @@ -19,6 +19,7 @@ def ddp_setup(rank, world_size): os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "12355" init_process_group(backend="nccl", rank=rank, world_size=world_size) + torch.cuda.set_device(rank) class Trainer: def __init__( diff --git a/distributed/ddp-tutorial-series/multigpu_torchrun.py b/distributed/ddp-tutorial-series/multigpu_torchrun.py index f38eaa5f73..66d8187346 100644 --- a/distributed/ddp-tutorial-series/multigpu_torchrun.py +++ b/distributed/ddp-tutorial-series/multigpu_torchrun.py @@ -12,6 +12,7 @@ def ddp_setup(): init_process_group(backend="nccl") + torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) class Trainer: def __init__( diff --git a/distributed/ddp-tutorial-series/multinode.py b/distributed/ddp-tutorial-series/multinode.py index 96e067db77..e80636bcc4 100644 --- a/distributed/ddp-tutorial-series/multinode.py +++ b/distributed/ddp-tutorial-series/multinode.py @@ -12,6 +12,7 @@ def ddp_setup(): init_process_group(backend="nccl") + torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) class Trainer: def __init__( diff --git a/distributed/minGPT-ddp/mingpt/main.py b/distributed/minGPT-ddp/mingpt/main.py index 8a2c5e3d9d..861a69e1e1 100644 --- a/distributed/minGPT-ddp/mingpt/main.py +++ b/distributed/minGPT-ddp/mingpt/main.py @@ -8,6 +8,7 @@ def ddp_setup(): init_process_group(backend="nccl") + torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) def get_train_objs(gpt_cfg: GPTConfig, opt_cfg: OptimizerConfig, data_cfg: DataConfig): dataset = CharDataset(data_cfg)