You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I got an error when I use 2 gpus to train the model on ImageNet. I followed the steps in README, but I got the following error.
Traceback (most recent call last):
File "imagenet.py", line 340, in
main()
File "imagenet.py", line 336, in main
engine.train(h, iter_train, opt.epochs, optimizer)
File "/usr/local/lib/python3.6/site-packages/torchnet/engine/engine.py", line 63, in train
state['optimizer'].step(closure)
File "/usr/local/lib/python3.6/site-packages/torch/optim/sgd.py", line 80, in step
loss = closure()
File "/usr/local/lib/python3.6/site-packages/torchnet/engine/engine.py", line 52, in closure
loss, output = state'network'
File "imagenet.py", line 265, in h
y_s, y_t, loss_groups = utils.data_parallel(f, inputs, params, mode, range(opt.ngpu))
File "/opt/ml/job/utils.py", line 64, in data_parallel
return gather(outputs, output_device)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 68, in gather
return gather_map(outputs)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 63, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 63, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 55, in gather_map
return Gather.apply(target_device, dim, *outputs)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 54, in forward
ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 54, in
ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
RuntimeError: dimension specified as 0 but tensor has no dimensions
Thank you if you have any solution to this.
The text was updated successfully, but these errors were encountered:
I got an error when I use 2 gpus to train the model on ImageNet. I followed the steps in README, but I got the following error.
Traceback (most recent call last):
File "imagenet.py", line 340, in
main()
File "imagenet.py", line 336, in main
engine.train(h, iter_train, opt.epochs, optimizer)
File "/usr/local/lib/python3.6/site-packages/torchnet/engine/engine.py", line 63, in train
state['optimizer'].step(closure)
File "/usr/local/lib/python3.6/site-packages/torch/optim/sgd.py", line 80, in step
loss = closure()
File "/usr/local/lib/python3.6/site-packages/torchnet/engine/engine.py", line 52, in closure
loss, output = state'network'
File "imagenet.py", line 265, in h
y_s, y_t, loss_groups = utils.data_parallel(f, inputs, params, mode, range(opt.ngpu))
File "/opt/ml/job/utils.py", line 64, in data_parallel
return gather(outputs, output_device)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 68, in gather
return gather_map(outputs)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 63, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 63, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 55, in gather_map
return Gather.apply(target_device, dim, *outputs)
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 54, in forward
ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
File "/usr/local/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 54, in
ctx.input_sizes = tuple(map(lambda i: i.size(ctx.dim), inputs))
RuntimeError: dimension specified as 0 but tensor has no dimensions
Thank you if you have any solution to this.
The text was updated successfully, but these errors were encountered: