Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMP] For amp.decorate() optimizers set to None is ok #37541

Merged
merged 4 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions python/paddle/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,5 +129,18 @@ def decorate(models,
output2 = models[1](data)
print(output.dtype) # FP16
print(output2.dtype) # FP16

# required: gpu
# Demo3: optimizers is None:
model3 = paddle.nn.Conv2D(3, 2, 3, bias_attr=False)
optimizer3 = paddle.optimizer.Adam(parameters=model3.parameters())

model = paddle.amp.decorate(models=model3, level='O2')

data = paddle.rand([10, 3, 32, 32])

with paddle.amp.auto_cast(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
output = model(data)
print(output.dtype) # FP16
"""
return amp_decorate(models, optimizers, level, master_weight, save_dtype)
79 changes: 51 additions & 28 deletions python/paddle/fluid/dygraph/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,30 @@ def amp_decorate(models,
output2 = models[1](data)
print(output.dtype) # FP16
print(output2.dtype) # FP16

# required: gpu
# Demo3: optimizers is None:
model3 = paddle.nn.Conv2D(3, 2, 3, bias_attr=False)
optimizer3 = paddle.optimizer.Adam(parameters=model2.parameters())

model = paddle.fluid.dygraph.amp_decorate(models=model3, level='O2')

data = paddle.rand([10, 3, 32, 32])

with paddle.fluid.dygraph.amp_guard(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
output = model(data)
print(output.dtype) # FP16
"""
if not (level in ['O1', 'O2']):
raise ValueError(
"level should be O1 or O2, O1 represent AMP train mode, O2 represent Pure fp16 train mode."
)

if level == 'O1':
return models, optimizers
if optimizers is None:
return models
else:
return models, optimizers

models_is_list = False
if isinstance(models, paddle.nn.Layer):
Expand All @@ -353,29 +369,30 @@ def amp_decorate(models,
raise TypeError(
"models must be either a single model or a list of models.")

optimizers_is_list = False
if isinstance(optimizers, (paddle.optimizer.Optimizer,
paddle.fluid.optimizer.Optimizer)):
optimizers_is_list = False
optimizers = [optimizers]
check_optimizers(optimizers)
elif isinstance(optimizers, list):
check_optimizers(optimizers)
optimizers_is_list = True
else:
raise TypeError(
"optimizers must be either a single optimizer or a list of optimizers."
)

models = pure_fp16_initialize(models=models)

# supprot master_weight
for idx_opt in range(len(optimizers)):
if hasattr(optimizers[idx_opt], '_multi_precision'):
if master_weight is False:
optimizers[idx_opt]._multi_precision = False
else:
optimizers[idx_opt]._multi_precision = True
if optimizers is not None:
# check optimizers
optimizers_is_list = False
if isinstance(optimizers, (paddle.optimizer.Optimizer,
paddle.fluid.optimizer.Optimizer)):
optimizers_is_list = False
optimizers = [optimizers]
check_optimizers(optimizers)
elif isinstance(optimizers, list):
check_optimizers(optimizers)
optimizers_is_list = True
else:
raise TypeError(
"optimizers must be either a single optimizer or a list of optimizers."
)
# supprot master_weight
for idx_opt in range(len(optimizers)):
if hasattr(optimizers[idx_opt], '_multi_precision'):
if master_weight is False:
optimizers[idx_opt]._multi_precision = False
else:
optimizers[idx_opt]._multi_precision = True

if save_dtype is not None:
if not (save_dtype in ['float16', 'float32', 'float64']):
Expand All @@ -387,12 +404,18 @@ def amp_decorate(models,
layer.register_state_dict_hook(StateDictHook(save_dtype))

if models_is_list:
if optimizers_is_list:
return models, optimizers
if optimizers is not None:
if optimizers_is_list:
return models, optimizers
else:
return models, optimizers[0]
else:
return models, optimizers[0]
return models
else:
if optimizers_is_list:
return models[0], optimizers
if optimizers is not None:
if optimizers_is_list:
return models[0], optimizers
else:
return models[0], optimizers[0]
else:
return models[0], optimizers[0]
return models[0]
Original file line number Diff line number Diff line change
Expand Up @@ -524,38 +524,29 @@ def func():

self.assertRaises(ValueError, func)

def test_input_formate_exception(self):
def test_model_error():
with fluid.dygraph.guard():
model = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None)
opt = paddle.optimizer.SGD(parameters=model.parameters())
paddle.amp.decorate(models=None, optimizers=opt, level='O2')

self.assertRaises(TypeError, test_model_error)

def test_optimizer_error():
with fluid.dygraph.guard():
model = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None)
paddle.amp.decorate(models=model, optimizers=None, level='O2')

self.assertRaises(TypeError, test_optimizer_error)

def test_input_type_exception(self):
def test_error_model_optimizer():
def test_error_model():
class MyModel(object):
def __init__(self):
print("A fake Model")

model = MyModel()
with fluid.dygraph.guard():
paddle.amp.decorate(models=model, optimizers=None, level='O2')

self.assertRaises(TypeError, test_error_model)

def test_error_optimizer():
class MyOptimizer(object):
def __init__(self):
print("A fake Optimizer")

model = MyModel()
model = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None)
opt = MyOptimizer()
with fluid.dygraph.guard():
paddle.amp.decorate(models=model, optimizers=opt, level='O2')

self.assertRaises(TypeError, test_error_model_optimizer)
self.assertRaises(TypeError, test_error_optimizer)

def test_set_master_weight(self):
model1 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None)
Expand Down Expand Up @@ -586,9 +577,16 @@ def test_set_master_weight(self):
model4 = fluid.dygraph.Conv2D(3, 2, 3, bias_attr=False, act=None)
opt4 = paddle.optimizer.Adam(
learning_rate=0.0001, parameters=model4.parameters())
model4, opt4 = paddle.amp.decorate(
models=model4, optimizers=opt4, level='O2', master_weight=False)
self.assertEqual(opt4._multi_precision, False)

models = [model3, model4]
optimizers = [opt3, opt4]
models, optimizers = paddle.amp.decorate(
models=models,
optimizers=optimizers,
level='O2',
master_weight=False)
self.assertEqual(optimizers[0]._multi_precision, False)
self.assertEqual(optimizers[1]._multi_precision, False)


class TestPureFp16SaveLoad(unittest.TestCase):
Expand Down Expand Up @@ -893,8 +891,7 @@ def train_resnet(self,
train_reader = train_loader

if enable_amp and (level == 'O2'):
resnet, optimizer = paddle.amp.decorate(
models=resnet, optimizers=optimizer, level='O2')
resnet = paddle.amp.decorate(models=resnet, level='O2')

for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num:
Expand Down