PaddlePaddle · JunnYu · Oct 28, 2022 · Oct 27, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/examples/language_model/gpt-3/dygraph/run_pretrain.py b/examples/language_model/gpt-3/dygraph/run_pretrain.py
@@ -266,6 +266,13 @@ def do_train(args):
             # and add it to 'paddle.amp.decorate'
             multi_precision=args.use_pure_fp16)
 
+    # decorate @to_static for benchmark, skip it by default.
+    if args.to_static:
+        specs = None
+        model = paddle.jit.to_static(model, input_spec=specs)
+        logger.info(
+            "Successfully to apply @to_static with specs: {}".format(specs))
+
     if args.use_pure_fp16:
         scaler = paddle.amp.GradScaler(init_loss_scaling=args.scale_loss)
         # level O2 means converting the network to FP16
@@ -301,13 +308,6 @@ def do_train(args):
             logger.warning("No optimizer checkpoint file found in %s." %
                            opt_path)
 
-    # decorate @to_static for benchmark, skip it by default.
-    if args.to_static:
-        specs = None
-        model = paddle.jit.to_static(model, input_spec=specs)
-        logger.info(
-            "Successfully to apply @to_static with specs: {}".format(specs))
-
     global_step = 0
     tic_train = time.time()
     for epoch in range(args.num_train_epochs):

diff --git a/examples/machine_translation/transformer/train.py b/examples/machine_translation/transformer/train.py
@@ -416,7 +416,8 @@ def do_train(args):
     args.unk_token = ARGS.unk_token
     args.bos_token = ARGS.bos_token
     args.eos_token = ARGS.eos_token
-    args.to_static = ARGS.to_static
+    if ARGS.to_static:
+        args.to_static = ARGS.to_static
     args.device = ARGS.device
     pprint(args)
 

diff --git a/model_zoo/gpt/run_pretrain.py b/model_zoo/gpt/run_pretrain.py
@@ -150,6 +150,13 @@ def do_train(args):
     # Create the critrion for the gpt model
     criterion = GPTPretrainingCriterion()
 
+    # decorate @to_static for benchmark, skip it by default.
+    if args.to_static:
+        specs = None
+        model = paddle.jit.to_static(model, input_spec=specs)
+        logger.info(
+            "Successfully to apply @to_static with specs: {}".format(specs))
+
     if paddle.distributed.get_world_size() > 1:
         model = paddle.DataParallel(model)
 
@@ -201,13 +208,6 @@ def do_train(args):
             logger.warning("No optimizer checkpoint file found in %s." %
                            opt_path)
 
-    # decorate @to_static for benchmark, skip it by default.
-    if args.to_static:
-        specs = None
-        model = paddle.jit.to_static(model, input_spec=specs)
-        logger.info(
-            "Successfully to apply @to_static with specs: {}".format(specs))
-
     global_step = 0
     epoch = 0
     tic_train = time.time()