[PPMiniLM P0] Add PretrainedConfig and unit tests (#5520)

* update ppminilm * update copyright * update ppminilm tokenizer unittest * fix format of tinybert * remove useless arg
PaddlePaddle · Apr 4, 2023 · 17d0664 · 17d0664
1 parent 2bb0965
commit 17d0664
Show file tree

Hide file tree

Showing 14 changed files with 840 additions and 371 deletions.
diff --git a/examples/model_compression/pp-minilm/finetuning/export_model.py b/examples/model_compression/pp-minilm/finetuning/export_model.py
@@ -15,6 +15,8 @@
 import os
 import sys
 
+import paddle
+
 from paddlenlp.trainer.argparser import strtobool
 from paddlenlp.transformers import PPMiniLMForSequenceClassification
 
@@ -53,13 +55,15 @@ def parse_args():
 def do_export(args):
     save_path = os.path.join(os.path.dirname(args.model_path), "inference")
     model = PPMiniLMForSequenceClassification.from_pretrained(args.model_path)
-    is_text_pair = True
     args.task_name = args.task_name.lower()
-    if args.task_name in ("tnews", "iflytek", "cluewsc2020"):
-        is_text_pair = False
-    model.to_static(
-        save_path, use_faster_tokenizer=args.save_inference_model_with_tokenizer, is_text_pair=is_text_pair
-    )
+
+    input_spec = [
+        paddle.static.InputSpec(shape=[None, None], dtype="int64"),  # input_ids
+        paddle.static.InputSpec(shape=[None, None], dtype="int64"),  # token_type_ids
+    ]
+    model = paddle.jit.to_static(model, input_spec=input_spec)
+
+    paddle.jit.save(model, save_path)
 
 
 def print_arguments(args):

diff --git a/examples/model_compression/pp-minilm/pruning/export_model.py b/examples/model_compression/pp-minilm/pruning/export_model.py
@@ -19,11 +19,9 @@
 import sys
 
 import paddle
-from paddle.common_ops_import import core
 from paddleslim.nas.ofa import OFA, utils
 from paddleslim.nas.ofa.convert_super import Convert, supernet
 
-from paddlenlp.trainer.argparser import strtobool
 from paddlenlp.transformers import PPMiniLMModel
 
 sys.path.append("../")
@@ -32,13 +30,7 @@
 
 def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None):
     wtype = self.pooler.dense.fn.weight.dtype if hasattr(self.pooler.dense, "fn") else self.pooler.dense.weight.dtype
-    if self.use_faster_tokenizer:
-        input_ids, token_type_ids = self.tokenizer(
-            text=input_ids,
-            text_pair=token_type_ids,
-            max_seq_len=self.max_seq_len,
-            pad_to_max_seq_len=self.pad_to_max_seq_len,
-        )
+
     if attention_mask is None:
         attention_mask = paddle.unsqueeze((input_ids == self.pad_token_id).astype(wtype) * -1e9, axis=[1, 2])
     embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids)
@@ -103,12 +95,6 @@ def parse_args():
     parser.add_argument("--n_gpu", type=int, default=1, help="number of gpus to use, 0 for cpu.")
     parser.add_argument("--width_mult", type=float, default=1.0, help="width mult you want to export")
     parser.add_argument("--depth_mult", type=float, default=1.0, help="depth mult you want to export")
-    parser.add_argument(
-        "--use_faster_tokenizer",
-        type=strtobool,
-        default=True,
-        help="Whether to use FasterTokenizer to accelerate training or further inference.",
-    )
     args = parser.parse_args()
     return args
 
@@ -118,7 +104,7 @@ def do_export(args):
     args.model_type = args.model_type.lower()
     args.task_name = args.task_name.lower()
     model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
-    config_path = os.path.join(args.model_name_or_path, "model_config.json")
+    config_path = os.path.join(args.model_name_or_path, "config.json")
     cfg_dict = dict(json.loads(open(config_path).read()))
 
     kept_layers_index = {}
@@ -132,12 +118,9 @@ def do_export(args):
     with open(config_path, "w", encoding="utf-8") as f:
         f.write(json.dumps(cfg_dict, ensure_ascii=False))
 
-    num_labels = cfg_dict["num_classes"]
-
-    model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels)
-    model.use_faster_tokenizer = args.use_faster_tokenizer
+    model = model_class.from_pretrained(args.model_name_or_path)
 
-    origin_model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels)
+    origin_model = model_class.from_pretrained(args.model_name_or_path)
 
     os.rename(config_path + "_bak", config_path)
 
@@ -164,30 +147,12 @@ def do_export(args):
         if isinstance(sublayer, paddle.nn.MultiHeadAttention):
             sublayer.num_heads = int(args.width_mult * sublayer.num_heads)
 
-    is_text_pair = True
-    if args.task_name in ("tnews", "iflytek", "cluewsc2020"):
-        is_text_pair = False
-
-    if args.use_faster_tokenizer:
-        ofa_model.model.add_faster_tokenizer_op()
-        if is_text_pair:
-            origin_model_new = ofa_model.export(
-                best_config,
-                input_shapes=[[1], [1]],
-                input_dtypes=[core.VarDesc.VarType.STRINGS, core.VarDesc.VarType.STRINGS],
-                origin_model=origin_model,
-            )
-        else:
-            origin_model_new = ofa_model.export(
-                best_config, input_shapes=[1], input_dtypes=core.VarDesc.VarType.STRINGS, origin_model=origin_model
-            )
-    else:
-        origin_model_new = ofa_model.export(
-            best_config,
-            input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]],
-            input_dtypes=["int64", "int64"],
-            origin_model=origin_model,
-        )
+    origin_model_new = ofa_model.export(
+        best_config,
+        input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]],
+        input_dtypes=["int64", "int64"],
+        origin_model=origin_model,
+    )
 
     for name, sublayer in origin_model_new.named_sublayers():
         if isinstance(sublayer, paddle.nn.MultiHeadAttention):
@@ -199,10 +164,13 @@ def do_export(args):
     model_to_save = origin_model_new
     model_to_save.save_pretrained(output_dir)
 
-    if args.static_sub_model is not None:
-        origin_model_new.to_static(
-            args.static_sub_model, use_faster_tokenizer=args.use_faster_tokenizer, is_text_pair=is_text_pair
-        )
+    input_spec = [
+        paddle.static.InputSpec(shape=[None, None], dtype="int64"),  # input_ids
+        paddle.static.InputSpec(shape=[None, None], dtype="int64"),  # token_type_ids
+    ]
+    origin_model_new = paddle.jit.to_static(origin_model_new, input_spec=input_spec)
+
+    paddle.jit.save(origin_model_new, args.static_sub_model)
 
 
 def print_arguments(args):

diff --git a/examples/model_compression/pp-minilm/pruning/prune.py b/examples/model_compression/pp-minilm/pruning/prune.py
@@ -27,16 +27,28 @@
 from paddle.io import DataLoader
 from paddleslim.nas.ofa import OFA, DistillConfig, utils
 from paddleslim.nas.ofa.convert_super import Convert, supernet
-from paddleslim.nas.ofa.utils import nlp_utils
 
 from paddlenlp.data import Pad, Stack, Tuple
 from paddlenlp.datasets import load_dataset
 from paddlenlp.transformers import LinearDecayWithWarmup, PPMiniLMModel
+from paddlenlp.transformers.ofa_utils import (
+    compute_neuron_head_importance,
+    encoder_layer_ofa_forward,
+    encoder_ofa_forward,
+    mha_ofa_forward,
+    prepare_qkv_ofa,
+    reorder_neuron_head,
+)
 from paddlenlp.utils.log import logger
 
 sys.path.append("../")
 from data import METRIC_CLASSES, MODEL_CLASSES, convert_example  # noqa: E402
 
+paddle.nn.MultiHeadAttention.forward = mha_ofa_forward
+paddle.nn.MultiHeadAttention._prepare_qkv = prepare_qkv_ofa
+paddle.nn.TransformerEncoder.forward = encoder_ofa_forward
+paddle.nn.TransformerEncoderLayer.forward = encoder_layer_ofa_forward
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -125,7 +137,11 @@ def parse_args():
         help="The device to select to train the model, is must be cpu/gpu/xpu.",
     )
     parser.add_argument(
-        "--width_mult_list", nargs="+", type=float, default=[1.0, 5 / 6, 2 / 3, 0.5], help="width mult in compress"
+        "--width_mult_list",
+        nargs="+",
+        type=str,
+        default=["1.0", "5 / 6", "2 / 3", "0.5"],
+        help="width mult of compression",
     )
     args = parser.parse_args()
     return args
@@ -161,10 +177,6 @@ def evaluate(model, metric, data_loader, width_mult, student=False):
 
 # monkey patch for ppminilm forward to accept [attention_mask, head_mask] as  attention_mask
 def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=[None, None]):
-    if self.use_faster_tokenizer:
-        input_ids, token_type_ids = self.tokenizer(
-            text=input_ids, text_pair=token_type_ids, max_seq_len=self.max_seq_len
-        )
     wtype = self.pooler.dense.fn.weight.dtype if hasattr(self.pooler.dense, "fn") else self.pooler.dense.weight.dtype
     if attention_mask[0] is None:
         attention_mask[0] = paddle.unsqueeze((input_ids == self.pad_token_id).astype(wtype) * -1e9, axis=[1, 2])
@@ -178,19 +190,6 @@ def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, at
 PPMiniLMModel.forward = ppminilm_forward
 
 
-# reorder weights according head importance and neuron importance
-def reorder_neuron_head(model, head_importance, neuron_importance):
-    # reorder heads and ffn neurons
-    for layer, current_importance in enumerate(neuron_importance):
-        # reorder heads
-        idx = paddle.argsort(head_importance[layer], descending=True)
-        nlp_utils.reorder_head(model.ppminilm.encoder.layers[layer].self_attn, idx)
-        # reorder neurons
-        idx = paddle.argsort(paddle.to_tensor(current_importance), descending=True)
-        nlp_utils.reorder_neuron(model.ppminilm.encoder.layers[layer].linear1.fn, idx, dim=1)
-        nlp_utils.reorder_neuron(model.ppminilm.encoder.layers[layer].linear2.fn, idx, dim=0)
-
-
 def soft_cross_entropy(inp, target):
     inp_likelihood = F.log_softmax(inp, axis=-1)
     target_prob = F.softmax(target, axis=-1)
@@ -273,8 +272,7 @@ def do_train(args):
 
     # Step6: Calculate the importance of neurons and head,
     # and then reorder them according to the importance.
-    head_importance, neuron_importance = nlp_utils.compute_neuron_head_importance(
-        args.task_name,
+    head_importance, neuron_importance = compute_neuron_head_importance(
         ofa_model.model,
         dev_data_loader,
         loss_fct=criterion,
@@ -315,14 +313,15 @@ def do_train(args):
     global_step = 0
     tic_train = time.time()
     best_res = 0.0
+    args.width_mult_list = [eval(width_mult) for width_mult in args.width_mult_list]
     for epoch in range(num_train_epochs):
         # Step7: Set current epoch and task.
         ofa_model.set_epoch(epoch)
         ofa_model.set_task("width")
 
         for step, batch in enumerate(train_data_loader):
             global_step += 1
-            input_ids, segment_ids, labels = batch
+            input_ids, segment_ids, _ = batch
 
             for width_mult in args.width_mult_list:
                 # Step8: Broadcast supernet config from width_mult,

diff --git a/examples/model_compression/pp-minilm/quantization/quant_post.py b/examples/model_compression/pp-minilm/quantization/quant_post.py
@@ -70,7 +70,7 @@
 parser.add_argument(
     "--use_faster_tokenizer",
     type=strtobool,
-    default=True,
+    default=False,
     help="Whether to use FasterTokenizer to accelerate training or further inference.",
 )
 

diff --git a/model_zoo/tinybert/data_augmentation.py b/model_zoo/tinybert/data_augmentation.py
@@ -14,19 +14,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import random
-import sys
+import argparse
+import csv
+import logging
 import os
-import unicodedata
+import random
 import re
-import logging
-import csv
-import argparse
+import unicodedata
 
 import numpy as np
 import paddle
 
-from paddlenlp.transformers import BertTokenizer, BertForPretraining
+from paddlenlp.transformers import BertForPretraining, BertTokenizer
 
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
@@ -241,8 +240,6 @@ def _read_tsv(input_file, quotechar=None):
         reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
         lines = []
         for line in reader:
-            if sys.version_info[0] == 2:
-                line = list(unicode(cell, "utf-8") for cell in line)
             lines.append(line)
         return lines
 
@@ -477,7 +474,6 @@ def main():
         "RTE": {"N": 30},
     }
 
-    device = paddle.set_device(args.device)
     if args.task_name in default_params:
         args.N = default_params[args.task_name]["N"]