Skip to content

Commit

Permalink
[PPMiniLM P0] Add PretrainedConfig and unit tests (#5520)
Browse files Browse the repository at this point in the history
* update ppminilm

* update copyright

* update ppminilm tokenizer unittest

* fix format of tinybert

* remove useless arg
  • Loading branch information
LiuChiachi authored Apr 4, 2023
1 parent 2bb0965 commit 17d0664
Show file tree
Hide file tree
Showing 14 changed files with 840 additions and 371 deletions.
16 changes: 10 additions & 6 deletions examples/model_compression/pp-minilm/finetuning/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import os
import sys

import paddle

from paddlenlp.trainer.argparser import strtobool
from paddlenlp.transformers import PPMiniLMForSequenceClassification

Expand Down Expand Up @@ -53,13 +55,15 @@ def parse_args():
def do_export(args):
save_path = os.path.join(os.path.dirname(args.model_path), "inference")
model = PPMiniLMForSequenceClassification.from_pretrained(args.model_path)
is_text_pair = True
args.task_name = args.task_name.lower()
if args.task_name in ("tnews", "iflytek", "cluewsc2020"):
is_text_pair = False
model.to_static(
save_path, use_faster_tokenizer=args.save_inference_model_with_tokenizer, is_text_pair=is_text_pair
)

input_spec = [
paddle.static.InputSpec(shape=[None, None], dtype="int64"), # input_ids
paddle.static.InputSpec(shape=[None, None], dtype="int64"), # token_type_ids
]
model = paddle.jit.to_static(model, input_spec=input_spec)

paddle.jit.save(model, save_path)


def print_arguments(args):
Expand Down
66 changes: 17 additions & 49 deletions examples/model_compression/pp-minilm/pruning/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@
import sys

import paddle
from paddle.common_ops_import import core
from paddleslim.nas.ofa import OFA, utils
from paddleslim.nas.ofa.convert_super import Convert, supernet

from paddlenlp.trainer.argparser import strtobool
from paddlenlp.transformers import PPMiniLMModel

sys.path.append("../")
Expand All @@ -32,13 +30,7 @@

def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None):
wtype = self.pooler.dense.fn.weight.dtype if hasattr(self.pooler.dense, "fn") else self.pooler.dense.weight.dtype
if self.use_faster_tokenizer:
input_ids, token_type_ids = self.tokenizer(
text=input_ids,
text_pair=token_type_ids,
max_seq_len=self.max_seq_len,
pad_to_max_seq_len=self.pad_to_max_seq_len,
)

if attention_mask is None:
attention_mask = paddle.unsqueeze((input_ids == self.pad_token_id).astype(wtype) * -1e9, axis=[1, 2])
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids)
Expand Down Expand Up @@ -103,12 +95,6 @@ def parse_args():
parser.add_argument("--n_gpu", type=int, default=1, help="number of gpus to use, 0 for cpu.")
parser.add_argument("--width_mult", type=float, default=1.0, help="width mult you want to export")
parser.add_argument("--depth_mult", type=float, default=1.0, help="depth mult you want to export")
parser.add_argument(
"--use_faster_tokenizer",
type=strtobool,
default=True,
help="Whether to use FasterTokenizer to accelerate training or further inference.",
)
args = parser.parse_args()
return args

Expand All @@ -118,7 +104,7 @@ def do_export(args):
args.model_type = args.model_type.lower()
args.task_name = args.task_name.lower()
model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
config_path = os.path.join(args.model_name_or_path, "model_config.json")
config_path = os.path.join(args.model_name_or_path, "config.json")
cfg_dict = dict(json.loads(open(config_path).read()))

kept_layers_index = {}
Expand All @@ -132,12 +118,9 @@ def do_export(args):
with open(config_path, "w", encoding="utf-8") as f:
f.write(json.dumps(cfg_dict, ensure_ascii=False))

num_labels = cfg_dict["num_classes"]

model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels)
model.use_faster_tokenizer = args.use_faster_tokenizer
model = model_class.from_pretrained(args.model_name_or_path)

origin_model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels)
origin_model = model_class.from_pretrained(args.model_name_or_path)

os.rename(config_path + "_bak", config_path)

Expand All @@ -164,30 +147,12 @@ def do_export(args):
if isinstance(sublayer, paddle.nn.MultiHeadAttention):
sublayer.num_heads = int(args.width_mult * sublayer.num_heads)

is_text_pair = True
if args.task_name in ("tnews", "iflytek", "cluewsc2020"):
is_text_pair = False

if args.use_faster_tokenizer:
ofa_model.model.add_faster_tokenizer_op()
if is_text_pair:
origin_model_new = ofa_model.export(
best_config,
input_shapes=[[1], [1]],
input_dtypes=[core.VarDesc.VarType.STRINGS, core.VarDesc.VarType.STRINGS],
origin_model=origin_model,
)
else:
origin_model_new = ofa_model.export(
best_config, input_shapes=[1], input_dtypes=core.VarDesc.VarType.STRINGS, origin_model=origin_model
)
else:
origin_model_new = ofa_model.export(
best_config,
input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]],
input_dtypes=["int64", "int64"],
origin_model=origin_model,
)
origin_model_new = ofa_model.export(
best_config,
input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]],
input_dtypes=["int64", "int64"],
origin_model=origin_model,
)

for name, sublayer in origin_model_new.named_sublayers():
if isinstance(sublayer, paddle.nn.MultiHeadAttention):
Expand All @@ -199,10 +164,13 @@ def do_export(args):
model_to_save = origin_model_new
model_to_save.save_pretrained(output_dir)

if args.static_sub_model is not None:
origin_model_new.to_static(
args.static_sub_model, use_faster_tokenizer=args.use_faster_tokenizer, is_text_pair=is_text_pair
)
input_spec = [
paddle.static.InputSpec(shape=[None, None], dtype="int64"), # input_ids
paddle.static.InputSpec(shape=[None, None], dtype="int64"), # token_type_ids
]
origin_model_new = paddle.jit.to_static(origin_model_new, input_spec=input_spec)

paddle.jit.save(origin_model_new, args.static_sub_model)


def print_arguments(args):
Expand Down
43 changes: 21 additions & 22 deletions examples/model_compression/pp-minilm/pruning/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,28 @@
from paddle.io import DataLoader
from paddleslim.nas.ofa import OFA, DistillConfig, utils
from paddleslim.nas.ofa.convert_super import Convert, supernet
from paddleslim.nas.ofa.utils import nlp_utils

from paddlenlp.data import Pad, Stack, Tuple
from paddlenlp.datasets import load_dataset
from paddlenlp.transformers import LinearDecayWithWarmup, PPMiniLMModel
from paddlenlp.transformers.ofa_utils import (
compute_neuron_head_importance,
encoder_layer_ofa_forward,
encoder_ofa_forward,
mha_ofa_forward,
prepare_qkv_ofa,
reorder_neuron_head,
)
from paddlenlp.utils.log import logger

sys.path.append("../")
from data import METRIC_CLASSES, MODEL_CLASSES, convert_example # noqa: E402

paddle.nn.MultiHeadAttention.forward = mha_ofa_forward
paddle.nn.MultiHeadAttention._prepare_qkv = prepare_qkv_ofa
paddle.nn.TransformerEncoder.forward = encoder_ofa_forward
paddle.nn.TransformerEncoderLayer.forward = encoder_layer_ofa_forward


def parse_args():
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -125,7 +137,11 @@ def parse_args():
help="The device to select to train the model, is must be cpu/gpu/xpu.",
)
parser.add_argument(
"--width_mult_list", nargs="+", type=float, default=[1.0, 5 / 6, 2 / 3, 0.5], help="width mult in compress"
"--width_mult_list",
nargs="+",
type=str,
default=["1.0", "5 / 6", "2 / 3", "0.5"],
help="width mult of compression",
)
args = parser.parse_args()
return args
Expand Down Expand Up @@ -161,10 +177,6 @@ def evaluate(model, metric, data_loader, width_mult, student=False):

# monkey patch for ppminilm forward to accept [attention_mask, head_mask] as attention_mask
def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=[None, None]):
if self.use_faster_tokenizer:
input_ids, token_type_ids = self.tokenizer(
text=input_ids, text_pair=token_type_ids, max_seq_len=self.max_seq_len
)
wtype = self.pooler.dense.fn.weight.dtype if hasattr(self.pooler.dense, "fn") else self.pooler.dense.weight.dtype
if attention_mask[0] is None:
attention_mask[0] = paddle.unsqueeze((input_ids == self.pad_token_id).astype(wtype) * -1e9, axis=[1, 2])
Expand All @@ -178,19 +190,6 @@ def ppminilm_forward(self, input_ids, token_type_ids=None, position_ids=None, at
PPMiniLMModel.forward = ppminilm_forward


# reorder weights according head importance and neuron importance
def reorder_neuron_head(model, head_importance, neuron_importance):
# reorder heads and ffn neurons
for layer, current_importance in enumerate(neuron_importance):
# reorder heads
idx = paddle.argsort(head_importance[layer], descending=True)
nlp_utils.reorder_head(model.ppminilm.encoder.layers[layer].self_attn, idx)
# reorder neurons
idx = paddle.argsort(paddle.to_tensor(current_importance), descending=True)
nlp_utils.reorder_neuron(model.ppminilm.encoder.layers[layer].linear1.fn, idx, dim=1)
nlp_utils.reorder_neuron(model.ppminilm.encoder.layers[layer].linear2.fn, idx, dim=0)


def soft_cross_entropy(inp, target):
inp_likelihood = F.log_softmax(inp, axis=-1)
target_prob = F.softmax(target, axis=-1)
Expand Down Expand Up @@ -273,8 +272,7 @@ def do_train(args):

# Step6: Calculate the importance of neurons and head,
# and then reorder them according to the importance.
head_importance, neuron_importance = nlp_utils.compute_neuron_head_importance(
args.task_name,
head_importance, neuron_importance = compute_neuron_head_importance(
ofa_model.model,
dev_data_loader,
loss_fct=criterion,
Expand Down Expand Up @@ -315,14 +313,15 @@ def do_train(args):
global_step = 0
tic_train = time.time()
best_res = 0.0
args.width_mult_list = [eval(width_mult) for width_mult in args.width_mult_list]
for epoch in range(num_train_epochs):
# Step7: Set current epoch and task.
ofa_model.set_epoch(epoch)
ofa_model.set_task("width")

for step, batch in enumerate(train_data_loader):
global_step += 1
input_ids, segment_ids, labels = batch
input_ids, segment_ids, _ = batch

for width_mult in args.width_mult_list:
# Step8: Broadcast supernet config from width_mult,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
parser.add_argument(
"--use_faster_tokenizer",
type=strtobool,
default=True,
default=False,
help="Whether to use FasterTokenizer to accelerate training or further inference.",
)

Expand Down
16 changes: 6 additions & 10 deletions model_zoo/tinybert/data_augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import random
import sys
import argparse
import csv
import logging
import os
import unicodedata
import random
import re
import logging
import csv
import argparse
import unicodedata

import numpy as np
import paddle

from paddlenlp.transformers import BertTokenizer, BertForPretraining
from paddlenlp.transformers import BertForPretraining, BertTokenizer

logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
Expand Down Expand Up @@ -241,8 +240,6 @@ def _read_tsv(input_file, quotechar=None):
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
if sys.version_info[0] == 2:
line = list(unicode(cell, "utf-8") for cell in line)
lines.append(line)
return lines

Expand Down Expand Up @@ -477,7 +474,6 @@ def main():
"RTE": {"N": 30},
}

device = paddle.set_device(args.device)
if args.task_name in default_params:
args.N = default_params[args.task_name]["N"]

Expand Down
Loading

0 comments on commit 17d0664

Please sign in to comment.