Merge pull request FlagAI-Open#238 from Anhforth/add_bminf

Add bminf
FlagOpen · Mar 2, 2023 · 9c6ec92 · 9c6ec92
2 parents 2cffcb5 + 5fb5fc8
commit 9c6ec92
Show file tree

Hide file tree

Showing 42 changed files with 208 additions and 59 deletions.
diff --git a/doc_zh/TUTORIAL_15_BERT_EXAMPLE_TITLE_GENERATION.md b/doc_zh/TUTORIAL_15_BERT_EXAMPLE_TITLE_GENERATION.md
@@ -24,7 +24,7 @@
 ### 1. 数据加载
 样例数据位于 /examples/bert_title_generation/data/
 
-需要在 ```trianer.py```文件中定义数据读取过程，例如：
+需要在 ```trainer.py```文件中定义数据读取过程，例如：
 ```python
 def read_file():
     src = []

diff --git a/doc_zh/TUTORIAL_3_MODEL.md b/doc_zh/TUTORIAL_3_MODEL.md
@@ -20,7 +20,7 @@
 ## From_pretrain
 
 `From_pretrain` 函数用于加载模型。同一个模型结构的模型可以用同一个class进行加载，比如`BERT-base-ch` 和`Roberta-base-ch`模型都能用`BertModel`这个`Class`进行加载。`From_pretrain`为了数据/模型并行的模型加载进行了特定优化，避免重复下载导致的资源浪费。
-通过调用`ClassName.from_pretrian()`来进行加载.
+通过调用`ClassName.from_pretrain()`来进行加载.
 ### 从modelhub加载
 现在我们支持从modelhub中下载[常用模型](#所有支持模型)，可以直接通过`from_pretrain`下载模型配置文件`config.json`，模型权重`pytorch_model.bin`，以及字典文件`vocab.txt`。例子：
 ```python

diff --git a/docs/TUTORIAL_3_MODEL.md b/docs/TUTORIAL_3_MODEL.md
@@ -25,7 +25,7 @@ All supported models now support the three most common model types [encoder, dec
 
 ### load model from modelhub
 
-By calling `ClassName.from_pretrian()` to load following [supported models](#all-supported-models), it will automatically download the model configuration file `config.json`, model weights `pytorch_model.bin`, and dictionary files `vocab .txt`.
+By calling `ClassName.from_pretrain()` to load following [supported models](#all-supported-models), it will automatically download the model configuration file `config.json`, model weights `pytorch_model.bin`, and dictionary files `vocab .txt`.
 
 ```python
 >>> # Downloading GLM-large-ch from modelhub

diff --git a/examples/bert_title_generation_english/train.py b/examples/bert_title_generation_english/train.py
@@ -1,7 +1,6 @@
 # Copyright © 2022 BAAI. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License")
-import sys
 import os
 import torch
 from torch.utils.data import Dataset

diff --git a/examples/bminf_generate/README.md b/examples/bminf_generate/README.md
@@ -0,0 +1,45 @@
+
+# BMInf
+
+## 简介/Overview
+
+BMInf is a low-resource inference package for large-scale pretrained language models. 
+
+BMInf supports running models with more than 10 billion parameters on a single NVIDIA GTX 1060 GPU in its minimum requirements. Running with better GPUs leads to better performance. In cases where the GPU memory supports the large model inference (such as V100 or A100), BMInf still has a significant performance improvement over the existing PyTorch implementation.
+
+BMInf Github Repository address: https://github.com/OpenBMB/BMInf
+
+BMInf (Big Model Inference) 是一个用于大规模预训练语言模型（pretrained language models, PLM）推理阶段的低资源工具包。
+
+BMInf最低支持在NVIDIA GTX 1060单卡运行百亿大模型。在此基础上，使用更好的gpu运行会有更好的性能。在显存支持进行大模型推理的情况下（如V100或A100显卡），BMInf的实现较现有PyTorch版本仍有较大性能提升。
+
+BMInf 仓库地址：https://github.com/OpenBMB/BMInf
+
+## 应用/Application
+
+在模型加载参数之后，使用如下代码来用BMInf转换模型
+
+```Python
+with torch.cuda.device(0):
+    model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+```
+The `quantization` parameter represents whether to use the model quantization technique, but if it is a generated class model, it needs to be set to `False`.
+
+You can use the `memory_limit` parameter to set the maximum available storage, the unit is Mb.
+
+`quantization`参数代表是否使用了模型量化的技巧，但如果是生成类模型，则需要设置成`False`
+
+可以用`memory_limit`参数设置最大的可用存储，单位为Mb
+
+如果`bminf.wrapper`不能很好的适配你的模型，你可以用以下的方法来进行手动适配。
+
+* 将 `torch.nn.ModuleList` 替换为 `bminf.TransformerBlockList`.
+```python
+module_list = bminf.TransformerBlockList([
+], [CUDA_DEVICE_INDEX])
+```
+
+* 将 `torch.nn.Linear` 替换为 `bminf.QuantizedLinear`.
+```python
+linear = bminf.QuantizedLinear(torch.nn.Linear(...))
+```
diff --git a/examples/bminf_generate/cpm1_generate.py b/examples/bminf_generate/cpm1_generate.py
@@ -0,0 +1,35 @@
+import torch
+from flagai.auto_model.auto_loader import AutoLoader
+from flagai.model.predictor.predictor import Predictor
+import bminf
+import time
+
+
+if __name__ == '__main__':
+
+    text = '''默写古诗:
+    白日依山尽，黄河入海流。
+    床前明月光，'''
+
+    loader = AutoLoader(task_name="lm",
+                        model_name="CPM-large-ch",
+                        model_dir="./checkpoints",
+                        device="cpu")
+
+    model = loader.get_model()
+    time_start=time.time()
+    with torch.cuda.device(0):
+        model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+    tokenizer = loader.get_tokenizer()
+
+    predictor = Predictor(model=model,
+                          tokenizer=tokenizer,
+                          )
+
+    out = predictor.predict_generate_randomsample(text,
+                                                  top_p=0.9,
+                                                  out_max_length=50)
+    time_end=time.time()
+    print('time cost',time_end-time_start,'s')
+
+    print(out)
diff --git a/examples/bminf_generate/galactica_6.7b_generate.py b/examples/bminf_generate/galactica_6.7b_generate.py
@@ -0,0 +1,37 @@
+
+from flagai.model.predictor.predictor import Predictor
+from flagai.auto_model.auto_loader import AutoLoader
+import torch
+import bminf
+import time
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+loader = AutoLoader(task_name="lm",
+                    model_name="galactica-6.7b-en",
+                    model_dir="./checkpoints/")
+
+model = loader.get_model()
+with torch.cuda.device(0):
+    model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+model.to(device)
+model.eval()
+tokenizer = loader.get_tokenizer()
+predictor = Predictor(model, tokenizer)
+print("model loaded")
+time_start=time.time()
+
+text = "Please write a abstract about the computer vision. \n"
+out = predictor.predict_generate_randomsample(text,
+                                            out_max_length=700,
+                                            top_k=50,
+                                            repetition_penalty=1.2,
+                                            temperature=0.7
+                                            )
+
+time_end=time.time()
+print('time cost',time_end-time_start,'s')
+print(out)
+
+
+
diff --git a/examples/bminf_generate/gpt2_generate.py b/examples/bminf_generate/gpt2_generate.py
@@ -0,0 +1,35 @@
+# Copyright © 2022 BAAI. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License")
+import torch
+from flagai.auto_model.auto_loader import AutoLoader
+from flagai.model.predictor.predictor import Predictor
+import bminf
+import time
+
+if __name__ == '__main__':
+
+    loader = AutoLoader("seq2seq",
+                        "GPT2-base-ch",
+                        model_dir="./checkpoints/")
+    model = loader.get_model()
+    model = model.to('cpu')
+    tokenizer = loader.get_tokenizer()
+    time_start=time.time()
+    with torch.cuda.device(0):
+        model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+    predictor = Predictor(model, tokenizer)
+
+    text = "今天天气不错"
+
+    out_2 = predictor.predict_generate_randomsample(text,
+                                                    input_max_length=512,
+                                                    out_max_length=100,
+                                                    repetition_penalty=1.5,
+                                                    top_k=20,
+                                                    top_p=0.8)
+
+    time_end=time.time()
+    print('time cost',time_end-time_start,'s')
+    # print(f"out_1 is {out_1}")
+    print(f"out_2 is {out_2}")
diff --git a/examples/cpm3_generation/generation.py b/examples/cpm3_generation/generation.py
@@ -103,7 +103,7 @@ def calc_banned_ngram_tokens(
 
     return banned_tokens
 
-# min_length_constriant
+# min_length_constraint
 def min_length_constraint(logits, cur_len, min_len, tokenizer):
     # This enforcing a min-length by setting EOS probability to 0.
     if cur_len <= min_len:

diff --git a/examples/cpm3_pretrain/data_analyze.py b/examples/cpm3_pretrain/data_analyze.py
@@ -1,6 +1,6 @@
 import json
 
-fout = open('{}'.format('/sharefs/baai-mrnd/xw/cpm3_trian_data/cpm3_train_data.jsonl'), "w", encoding='utf-8')
+fout = open('{}'.format('/sharefs/baai-mrnd/xw/cpm3_train_data/cpm3_train_data.jsonl'), "w", encoding='utf-8')
 fin = open('{}'.format('/sharefs/webbrain-lijijie/data/CEPSUM/test_public.jsonl'), 'r', encoding='utf-8')
 
 def random_mask(source: str):

diff --git a/examples/cpm_1/generate.py b/examples/cpm_1/generate.py
@@ -9,7 +9,7 @@
 
     loader = AutoLoader(task_name="lm",
                         model_name="CPM-large-ch",
-                        model_dir="./state_dict/")
+                        model_dir="./checkpoints")
 
     model = loader.get_model()
     tokenizer = loader.get_tokenizer()

diff --git a/examples/galactica/generate_galactica_1.3b.py b/examples/galactica/generate_galactica_1.3b.py
@@ -22,6 +22,4 @@
                                               repetition_penalty=1.2,
                                               temperature=0.7
                                               )
-print(out)
-
-
+print(out)
diff --git a/examples/glm_blank_filling/glm_generate_samples.py b/examples/glm_blank_filling/glm_generate_samples.py
@@ -43,4 +43,4 @@
     for t in text:
         output = predictor.predict_generate_randomsample(
             t, top_k=50, repetition_penalty=4.0, top_p=1.0)
-        print(t, '\n', output)
+        print(t, '\n', output)
diff --git a/examples/glm_blank_filling/glm_generate_samples_en.py b/examples/glm_blank_filling/glm_generate_samples_en.py
@@ -1,7 +1,6 @@
 # Copyright © 2022 BAAI. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License")
-
 import torch
 from flagai.model.glm_model import GLMModel
 from flagai.data.tokenizer import Tokenizer
@@ -13,7 +12,7 @@
     print('Generate Samples')
 
     loader = AutoLoader(task_name='lm',
-                            model_name='GLM-large-en-generation',
+                            model_name='GLM-large-en',
                             only_download_config=False)
     model = loader.get_model()
     tokenizer = loader.get_tokenizer()

diff --git a/examples/gpt2_title_generation/train_multi_gpu.py b/examples/gpt2_title_generation/train_multi_gpu.py
@@ -1,7 +1,6 @@
 # Copyright © 2022 BAAI. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License")
-import sys
 import os
 import torch
 from torch.utils.data import Dataset
@@ -39,7 +38,7 @@
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 src_dir = cur_dir + '/data/train.src'
 tgt_dir = cur_dir + '/data/train.tgt'
-model_dir = "./state_dict/"
+model_dir = "./checkpoints/"
 os.makedirs(model_dir, exist_ok=True)
 maxlen = 256
 

diff --git a/examples/roberta_ner/generate.py b/examples/roberta_ner/generate.py
@@ -7,11 +7,13 @@
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+
+
 task_name = "ner"
-model_dir = "./state_dict/"
+model_dir = "./checkpoints"
 
 # Note "./checkpoints_ner/{}/mp_rank_00_model_states.pt", {} is a directory in the checkpoints_ner.
-model_save_path = "./checkpoints_ner/3913/mp_rank_00_model_states.pt"
+# model_save_path = "./checkpoints_ner/3913/mp_rank_00_model_states.pt"
 
 target = ["O", "B-LOC", "I-LOC", "B-ORG", "I-ORG", "B-PER", "I-PER"]
 
@@ -25,8 +27,8 @@
 tokenizer = auto_loader.get_tokenizer()
 
 predictor = Predictor(model, tokenizer)
-model.load_state_dict(
-    torch.load(model_save_path, map_location=device)["module"])
+# model.load_state_dict(
+#     torch.load(model_save_path, map_location=device)["module"])
 
 model.to(device)
 model.eval()

diff --git a/examples/roberta_ner/train.py b/examples/roberta_ner/train.py
@@ -69,7 +69,7 @@ def load_data(filename):
 val_data = load_data(valid_path)
 test_data = load_data(test_path)
 
-print(f"trian_data is {len(train_data)}")
+print(f"train_data is {len(train_data)}")
 print(f"val_data is {len(val_data)}")
 print(f"test_data is {len(test_data)}")
 print(f"target is {target}")

diff --git a/examples/roberta_ner/train_crf.py b/examples/roberta_ner/train_crf.py
@@ -65,7 +65,7 @@ def load_data(filename):
 val_data = load_data(valid_path)
 test_data = load_data(test_path)
 
-print(f"trian_data is {len(train_data)}")
+print(f"train_data is {len(train_data)}")
 print(f"val_data is {len(val_data)}")
 print(f"test_data is {len(test_data)}")
 print(f"target is {target}")

diff --git a/examples/roberta_ner/train_global_pointer.py b/examples/roberta_ner/train_global_pointer.py
@@ -61,7 +61,7 @@ def load_data(filename):
 val_data = load_data(valid_path)
 test_data = load_data(test_path)
 
-print(f"trian_data is {len(train_data)}")
+print(f"train_data is {len(train_data)}")
 print(f"val_data is {len(val_data)}")
 print(f"test_data is {len(test_data)}")
 print(f"target is {target}")

diff --git a/examples/roberta_title_generation/generate.py b/examples/roberta_title_generation/generate.py
@@ -7,7 +7,7 @@
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
-model_dir = "./state_dict"
+model_dir = "./checkpoints"
 
 # Note "./checkpoints_seq2seq/{}/mp_rank_00_model_states.pt", {} is a directory in the checkpoints_seq2seq.
 model_save_path = "./checkpoints_seq2seq/10/mp_rank_00_model_states.pt"

diff --git a/examples/t5_title_generation/generate.py b/examples/t5_title_generation/generate.py
@@ -5,7 +5,7 @@
 from flagai.model.predictor.predictor import Predictor
 
 if __name__ == '__main__':
-    loader = AutoLoader("title-generation", "T5-base-ch", model_dir="./state_dict/")
+    loader = AutoLoader("title-generation", "T5-base-ch", model_dir="./checkpoints")
     model = loader.get_model()
     tokenizer = loader.get_tokenizer()
     predictor = Predictor(model, tokenizer)

diff --git a/flagai/auto_model/auto_loader.py b/flagai/auto_model/auto_loader.py
@@ -46,6 +46,7 @@ def __getattr__(self, name):
     "cpm_lm": ("flagai.model.gpt2_model", "GPT2Model"),
     "t5_seq2seq": ["flagai.model.t5_model", "T5Model"],
     "t5_lm": ["flagai.model.t5_model", "T5Model"],
+    "t5_title-generation": ["flagai.model.t5_model", "T5Model"],
     "alm_lm": ["flagai.model.alm_model", "ALMModel"],
     "glm_lm": ["flagai.model.glm_model", "GLMModel"],
     "glm_seq2seq": ["flagai.model.glm_model", "GLMForSeq2Seq"],
@@ -62,7 +63,7 @@ def __getattr__(self, name):
     "swinv2_classification": ("flagai.model.vision.swinv2",
                               "SwinTransformerV2"),
     "cpm3_lm": ("flagai.model.cpm3_model", "CPM3"),
-    "cpm3_trian": ("flagai.model.cpm3_trian_model", "CPM3"),
+    "cpm3_train": ("flagai.model.cpm3_train_model", "CPM3"),
     "diffusion_text2img": ("flagai.model.mm.AltDiffusion", "LatentDiffusion"),
     "altclip_txt_img_matching": ("flagai.model.mm.AltCLIP", "AltCLIP"),
     "evaclip_txt_img_matching": ("flagai.model.mm.eva_clip_model", "EVA_CLIP"),

diff --git a/flagai/data/file_utils.py b/flagai/data/file_utils.py
@@ -20,7 +20,6 @@
 from hashlib import sha256
 import sys
 from io import open
-
 import boto3
 import requests
 from botocore.exceptions import ClientError

diff --git a/flagai/data/tokenizer/__init__.py b/flagai/data/tokenizer/__init__.py
@@ -6,5 +6,6 @@
 from .bert.bert_tokenizer import BertWordPieceTokenizer
 from .cpm_1.cpm1_tokenizer import CPMTokenizer
 from .opt.opt_en_tokenizer import OPTTokenizer
+from .t5.t5_pegasus_tokenizer import T5PegasusTokenizer
 from .uni_tokenizer.tokenizer import Tokenizer
 # from .uni_tokenizer.base_tokenizer import BaseTokenizer
diff --git a/flagai/data/tokenizer/bert/bert_tokenizer.py b/flagai/data/tokenizer/bert/bert_tokenizer.py
@@ -74,7 +74,7 @@ def __init__(self, tokenizer_model_type=None, cache_dir=None):
 
         self._command_tokens = [
             CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
-            CommandToken('ENC', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
+            CommandToken('cls', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
             CommandToken('MASK', '[MASK]',
                          self.get_specialid_from_text_tokenizer('mask')),
             CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),

diff --git a/flagai/data/tokenizer/galactica/galactica_tokenizer.py b/flagai/data/tokenizer/galactica/galactica_tokenizer.py
@@ -14,7 +14,7 @@ def __init__(self, download_dir) -> None:
 
         self._command_tokens = [
             CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
-            CommandToken('ENC', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
+            CommandToken('cls', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
             CommandToken('MASK', '[MASK]',
                          self.get_specialid_from_text_tokenizer('mask')),
             CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),

diff --git a/flagai/data/tokenizer/opt/opt_en_tokenizer.py b/flagai/data/tokenizer/opt/opt_en_tokenizer.py
@@ -34,7 +34,7 @@ def __init__(self, tokenizer_model_type="facebook/opt-125m", cache_dir=None):
 
         self._command_tokens = [
             CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
-            CommandToken('ENC', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
+            CommandToken('cls', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
             CommandToken('MASK', '[MASK]',
                          self.get_specialid_from_text_tokenizer('mask')),
             CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),