[CodeStyle] fix taskflow (#4982)

* dependency parsing * fix lexical * fix lexical * fix lexical * poetry * poetry * qa * text correction * text generation * seg
PaddlePaddle · Feb 24, 2023 · edf6fef · edf6fef
1 parent 1a0af2b
commit edf6fef
Show file tree

Hide file tree

Showing 9 changed files with 43 additions and 122 deletions.
diff --git a/paddlenlp/taskflow/dependency_parsing.py b/paddlenlp/taskflow/dependency_parsing.py
@@ -15,17 +15,17 @@
 
 import copy
 import os
-import itertools
 
 import numpy as np
 import paddle
-from ..data import Vocab, Pad
-from .utils import download_file, dygraph_mode_guard
-from .task import Task
+
+from ..data import Pad, Vocab
 from .models import BiAffineParser
+from .task import Task
+from .utils import download_file
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            ddp = Taskflow("dependency_parsing")
            ddp("三亚是一座美丽的城市")
@@ -35,7 +35,7 @@
            ddp(["三亚是一座美丽的城市", "他送了一本书"])
            '''
            [{'word': ['三亚', '是', '一座', '美丽', '的', '城市'], 'head': [2, 0, 6, 6, 4, 2], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'MT', 'VOB']}, {'word': ['他', '送', '了', '一本', '书'], 'head': [2, 0, 2, 5, 2], 'deprel': ['SBV', 'HED', 'MT', 'ATT', 'VOB']}]
-           '''       
+           '''
 
            ddp = Taskflow("dependency_parsing", prob=True, use_pos=True)
            ddp("三亚是一座美丽的城市")
@@ -64,7 +64,7 @@
            ddp.from_segments([['三亚', '是', '一座', '美丽', '的', '城市'], ['他', '送', '了', '一本', '书']])
            '''
            [{'word': ['三亚', '是', '一座', '美丽', '的', '城市'], 'head': [2, 0, 6, 6, 4, 2], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'MT', 'VOB']}, {'word': ['他', '送', '了', '一本', '书'], 'head': [2, 0, 2, 5, 2], 'deprel': ['SBV', 'HED', 'MT', 'ATT', 'VOB']}]
-           '''   
+           '''
          """
 
 
@@ -181,7 +181,7 @@ def __init__(
 
         try:
             from LAC import LAC
-        except:
+        except Exception:
             raise ImportError("Please install the dependencies first, pip install LAC --upgrade")
 
         self.use_cuda = use_cuda
@@ -274,10 +274,6 @@ def _preprocess(self, inputs):
            2) Generate the other model inputs from the raw text and token ids.
         """
 
-        # Get the config from the kwargs
-        num_workers = self.kwargs["num_workers"] if "num_workers" in self.kwargs else 0
-        lazy_load = self.kwargs["lazy_load"] if "lazy_load" in self.kwargs else False
-
         outputs = {}
 
         lac_results = []
@@ -368,9 +364,9 @@ def _visualize(self, data):
             data: a numpy array, use cv2.imshow to show it or cv2.imwrite to save it.
         """
         try:
-            import matplotlib.pyplot as plt
             import matplotlib.font_manager as font_manager
-        except:
+            import matplotlib.pyplot as plt
+        except Exception:
             raise ImportError("Please install the dependencies first, pip install matplotlib --upgrade")
 
         self.plt = plt

diff --git a/paddlenlp/taskflow/lexical_analysis.py b/paddlenlp/taskflow/lexical_analysis.py
@@ -13,26 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
 import os
-import copy
-import itertools
 
-import numpy as np
 import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from ..datasets import load_dataset, MapDataset
-from ..data import Stack, Pad, Tuple, Vocab, JiebaTokenizer
-from .utils import download_file, add_docstrings, static_mode_guard, dygraph_mode_guard
-from .utils import Customization
-from .task import Task
+
+from ..data import Pad, Stack, Tuple
+from ..datasets import load_dataset
 from .models import BiGruCrf
+from .task import Task
+from .utils import Customization
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            lac = Taskflow("lexical_analysis")
            lac("LAC是个优秀的分词工具")
@@ -42,7 +34,7 @@
 
            lac(["LAC是个优秀的分词工具", "三亚是一个美丽的城市"])
            '''
-           [{'text': 'LAC是个优秀的分词工具', 'segs': ['LAC', '是', '个', '优秀', '的', '分词', '工具'], 'tags': ['nz', 'v', 'q', 'a', 'u', 'n', 'n']}, 
+           [{'text': 'LAC是个优秀的分词工具', 'segs': ['LAC', '是', '个', '优秀', '的', '分词', '工具'], 'tags': ['nz', 'v', 'q', 'a', 'u', 'n', 'n']},
             {'text': '三亚是一个美丽的城市', 'segs': ['三亚', '是', '一个', '美丽', '的', '城市'], 'tags': ['LOC', 'v', 'm', 'a', 'u', 'n']}
            ]
            '''
@@ -60,7 +52,7 @@ def load_vocab(dict_path):
         for i, line in enumerate(fin):
             terms = line.strip("\n").split("\t")
             if len(terms) == 2:
-                if reverse == None:
+                if reverse is None:
                     reverse = True if terms[0].isdigit() else False
                 if reverse:
                     value, key = terms
@@ -174,7 +166,6 @@ def _preprocess(self, inputs, padding=True, add_special_tokens=True):
         batch_size = self.kwargs["batch_size"] if "batch_size" in self.kwargs else 1
         num_workers = self.kwargs["num_workers"] if "num_workers" in self.kwargs else 0
         self._split_sentence = self.kwargs["split_sentence"] if "split_sentence" in self.kwargs else False
-        infer_data = []
         oov_token_id = self._word_vocab.get("OOV")
 
         filter_inputs = []
@@ -238,7 +229,6 @@ def _postprocess(self, inputs):
         """
         The model output is the tag ids, this function will convert the model output to raw text.
         """
-        batch_out = []
         lengths = inputs["lens"]
         preds = inputs["result"]
         sents = inputs["text"]

diff --git a/paddlenlp/taskflow/models/lexical_analysis_model.py b/paddlenlp/taskflow/models/lexical_analysis_model.py
@@ -15,13 +15,12 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.nn.functional as F
+
 from paddlenlp.layers.crf import LinearChainCrf, LinearChainCrfLoss
-from paddlenlp.utils.tools import compare_version
 
 try:
     from paddle.text import ViterbiDecoder
-except:
+except Exception:
     raise ImportError(
         "Taskflow requires paddle version >= 2.2.0, but current paddle version is {}".format(
             paddle.version.full_version

diff --git a/paddlenlp/taskflow/poetry_generation.py b/paddlenlp/taskflow/poetry_generation.py
@@ -13,20 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
-import os
-import copy
-import itertools
-
-import numpy as np
-from .utils import download_file
 from .text_generation import TextGenerationTask
-from .task import Task
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            poetry = Taskflow("poetry_generation")
            poetry("林密不见人")

diff --git a/paddlenlp/taskflow/pos_tagging.py b/paddlenlp/taskflow/pos_tagging.py
@@ -13,19 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
-import os
-import copy
-import itertools
-
-import numpy as np
-from .utils import download_file
-from .lexical_analysis import load_vocab, LacTask
+from .lexical_analysis import LacTask
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            pos = Taskflow("pos_tagging")
            pos("第十四届全运会在西安举办")
@@ -56,13 +47,11 @@ def _postprocess(self, inputs):
         """
         The model output is the tag ids, this function will convert the model output to raw text.
         """
-        batch_out = []
         lengths = inputs["lens"]
         preds = inputs["result"]
         sents = inputs["text"]
         final_results = []
         for sent_index in range(len(lengths)):
-            single_result = {}
             tags = [self._id2tag_dict[str(index)] for index in preds[sent_index][: lengths[sent_index]]]
             sent = sents[sent_index]
             if self._custom:

diff --git a/paddlenlp/taskflow/question_answering.py b/paddlenlp/taskflow/question_answering.py
@@ -13,19 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
-import os
-import copy
-import itertools
-
-import numpy as np
-from .utils import download_file
 from .text_generation import TextGenerationTask
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            qa = Taskflow("question_answering")
            qa("中国的国土面积有多大？")
@@ -37,7 +28,7 @@
            '''
            [{'text': '中国国土面积有多大？', 'answer': '960万平方公里。'}, {'text': '中国的首都在哪里？', 'answer': '北京。'}]
            '''
-           
+
          """
 
 URLS = {

diff --git a/paddlenlp/taskflow/text_correction.py b/paddlenlp/taskflow/text_correction.py
@@ -13,24 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
 import os
-import copy
-import itertools
 
-import numpy as np
 import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from ..transformers import ErnieTokenizer, ErnieModel
-from ..transformers import is_chinese_char
-from ..datasets import load_dataset
-from ..data import Stack, Pad, Tuple, Vocab
-from .utils import download_file, add_docstrings, static_mode_guard
+
+from ..data import Pad, Stack, Tuple, Vocab
+from ..transformers import ErnieModel, ErnieTokenizer, is_chinese_char
 from .models import ErnieForCSC
 from .task import Task
+from .utils import static_mode_guard
 
 usage = r"""
            from paddlenlp import Taskflow
@@ -47,11 +38,11 @@
            text_correction(['遇到逆竟时，我们必须勇于面对，而且要愈挫愈勇，这样我们才能朝著成功之路前进。',
                             '人生就是如此，经过磨练才能让自己更加拙壮，才能使自己更加乐观。'])
            '''
-           [{'source': '遇到逆竟时，我们必须勇于面对，而且要愈挫愈勇，这样我们才能朝著成功之路前进。', 
-             'target': '遇到逆境时，我们必须勇于面对，而且要愈挫愈勇，这样我们才能朝著成功之路前进。', 
-             'errors': [{'position': 3, 'correction': {'竟': '境'}}]}, 
-            {'source': '人生就是如此，经过磨练才能让自己更加拙壮，才能使自己更加乐观。', 
-             'target': '人生就是如此，经过磨练才能让自己更加茁壮，才能使自己更加乐观。', 
+           [{'source': '遇到逆竟时，我们必须勇于面对，而且要愈挫愈勇，这样我们才能朝著成功之路前进。',
+             'target': '遇到逆境时，我们必须勇于面对，而且要愈挫愈勇，这样我们才能朝著成功之路前进。',
+             'errors': [{'position': 3, 'correction': {'竟': '境'}}]},
+            {'source': '人生就是如此，经过磨练才能让自己更加拙壮，才能使自己更加乐观。',
+             'target': '人生就是如此，经过磨练才能让自己更加茁壮，才能使自己更加乐观。',
              'errors': [{'position': 18, 'correction': {'拙': '茁'}}]}
            ]
            '''
@@ -93,7 +84,7 @@ def __init__(self, task, model, **kwargs):
         self._construct_tokenizer(model)
         try:
             import pypinyin
-        except:
+        except ImportError:
             raise ImportError("Please install the dependencies first, pip install pypinyin --upgrade")
         self._pypinyin = pypinyin
         self._batchify_fn = lambda samples, fn=Tuple(

diff --git a/paddlenlp/taskflow/text_generation.py b/paddlenlp/taskflow/text_generation.py
@@ -13,23 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
-import os
-import copy
-import itertools
-
-import numpy as np
 import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from ..transformers import GPTForGreedyGeneration
-from ..transformers import GPTChineseTokenizer, GPTTokenizer
-from ..datasets import load_dataset
-from ..data import Stack, Pad, Tuple
-from .utils import download_file, add_docstrings, static_mode_guard, dygraph_mode_guard
+
+from ..data import Pad, Stack, Tuple
+from ..transformers import GPTChineseTokenizer, GPTForGreedyGeneration, GPTTokenizer
 from .task import Task
+from .utils import download_file, static_mode_guard
 
 usage = r"""
          """
@@ -99,9 +88,7 @@ def _preprocess(self, inputs, padding=True, add_special_tokens=True):
         inputs = self._check_input_text(inputs)
         # Get the config from the kwargs
         batch_size = self.kwargs["batch_size"] if "batch_size" in self.kwargs else 1
-        num_workers = self.kwargs["num_workers"] if "num_workers" in self.kwargs else 0
         generation_task = self.kwargs["generation_task"] if "generation_task" in self.kwargs else "question_answering"
-        max_seq_len = 32
 
         def select_few_shot_input(model_name, generation_task):
             pre_input = ""
@@ -116,8 +103,6 @@ def select_few_shot_input(model_name, generation_task):
 
         pre_input = select_few_shot_input(self.model, generation_task)
 
-        infer_data = []
-
         examples = []
         filter_inputs = []
         for input_text in inputs:

diff --git a/paddlenlp/taskflow/word_segmentation.py b/paddlenlp/taskflow/word_segmentation.py
@@ -13,22 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import json
-import math
-import os
-import copy
-import itertools
-
-import numpy as np
 import jieba
-from .utils import download_file
-from .task import Task
-from .lexical_analysis import load_vocab, LacTask
+
+from .lexical_analysis import LacTask
 from .named_entity_recognition import NERWordTagTask
+from .task import Task
 
 usage = r"""
-           from paddlenlp import Taskflow 
+           from paddlenlp import Taskflow
 
            # Taskflow base模式
            seg = Taskflow("word_segmentation")
@@ -124,13 +116,11 @@ def _postprocess(self, inputs):
         """
         The model output is the tag ids, this function will convert the model output to raw text.
         """
-        batch_out = []
         lengths = inputs["lens"]
         preds = inputs["result"]
         sents = inputs["text"]
         final_results = []
         for sent_index in range(len(lengths)):
-            single_result = {}
             tags = [self._id2tag_dict[str(index)] for index in preds[sent_index][: lengths[sent_index]]]
             sent = sents[sent_index]
             if self._custom: