From c3d1df6e9e9b77df7af74b7f8a819182ee9c3a7f Mon Sep 17 00:00:00 2001 From: Steffy-zxf <48793257+Steffy-zxf@users.noreply.github.com> Date: Mon, 24 May 2021 16:58:11 +0800 Subject: [PATCH] add cote-dp & seabsa16_phns dataset docs (#436) * add cote-dp & seabsa16_phns dataset docs --- docs/data_prepare/dataset_list.rst | 5 +++++ examples/language_model/bert/run_pretrain.py | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/data_prepare/dataset_list.rst b/docs/data_prepare/dataset_list.rst index d9706e0a5a80d..66868e099d928 100644 --- a/docs/data_prepare/dataset_list.rst +++ b/docs/data_prepare/dataset_list.rst @@ -48,6 +48,11 @@ PaddleNLP提供了以下数据集的快速读取API,实际使用时请根据 +---------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------+ | `ChnSentiCorp `__ | 中文评论情感分析语料 | ``paddlenlp.datasets.load_dataset('chnsenticorp')`` | +---------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------+ +| `COTE-DP`__ | 中文观点抽取语料 | ``paddlenlp.datasets.load_dataset('cote', 'dp')`` | ++---------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------+ +| `SE-ABSA16_PHNS`__ | 中文评价对象级情感分析语料 | ``paddlenlp.datasets.load_dataset('seabsa16', 'phns')`` | ++---------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------+ + 序列标注 -------- diff --git a/examples/language_model/bert/run_pretrain.py b/examples/language_model/bert/run_pretrain.py index 0377f1e7c08c0..d6eeaaaabf5b3 100644 --- a/examples/language_model/bert/run_pretrain.py +++ b/examples/language_model/bert/run_pretrain.py @@ -333,8 +333,7 @@ def do_train(args): for epoch in range(args.num_train_epochs): files = [ os.path.join(args.input_dir, f) for f in os.listdir(args.input_dir) - if os.path.isfile(os.path.join(args.input_dir, f)) and "training" in - f + if os.path.isfile(os.path.join(args.input_dir, f)) and "train" in f ] files.sort() num_files = len(files)