microsoft · gagansh7171 · Feb 16, 2023 · Feb 16, 2023 · Feb 16, 2023 · Feb 16, 2023
diff --git a/Code/BertToken.py b/Code/BertToken.py
@@ -16,7 +16,7 @@
 from seqeval.metrics import precision_score, recall_score, f1_score, accuracy_score
 
 
-logger = logging.getLogger(__name__)
+#logger = logging.getLogger(__name__)
 
 
 def set_seed(args):
@@ -119,10 +119,10 @@ def train(args, train_dataset, valid_dataset, model, tokenizer, labels):
     scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=t_total // 10, num_training_steps=t_total)
 
     # Training
-    logger.info("***** Running training *****")
-    logger.info("  Num examples = %d", len(train_dataset))
-    logger.info("  Num Epochs = %d", args.num_train_epochs)
-    logger.info("  Instantaneous batch size per GPU = %d",
+    print("***** Running training *****")
+    print("  Num examples = %d", len(train_dataset))
+    print("  Num Epochs = %d", args.num_train_epochs)
+    print("  Instantaneous batch size per GPU = %d",
                 args.train_batch_size)
 
     global_step = 0
@@ -175,9 +175,9 @@ def evaluate(args, model, tokenizer, labels, mode, prefix=""):
         eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=collate)
 
     # Evaluation
-    logger.info("***** Running evaluation %s *****", prefix)
-    logger.info("  Num examples = %d", len(eval_dataset))
-    logger.info("  Batch size = %d", args.eval_batch_size)
+    print("***** Running evaluation %s *****", prefix)
+    print("  Num examples = %d", len(eval_dataset))
+    print("  Batch size = %d", args.eval_batch_size)
     eval_loss = 0.0
     nb_eval_steps = 0
     preds = []
@@ -229,9 +229,9 @@ def evaluate(args, model, tokenizer, labels, mode, prefix=""):
             "accuracy": accuracy_score(out_label_list, preds_list)
         }
 
-        logger.info("***** Eval results %s *****", prefix)
+        print("***** Eval results %s *****", prefix)
         for key in sorted(results.keys()):
-            logger.info("  %s = %s", key, str(results[key]))
+            print("  %s = %s", key, str(results[key]))
 
         return results
 
@@ -274,7 +274,7 @@ def collate(examples):
 
 def load_and_cache_examples(args, tokenizer, labels, mode):
 
-    logger.info("Creating features from dataset file at %s", args.data_dir)
+    print("Creating features from dataset file at %s", args.data_dir)
     examples = read_examples_from_file(args.data_dir, mode)
     features = convert_examples_to_features(examples, labels, tokenizer, args.max_seq_length)
 
@@ -352,17 +352,17 @@ def main():
 
     model.to(args.device)
 
-    logger.info("Training/evaluation parameters %s", args)
+    print("Training/evaluation parameters %s", args)
 
     train_dataset = load_and_cache_examples(
         args, tokenizer, labels, mode="train")
     valid_dataset = load_and_cache_examples(
         args, tokenizer, labels, mode="validation")
     global_step, tr_loss = train(
         args, train_dataset, valid_dataset, model, tokenizer, labels)
-    logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
+    print(" global_step = %s, average loss = %s", global_step, tr_loss)
 
-    logger.info("Saving model checkpoint to %s", args.output_dir)
+    print("Saving model checkpoint to %s", args.output_dir)
     # Save a trained model, configuration and tokenizer using `save_pretrained()`.
     # They can then be reloaded using `from_pretrained()`
 

diff --git a/Data/Preprocess_Scripts/preprocess_pos_en_hi_ud.py b/Data/Preprocess_Scripts/preprocess_pos_en_hi_ud.py
@@ -35,9 +35,9 @@ def scrape_tweets(original_path):
 				outfile.write(i)
 
 	#scraping tweets
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_train.txt -a train-annot.json -o tweets_train.conll'))
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_dev.txt -a dev-annot.json -o tweets_dev.conll'))
-	call(shlex.split('python crawl_tweets_copy.py -i tweet_ids_test.txt -a test-annot.json -o tweets_test.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_train.txt -a train-annot.json -o tweets_train.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_dev.txt -a dev-annot.json -o tweets_dev.conll'))
+	call(shlex.split('python3 crawl_tweets_copy.py -i tweet_ids_test.txt -a test-annot.json -o tweets_test.conll'))
 
 def make_files(original_path,new_path):
 
@@ -176,4 +176,4 @@ def main():
 	open(new_path+'Devanagari/all.txt', 'a').writelines([l for l in open(new_path+'Devanagari/validation.txt').readlines() ])
 
 if __name__=="__main__":
-	main()
+	main()
diff --git a/Data/Preprocess_Scripts/preprocess_qa.sh b/Data/Preprocess_Scripts/preprocess_qa.sh
@@ -9,17 +9,17 @@ PART1=`dirname "$INP_FILE"`
 PART2=`basename "$INP_FILE"`
 
 #preprocesss for DrQA
-python $PREPROCESS_DIR/preprocess_drqa.py --data_dir $ORIGINAL_DATA_DIR
+python3 $PREPROCESS_DIR/preprocess_drqa.py --data_dir $ORIGINAL_DATA_DIR
 
 #run DrQA
 git clone https://github.com/facebookresearch/DrQA.git
 cd DrQA
 git checkout 96f343c
 pip install elasticsearch==7.8.0 nltk==3.5 scipy==1.5.0 prettytable==0.7.2 tqdm==4.46.1 regex==2020.6.8 termcolor==1.1.0 scikit-learn==0.23.1 numpy==1.18.5 torch==1.4.0
-python setup.py develop
+python3 setup.py develop
 pip install spacy==2.3.0
-python -m spacy download xx_ent_wiki_sm
-python -c "import nltk;nltk.download(['punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', 'words'])"
+python3 -m spacy download xx_ent_wiki_sm
+python3 -c "import nltk;nltk.download(['punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', 'words'])"
 ./download.sh
 sed -i 's/np.load(filename)/np.load(filename, allow_pickle=True)/g' drqa/retriever/utils.py
 sed -i 's/\[\x27tokenizer_class\x27\], {},/\[\x27tokenizer_class\x27\], {\x27model\x27: \x27xx_ent_wiki_sm\x27},/g' scripts/distant/generate.py
@@ -30,8 +30,8 @@ patch scripts/distant/generate.py <<EOF
 263a264
 >     random.seed(0)
 EOF
-python scripts/distant/generate.py $PART1 $PART2 $PREPROCESS_DIR --tokenizer spacy --dev-split 0.2 --n-docs 1 --workers 1
+python3 scripts/distant/generate.py $PART1 $PART2 $PREPROCESS_DIR --tokenizer spacy --dev-split 0.2 --n-docs 1 --workers 1
 
 cd ./..
 # Squad format processor
-python $PREPROCESS_DIR/preprocess_qa_en_hi.py --output_dir $PROCESSED_DIR
+python3 $PREPROCESS_DIR/preprocess_qa_en_hi.py --output_dir $PROCESSED_DIR
diff --git a/Data/Preprocess_Scripts/preprocess_sent_en_es.py b/Data/Preprocess_Scripts/preprocess_sent_en_es.py
@@ -215,4 +215,4 @@ def main():
 	os.unlink('sentiment_annotated.txt')
 
 if __name__=='__main__':
-	main()
+	main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -215,4 +215,4 @@ def main(): @@
     	os.unlink('sentiment_annotated.txt')
     if __name__=='__main__':
-    	main()
+    	main()