-
Notifications
You must be signed in to change notification settings - Fork 3
/
format_checker.py
59 lines (45 loc) · 1.76 KB
/
format_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import argparse
import logging
import json
import pandas as pd
"""
This script checks whether the results format is correct.
It also provides some warnings about possible errors.
The submission of the result file should be in jsonl format.
It should be a lines of objects:
{
id -> identifier of the test sample,
labels -> labels (0 or 1),
}
"""
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
COLUMNS = ['id', 'label']
def check_format(file_path):
if not os.path.exists(file_path):
logging.error("File doesnt exists: {}".format(file_path))
return False
try:
submission = pd.read_json(file_path, lines=True)[['id', 'label']]
except:
logging.error("File is not a valid json file: {}".format(file_path))
return False
for column in COLUMNS:
if submission[column].isna().any():
logging.error("NA value in file {} in column {}".format(file_path, column))
return False
if not submission['label'].isin(range(0, 2)).all():
logging.error("Unknown Label in file {}".format(file_path))
logging.error("Unique Labels in the file are {}".format(submission['label'].unique()))
return False
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--prediction_file_path", "-p", nargs='+', required=True,
help="Path to the files you want to check.", type=str)
args = parser.parse_args()
logging.info("Checking files: {}".format(args.prediction_file_path))
for pred_file_path in args.prediction_file_path:
check_result = check_format(pred_file_path)
result = 'Format is correct' if check_result else 'Something wrong in file format'
logging.info("Checking file: {}. Result: {}".format(args.prediction_file_path, result))