This repository has been archived by the owner on Jul 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
apply_corrections.py
40 lines (37 loc) · 1.67 KB
/
apply_corrections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import csv
import json
import os
def run():
print("Applying corrections...")
with open("raw_data/corrections.json") as corrections_file:
corrections = json.load(corrections_file)
def patch_row(row):
if corrections_row := corrections.get(row["id"], None):
print(f"PATCH: Found corrections for ID {row['id']} --", end=' ')
if corrections_row.get("apply", True):
print("Applied.")
row.update({k : v for k, v in corrections_row.items() if k in row})
else:
print("Skipped.")
return row
for filename in os.listdir("csv/unpatched", ):
if not filename.endswith("csv"):
continue
with open(f"csv/unpatched/{filename}") as read_file:
with open(f"csv/{filename}", "w") as write_file:
reader = csv.DictReader(read_file)
row1 = next(reader)
writer = csv.DictWriter(write_file, row1.keys())
writer.writeheader()
if row1["id"] in corrections and corrections[row1["id"]].get("exclude", False):
print(f"PATCH: Excluding ID {row1['id']}.")
else:
writer.writerow(patch_row(row1))
for row in reader:
if row["id"] in corrections and corrections[row["id"]].get("exclude", False):
print(f"PATCH: Excluding ID {row['id']}.")
continue
writer.writerow(patch_row(row))
print("-"*140)
if __name__ == "__main__":
run()