-
Notifications
You must be signed in to change notification settings - Fork 3
/
augment_prep.py
41 lines (36 loc) · 1.42 KB
/
augment_prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import json
from pathlib import Path
# Define the input and output paths
input_filepath = Path('./data/questions.json')
output_directory = Path('./data/qna')
# Expecting the questions.json with an array of { source, question, answer } pair tuples.
with open(input_filepath, 'r') as input_file:
input_json = json.load(input_file)
current_source = None
current_file = None
lines = []
for input_tuple in input_json:
src = input_tuple["source"]
if current_source != src:
if current_file:
current_file.writelines(lines[:-1])
current_file.flush()
current_file.close()
file_name_stub = src.split("/")[-1].split(".")[0]
file_path = f"{output_directory}/{file_name_stub}_qna.md"
current_file = open(file_path, 'w')
current_source = src
lines = []
# with open(src, 'r') as src_file:
# src_lines = src_file.readlines()
# title = src_lines[0][1:].strip()
# lines = [f"# {title} Questions and Answers:\n", "\n"]
answer = input_tuple['answer'].replace("\n\n", "\n").replace("\n\n", "\n").replace("\n\n", "\n")
lines.extend([
f"Question: {input_tuple['question']}\n",
f"Answer: {answer}\n",
"---\n"
])
current_file.writelines(lines[:-1])
current_file.flush()
current_file.close()