-
Notifications
You must be signed in to change notification settings - Fork 1
/
notion_tools.py
147 lines (124 loc) · 5.01 KB
/
notion_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import pickle as pkl
from os.path import join
import textwrap
from notion_client import Client
def wrap_breakline(s, width=80):
return "\n".join("\n".join(textwrap.wrap(x, width=width)) for x in s.splitlines())
def QA_notion_blocks(Q, A, refs=(), max_len=1950):
"""
notion.blocks.children.append(page_id, children=QA_notion_blocks("Q1", "A1"))
notion.blocks.children.append(page_id, children=QA_notion_blocks("Q1", "A1", ("ref1", "ref2")))
:param Q: str question
:param A: str answer
:param refs: list or tuple of str references
:return:
"""
question_blocks = []
for i in range(0, len(Q), max_len):
question_blocks.append(
{'paragraph': {"rich_text": [{"text": {"content": Q[i:i+max_len]}}]}})
ans_blocks = []
# split the answer into multiple blocks with max_len, split only by \n
# This is to avoid too long block, which is not allowed by Notion API.
for paragraph in A.split("\n"):
for i in range(0, len(paragraph), max_len):
ans_blocks.append(
{'paragraph': {"rich_text": [{"text": {"content": paragraph[i:i+max_len]}}]}})
ref_blocks = []
for ref in refs:
ref_blocks.append({'quote': {"rich_text": [{"text": {"content": ref[:max_len]}}]}})
return [
{'divider': {}},
{'paragraph': {"rich_text": [{"text": {"content": f"Question:"}, 'annotations': {'bold': True}}, ]}},
# {'paragraph': {"rich_text": [{"text": {"content": Q}}]}},
*question_blocks,
{'paragraph': {"rich_text": [{"text": {"content": f"Answer:"}, 'annotations': {'bold': True}}, ]}},
# {'paragraph': {"rich_text": [{"text": {"content": A}}]}},
*ans_blocks,
{'toggle': {"rich_text": [{"text": {"content": f"Reference:"}, 'annotations': {'bold': True}}, ],
"children": ref_blocks, }},
]
def append_chathistory_to_notion_page(notion: Client, page_id: str, chat_history: list, ref_maxlen=250):
"""
Append chat history to notion page
:param notion: notion client
:param page_id: str
:param chat_history: list of tuple (query, answer_struct)
:return:
"""
for query, ans_struct in chat_history:
answer = ans_struct["answer"]
refdocs = ans_struct['source_documents']
refstrs = [refdoc.page_content[:ref_maxlen] for refdoc in refdocs]
notion.blocks.children.append(page_id, children=QA_notion_blocks(query, answer, refstrs))
def print_entries(entries_return, print_prop=()):
# formating the output, so Name starts at the same column
# pad the string to be 36 character
if type(entries_return) == dict:
entries_return = entries_return["results"]
print("id".ljust(36), "\t", "Name",)
for entry in entries_return:
print(entry["id"], "\t", entry["properties"]["Name"]["title"][0]["plain_text"], entry["url"] if "url" in print_prop else "")
def clean_metadata(metadata):
metadata_new = {}
for k, v in metadata.items():
if v is None or v == []:
continue
metadata_new[k] = metadata[k]
return metadata_new
def save_qa_history(query, result, qa_path,):
uid = 0
while os.path.exists(join(qa_path, f"QA{uid:05d}.pkl")):
uid += 1
pkl.dump((query, result), open(join(qa_path, f"QA{uid:05d}.pkl"), "wb"))
pkl_path = join(qa_path, "chat_history.pkl")
if os.path.exists(pkl_path):
chat_history = pkl.load(open(pkl_path, "rb"))
else:
chat_history = []
chat_history.append((query, result))
pkl.dump(chat_history, open(pkl_path, "wb"))
with open(os.path.join(qa_path, "QA.md"), "a", encoding="utf-8") as f:
f.write("\n**Question:**\n\n")
f.write(query)
f.write("\n\n**Answer:**\n\n")
f.write(result["answer"])
f.write("\n\nReferences:\n\n")
for doc in result["source_documents"]:
f.write("> ")
f.write(doc.page_content[:250])
f.write("\n\n")
f.write("-------------------------\n\n")
def load_qa_history(qa_path):
pkl_path = join(qa_path, "chat_history.pkl")
if os.path.exists(pkl_path):
chat_history = pkl.load(open(pkl_path, "rb"))
else:
chat_history = []
queries = [q for q, _ in chat_history]
results = [r for _, r in chat_history]
return chat_history, queries, results
def print_qa_result(result, ref_maxlen=200, line_width=80):
print("\nAnswer:")
print(wrap_breakline(result["answer"], line_width))
print("\nReference:")
for refdoc in result['source_documents']:
print("Ref doc:\n", refdoc.metadata)
print(textwrap.fill(refdoc.page_content[:ref_maxlen], line_width))
print("\n")
def update_title(notion: Client, page_id, title):
update_struct = {
"properties": {
"title": {
"title": [
{
"text": {
"content": title
}
}
]
}
}
}
notion.pages.update(page_id, **update_struct)