-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_static_site.py
100 lines (84 loc) · 3.47 KB
/
generate_static_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3
# generate a static html site from the data
# usage: python3 ./generate_static_site.py ud.json
# this generator enumerates pages.
# why not escape or base64 encode the names or something?
# because theres a 422 utf-8 bytes long word and linux ext4 only allows 255
# character long filenames (see linux-kernel/fs/ext4/ext4.h EXT4_NAME_LEN)
from typing import List, Dict, Union, Generator
from html import escape as he
from urllib.parse import unquote
from base64 import urlsafe_b64encode
import json
def main(json_file: str) -> None:
with open(json_file) as fh:
json_data = json.load(fh)
all_words: List[str] = list(set(sum((i["pagenames"] for i in json_data), [])))
all_words.sort()
print("generating index..")
index_html: str = generate_index(all_words)
with open("./static_html/index.html", "w") as fh:
fh.write(index_html)
del index_html
print("generating words..")
one_percent = len(all_words) // 100
cur_percent = 0
for idx, word in enumerate(all_words):
word_page_html: str = generate_word_page(
word,
(i for i in json_data if word in i["pagenames"])
)
with open(f"./static_html/{idx_to_filename(idx)}.html", "w") as fh:
fh.write(word_page_html)
del word_page_html
if idx % one_percent == 0:
cur_percent += 1
print(f"{cur_percent}% done")
def generate_index(all_words: List[str]) -> str:
return ''.join([
'<!DOCTYPE HTML><html><head>',
'<title>UDOffline Index</title>',
'<link href="style.css" rel="stylesheet">',
'</head><body>',
'<h1>UDOffliner Index</h1>',
'<p>Use CRTL+f</p>',
'<ul>',
*(f'<li><a href="{idx_to_filename(idx)}.html">{he(unquote(word))}</a></li>' for idx, word in enumerate(all_words)),
'</ul>',
'</body></html>',
])
def generate_word_page(pagename: str, definitions: Generator[Dict[str, Union[str, int, List[str]]], None, None]) -> str:
definition_html_list: List[str] = []
for definition in definitions:
assert isinstance(definition["examples"], list)
assert isinstance(definition["word"], str)
assert isinstance(definition["meaning"], str)
assert isinstance(definition["author_name"], str)
assert isinstance(definition["upvotes"], int)
assert isinstance(definition["downvotes"], int)
examples_html: str = "".join((
f'<p>{he(example)}</p>' for example in definition["examples"]
))
definition_html_list.append("".join([
'<div class="def">'
f'<h2>{he(definition["word"])}</h2>'
f'<p>{he(definition["meaning"])}</p>'
f'<div class="ex">{examples_html}</div>'
f'<div class="meta"><b>Author:</b> {he(definition["author_name"])} <b>Upvotes:</b> {definition["upvotes"]} <b>Downvotes:</b> {definition["downvotes"]}</div>'
'</div>'
]))
return "".join([
'<!DOCTYPE HTML><html><head>',
f'<title>{he(unquote(pagename))}</title>',
'<link href="style.css" rel="stylesheet">',
'</head><body>',
'<nav><a href="index.html">index</a></nav>',
f'<h1>{he(unquote(pagename))}</h1>',
''.join(definition_html_list),
'</body></html>',
])
def idx_to_filename(idx: int) -> str:
return urlsafe_b64encode(idx.to_bytes(length=32)).decode("ascii").rstrip("=")
if __name__ == "__main__":
from sys import argv
main(argv[1])