-
Notifications
You must be signed in to change notification settings - Fork 72
/
buildIndex.py
79 lines (66 loc) · 2.42 KB
/
buildIndex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python
# make sure to run
# > python -m pip install bottle
# I used version bottle-0.12.19
from bottle import SimpleTemplate
from bs4 import BeautifulSoup
import json
import hjson
import os.path
import sys
IGNORE_DIRS = [
]
TEMPLATE_DIR = 'chapters'
ROOT = 'pythonreader'
OUT_DIR = 'en'
# Use the -t flag if you want to compile for local tests
DEPLOY = False
class IndexBuilder(object):
# Function: Run
# -------------
# This function compiles all the html files (recursively)
# from the templates dir into the current folder. Folder
# hierarchy is preserved
def run(self):
self.index = []
book_data = hjson.load(open('bookOutline.hjson'))
for part_key, part in book_data.items():
for section_key, title in part['sections'].items():
path = '{}/{}'.format(part_key, section_key)
self.addSectionToIndex(path, section_key, title)
if not 'examples' in part: continue
for section_key, title in part['examples'].items():
path = '{}/{}'.format('examples', section_key)
self.addSectionToIndex(path, section_key, title)
json.dump(self.index,open('searchIndex.json', 'w'))
#####################
# Private Helpers
#####################
def addSectionToIndex(self, rel_path, key, title):
sectionDirPath = os.path.join(TEMPLATE_DIR, rel_path)
for fileName in os.listdir(sectionDirPath):
if fileName.endswith('.html'):
filePath = os.path.join(sectionDirPath, fileName)
self.addFileToIndex(filePath, rel_path, key, title)
def addFileToIndex(self, filePath, url, key, title):
print(filePath)
templateText = open(filePath).read()
# compiledHtml = SimpleTemplate(templateText).render(pathToRoot = '../' + pathToLangRoot, pathToLang = pathToLangRoot)
soup = BeautifulSoup(templateText, 'html.parser')
newItem = {
'id':key,
'title':title,
'url':url,
'text':self.sanitizeText(soup.get_text())
}
self.index.append(newItem)
def sanitizeText(self, text):
sText = ''
for line in text.split('\n'):
stripped = line.strip()
if stripped.startswith('%'):
continue
sText += line + '\n'
return sText
if __name__ == '__main__':
IndexBuilder().run()