-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_or_dictionary.py
75 lines (53 loc) · 1.87 KB
/
create_or_dictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import random
def load_dictionary_from_file(word_list_file):
dictionary = []
count = 0
with open(word_list_file) as f:
for word in f:
count = count + 1
word = word.replace("\n", "")
word = word.replace("\r", "")
if word.isalpha() is False:
continue
if len(word) < 2:
continue
# if count % 10 != 0:
# continue
difference = len(word) - len(dictionary) + 1
if difference > 0:
for p in range(0, difference):
dictionary.append([])
dictionary[len(word)].append((word.upper()))
return dictionary
def shuffle_words(dictionary):
for page_index, page in enumerate(dictionary):
random.shuffle(page)
def make_dictionary_matrix(dictionary, max_word_length):
matrix = []
domain_ranges = {}
for page_index, page in enumerate(dictionary):
if page_index < 2 :
continue
if len(page) == 0:
continue
if page_index > max_word_length:
continue
domain_start = len(matrix)
for word in page:
assert (page_index == len(word))
padding_length = max_word_length - page_index
word_to_int = ( [ord(letter) for letter in word] )
word_to_int = [ i - (ord('A') - 1) for i in word_to_int ]
word_to_int = word_to_int + [0 for _ in range(padding_length)]
matrix.append(word_to_int)
domain_end = len(matrix) - 1
domain_ranges[page_index] = (domain_start, domain_end)
return matrix, domain_ranges
def main():
word_list_file = "words.txt"
dictionary = load_dictionary_from_file(word_list_file)
matrix, domain = make_dictionary_matrix(dictionary, 5)
print (domain)
return
if __name__ == "__main__":
main()