-
Notifications
You must be signed in to change notification settings - Fork 0
/
finder.py
141 lines (120 loc) · 4.04 KB
/
finder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from naturalprocessingwordnet import getsynonyms
textFile = open("./a.txt", "r")
wordsToFind = textFile.read().lower().split(" ")
# wordsToFind[:-1]
wordsToFind[-1] = wordsToFind[-1].strip().strip(".")
# wordsToFind = ["motivated"]
positiveWordsFile = open("./list of positive words.txt", "r")
negativeWordsFile = open("./list of negative words.txt", "r")
start = False
positiveFile = list(positiveWordsFile)
negativeFile = list(negativeWordsFile)
posSet = set()
negSet = set()
neutralSet = set()
positiveWords = []
negativeWords = []
for i in positiveFile:
if i == "a+\n":
start = True
if start:
positiveWords.append(i.lower().strip())
start = False
for i in negativeFile:
if i == "2-faced\n":
start = True
if start:
negativeWords.append(i.lower().strip())
class TrieNode:
# Trie node class
def __init__(self):
self.children = [None] * 256
# isEndOfWord is True if node represent the end of the word
self.isEndOfWord = False
class Trie:
# Trie data structure class
def __init__(self):
self.root = self.getNode()
def getNode(self):
# Returns new trie node (initialized to NULLs)
return TrieNode()
def _charToIndex(self, ch):
# private helper function
# Converts key current character into index
# use only 'a' through 'z' and lower case
return ord(ch)
def insert(self, key):
# If not present, inserts key into trie
# If the key is prefix of trie node,
# just marks leaf node
pCrawl = self.root
length = len(key)
for level in range(length):
index = self._charToIndex(key[level])
# if current character is not present
if not pCrawl.children[index]:
pCrawl.children[index] = self.getNode()
pCrawl = pCrawl.children[index]
# mark last node as leaf
pCrawl.isEndOfWord = True
def search(self, key):
# Search key in the trie
# Returns true if key presents
# in trie, else false
pCrawl = self.root
length = len(key)
for level in range(length):
index = self._charToIndex(key[level])
if not pCrawl.children[index]:
return False
pCrawl = pCrawl.children[index]
return pCrawl != None and pCrawl.isEndOfWord
def removeDuplicatesSave(file_name, set_data):
with open(file_name, 'r') as fp:
file_data = fp.readlines()
for word in file_data:
set_data.add(word.strip())
fp.close()
set_data=set(set_data)
with open(file_name, 'w') as fp:
for word in set_data:
fp.write(word + "\n")
fp.close()
def main():
# Trie object
positiveTrie = Trie()
negativeTrie = Trie()
# Construct positive and negative tries
for key in positiveWords:
positiveTrie.insert(key)
for key in negativeWords:
negativeTrie.insert(key)
for word in wordsToFind:
word = word.lower()
if positiveTrie.search(word):
posSet.add(word)
removeDuplicatesSave("./destinationposfile.txt", posSet)
elif negativeTrie.search(word):
negSet.add(word)
removeDuplicatesSave("./destinationnegfile.txt", negSet)
else:
synonyms = []
synonymFound = False
synonyms = getsynonyms(word)
for syn in synonyms:
if positiveTrie.search(syn):
posSet.add(syn)
synonymFound = True
break
elif negativeTrie.search(syn):
negSet.add(syn)
synonymFound = True
break
if synonymFound == False:
neutralSet.add(word)
# Search for different keys
removeDuplicatesSave("./destinationposfile.txt", posSet)
removeDuplicatesSave("./destinationnegfile.txt", negSet)
removeDuplicatesSave("./destinationneufile.txt", neutralSet)
if __name__ == '__main__':
main()