-
Notifications
You must be signed in to change notification settings - Fork 1
/
cnf.py
75 lines (73 loc) · 2.66 KB
/
cnf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
RESULT = {}
# fungsi untuk menghilangkan
def remove_unit_production(keyList):
global RESULT
for key, value in RESULT.items():
if key in keyList:
tempList = []
for prod in value:
if len(prod.split(" ")) == 2:
tempList.append(prod)
else:
for i in RESULT[prod]:
if i not in tempList:
tempList.append(i)
RESULT[key] = tempList
# fungsi untuk membuka dan mengolah file gramar raw, menjadi set of production
def get_set_of_production():
global RESULT
RESULT.clear()
f = open("model/raw copy.txt", "r", encoding="utf-8")
for lines in f:
line = lines.splitlines()
line = line[0].split(" -> ")
lhs = line[0]
rhs = line[1].split(" | ")
if lhs in RESULT.keys():
RESULT[lhs].extend(rhs)
else:
RESULT[lhs] = rhs
f.close()
for key, value in RESULT.items():
if key == "PropNoun":
tempList = []
for val in value:
if val not in tempList:
tempList.append(val.lower())
RESULT[key] = tempList
phrases = ["NumP", "AdvP", "AdjP", "PP", "NP", "VP"]
remove_unit_production(phrases)
patterns = ["S", "P", "O", "Pel", "Ket"]
remove_unit_production(patterns)
tempList = []
tempDict = {}
counter = 1
for key, value in RESULT.items():
if key == "K":
for val in value:
if len(val.split(" ")) > 2:
temp = val.split(" ")
while len(temp) > 2:
checkStr = temp[0] + " " + temp[1]
isFound = False
for k, v in tempDict.items():
if checkStr == v:
isFound = True
temp.pop(0)
temp.pop(0)
temp.insert(0, k)
break
if not isFound:
tempDict["K" + str(counter)] = checkStr
temp.pop(0)
temp.pop(0)
temp.insert(0, "K" + str(counter))
counter += 1
tempList.append(" ".join(temp))
else:
tempList.append(val)
RESULT[key] = tempList
for key, value in tempDict.items():
RESULT[key] = [value]
return RESULT