-
Notifications
You must be signed in to change notification settings - Fork 0
/
data2.py
95 lines (85 loc) · 3.36 KB
/
data2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
rel_id = set()
relid = 0
ent_id = set()
entid = 0
with open("train.txt", "r", encoding="utf-8") as f:
datas = f.readlines()
for data in datas:
line = data.split("\t")
if line[1] not in rel_id:
rel_id.add(line[1])
with open("relation2id.txt", "a", encoding="utf-8") as f:
f.write(str(relid) + "\t" + line[1] + "\n")
relid = relid + 1
if line[2] not in ent_id:
ent_id.add(line[2])
with open("entity2id.txt", "a", encoding="utf-8") as f:
s = ""
length = 0
while length < len(line[2]):
if (line[2][length] != "\n"):
s = s + line[2][length]
length = length + 1
else:
break
f.write(str(entid) + "\t" + s + "\n")
entid = entid + 1
if line[0] not in ent_id:
ent_id.add(line[0])
with open("entity2id.txt", "a", encoding="utf-8") as f:
f.write(str(entid) + "\t" + line[0] + "\n")
entid = entid + 1
with open("test.txt", "r", encoding="utf-8") as f:
datas = f.readlines()
for data in datas:
line = data.split("\t")
if line[1] not in rel_id:
rel_id.add(line[1])
with open("relation2id.txt", "a", encoding="utf-8") as f:
f.write(str(relid) + "\t" + line[1] + "\n")
relid = relid + 1
if line[2] not in ent_id:
ent_id.add(line[2])
with open("entity2id.txt", "a", encoding="utf-8") as f:
s = ""
length = 0
while length < len(line[2]):
if (line[2][length] != "\n"):
s = s + line[2][length]
length = length + 1
else:
break
f.write(str(entid) + "\t" + s + "\n")
entid = entid + 1
if line[0] not in ent_id:
ent_id.add(line[0])
with open("entity2id.txt", "a", encoding="utf-8") as f:
f.write(str(entid) + "\t" + line[0] + "\n")
entid = entid + 1
with open("valid.txt", "r", encoding="utf-8") as f:
datas = f.readlines()
for data in datas:
line = data.split("\t")
if line[1] not in rel_id:
rel_id.add(line[1])
with open("relation2id.txt", "a", encoding="utf-8") as f:
f.write(str(relid) + "\t" + line[1] + "\n")
relid = relid + 1
if line[2] not in ent_id:
ent_id.add(line[2])
with open("entity2id.txt", "a", encoding="utf-8") as f:
s = ""
length = 0
while length < len(line[2]):
if (line[2][length] != "\n"):
s = s + line[2][length]
length = length + 1
else:
break
f.write(str(entid) + "\t" + s + "\n")
entid = entid + 1
if line[0] not in ent_id:
ent_id.add(line[0])
with open("entity2id.txt", "a", encoding="utf-8") as f:
f.write(str(entid) + "\t" + line[0] + "\n")
entid = entid + 1