-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine_arffs.py
123 lines (89 loc) · 2.63 KB
/
combine_arffs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import sys
import os
import getopt
from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, FileTransferSpeed, FormatLabel, Percentage, \
ProgressBar, ReverseBar, RotatingMarker, \
SimpleProgress, Timer
import subprocess
import time
from datetime import datetime
import random
def usage():
'''prints the acceptable list of command line options to the user'''
print '''-------------------------------------------------------
Usage
-h or --help print list of commands
--file1=file file1
--file2=file file2
--outfile=combine.arff
-v verbose
-------------------------------------------------------'''
def main():
try:
options, remainder = getopt.getopt(sys.argv[1:],
'hv',["help","file1=","file2=","outfile="])
#fix later
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
file1 = None
file2 = None
classifiers = []
args = []
combine='combine.arff'
verb = False
for opt, arg in options:
if opt in ('-h','--help'):
usage()
sys.exit(2)
elif opt in ('--file1'):
file1 = arg
elif opt in ('--file2'):
file2 = arg
elif opt in ('--outfile'):
combine = arg
else:
usage()
assert False, "unhandled option "+opt
if file1 is None:
print 'File 1 not specified'
usage()
sys.exit(1)
if file2 is None:
print 'File 2 not specified'
usage()
sys.exit(1)
f1 = open(file1,'r')
f1Lines=f1.read()
f1.close()
f2 = open(file2,'r')
f2Lines = f2.read()
f2.close()
outFile=open(combine,'w')
header1 = f1Lines.split('@DATA\n')[0]
header1Lines = header1.split('\n')
for line in header1Lines[:-3]:
outFile.write(line+'\n')
header2 = f2Lines.split('@DATA')[0]
header2 = header2.split('@RELATION BUGGY\n')[1]
outFile.write(header2+'@DATA\n')
data1 = f1Lines.split('@DATA\n')[1]
data1Lines = data1.split('\n')
data2 = f2Lines.split('@DATA\n')[1]
data2Lines = data2.split('\n')
print len(data1Lines)
print len(data2Lines)
for ndx in range(len(data1Lines)):
line1 = data1Lines[ndx].split(',')
line2 = data2Lines[ndx].split(',')
if line1[-1] != line2[-1]:
print 'Question Yo data'
print ndx
break
for datum in line1[:-1]:
outFile.write(datum+',')
outFile.write(data2Lines[ndx]+'\n')
outFile.close()
if __name__ == "__main__":
main()