-
Notifications
You must be signed in to change notification settings - Fork 0
/
remove_attrib.py
115 lines (83 loc) · 2.52 KB
/
remove_attrib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import sys
import os
import getopt
from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, FileTransferSpeed, FormatLabel, Percentage, \
ProgressBar, ReverseBar, RotatingMarker, \
SimpleProgress, Timer
import subprocess
import time
from datetime import datetime
import random
def usage():
'''prints the acceptable list of command line options to the user'''
print '''-------------------------------------------------------
Usage
-h or --help print list of commands
--file1=file file1
--attrib=attrib attrib
--outfile=combine.arff
-v verbose
-------------------------------------------------------'''
def main():
try:
options, remainder = getopt.getopt(sys.argv[1:],
'hv',["help","file1=","attrib=","outfile=","classConfig="])
#fix later
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
file1 = None
attrib = None
classifiers = []
args = []
combine='removed.arff'
verb = False
for opt, arg in options:
if opt in ('-h','--help'):
usage()
sys.exit(2)
elif opt in ('--file1'):
file1 = arg
elif opt in ('--attrib'):
attrib = arg
elif opt in ('--outfile'):
combine=arg
else:
usage()
assert False, "unhandled option "+opt
if file1 is None:
print 'File 1 not specified'
usage()
sys.exit(1)
if attrib is None:
print 'Attrib not specified'
usage()
sys.exit(1)
f1 = open(file1,'r')
f1Lines=f1.read()
f1.close()
outFile=open(combine,'w')
startAttribLine=1
header1 = f1Lines.split('@DATA\n')[0]
header1Lines = header1.split('\n')
for ndx in range(len(header1Lines[:-1])):
if attrib not in header1Lines[ndx]:
outFile.write(header1Lines[ndx].strip()+'\n')
else:
attribNdx=ndx-startAttribLine
if 'RELATION' in header1Lines[ndx]:
startAttribLine=ndx+1
outFile.write('@DATA\n')
print attribNdx
data1 = f1Lines.split('@DATA\n')[1]
data1Lines = data1.split('\n')
for ndx in range(len(data1Lines)):
line1 = data1Lines[ndx].split(',')
for linendx in range(len(line1[:-1])):
if linendx != attribNdx:
outFile.write(line1[linendx]+',')
outFile.write(line1[-1]+'\n')
outFile.close()
if __name__ == "__main__":
main()