-
Notifications
You must be signed in to change notification settings - Fork 8
/
icst_grabber.py
56 lines (51 loc) · 2.12 KB
/
icst_grabber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
'''
Akond Rahman
Dec 22, 2017
Friday
Code to extract data for ICST paper
'''
import csv
import os
import numpy as np
import cPickle as pickle
def getContent(list_of_ds):
counter, ds_index = 0, 0
dictOfAllFiles, dict2Ret, org_dict = {}, {}, {}
for ds_ in list_of_ds:
ds_index += 1
print 'Processing:', ds_
print '-'*100
with open(ds_, 'rU') as file_:
reader_ = csv.reader(file_)
next(reader_, None)
for row_ in reader_:
repo_of_file = row_[1]
categ_of_file = row_[3]
full_path_of_file = row_[4]
if os.path.exists(full_path_of_file):
if full_path_of_file not in dictOfAllFiles:
dictOfAllFiles[full_path_of_file] = [ categ_of_file ]
org_dict[full_path_of_file] = ds_index
else:
dictOfAllFiles[full_path_of_file] = dictOfAllFiles[full_path_of_file] + [ categ_of_file ]
print 'Total valid scripts:', len(dictOfAllFiles)
print '-'*100
for k_, v_ in dictOfAllFiles.items():
counter += 1
uniq = np.unique(v_)
with open(k_, 'rU') as the_file:
the_content = the_file.read()
if ((len(uniq)==1) and (uniq[0]=='N')):
dict2Ret[counter] = (the_content, '0', org_dict[k_])
else:
dict2Ret[counter] = (the_content, '1', org_dict[k_])
return dict2Ret
if __name__=='__main__':
moz='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Mozilla.Final.Categ.csv'
ost='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Openstack.WithoutBadBoys.Final.Categ.csv'
wik='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Wikimedia.Final.Categ.csv'
ds_list = [moz, ost, wik]
all_file_dump=getContent(ds_list)
pickle.dump( all_file_dump, open( "SCRIPT.LABELS.DUMP", "wb" ) )
all_script_dict = pickle.load( open('SCRIPT.LABELS.DUMP', 'rb'))
# print all_script_dict