-
Notifications
You must be signed in to change notification settings - Fork 21
/
gendata.py
83 lines (63 loc) · 2.13 KB
/
gendata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import csv
import datetime
import histdata
import news
import sentiment
import sys
csv.field_size_limit(sys.maxint)
def fetchData():
print 'Updating historical stock data'
histdata.getHistData()
print 'Updating news data'
news.init()
def getStockData(symbol, date):
file = open('data/hsd/' + symbol + '.csv')
csv_file = csv.reader(file)
# Get stock data for the next day
date += datetime.timedelta(days=1)
data = []
print 'Getting stock data for %s for date %s' % (symbol, date.strftime('%Y-%m-%d'))
for row in csv_file:
if(row[0] == date.strftime('%Y-%m-%d')):
data.append(float(row[1]))
data.append(float(row[4]))
return data
# No data found for symbol for given date
return -1
def genData():
dataHistFile = open('dat.pkl', 'r+b')
dataHist = pickle.load(dataHistFile)
dataFileNumber = dataHist['data_file_number'] + 1
dataFile = open('data/dat_' + dataFileNumber + '.csv', 'a')
csvWriter = csv.writer(dataFile)
date = dateHist['last_updated']
endDate = datetime.date.today()
while(date < endDate):
print 'Checking data for ' + date.strftime('%Y-%m-%d')
day = date.weekday()
if(day == 4 or day == 5):
date += datetime.timedelta(days=1)
continue
fname = date.strftime('%Y-%m-%d')
file = open('data/news/' + fname + '.csv')
csv_file = csv.reader(file)
for row in csv_file:
stockdata = getStockData(row[0], date)
if(stockdata == -1):
continue
sentdata = sentiment.analyzeText(row[1])
data = []
data.extend((row[0], date.timetuple().tm_yday))
data.extend((sentdata.score, sentdata.magnitude))
data.extend(stockdata)
csvWriter.writerow(data)
date += datetime.timedelta(days=1)
dataHist['data_file_number'] = dataFileNumber
dataHist['last_updated'] = endDate
dataHistFile.seek(0)
pickle.dump(dataHist, dataHistFile, protocol = pickle.HIGHEST_PROTOCOL)
dataHistFile.close()
def init():
fetchData()
genData()
init()