-
Notifications
You must be signed in to change notification settings - Fork 0
/
DailyDataUpdates.py
175 lines (136 loc) · 7.76 KB
/
DailyDataUpdates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 7 13:46:22 2022
@author: mtang
"""
import json
import datetime
import pandas as pd
import numpy as np
import quantlib.data_utils as data_utils
import quantlib.general_utils as general_utils
class YF_OHLC_Update():
tickers = []
def __init__(self, update_name, market, ticker_file_path, raw_data_path, adjusted_data_path, return_path):
self.update_name = update_name
self.market = market
self.ticker_file_path = ticker_file_path
self.raw_data_path = raw_data_path
self.adjusted_data_path = adjusted_data_path
self.return_path = return_path
#self.simple_return_path = simple_return_path
#self.log_return_path = log_return_path
#self.daily_var_path = daily_var_path
self.tickers =[]
def update_tickers(self):
#For US stocks: get sp500 tickers and sp500 changes and save the table to ticker directory
#For HK stocks: examine the YF
#update the self.tickers list
if (self.market =="US") :
df_sp500 = data_utils.get_sp500_instruments()
df_sp500_chg = data_utils.get_sp500_changes()
df_ndq = data_utils.get_ndq_instruments()
df_sp500.to_excel(self.ticker_file_path+"\\Archive\\SP500 "+datetime.datetime.today().strftime("%Y%m%d")+".xlsx",engine="openpyxl", index=False)
df_sp500_chg.to_excel(self.ticker_file_path+"\\Archive\\SP500 Change "+datetime.datetime.today().strftime("%Y%m%d")+".xlsx",engine="openpyxl", index=False)
df_ndq.to_excel(self.ticker_file_path+"\\Archive\\NDQ "+datetime.datetime.today().strftime("%Y%m%d")+".xlsx",engine="openpyxl", index=False)
sp500_tickers = list(df_sp500['Symbol'])
sp500_chg_tickers = list(df_sp500_chg['Added Ticker'])
ndq_tickers = list(df_ndq['Ticker'] )
sp500_tickers = [x for x in sp500_tickers if pd.isnull(x) == False]
sp500_chg_tickers = [x for x in sp500_chg_tickers if pd.isnull(x) == False]
ndq_tickers = [x for x in ndq_tickers if pd.isnull(x) == False]
all_tickers = list(set(sp500_tickers+sp500_chg_tickers +ndq_tickers ))
all_tickers = sorted(list(map(lambda x: str.replace(x, ".", "-"), all_tickers)))
us_ticker_file = self.ticker_file_path + "\\USTickers.csv"
us_ticker_archive = self.ticker_file_path + "\\Archive\\USTickers " + datetime.datetime.today().strftime("%Y%m%d")+".csv"
with open(us_ticker_file, "w") as outfile:
outfile.write("\n".join(all_tickers))
with open(us_ticker_archive, "w") as outfile:
outfile.write("\n".join(all_tickers))
self.tickers = all_tickers
else:
if (self.market == "HK"):
hk_equities_datafile = self.ticker_file_path + "\\HKSecuritiesData.xlsx"
df = pd.read_excel(hk_equities_datafile, header=0, index_col=0, engine="openpyxl")
hk_etf_datafile = self.ticker_file_path + "\\HKETFData.xlsx"
df1 = pd.read_excel(hk_etf_datafile, header=0, index_col=0, engine="openpyxl")
self.tickers = sorted(list(df.index)+list(df1.index))
else:
if (self.market == "CN"):
sz_equities_datafile = self.ticker_file_path + "\\SZSecuritiesData.xlsx"
df = pd.read_excel(sz_equities_datafile, header=0, index_col=0, engine="openpyxl")
sh_equities_datafile = self.ticker_file_path + "\\SHSecuritiesData.xlsx"
df1 = pd.read_excel(sh_equities_datafile, header=0, index_col=0, engine="openpyxl")
self.tickers = sorted(list(df.index)+list(df1.index))
else:
if (self.market =="Indices") :
index_file = self.ticker_file_path + "\\GlobalIndices.csv"
df1 = pd.read_csv(index_file, header=None)
self.tickers = sorted(list(df1[0]))
return
def update_ohlc(self):
#update the ohlc files for tickers
if (len(self.tickers) <=0):
self.update_tickers()
for ticker in list(self.tickers):
#print(ticker)
output_file = self.raw_data_path + "\\" + ticker + ".csv"
if general_utils.check_file_exists(output_file):
try:
df1 = pd.read_csv(output_file, header=0, index_col=0, parse_dates=[0])
last_dt = df1.index[-1]
ndays_to_retrieve = (datetime.datetime.now() - last_dt).days + 10
df = data_utils.get_yf_daily_ohlcv(ticker, str(ndays_to_retrieve)+"d")
if not df.empty:
#check if there are dividend/split events in retrieved data
#yfinance returns dividend/split adjusted data by default
#hence in case of corp action, the entire history needs to be re-download
corp_action = sum(df['dividends']) + sum(df['stocksplits'])
if (corp_action >0):
df = data_utils.get_yf_daily_ohlcv(ticker)
else:
df = df.combine_first(df1)
df.to_csv(output_file)
except Exception as e:
print(e)
df = data_utils.get_yf_daily_ohlcv(ticker)
if not df.empty:
df.to_csv(output_file)
else:
df = data_utils.get_yf_daily_ohlcv(ticker)
if not df.empty:
df.to_csv(output_file)
return
#Calculate simple return, log return, 1-period variance for given OHLC
def update_return(self):
if (len(self.tickers) <=0):
self.update_tickers()
for ticker in list(self.tickers):
#print(ticker)
try:
ohlc_file = self.raw_data_path + "\\" + ticker + ".csv"
output_file = self.return_path+ "\\" + ticker + ".csv"
ohlc_data = pd.read_csv(ohlc_file, header=0, index_col=0, parse_dates=[0])
ohlc_data['SimpleRet'] = ohlc_data['close'].pct_change()
ohlc_data['LogRet'] = np.log(ohlc_data.close) - np.log(ohlc_data.close.shift(1))
ohlc_data['Var1'] = np.square(ohlc_data.LogRet)
ohlc_data[['SimpleRet', 'LogRet', 'Var1']].to_csv(output_file)
except Exception as e:
print(e)
return
def update_adjusted(self):
#update adjusted ohlc files, adjusting for dividend and splits
#pass for now -- yfinance downloads dividend/split adjusted OHLCV by default
return
hk_yf_update = YF_OHLC_Update("HK Update", "HK", "V:\\HKExFilings\\StockInfo", "V:\\Daily\\OHLC", "V:\\Daily\\Adjusted", "V:\\Daily\\Return")
hk_yf_update.update_ohlc()
hk_yf_update.update_return()
us_yf_update = YF_OHLC_Update("US Update", "US", "V:\\Daily\\USTickers", "V:\\Daily\\OHLC", "V:\\Daily\\Adjusted","V:\\Daily\\Return")
us_yf_update.update_ohlc()
us_yf_update.update_return()
cn_yf_update = YF_OHLC_Update("CN Update", "CN", "V:\\HKExFilings\\StockInfo", "V:\\Daily\\OHLC", "V:\\Daily\\Adjusted", "V:\\Daily\\Return")
cn_yf_update.update_ohlc()
cn_yf_update.update_return()
index_yf_update = YF_OHLC_Update("Index Update", "Indices", "V:\\Daily", "V:\\Daily\\OHLC", "V:\\Daily\\Adjusted","V:\\Daily\\Return")
index_yf_update.update_ohlc()
index_yf_update.update_return()