diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index fc508670..4c510ce4 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -36,6 +36,8 @@ It should be noted, that various sources support different kinds of data, so not Yahoo! Finance ============== +Historical stock prices from Yahoo! Finance. + .. ipython:: python import pandas_datareader.data as web @@ -45,6 +47,18 @@ Yahoo! Finance f = web.DataReader("F", 'yahoo', start, end) f.ix['2010-01-04'] +Historical corporate actions (Dividends and Stock Splits) with ex-dates from Yahoo! Finance. + +.. ipython:: python + + import pandas_datareader.data as web + import datetime + + start = datetime.datetime(2010, 1, 1) + end = datetime.datetime(2015, 5, 9) + + web.DataReader('AAPL', 'yahoo-actions', start, end) + .. _remote_data.yahoo_options: Yahoo! Finance Options diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index 3e077bf5..70e44231 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -7,6 +7,7 @@ import tempfile import datetime as dt import time +import csv from collections import defaultdict @@ -45,7 +46,7 @@ def DataReader(name, data_source=None, start=None, end=None, the name of the dataset. Some data sources (yahoo, google, fred) will accept a list of names. data_source: str - the data source ("yahoo", "google", "fred", or "ff") + the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff") start : {datetime, None} left boundary for range (defaults to 1/1/2010) end : {datetime, None} @@ -57,6 +58,9 @@ def DataReader(name, data_source=None, start=None, end=None, # Data from Yahoo! Finance gs = DataReader("GS", "yahoo") + # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance + gs = DataReader("GS", "yahoo-actions") + # Data from Google Finance aapl = DataReader("AAPL", "google") @@ -75,6 +79,9 @@ def DataReader(name, data_source=None, start=None, end=None, return get_data_yahoo(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause) + elif data_source == "yahoo-actions": + return get_data_yahoo_actions(symbol=name, start=start, end=end, + retry_count=retry_count, pause=pause) elif data_source == "google": return get_data_google(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, @@ -423,6 +430,81 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, return _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price, ret_index, chunksize, 'yahoo') +_HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?' + +def get_data_yahoo_actions(symbol, start=None, end=None, retry_count=3, + pause=0.001): + """ + Returns DataFrame of historical corporate actions (dividends and stock + splits) from symbols, over date range, start to end. All dates in the + resulting DataFrame correspond with dividend and stock split ex-dates. + + Parameters + ---------- + sym : string with a single Single stock symbol (ticker). + start : string, (defaults to '1/1/2010') + Starting date, timestamp. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, (defaults to today) + Ending date, timestamp. Same format as starting date. + retry_count : int, default 3 + Number of times to retry query request. + pause : int, default 0 + Time, in seconds, of the pause between retries. + """ + + start, end = _sanitize_dates(start, end) + url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol + + '&a=%s' % (start.month - 1) + + '&b=%s' % start.day + + '&c=%s' % start.year + + '&d=%s' % (end.month - 1) + + '&e=%s' % end.day + + '&f=%s' % end.year + + '&g=v') + + for _ in range(retry_count): + time.sleep(pause) + + try: + with urlopen(url) as resp: + lines = resp.read() + except _network_error_classes: + pass + else: + actions_index = [] + actions_entries = [] + + for line in csv.reader(StringIO(bytes_to_str(lines))): + # Ignore lines that aren't dividends or splits (Yahoo + # add a bunch of irrelevant fields.) + if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'): + continue + + action, date, value = line + if action == 'DIVIDEND': + actions_index.append(to_datetime(date)) + actions_entries.append({ + 'action': action, + 'value': float(value) + }) + elif action == 'SPLIT' and ':' in value: + # Convert the split ratio to a fraction. For example a + # 4:1 split expressed as a fraction is 1/4 = 0.25. + denominator, numerator = value.split(':', 1) + split_fraction = float(numerator) / float(denominator) + + actions_index.append(to_datetime(date)) + actions_entries.append({ + 'action': action, + 'value': split_fraction + }) + + return DataFrame(actions_entries, index=actions_index) + + raise IOError("after %d tries, Yahoo! did not " + "return a 200 for url %r" % (retry_count, url)) + def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0.001, adjust_price=False, ret_index=False, diff --git a/pandas_datareader/tests/test_data.py b/pandas_datareader/tests/test_data.py index ecb6846d..cf205c78 100644 --- a/pandas_datareader/tests/test_data.py +++ b/pandas_datareader/tests/test_data.py @@ -265,6 +265,27 @@ def test_get_date_ret_index(self): # sanity checking assert np.issubdtype(pan.values.dtype, np.floating) + def test_get_data_yahoo_actions(self): + start = datetime(1990, 1, 1) + end = datetime(2000, 4, 5) + + actions = web.get_data_yahoo_actions('BHP.AX', start, end) + + self.assertEqual(sum(actions['action'] == 'DIVIDEND'), 20) + self.assertEqual(sum(actions['action'] == 'SPLIT'), 1) + + self.assertEqual(actions.ix['1995-05-11']['action'][0], 'SPLIT') + self.assertEqual(actions.ix['1995-05-11']['value'][0], 1/1.1) + + self.assertEqual(actions.ix['1993-05-10']['action'][0], 'DIVIDEND') + self.assertEqual(actions.ix['1993-05-10']['value'][0], 0.3) + + def test_get_data_yahoo_actions_invalid_symbol(self): + start = datetime(1990, 1, 1) + end = datetime(2000, 4, 5) + + self.assertRaises(IOError, web.get_data_yahoo_actions, 'UNKNOWN TICKER', start, end) + class TestYahooOptions(tm.TestCase): @classmethod