From 8196db95daa658737af929f68ec7cd45b826aa02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 17:41:24 -0400
Subject: [PATCH 1/6] Use google finance as datasource (test only, still
 pointing to yahoo finance)

---
 pandas/io/data.py              | 178 +++++++++++++++++++++++++++++++++
 pandas/io/tests/test_google.py |  95 ++++++++++++++++++
 2 files changed, 273 insertions(+)
 create mode 100644 pandas/io/tests/test_google.py

diff --git a/pandas/io/data.py b/pandas/io/data.py
index 43178fdcfddf1..f2b539fc795a7 100644
--- a/pandas/io/data.py
+++ b/pandas/io/data.py
@@ -58,6 +58,10 @@ def DataReader(name, data_source=None, start=None, end=None,
         return get_data_yahoo(symbols=name, start=start, end=end,
                               adjust_price=False, chunk=25,
                               retry_count=retry_count, pause=pause)
+    elif(data_source == "google"):
+        return get_data_google(symbols=name, start=start, end=end,
+                              adjust_price=False, chunk=25,
+                              retry_count=retry_count, pause=pause)
     elif(data_source == "fred"):
         return get_data_fred(name=name, start=start, end=end)
     elif(data_source == "famafrench"):
@@ -132,6 +136,56 @@ def get_quote_yahoo(symbols):
     return DataFrame(data, index=idx)
 
 
+def get_quote_google(symbols):
+    """
+    Get current yahoo quote
+
+    Returns a DataFrame
+    """
+    if isinstance(symbols, str):
+        sym_list = symbols
+    elif not isinstance(symbols, Series):
+        symbols  = Series(symbols)
+        sym_list = str.join('+', symbols)
+    else:
+        sym_list = str.join('+', symbols)
+
+    # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm
+    codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r',
+             'time': 't1', 'short_ratio': 's7'}
+    request = str.join('', codes.values())  # code request string
+    header = codes.keys()
+
+    data = dict(zip(codes.keys(), [[] for i in range(len(codes))]))
+
+    urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
+        sym_list, request)
+
+    try:
+        lines = urllib2.urlopen(urlStr).readlines()
+    except Exception, e:
+        s = "Failed to download:\n{0}".format(e)
+        print s
+        return None
+
+    for line in lines:
+        fields = line.decode('utf-8').strip().split(',')
+        for i, field in enumerate(fields):
+            if field[-2:] == '%"':
+                data[header[i]].append(float(field.strip('"%')))
+            elif field[0] == '"':
+                data[header[i]].append(field.strip('"'))
+            else:
+                try:
+                    data[header[i]].append(float(field))
+                except ValueError:
+                    data[header[i]].append(np.nan)
+
+    idx = data.pop('symbol')
+
+    return DataFrame(data, index=idx)
+
+
 def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
                     pause=0, **kwargs):
     """
@@ -178,6 +232,52 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
                     "return a 200 for url %s" % (pause, url))
 
 
+def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
+                    pause=0, **kwargs):
+    """
+    Get historical data for the given name from yahoo.
+    Date format is datetime
+
+    Returns a DataFrame.
+    """
+    if(sym is None):
+        warnings.warn("Need to provide a name.")
+        return None
+
+    start, end = _sanitize_dates(start, end)
+
+    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
+
+    url = yahoo_URL + 's=%s' % sym + \
+        '&a=%s' % (start.month - 1) + \
+        '&b=%s' % start.day + \
+        '&c=%s' % start.year + \
+        '&d=%s' % (end.month - 1) + \
+        '&e=%s' % end.day + \
+        '&f=%s' % end.year + \
+        '&g=d' + \
+        '&ignore=.csv'
+
+    for _ in range(retry_count):
+        resp = urllib2.urlopen(url)
+        if resp.code == 200:
+            lines = resp.read()
+            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
+                          parse_dates=True)[::-1]
+
+            # Yahoo! Finance sometimes does this awesome thing where they
+            # return 2 rows for the most recent business day
+            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
+                rs = rs[:-1]
+
+            return rs
+
+        time.sleep(pause)
+
+    raise Exception("after %d tries, Yahoo did not "
+                    "return a 200 for url %s" % (pause, url))
+
+
 def _adjust_prices(hist_data, price_list=['Open', 'High', 'Low', 'Close']):
     """
     Return modifed DataFrame or Panel with adjusted prices based on
@@ -347,6 +447,84 @@ def dl_mult_symbols(symbols):
 
     return hist_data
 
+def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
+                   adjust_price=False, ret_index=False, chunksize=25,
+                   **kwargs):
+    """
+    Returns DataFrame/Panel of historical stock prices from symbols, over date
+    range, start to end. To avoid being penalized by Yahoo! Finance servers,
+    pauses between downloading 'chunks' of symbols can be specified.
+
+    Parameters
+    ----------
+    symbols : string, array-like object (list, tuple, Series), or DataFrame
+        Single stock symbol (ticker), array-like object of symbols or
+        DataFrame with index containing stock symbols.
+    start : string, (defaults to '1/1/2010')
+        Starting date, timestamp. Parses many different kind of date
+        representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
+    end : string, (defaults to today)
+        Ending date, timestamp. Same format as starting date.
+    retry_count : int, default 3
+        Number of times to retry query request.
+    pause : int, default 0
+        Time, in seconds, to pause between consecutive queries of chunks. If
+        single value given for symbol, represents the pause between retries.
+    adjust_price : bool, default False
+        If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
+        based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
+        'Adj Close'.
+    ret_index : bool, default False
+        If True, includes a simple return index 'Ret_Index' in hist_data.
+    chunksize : int, default 25
+        Number of symbols to download consecutively before intiating pause.
+
+    Returns
+    -------
+    hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
+    """
+
+    def dl_mult_symbols(symbols):
+        stocks = {}
+        for sym_group in _in_chunks(symbols, chunksize):
+            for sym in sym_group:
+                try:
+                    stocks[sym] = _get_hist_google(sym, start=start,
+                                                  end=end, **kwargs)
+                except:
+                    warnings.warn('Error with sym: ' + sym + '... skipping.')
+
+            time.sleep(pause)
+
+        return Panel(stocks).swapaxes('items', 'minor')
+
+    if 'name' in kwargs:
+        warnings.warn("Arg 'name' is deprecated, please use 'symbols' instead.",
+                      FutureWarning)
+        symbols = kwargs['name']
+
+    #If a single symbol, (e.g., 'GOOG')
+    if isinstance(symbols, (str, int)):
+        sym = symbols
+        hist_data = _get_hist_google(sym, start=start, end=end)
+    #Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
+    elif isinstance(symbols, DataFrame):
+        try:
+            hist_data = dl_mult_symbols(Series(symbols.index))
+        except ValueError:
+            raise
+    else: #Guess a Series
+        try:
+            hist_data = dl_mult_symbols(symbols)
+        except TypeError:
+            hist_data = dl_mult_symbols(Series(symbols))
+
+    if(ret_index):
+        hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close'])
+    if(adjust_price):
+        hist_data = _adjust_prices(hist_data)
+
+    return hist_data
 
 def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
                   end=dt.datetime.today()):
diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py
new file mode 100644
index 0000000000000..9c3e81485f34d
--- /dev/null
+++ b/pandas/io/tests/test_google.py
@@ -0,0 +1,95 @@
+import unittest
+import nose
+from datetime import datetime
+
+import pandas as pd
+import pandas.io.data as web
+from pandas.util.testing import (network, assert_frame_equal,
+                                 assert_series_equal,
+                                 assert_almost_equal)
+from numpy.testing.decorators import slow
+
+import urllib2
+
+
+class TestGoogle(unittest.TestCase):
+
+    @slow
+    @network
+    def test_google(self):
+        # asserts that google is minimally working and that it throws
+        # an excecption when DataReader can't get a 200 response from
+        # google
+        start = datetime(2010, 1, 1)
+        end = datetime(2013, 01, 27)
+
+        try:
+            self.assertEquals(
+                web.DataReader("F", 'google', start, end)['Close'][-1],
+                13.68)
+
+            self.assertRaises(
+                Exception,
+                lambda: web.DataReader("NON EXISTENT TICKER", 'google',
+                                      start, end))
+        except urllib2.URLError:
+            try:
+                urllib2.urlopen('http://www.google.com')
+            except urllib2.URLError:
+                raise nose.SkipTest
+            else:
+                raise
+
+
+    @slow
+    @network
+    def test_get_quote(self):
+        df = web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG']))
+        assert_series_equal(df.ix[0], df.ix[2])
+
+
+    @slow
+    @network
+    def test_get_data(self):
+        import numpy as np
+        df = web.get_data_google('GOOG')
+        assert df.Volume.ix['OCT-08-2010'] == 2859200
+
+        sl = ['AAPL', 'AMZN', 'GOOG']
+        pan = web.get_data_google(sl, '2012')
+        ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
+        assert ts[0].dayofyear == 96
+
+        pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12')
+        expected = [19.02, 28.23, 25.39]
+        result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
+        assert result == expected
+
+        # sanity checking
+        t= np.array(result)
+        assert     np.issubdtype(t.dtype, np.floating)
+        assert     t.shape == (3,)
+
+        expected = [[ 18.99,  28.4 ,  25.18],
+                    [ 18.58,  28.31,  25.13],
+                    [ 19.03,  28.16,  25.52],
+                    [ 18.81,  28.82,  25.87]]
+        result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
+        assert (result == expected).all()
+
+        #Check ret_index
+        pan = web.get_data_google(['GE', 'INTC', 'IBM'], '1977', '1987',
+                                 ret_index=True)
+        tstamp = pan.Ret_Index.INTC.first_valid_index()
+        result = pan.Ret_Index.ix[tstamp]['INTC']
+        expected = 1.0
+        assert result == expected
+
+        # sanity checking
+        t= np.array(pan)
+        assert     np.issubdtype(t.dtype, np.floating)
+
+
+if __name__ == '__main__':
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)

From ad89365c3870f192dde01568b584bec8b7ee1086 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 19:00:30 -0400
Subject: [PATCH 2/6] Remove unneeded import from test_google

---
 pandas/io/tests/test_google.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py
index 9c3e81485f34d..5b5fdd59e4b55 100644
--- a/pandas/io/tests/test_google.py
+++ b/pandas/io/tests/test_google.py
@@ -4,9 +4,7 @@
 
 import pandas as pd
 import pandas.io.data as web
-from pandas.util.testing import (network, assert_frame_equal,
-                                 assert_series_equal,
-                                 assert_almost_equal)
+from pandas.util.testing import (network, assert_series_equal)
 from numpy.testing.decorators import slow
 
 import urllib2

From ee10caaaa30a81fc0e72ff53ca85f0937099b837 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 19:13:13 -0400
Subject: [PATCH 3/6] Implement _get_hist_google

---
 pandas/io/data.py              | 39 +++++++---------------------------
 pandas/io/tests/test_google.py | 11 ++--------
 2 files changed, 10 insertions(+), 40 deletions(-)

diff --git a/pandas/io/data.py b/pandas/io/data.py
index f2b539fc795a7..d178d0089e6d6 100644
--- a/pandas/io/data.py
+++ b/pandas/io/data.py
@@ -246,18 +246,12 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
 
     start, end = _sanitize_dates(start, end)
 
-    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
-
-    url = yahoo_URL + 's=%s' % sym + \
-        '&a=%s' % (start.month - 1) + \
-        '&b=%s' % start.day + \
-        '&c=%s' % start.year + \
-        '&d=%s' % (end.month - 1) + \
-        '&e=%s' % end.day + \
-        '&f=%s' % end.year + \
-        '&g=d' + \
-        '&ignore=.csv'
+    google_URL = 'http://www.google.com/finance/historical?'
 
+    # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
+    url = google_URL + urllib.urlencode({"q": sym, \
+        "startdate": start.strftime('%b %d, %Y'), \
+        "enddate": end.strftime('%b %d, %Y'), "output": "csv" })
     for _ in range(retry_count):
         resp = urllib2.urlopen(url)
         if resp.code == 200:
@@ -265,16 +259,11 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
             rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                           parse_dates=True)[::-1]
 
-            # Yahoo! Finance sometimes does this awesome thing where they
-            # return 2 rows for the most recent business day
-            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
-                rs = rs[:-1]
-
             return rs
 
         time.sleep(pause)
 
-    raise Exception("after %d tries, Yahoo did not "
+    raise Exception("after %d tries, Google did not "
                     "return a 200 for url %s" % (pause, url))
 
 
@@ -448,11 +437,10 @@ def dl_mult_symbols(symbols):
     return hist_data
 
 def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
-                   adjust_price=False, ret_index=False, chunksize=25,
-                   **kwargs):
+                   chunksize=25, **kwargs):
     """
     Returns DataFrame/Panel of historical stock prices from symbols, over date
-    range, start to end. To avoid being penalized by Yahoo! Finance servers,
+    range, start to end. To avoid being penalized by Google Finance servers,
     pauses between downloading 'chunks' of symbols can be specified.
 
     Parameters
@@ -470,12 +458,6 @@ def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
     pause : int, default 0
         Time, in seconds, to pause between consecutive queries of chunks. If
         single value given for symbol, represents the pause between retries.
-    adjust_price : bool, default False
-        If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
-        based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
-        'Adj Close'.
-    ret_index : bool, default False
-        If True, includes a simple return index 'Ret_Index' in hist_data.
     chunksize : int, default 25
         Number of symbols to download consecutively before intiating pause.
 
@@ -519,11 +501,6 @@ def dl_mult_symbols(symbols):
         except TypeError:
             hist_data = dl_mult_symbols(Series(symbols))
 
-    if(ret_index):
-        hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close'])
-    if(adjust_price):
-        hist_data = _adjust_prices(hist_data)
-
     return hist_data
 
 def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py
index 5b5fdd59e4b55..01868a70c3709 100644
--- a/pandas/io/tests/test_google.py
+++ b/pandas/io/tests/test_google.py
@@ -51,7 +51,8 @@ def test_get_quote(self):
     def test_get_data(self):
         import numpy as np
         df = web.get_data_google('GOOG')
-        assert df.Volume.ix['OCT-08-2010'] == 2859200
+        print(df.Volume.ix['OCT-08-2010'])
+        assert df.Volume.ix['OCT-08-2010'] == 2863473
 
         sl = ['AAPL', 'AMZN', 'GOOG']
         pan = web.get_data_google(sl, '2012')
@@ -75,14 +76,6 @@ def test_get_data(self):
         result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
         assert (result == expected).all()
 
-        #Check ret_index
-        pan = web.get_data_google(['GE', 'INTC', 'IBM'], '1977', '1987',
-                                 ret_index=True)
-        tstamp = pan.Ret_Index.INTC.first_valid_index()
-        result = pan.Ret_Index.ix[tstamp]['INTC']
-        expected = 1.0
-        assert result == expected
-
         # sanity checking
         t= np.array(pan)
         assert     np.issubdtype(t.dtype, np.floating)

From f43d24540a09cc2855569c6e8811669759cc065a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 19:20:23 -0400
Subject: [PATCH 4/6] No current finance data from Google

---
 pandas/io/data.py              | 49 +---------------------------------
 pandas/io/tests/test_google.py |  5 ++--
 2 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/pandas/io/data.py b/pandas/io/data.py
index d178d0089e6d6..13551272edae2 100644
--- a/pandas/io/data.py
+++ b/pandas/io/data.py
@@ -137,54 +137,7 @@ def get_quote_yahoo(symbols):
 
 
 def get_quote_google(symbols):
-    """
-    Get current yahoo quote
-
-    Returns a DataFrame
-    """
-    if isinstance(symbols, str):
-        sym_list = symbols
-    elif not isinstance(symbols, Series):
-        symbols  = Series(symbols)
-        sym_list = str.join('+', symbols)
-    else:
-        sym_list = str.join('+', symbols)
-
-    # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm
-    codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r',
-             'time': 't1', 'short_ratio': 's7'}
-    request = str.join('', codes.values())  # code request string
-    header = codes.keys()
-
-    data = dict(zip(codes.keys(), [[] for i in range(len(codes))]))
-
-    urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
-        sym_list, request)
-
-    try:
-        lines = urllib2.urlopen(urlStr).readlines()
-    except Exception, e:
-        s = "Failed to download:\n{0}".format(e)
-        print s
-        return None
-
-    for line in lines:
-        fields = line.decode('utf-8').strip().split(',')
-        for i, field in enumerate(fields):
-            if field[-2:] == '%"':
-                data[header[i]].append(float(field.strip('"%')))
-            elif field[0] == '"':
-                data[header[i]].append(field.strip('"'))
-            else:
-                try:
-                    data[header[i]].append(float(field))
-                except ValueError:
-                    data[header[i]].append(np.nan)
-
-    idx = data.pop('symbol')
-
-    return DataFrame(data, index=idx)
-
+    raise NotImplementedError("Google Finance doesn't have this functionality")
 
 def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
                     pause=0, **kwargs):
diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py
index 01868a70c3709..9db7964c1acfe 100644
--- a/pandas/io/tests/test_google.py
+++ b/pandas/io/tests/test_google.py
@@ -42,9 +42,8 @@ def test_google(self):
     @slow
     @network
     def test_get_quote(self):
-        df = web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG']))
-        assert_series_equal(df.ix[0], df.ix[2])
-
+        self.assertRaises(NotImplementedError,
+                lambda: web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG'])))
 
     @slow
     @network

From c0529576e1bcd4369954539fbafc82a5e8c42502 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 20:11:02 -0400
Subject: [PATCH 5/6] Corrected typo in data

---
 pandas/io/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/data.py b/pandas/io/data.py
index 13551272edae2..8bc3df561cadb 100644
--- a/pandas/io/data.py
+++ b/pandas/io/data.py
@@ -188,7 +188,7 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
 def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
                     pause=0, **kwargs):
     """
-    Get historical data for the given name from yahoo.
+    Get historical data for the given name from google.
     Date format is datetime
 
     Returns a DataFrame.

From 0aadb1195219269b38e551e9044a52c33898e437 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= <gliptak@gmail.com>
Date: Sat, 8 Jun 2013 20:16:28 -0400
Subject: [PATCH 6/6] Change google finance tests to @network only

---
 pandas/io/tests/test_google.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py
index 9db7964c1acfe..7f4ca13c27e58 100644
--- a/pandas/io/tests/test_google.py
+++ b/pandas/io/tests/test_google.py
@@ -12,7 +12,6 @@
 
 class TestGoogle(unittest.TestCase):
 
-    @slow
     @network
     def test_google(self):
         # asserts that google is minimally working and that it throws
@@ -39,13 +38,11 @@ def test_google(self):
                 raise
 
 
-    @slow
     @network
     def test_get_quote(self):
         self.assertRaises(NotImplementedError,
                 lambda: web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG'])))
 
-    @slow
     @network
     def test_get_data(self):
         import numpy as np