diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 57480a244f3083..86370709cde417 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -505,13 +505,18 @@ two ``Series`` or any combination of ``DataFrame/Series`` or - ``DataFrame/DataFrame``: by default compute the statistic for matching column names, returning a DataFrame. If the keyword argument ``pairwise=True`` is passed then computes the statistic for each pair of columns, returning a - ``Panel`` whose ``items`` are the dates in question (see :ref:`the next section + ``MultiIndexed DataFrame`` whose ``index`` are the dates in question (see :ref:`the next section `). For example: .. ipython:: python + df = pd.DataFrame(np.random.randn(1000, 4), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C', 'D']) + df = df.cumsum() + df2 = df[:20] df2.rolling(window=5).corr(df2['B']) @@ -520,11 +525,16 @@ For example: Computing rolling pairwise covariances and correlations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: + + Prior to version 0.20.0 if ``pairwise=True`` was passed, a ``Panel`` would be returned. + This will now return a 2-level MultiIndexed DataFrame, see the whatsnew :ref:`here ` + In financial data analysis and other fields it's common to compute covariance and correlation matrices for a collection of time series. Often one is also interested in moving-window covariance and correlation matrices. This can be done by passing the ``pairwise`` keyword argument, which in the case of -``DataFrame`` inputs will yield a ``Panel`` whose ``items`` are the dates in +``DataFrame`` inputs will yield a ``MultiIndexed DataFrame`` whose ``index`` are the dates in question. In the case of a single DataFrame argument the ``pairwise`` argument can even be omitted: @@ -539,12 +549,12 @@ can even be omitted: .. ipython:: python covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True) - covs[df.index[-50]] + covs.iloc[-50].unstack() .. ipython:: python correls = df.rolling(window=50).corr() - correls[df.index[-50]] + correls.iloc[-50].unstack() You can efficiently retrieve the time series of correlations between two columns using ``.loc`` indexing: @@ -557,7 +567,7 @@ columns using ``.loc`` indexing: .. ipython:: python @savefig rolling_corr_pairwise_ex.png - correls.loc[:, 'A', 'C'].plot() + correls[('A', 'C')].plot() .. _stats.aggregate: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 680aefc4041fb4..7113b1832cdf05 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -13,6 +13,8 @@ Highlights include: - The ``.ix`` indexer has been deprecated, see :ref:`here ` - Switched the test framework to `pytest`_ (:issue:`13097`) - A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` +- Window Binary Corr/Cov operations return a MultiIndex DataFrame rather than a Panel, see :ref:`here ` + .. _pytest: http://doc.pytest.org/en/latest/ @@ -677,6 +679,48 @@ New Behavior: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) +.. _whatsnew_0200.api_breaking.rolling_pairwise: + +Window Binary Corr/Cov operations return a MultiIndex DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A binary window operation, like ``.corr()`` or ``.cov()``, when operating on a ``.rolling(..)``, ``.expanding(..)``, or ``.ewm(..)`` object, +will now return a 2-level ``MultiIndexed DataFrame`` rather than a ``Panel``. These are equivalent in function, +but MultiIndexed DataFrames enjoy more support in pandas. +See the section on :ref:`Windowed Binary Operations ` for more information. (:issue:`15677`) + +.. ipython:: python + + np.random.seed(1234) + df = DataFrame(np.random.rand(100, 2)) + df + +Old Behavior: + +.. code-block:: ipython + + In [28]: df.rolling(12).corr() + Out[28]: + + Dimensions: 100 (items) x 2 (major_axis) x 2 (minor_axis) + Items axis: 0 to 99 + Major_axis axis: 0 to 1 + Minor_axis axis: 0 to 1 + +New Behavior: + +.. ipython:: python + + res = df.rolling(12).corr() + res + +Retrieving a correlation matrix for a specified index + +.. ipython:: python + + res.iloc[-1].unstack() + + .. _whatsnew_0200.api_breaking.hdfstore_where: HDFStore where string comparison diff --git a/pandas/core/window.py b/pandas/core/window.py index 9c9f861451309d..71559c344e90b7 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1652,7 +1652,8 @@ def _cov(x, y): def _flex_binary_moment(arg1, arg2, f, pairwise=False): - from pandas import Series, DataFrame, Panel + from pandas import Series, DataFrame + if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " @@ -1703,12 +1704,19 @@ def dataframe_from_int_dict(data, frame_template): else: results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) + + from pandas import Panel p = Panel.from_dict(results).swapaxes('items', 'major') if len(p.major_axis) > 0: p.major_axis = arg1.columns[p.major_axis] if len(p.minor_axis) > 0: p.minor_axis = arg2.columns[p.minor_axis] - return p + + result = (p.to_frame(filter_observations=False) + .T + ) + return result + else: raise ValueError("'pairwise' is not True/False") else: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index fe03d7886e6614..a3ad2448a27765 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -10,7 +10,7 @@ from distutils.version import LooseVersion import pandas as pd -from pandas import (Series, DataFrame, Panel, bdate_range, isnull, +from pandas import (Series, DataFrame, bdate_range, isnull, notnull, concat, Timestamp) import pandas.stats.moments as mom import pandas.core.window as rwindow @@ -1688,6 +1688,162 @@ def _check_ew_structures(self, func, name): self.assertEqual(type(frame_result), DataFrame) +class TestPairwise(object): + + # GH 7738 + df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], + columns=['C', 'C']), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), + DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], + columns=[1, 0.]), + DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], + columns=[0, 1.]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], + columns=[1., 'X']), ] + df2 = DataFrame([[None, 1, 1], [None, 1, 2], + [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) + s = Series([1, 1, 3, 8]) + + def compare(self, result, expected): + + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, f): + + # DataFrame methods (which do not call _flex_binary_moment()) + + with warnings.catch_warnings(record=True): + + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.columns) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True)]) + def test_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=True + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.columns.levels[0], + df.columns, + check_names=False) + tm.assert_index_equal(result.columns.levels[1], + df.columns, + check_names=False) + tm.assert_index_equal(result.index, df.index) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), ]) + def test_no_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=False + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]) + def test_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=True + results = [f(df, self.df2) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.columns.levels[0], + df.columns, + check_names=False) + tm.assert_index_equal(result.columns.levels[1], + self.df2.columns, + check_names=False) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]) + def test_no_pairwise_with_other(self, f): + + with warnings.catch_warnings(record=True): + + # DataFrame with another DataFrame, pairwise=False + results = [f(df, self.df2) if df.columns.is_unique else None + for df in self.df1s] + for (df, result) in zip(self.df1s, results): + if result is not None: + expected_index = df.index.union(self.df2.index) + expected_columns = df.columns.union(self.df2.columns) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + tm.assertRaisesRegexp( + ValueError, "'arg1' columns are not unique", f, df, + self.df2) + tm.assertRaisesRegexp( + ValueError, "'arg2' columns are not unique", f, + self.df2, df) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), ]) + def test_pairwise_with_series(self, f): + + # DataFrame with a Series + results = ([f(df, self.s) for df in self.df1s] + + [f(self.s, df) for df in self.df1s]) + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + # create the data only once as we are not setting it def _create_consistency_data(): def create_series(): @@ -2083,21 +2239,6 @@ def test_expanding_consistency(self): assert_equal(expanding_f_result, expanding_apply_f_result) - if (name in ['cov', 'corr']) and isinstance(x, - DataFrame): - # test pairwise=True - expanding_f_result = expanding_f(x, pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = getattr( - x.iloc[:, i].expanding( - min_periods=min_periods), - name)(x.iloc[:, j]) - tm.assert_panel_equal(expanding_f_result, expected) - @tm.slow def test_rolling_consistency(self): @@ -2203,25 +2344,6 @@ def cases(): assert_equal(rolling_f_result, rolling_apply_f_result) - if (name in ['cov', 'corr']) and isinstance( - x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, - pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = ( - getattr( - x.iloc[:, i] - .rolling(window=window, - min_periods=min_periods, - center=center), - name)(x.iloc[:, j])) - tm.assert_panel_equal(rolling_f_result, expected) - # binary moments def test_rolling_cov(self): A = self.series @@ -2257,11 +2379,10 @@ def _check_pairwise_moment(self, dispatch, name, **kwargs): def get_result(obj, obj2=None): return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2) - panel = get_result(self.frame) - actual = panel.loc[:, 1, 5] + result = get_result(self.frame) + result = result[1][5] expected = get_result(self.frame[1], self.frame[5]) - tm.assert_series_equal(actual, expected, check_names=False) - self.assertEqual(actual.name, 5) + tm.assert_series_equal(result, expected, check_names=False) def test_flex_binary_moment(self): # GH3155 @@ -2429,17 +2550,14 @@ def test_expanding_cov_pairwise(self): rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_corr_pairwise(self): result = self.frame.expanding().corr() rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_cov_diff_index(self): # GH 7512 @@ -2507,8 +2625,6 @@ def test_rolling_functions_window_non_shrinkage(self): s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) - df_expected_panel = Panel(items=df.index, major_axis=df.columns, - minor_axis=df.columns) functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=False)), @@ -2540,13 +2656,22 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue + def test_rolling_functions_window_non_shrinkage_binary(self): + + # corr/cov return a MI DataFrame + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) + df_expected = DataFrame( + columns=pd.MultiIndex.from_product([df.columns, df.columns], + names=['major', 'minor']), + index=df.index, + dtype='float64') functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=True)), lambda x: (x.rolling(window=10, min_periods=5) .corr(x, pairwise=True))] for f in functions: - df_result_panel = f(df) - tm.assert_panel_equal(df_result_panel, df_expected_panel) + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) def test_moment_functions_zero_length(self): # GH 8056 @@ -2554,13 +2679,9 @@ def test_moment_functions_zero_length(self): s_expected = s df1 = DataFrame() df1_expected = df1 - df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, - minor_axis=df1.columns) df2 = DataFrame(columns=['a']) df2['a'] = df2['a'].astype('float64') df2_expected = df2 - df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, - minor_axis=df2.columns) functions = [lambda x: x.expanding().count(), lambda x: x.expanding(min_periods=5).cov( @@ -2613,6 +2734,23 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue + def test_moment_functions_zero_length_pairwise(self): + + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=['a']) + df2['a'] = df2['a'].astype('float64') + + df1_expected = DataFrame( + index=df1.index, + columns=pd.MultiIndex.from_product([df1.columns, df1.columns], + names=['major', 'minor'])) + df2_expected = DataFrame( + index=df2.index, + columns=pd.MultiIndex.from_product([df2.columns, df2.columns], + names=['major', 'minor']), + dtype='float64') + functions = [lambda x: (x.expanding(min_periods=5) .cov(x, pairwise=True)), lambda x: (x.expanding(min_periods=5) @@ -2623,11 +2761,11 @@ def test_moment_functions_zero_length(self): .corr(x, pairwise=True)), ] for f in functions: - df1_result_panel = f(df1) - tm.assert_panel_equal(df1_result_panel, df1_expected_panel) + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) - df2_result_panel = f(df2) - tm.assert_panel_equal(df2_result_panel, df2_expected_panel) + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) def test_expanding_cov_pairwise_diff_length(self): # GH 7512 @@ -2635,16 +2773,20 @@ def test_expanding_cov_pairwise_diff_length(self): df1a = DataFrame([[1, 5], [3, 9]], index=[0, 2], columns=['A', 'B']) df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().cov(df2a, pairwise=True)[2] - result2 = df1.expanding().cov(df2a, pairwise=True)[2] - result3 = df1a.expanding().cov(df2, pairwise=True)[2] - result4 = df1a.expanding().cov(df2a, pairwise=True)[2] - expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A', 'B'], - columns=['X', 'Y']) - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) - tm.assert_frame_equal(result4, expected) + result1 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = Series( + [-3.0, -5.0, -6.0, -10.0], + index=pd.MultiIndex.from_product([['A', 'B'], + ['X', 'Y']], + names=['major', 'minor']), + name=2) + tm.assert_series_equal(result1, expected) + tm.assert_series_equal(result2, expected) + tm.assert_series_equal(result3, expected) + tm.assert_series_equal(result4, expected) def test_expanding_corr_pairwise_diff_length(self): # GH 7512 @@ -2652,144 +2794,20 @@ def test_expanding_corr_pairwise_diff_length(self): df1a = DataFrame([[1, 2], [3, 4]], index=[0, 2], columns=['A', 'B']) df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().corr(df2, pairwise=True)[2] - result2 = df1.expanding().corr(df2a, pairwise=True)[2] - result3 = df1a.expanding().corr(df2, pairwise=True)[2] - result4 = df1a.expanding().corr(df2a, pairwise=True)[2] - expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A', 'B'], - columns=['X', 'Y']) - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) - tm.assert_frame_equal(result4, expected) - - def test_pairwise_stats_column_names_order(self): - # GH 7738 - df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], - columns=['C', 'C']), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), - DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], - columns=[1, 0.]), - DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], - columns=[0, 1.]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], - columns=[1., 'X']), ] - df2 = DataFrame([[None, 1, 1], [None, 1, 2], - [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) - s = Series([1, 1, 3, 8]) - - # suppress warnings about incomparable objects, as we are deliberately - # testing with such column labels - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message=".*incomparable objects.*", - category=RuntimeWarning) - - # DataFrame methods (which do not call _flex_binary_moment()) - for f in [lambda x: x.cov(), lambda x: x.corr(), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.columns) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - # compare internal values, as columns can be different - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=True - for f in [lambda x: x.expanding().cov(pairwise=True), - lambda x: x.expanding().corr(pairwise=True), - lambda x: x.rolling(window=3).cov(pairwise=True), - lambda x: x.rolling(window=3).corr(pairwise=True), - lambda x: x.ewm(com=3).cov(pairwise=True), - lambda x: x.ewm(com=3).corr(pairwise=True), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=False - for f in [lambda x: x.expanding().cov(pairwise=False), - lambda x: x.expanding().corr(pairwise=False), - lambda x: x.rolling(window=3).cov(pairwise=False), - lambda x: x.rolling(window=3).corr(pairwise=False), - lambda x: x.ewm(com=3).cov(pairwise=False), - lambda x: x.ewm(com=3).corr(pairwise=False), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=True - for f in [lambda x, y: x.expanding().cov(y, pairwise=True), - lambda x, y: x.expanding().corr(y, pairwise=True), - lambda x, y: x.rolling(window=3).cov(y, pairwise=True), - lambda x, y: x.rolling(window=3).corr(y, pairwise=True), - lambda x, y: x.ewm(com=3).cov(y, pairwise=True), - lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: - results = [f(df, df2) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df2.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=False - for f in [lambda x, y: x.expanding().cov(y, pairwise=False), - lambda x, y: x.expanding().corr(y, pairwise=False), - lambda x, y: x.rolling(window=3).cov(y, pairwise=False), - lambda x, y: x.rolling(window=3).corr(y, pairwise=False), - lambda x, y: x.ewm(com=3).cov(y, pairwise=False), - lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: - results = [f(df, df2) if df.columns.is_unique else None - for df in df1s] - for (df, result) in zip(df1s, results): - if result is not None: - expected_index = df.index.union(df2.index) - expected_columns = df.columns.union(df2.columns) - tm.assert_index_equal(result.index, expected_index) - tm.assert_index_equal(result.columns, expected_columns) - else: - tm.assertRaisesRegexp( - ValueError, "'arg1' columns are not unique", f, df, - df2) - tm.assertRaisesRegexp( - ValueError, "'arg2' columns are not unique", f, - df2, df) - - # DataFrame with a Series - for f in [lambda x, y: x.expanding().cov(y), - lambda x, y: x.expanding().corr(y), - lambda x, y: x.rolling(window=3).cov(y), - lambda x, y: x.rolling(window=3).corr(y), - lambda x, y: x.ewm(com=3).cov(y), - lambda x, y: x.ewm(com=3).corr(y), ]: - results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = Series( + [-1.0, -1.0, -1.0, -1.0], + index=pd.MultiIndex.from_product([['A', 'B'], + ['X', 'Y']], + names=['major', 'minor']), + name=2) + tm.assert_series_equal(result1, expected) + tm.assert_series_equal(result2, expected) + tm.assert_series_equal(result3, expected) + tm.assert_series_equal(result4, expected) def test_rolling_skew_edge_cases(self):