From a9495abd23a9ae2620abbb6adf96ba537ad7621a Mon Sep 17 00:00:00 2001 From: Jason Sexauer Date: Sun, 23 Mar 2014 10:40:15 -0400 Subject: [PATCH] Deprecate cols in to_csv, to_excel, drop_duplicates, and duplicated. Use decorator. Update docs and unit tests. [fix #6645, fix#6680] --- doc/source/comparison_with_r.rst | 8 ++-- doc/source/release.rst | 20 +++++++--- doc/source/reshaping.rst | 16 ++++---- doc/source/v0.14.0.txt | 19 ++++++--- pandas/core/frame.py | 49 +++++++++++++---------- pandas/io/tests/test_excel.py | 24 +++++++---- pandas/tests/test_format.py | 2 +- pandas/tests/test_frame.py | 68 +++++++++++++++++++++++++++++--- pandas/tools/pivot.py | 54 ++++--------------------- pandas/tools/tests/test_pivot.py | 2 +- pandas/util/decorators.py | 49 +++++++++++++++++++++++ 11 files changed, 205 insertions(+), 106 deletions(-) diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index 7de0b85ede51f..84bba77e0dfa3 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -171,7 +171,7 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this: 'player': random.sample(list(string.ascii_lowercase),25), 'batting avg': np.random.uniform(.200, .400, 25) }) - baseball.pivot_table(values='batting avg', cols='team', aggfunc=np.max) + baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max) For more details and examples see :ref:`the reshaping documentation `. @@ -402,8 +402,8 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`: 'week': [1,2]*6 }) mdf = pd.melt(df, id_vars=['month', 'week']) - pd.pivot_table(mdf, values='value', rows=['variable','week'], - cols=['month'], aggfunc=np.mean) + pd.pivot_table(mdf, values='value', index=['variable','week'], + columns=['month'], aggfunc=np.mean) Similarly for ``dcast`` which uses a data.frame called ``df`` in R to aggregate information based on ``Animal`` and ``FeedType``: @@ -433,7 +433,7 @@ using :meth:`~pandas.pivot_table`: 'Amount': [10, 7, 4, 2, 5, 6, 2], }) - df.pivot_table(values='Amount', rows='Animal', cols='FeedType', aggfunc='sum') + df.pivot_table(values='Amount', index='Animal', columns='FeedType', aggfunc='sum') The second approach is to use the :meth:`~pandas.DataFrame.groupby` method: diff --git a/doc/source/release.rst b/doc/source/release.rst index df0f472c390c7..6d7751266036b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -129,11 +129,6 @@ API Changes ``DataFrame.stack`` operations where the name of the column index is used as the name of the inserted column containing the pivoted data. -- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions - now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A - ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments - will not be supported in a future release (:issue:`5505`) - - Allow specification of a more complex groupby, via ``pd.Grouper`` (:issue:`3794`) - A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of @@ -149,6 +144,21 @@ API Changes Deprecations ~~~~~~~~~~~~ +- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions + now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A + ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments + will not be supported in a future release (:issue:`5505`) + +- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods + now take argument ``subset`` instead of ``cols`` to better align with + :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old + ``cols`` arguments will not be supported in a future release (:issue:`6680`) + +- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions + now takes argument ``columns`` instead of ``cols``. A + ``FutureWarning`` is raised to alert that the old ``cols`` arguments + will not be supported in a future release (:issue:`6645`) + Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 9aab5543aff92..274a2341c1a9f 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -283,9 +283,9 @@ We can produce pivot tables from this data very easily: .. ipython:: python - pivot_table(df, values='D', rows=['A', 'B'], cols=['C']) - pivot_table(df, values='D', rows=['B'], cols=['A', 'C'], aggfunc=np.sum) - pivot_table(df, values=['D','E'], rows=['B'], cols=['A', 'C'], aggfunc=np.sum) + pivot_table(df, values='D', index=['A', 'B'], columns=['C']) + pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum) + pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum) The result object is a DataFrame having potentially hierarchical indexes on the rows and columns. If the ``values`` column name is not given, the pivot table @@ -294,14 +294,14 @@ hierarchy in the columns: .. ipython:: python - pivot_table(df, rows=['A', 'B'], cols=['C']) + pivot_table(df, index=['A', 'B'], columns=['C']) You can render a nice output of the table omitting the missing values by calling ``to_string`` if you wish: .. ipython:: python - table = pivot_table(df, rows=['A', 'B'], cols=['C']) + table = pivot_table(df, index=['A', 'B'], columns=['C']) print(table.to_string(na_rep='')) Note that ``pivot_table`` is also available as an instance method on DataFrame. @@ -315,8 +315,8 @@ unless an array of values and an aggregation function are passed. It takes a number of arguments -- ``rows``: array-like, values to group by in the rows -- ``cols``: array-like, values to group by in the columns +- ``index``: array-like, values to group by in the rows +- ``columns``: array-like, values to group by in the columns - ``values``: array-like, optional, array of values to aggregate according to the factors - ``aggfunc``: function, optional, If no values array is passed, computes a @@ -350,7 +350,7 @@ rows and columns: .. ipython:: python - df.pivot_table(rows=['A', 'B'], cols='C', margins=True, aggfunc=np.std) + df.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.std) .. _reshaping.tile: diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index a2990644fa124..d845d0511815c 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -173,11 +173,6 @@ These are out-of-bounds selections # New output, 4-level MultiIndex df_multi.set_index([df_multi.index, df_multi.index]) -- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions - now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A - ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments - will not be supported in a future release (:issue:`5505`) - - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`. - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) @@ -313,8 +308,20 @@ Therse are prior version deprecations that are taking effect as of 0.14.0. Deprecations ~~~~~~~~~~~~ +- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions + now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A + ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments + will not be supported in a future release (:issue:`5505`) + +- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods + now take argument ``subset`` instead of ``cols`` to better align with + :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old + ``cols`` arguments will not be supported in a future release (:issue:`6680`) -There are no deprecations of prior behavior in 0.14.0 +- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions + now takes argument ``columns`` instead of ``cols``. A + ``FutureWarning`` is raised to alert that the old ``cols`` arguments + will not be supported in a future release (:issue:`6645`) Enhancements ~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8cf164ba76c21..a410bb8be8c52 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -41,7 +41,8 @@ from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat -from pandas.util.decorators import deprecate, Appender, Substitution +from pandas.util.decorators import deprecate, Appender, Substitution, \ + deprecate_kwarg from pandas.tseries.period import PeriodIndex from pandas.tseries.index import DatetimeIndex @@ -1067,8 +1068,9 @@ def to_panel(self): to_wide = deprecate('to_wide', to_panel) + @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, - cols=None, header=True, index=True, index_label=None, + columns=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=False, date_format=None, doublequote=True, @@ -1086,7 +1088,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Missing data representation float_format : string, default None Format string for floating point numbers - cols : sequence, optional + columns : sequence, optional Columns to write header : boolean or list of string, default True Write out column names. If a list of string is given it is assumed @@ -1124,6 +1126,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, or new (expanded format) if False) date_format : string, default None Format string for datetime objects + cols : kwarg only alias of columns [deprecated] """ if nanRep is not None: # pragma: no cover warnings.warn("nanRep is deprecated, use na_rep", @@ -1134,7 +1137,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, line_terminator=line_terminator, sep=sep, encoding=encoding, quoting=quoting, na_rep=na_rep, - float_format=float_format, cols=cols, + float_format=float_format, cols=columns, header=header, index=index, index_label=index_label, mode=mode, chunksize=chunksize, quotechar=quotechar, @@ -1148,8 +1151,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, if path_or_buf is None: return formatter.path_or_buf.getvalue() + @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', - float_format=None, cols=None, header=True, index=True, + float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None): """ @@ -1189,6 +1193,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', encoding: string, default None encoding of the resulting excel file. Only necessary for xlwt, other writers support unicode natively. + cols : kwarg only alias of columns [deprecated] Notes ----- @@ -1202,6 +1207,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', >>> writer.save() """ from pandas.io.excel import ExcelWriter + need_save = False if encoding == None: encoding = 'ascii' @@ -1212,7 +1218,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', formatter = fmt.ExcelFormatter(self, na_rep=na_rep, - cols=cols, + cols=columns, header=header, float_format=float_format, index=index, @@ -2439,27 +2445,28 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: return result - def drop_duplicates(self, cols=None, take_last=False, inplace=False): + @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset') + def drop_duplicates(self, subset=None, take_last=False, inplace=False): """ Return DataFrame with duplicate rows removed, optionally only considering certain columns Parameters ---------- - cols : column label or sequence of labels, optional + subset : column label or sequence of labels, optional Only consider certain columns for identifying duplicates, by default use all of the columns take_last : boolean, default False Take the last observed row in a row. Defaults to the first row inplace : boolean, default False Whether to drop duplicates in place or to return a copy + cols : kwargs only argument of subset [deprecated] Returns ------- deduplicated : DataFrame """ - - duplicated = self.duplicated(cols, take_last=take_last) + duplicated = self.duplicated(subset, take_last=take_last) if inplace: inds, = (-duplicated).nonzero() @@ -2468,18 +2475,20 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False): else: return self[-duplicated] - def duplicated(self, cols=None, take_last=False): + @deprecate_kwarg(old_arg_name='cols', new_arg_name='subset') + def duplicated(self, subset=None, take_last=False): """ Return boolean Series denoting duplicate rows, optionally only considering certain columns Parameters ---------- - cols : column label or sequence of labels, optional + subset : column label or sequence of labels, optional Only consider certain columns for identifying duplicates, by default use all of the columns take_last : boolean, default False Take the last observed row in a row. Defaults to the first row + cols : kwargs only argument of subset [deprecated] Returns ------- @@ -2491,19 +2500,19 @@ def _m8_to_i8(x): return x.view(np.int64) return x - if cols is None: + if subset is None: values = list(_m8_to_i8(self.values.T)) else: - if np.iterable(cols) and not isinstance(cols, compat.string_types): - if isinstance(cols, tuple): - if cols in self.columns: - values = [self[cols].values] + if np.iterable(subset) and not isinstance(subset, compat.string_types): + if isinstance(subset, tuple): + if subset in self.columns: + values = [self[subset].values] else: - values = [_m8_to_i8(self[x].values) for x in cols] + values = [_m8_to_i8(self[x].values) for x in subset] else: - values = [_m8_to_i8(self[x].values) for x in cols] + values = [_m8_to_i8(self[x].values) for x in subset] else: - values = [self[cols].values] + values = [self[subset].values] keys = lib.fast_zip_fillna(values) duplicated = lib.duplicated(keys, take_last=take_last) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index e1d168b66420f..8ba2a5dfc3d9c 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -396,6 +396,14 @@ def test_excel_sheet_by_name_raise(self): self.assertRaises(xlrd.XLRDError, xl.parse, '0') + def test_excel_deprecated_options(self): + with ensure_clean(self.ext) as path: + with tm.assert_produces_warning(FutureWarning): + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + + with tm.assert_produces_warning(False): + self.frame.to_excel(path, 'test1', columns=['A', 'B']) + def test_excelwriter_contextmanager(self): _skip_if_no_xlrd() @@ -417,7 +425,7 @@ def test_roundtrip(self): self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) @@ -479,7 +487,7 @@ def test_basics_with_nan(self): with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) @@ -537,7 +545,7 @@ def test_sheets(self): self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) @@ -562,7 +570,7 @@ def test_colaliases(self): self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) @@ -583,7 +591,7 @@ def test_roundtrip_indexlabels(self): self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) @@ -630,7 +638,7 @@ def test_roundtrip_indexlabels(self): self.frame.to_excel(path, 'test1', - cols=['A', 'B', 'C', 'D'], + columns=['A', 'B', 'C', 'D'], index=False, merge_cells=self.merge_cells) # take 'A' and 'B' as indexes (same row as cols 'C', 'D') df = self.frame.copy() @@ -733,7 +741,7 @@ def test_to_excel_multiindex(self): with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', cols=['A', 'B']) + frame.to_excel(path, 'test1', columns=['A', 'B']) # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) @@ -1020,7 +1028,7 @@ def test_swapped_columns(self): with ensure_clean(self.ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', cols=['B', 'A']) + write_frame.to_excel(path, 'test1', columns=['B', 'A']) read_frame = read_excel(path, 'test1', header=0) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 80234c9c81023..3ce65c81592a9 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -725,7 +725,7 @@ def test_to_html_regression_GH6098(self): 'données1': np.random.randn(5), 'données2': np.random.randn(5)}) # it works - df.pivot_table(rows=[u('clé1')], cols=[u('clé2')])._repr_html_() + df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 32805d47821f4..2101c732893e3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5439,6 +5439,13 @@ def test_to_csv_deprecated_options(self): self.tsframe.to_csv(path, nanRep='foo') recons = read_csv(path,index_col=0,parse_dates=[0],na_values=['foo']) assert_frame_equal(self.tsframe, recons) + + with tm.assert_produces_warning(FutureWarning): + self.frame.to_csv(path, cols=['A', 'B']) + + with tm.assert_produces_warning(False): + self.frame.to_csv(path, columns=['A', 'B']) + def test_to_csv_from_csv(self): @@ -5448,7 +5455,7 @@ def test_to_csv_from_csv(self): self.frame['A'][:5] = nan self.frame.to_csv(path) - self.frame.to_csv(path, cols=['A', 'B']) + self.frame.to_csv(path, columns=['A', 'B']) self.frame.to_csv(path, header=False) self.frame.to_csv(path, index=False) @@ -5522,9 +5529,9 @@ def test_to_csv_cols_reordering(self): def _check_df(df,cols=None): with ensure_clean() as path: - df.to_csv(path,cols = cols,engine='python') + df.to_csv(path,columns = cols,engine='python') rs_p = pd.read_csv(path,index_col=0) - df.to_csv(path,cols = cols,chunksize=chunksize) + df.to_csv(path,columns = cols,chunksize=chunksize) rs_c = pd.read_csv(path,index_col=0) if cols: @@ -5550,7 +5557,7 @@ def test_to_csv_new_dupe_cols(self): import pandas as pd def _check_df(df,cols=None): with ensure_clean() as path: - df.to_csv(path,cols = cols,chunksize=chunksize) + df.to_csv(path,columns = cols,chunksize=chunksize) rs_c = pd.read_csv(path,index_col=0) # we wrote them in a different order @@ -5807,7 +5814,7 @@ def test_to_csv_multiindex(self): with ensure_clean(pname) as path: frame.to_csv(path, header=False) - frame.to_csv(path, cols=['A', 'B']) + frame.to_csv(path, columns=['A', 'B']) # round trip frame.to_csv(path) @@ -5925,7 +5932,7 @@ def _make_frame(names=None): # write with cols with assertRaisesRegexp(TypeError, 'cannot specify cols with a MultiIndex'): - df.to_csv(path, tupleize_cols=False, cols=['foo', 'bar']) + df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) with ensure_clean(pname) as path: # empty @@ -6937,6 +6944,32 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) assert_frame_equal(result, expected) + def test_drop_duplicates_deprecated_warning(self): + df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1, 1, 2, 2, 2, 2, 1, 2], + 'D': lrange(8)}) + expected = df[:2] + + # Raises warning + with tm.assert_produces_warning(False): + result = df.drop_duplicates(subset='AAA') + assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = df.drop_duplicates(cols='AAA') + assert_frame_equal(result, expected) + + # Does not allow both subset and cols + self.assertRaises(TypeError, df.drop_duplicates, + kwargs={'cols': 'AAA', 'subset': 'B'}) + + # Does not allow unknown kwargs + self.assertRaises(TypeError, df.drop_duplicates, + kwargs={'subset': 'AAA', 'bad_arg': True}) + def test_drop_duplicates_tuple(self): df = DataFrame({('AA', 'AB'): ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'bar', 'foo'], @@ -7062,6 +7095,29 @@ def test_drop_duplicates_inplace(self): result = df2 assert_frame_equal(result, expected) + def test_duplicated_deprecated_warning(self): + df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'bar', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': [1, 1, 2, 2, 2, 2, 1, 2], + 'D': lrange(8)}) + + # Raises warning + with tm.assert_produces_warning(False): + result = df.duplicated(subset='AAA') + + with tm.assert_produces_warning(FutureWarning): + result = df.duplicated(cols='AAA') + + # Does not allow both subset and cols + self.assertRaises(TypeError, df.duplicated, + kwargs={'cols': 'AAA', 'subset': 'B'}) + + # Does not allow unknown kwargs + self.assertRaises(TypeError, df.duplicated, + kwargs={'subset': 'AAA', 'bad_arg': True}) + def test_drop_col_still_multiindex(self): arrays = [['a', 'b', 'c', 'top'], ['', '', '', 'OD'], diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 59f1bf3453b1b..6c4f55ae8a3b5 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -7,13 +7,15 @@ from pandas.tools.merge import concat from pandas.tools.util import cartesian_product from pandas.compat import range, lrange, zip +from pandas.util.decorators import deprecate_kwarg from pandas import compat import pandas.core.common as com import numpy as np - +@deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') +@deprecate_kwarg(old_arg_name='rows', new_arg_name='index') def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', - fill_value=None, margins=False, dropna=True, **kwarg): + fill_value=None, margins=False, dropna=True): """ Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on @@ -67,28 +69,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', ------- table : DataFrame """ - # Parse old-style keyword arguments - rows = kwarg.pop('rows', None) - if rows is not None: - warnings.warn("rows is deprecated, use index", FutureWarning) - if index is None: - index = rows - else: - msg = "Can only specify either 'rows' or 'index'" - raise TypeError(msg) - - cols = kwarg.pop('cols', None) - if cols is not None: - warnings.warn("cols is deprecated, use columns", FutureWarning) - if columns is None: - columns = cols - else: - msg = "Can only specify either 'cols' or 'columns'" - raise TypeError(msg) - - if kwarg: - raise TypeError("Unexpected argument(s): %s" % kwarg.keys()) - index = _convert_by(index) columns = _convert_by(columns) @@ -324,9 +304,10 @@ def _convert_by(by): by = list(by) return by - +@deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') +@deprecate_kwarg(old_arg_name='rows', new_arg_name='index') def crosstab(index, columns, values=None, rownames=None, colnames=None, - aggfunc=None, margins=False, dropna=True, **kwarg): + aggfunc=None, margins=False, dropna=True): """ Compute a simple cross-tabulation of two (or more) factors. By default computes a frequency table of the factors unless an array of values and an @@ -381,27 +362,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, ------- crosstab : DataFrame """ - # Parse old-style keyword arguments - rows = kwarg.pop('rows', None) - if rows is not None: - warnings.warn("rows is deprecated, use index", FutureWarning) - if index is None: - index = rows - else: - msg = "Can only specify either 'rows' or 'index'" - raise TypeError(msg) - - cols = kwarg.pop('cols', None) - if cols is not None: - warnings.warn("cols is deprecated, use columns", FutureWarning) - if columns is None: - columns = cols - else: - msg = "Can only specify either 'cols' or 'columns'" - raise TypeError(msg) - - if kwarg: - raise TypeError("Unexpected argument(s): %s" % kwarg.keys()) index = com._maybe_make_list(index) columns = com._maybe_make_list(columns) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 12f0ffa6e8aa5..2255fdebc9fe3 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -430,7 +430,7 @@ def test_crosstab_pass_values(self): df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values': values}) - expected = df.pivot_table('values', index=['foo', 'bar'], cols='baz', + expected = df.pivot_table('values', index=['foo', 'bar'], columns='baz', aggfunc=np.sum) tm.assert_frame_equal(table, expected) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 56a90b9ba1d17..476a643b34ff7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -2,6 +2,7 @@ from pandas.lib import cache_readonly import sys import warnings +from functools import wraps def deprecate(name, alternative, alt_name=None): @@ -13,6 +14,54 @@ def wrapper(*args, **kwargs): return alternative(*args, **kwargs) return wrapper + +def deprecate_kwarg(old_arg_name, new_arg_name): + """Decorator to deprecate a keyword argument of a function + + Parameters + ---------- + old_arg_name : str + Name of argument in function to deprecate + new_arg_name : str + Name of prefered argument in function + + Examples + -------- + The following deprecates 'cols', using 'columns' instead + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') + ... def f(columns=''): + ... print columns + ... + >>> f(columns='should work ok') + should work ok + >>> f(cols='should raise warning') + FutureWarning: cols is deprecated, use columns instead + warnings.warn(msg, FutureWarning) + should raise warning + >>> f(cols='should error', columns="can't pass do both") + TypeError: Can only specify 'cols' or 'columns', not both + + """ + def _deprecate_kwarg(func): + @wraps(func) + def wrapper(*args, **kwargs): + old_arg_value = kwargs.pop(old_arg_name, None) + if old_arg_value is not None: + msg = "%s is deprecated, use %s instead" % \ + (old_arg_name, new_arg_name) + warnings.warn(msg, FutureWarning) + if kwargs.get(new_arg_name, None) is not None: + msg = "Can only specify '%s' or '%s', not both" % \ + (old_arg_name, new_arg_name) + raise TypeError(msg) + else: + kwargs[new_arg_name] = old_arg_value + return func(*args, **kwargs) + return wrapper + return _deprecate_kwarg + + # Substitution and Appender are derived from matplotlib.docstring (1.1.0) # module http://matplotlib.sourceforge.net/users/license.html