From c81712c985eedce3118018e2f783d5048d22f4c3 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 21 Apr 2014 16:05:34 -0400 Subject: [PATCH] API: remove the copy kw from .xs to prevent accidental / confusing setting (GH6894) --- doc/source/api.rst | 2 +- doc/source/release.rst | 4 ++ doc/source/v0.14.0.txt | 4 ++ pandas/core/frame.py | 7 +++- pandas/core/generic.py | 33 +++++++--------- pandas/core/indexing.py | 7 +--- pandas/core/internals.py | 24 +++++------ pandas/core/panel.py | 70 +++++++++++++++++++++++++-------- pandas/tests/test_frame.py | 29 ++++---------- pandas/tests/test_multilevel.py | 23 ++++++++--- pandas/tests/test_panel.py | 14 +++---- pandas/tests/test_panel4d.py | 12 ++---- 12 files changed, 129 insertions(+), 100 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 5e5b84e0e80b2..7918d6930341a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -78,7 +78,7 @@ SQL .. autosummary:: :toctree: generated/ - + read_sql_table read_sql_query read_sql diff --git a/doc/source/release.rst b/doc/source/release.rst index 03b89f9077994..ec92613b67075 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -176,6 +176,10 @@ API Changes - ``.quantile`` on a ``datetime[ns]`` series now returns ``Timestamp`` instead of ``np.datetime64`` objects (:issue:`6810`) +- Remove the ``copy`` keyword from ``DataFrame.xs``,``Panel.major_xs``,``Panel.minor_xs``. A view will be + returned if possible, otherwise a copy will be made. Previously the user could think that ``copy=False`` would + ALWAYS return a view. (:issue:`6894`) + Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 187a757f53d45..d44c5b7a2f392 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -214,6 +214,10 @@ API changes (and numpy defaults) - add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- Remove the ``copy`` keyword from ``DataFrame.xs``,``Panel.major_xs``,``Panel.minor_xs``. A view will be + returned if possible, otherwise a copy will be made. Previously the user could think that ``copy=False`` would + ALWAYS return a view. (:issue:`6894`) + .. _whatsnew_0140.sql: SQL diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7346fd522f50d..bce09b673ad75 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1564,7 +1564,7 @@ def irow(self, i, copy=False): def icol(self, i): return self._ixs(i, axis=1) - def _ixs(self, i, axis=0, copy=False): + def _ixs(self, i, axis=0): """ i : int, slice, or sequence of integers axis : int @@ -1588,7 +1588,10 @@ def _ixs(self, i, axis=0, copy=False): result = self.take(i, axis=axis) copy=True else: - new_values, copy = self._data.fast_xs(i, copy=copy) + new_values = self._data.fast_xs(i) + + # if we are a copy, mark as such + copy = isinstance(new_values,np.ndarray) and new_values.base is None result = Series(new_values, index=self.columns, name=self.index[i], dtype=new_values.dtype) result._set_is_copy(self, copy=copy) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ebcdc600f6751..919ea4c0e74ce 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1240,7 +1240,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): return result - def xs(self, key, axis=0, level=None, copy=True, drop_level=True): + def xs(self, key, axis=0, level=None, copy=None, drop_level=True): """ Returns a cross-section (row(s) or column(s)) from the Series/DataFrame. Defaults to cross-section on the rows (axis=0). @@ -1254,7 +1254,7 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): level : object, defaults to first n levels (n=1 or len(key)) In case of a key partially contained in a MultiIndex, indicate which levels are used. Levels can be referred by label or position. - copy : boolean, default True + copy : boolean [deprecated] Whether to make a copy of the data drop_level : boolean, default True If False, returns object with same levels as self. @@ -1276,14 +1276,6 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): b 9 c 3 Name: C - >>> s = df.xs('a', copy=False) - >>> s['A'] = 100 - >>> df - A B C - a 100 5 2 - b 4 0 9 - c 9 7 3 - >>> df A B C D @@ -1310,16 +1302,24 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): ------- xs : Series or DataFrame + Notes + ----- + xs is only for getting, not setting values. + + MultiIndex Slicers is a generic way to get/set values on any level or levels + it is a superset of xs functionality, see :ref:`MultiIndex Slicers ` + """ + if copy is not None: + warnings.warn("copy keyword is deprecated, " + "default is to return a copy or a view if possible") + axis = self._get_axis_number(axis) labels = self._get_axis(axis) if level is not None: loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) - if not copy and not isinstance(loc, slice): - raise ValueError('Cannot retrieve view (copy=False)') - # convert to a label indexer if needed if isinstance(loc, slice): lev_num = labels._get_level_number(level) @@ -1336,10 +1336,7 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): return result if axis == 1: - data = self[key] - if copy: - data = data.copy() - return data + return self[key] self._consolidate_inplace() @@ -1362,7 +1359,7 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): if np.isscalar(loc): from pandas import Series - new_values, copy = self._data.fast_xs(loc, copy=copy) + new_values = self._data.fast_xs(loc) # may need to box a datelike-scalar # diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e0c5fa573ff69..1f284a9b7a7ff 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -76,17 +76,14 @@ def _get_label(self, label, axis=0): # but will fail when the index is not present # see GH5667 try: - return self.obj._xs(label, axis=axis, copy=False) + return self.obj._xs(label, axis=axis) except: return self.obj[label] elif (isinstance(label, tuple) and isinstance(label[axis], slice)): raise IndexingError('no slices here, handle elsewhere') - try: - return self.obj._xs(label, axis=axis, copy=False) - except Exception: - return self.obj._xs(label, axis=axis, copy=True) + return self.obj._xs(label, axis=axis) def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8d5fddc6b6029..792a310c8a554 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2861,9 +2861,7 @@ def xs(self, key, axis=1, copy=True, takeable=False): new_blocks = [] if len(self.blocks) > 1: - if not copy: - raise Exception('cannot get view of mixed-type or ' - 'non-consolidated DataFrame') + # we must copy here as we are mixed type for blk in self.blocks: newb = make_block(blk.values[slicer], blk.items, @@ -2884,18 +2882,16 @@ def xs(self, key, axis=1, copy=True, takeable=False): return self.__class__(new_blocks, new_axes) - def fast_xs(self, loc, copy=False): + def fast_xs(self, loc): """ get a cross sectional for a given location in the items ; handle dups - return the result and a flag if a copy was actually made + return the result, is *could* be a view in the case of a + single block """ if len(self.blocks) == 1: - result = self.blocks[0].values[:, loc] - if copy: - result = result.copy() - return result, copy + return self.blocks[0].values[:, loc] items = self.items @@ -2904,7 +2900,7 @@ def fast_xs(self, loc, copy=False): result = self._interleave(items) if self.ndim == 2: result = result.T - return result[loc], True + return result[loc] # unique dtype = _interleaved_dtype(self.blocks) @@ -2915,7 +2911,7 @@ def fast_xs(self, loc, copy=False): i = items.get_loc(item) result[i] = blk._try_coerce_result(blk.iget((j, loc))) - return result, True + return result def consolidate(self): """ @@ -3829,12 +3825,12 @@ def _consolidate_check(self): def _consolidate_inplace(self): pass - def fast_xs(self, loc, copy=False): + def fast_xs(self, loc): """ fast path for getting a cross-section + return a view of the data """ - result = self._block.values[loc] - return result, False + return self._block.values[loc] def construction_error(tot_items, block_shape, axes, e=None): """ raise a helpful message about our construction """ diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f1c52a8facc0a..5a2a2f1d17d16 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -688,7 +688,7 @@ def _combine_panel(self, other, func): return self._constructor(result_values, items, major, minor) - def major_xs(self, key, copy=True): + def major_xs(self, key, copy=None): """ Return slice of panel along major axis @@ -696,17 +696,29 @@ def major_xs(self, key, copy=True): ---------- key : object Major axis label - copy : boolean, default True - Copy data + copy : boolean [deprecated] + Whether to make a copy of the data Returns ------- y : DataFrame index -> minor axis, columns -> items + + Notes + ----- + major_xs is only for getting, not setting values. + + MultiIndex Slicers is a generic way to get/set values on any level or levels + it is a superset of major_xs functionality, see :ref:`MultiIndex Slicers ` + """ - return self.xs(key, axis=self._AXIS_LEN - 2, copy=copy) + if copy is not None: + warnings.warn("copy keyword is deprecated, " + "default is to return a copy or a view if possible") - def minor_xs(self, key, copy=True): + return self.xs(key, axis=self._AXIS_LEN - 2) + + def minor_xs(self, key, copy=None): """ Return slice of panel along minor axis @@ -714,17 +726,29 @@ def minor_xs(self, key, copy=True): ---------- key : object Minor axis label - copy : boolean, default True - Copy data + copy : boolean [deprecated] + Whether to make a copy of the data Returns ------- y : DataFrame index -> major axis, columns -> items + + Notes + ----- + minor_xs is only for getting, not setting values. + + MultiIndex Slicers is a generic way to get/set values on any level or levels + it is a superset of minor_xs functionality, see :ref:`MultiIndex Slicers ` + """ - return self.xs(key, axis=self._AXIS_LEN - 1, copy=copy) + if copy is not None: + warnings.warn("copy keyword is deprecated, " + "default is to return a copy or a view if possible") + + return self.xs(key, axis=self._AXIS_LEN - 1) - def xs(self, key, axis=1, copy=True): + def xs(self, key, axis=1, copy=None): """ Return slice of panel along selected axis @@ -733,24 +757,36 @@ def xs(self, key, axis=1, copy=True): key : object Label axis : {'items', 'major', 'minor}, default 1/'major' - copy : boolean, default True - Copy data + copy : boolean [deprecated] + Whether to make a copy of the data Returns ------- y : ndim(self)-1 + + Notes + ----- + xs is only for getting, not setting values. + + MultiIndex Slicers is a generic way to get/set values on any level or levels + it is a superset of xs functionality, see :ref:`MultiIndex Slicers ` + """ + if copy is not None: + warnings.warn("copy keyword is deprecated, " + "default is to return a copy or a view if possible") + axis = self._get_axis_number(axis) if axis == 0: - data = self[key] - if copy: - data = data.copy() - return data + return self[key] self._consolidate_inplace() axis_number = self._get_axis_number(axis) - new_data = self._data.xs(key, axis=axis_number, copy=copy) - return self._construct_return_type(new_data) + new_data = self._data.xs(key, axis=axis_number, copy=False) + result = self._construct_return_type(new_data) + copy = new_data.is_mixed_type + result._set_is_copy(self, copy=copy) + return result _xs = xs diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 83fd56c676f8e..a9e48c62f9693 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8371,12 +8371,8 @@ def test_xs(self): expected = self.frame['A'] assert_series_equal(series, expected) - # no view by default - series[:] = 5 - self.assert_((expected != 5).all()) - - # view - series = self.frame.xs('A', axis=1, copy=False) + # view is returned if possible + series = self.frame.xs('A', axis=1) series[:] = 5 self.assert_((expected == 5).all()) @@ -11888,25 +11884,16 @@ def test_boolean_set_uncons(self): assert_almost_equal(expected, self.frame.values) def test_xs_view(self): + """ + in 0.14 this will return a view if possible + a copy otherwise, but this is numpy dependent + """ + dm = DataFrame(np.arange(20.).reshape(4, 5), index=lrange(4), columns=lrange(5)) - dm.xs(2, copy=False)[:] = 5 - self.assert_((dm.xs(2) == 5).all()) - dm.xs(2)[:] = 10 - self.assert_((dm.xs(2) == 5).all()) - - # prior to chained assignment (GH5390) - # this would raise, but now just returns a copy (and sets is_copy) - # TODO (?): deal with mixed-type fiasco? - # with assertRaisesRegexp(TypeError, 'cannot get view of mixed-type'): - # self.mixed_frame.xs(self.mixed_frame.index[2], copy=False) - - # unconsolidated - dm['foo'] = 6. - dm.xs(3, copy=False)[:] = 10 - self.assert_((dm.xs(3) == 10).all()) + self.assert_((dm.xs(2) == 10).all()) def test_boolean_indexing(self): idx = lrange(3) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index aef4e3a72c099..ac420ee5d78cd 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -444,9 +444,15 @@ def test_xs_level(self): expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) - # can't produce a view of a multiindex with a level without copying - with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): - self.frame.xs('two', level='second', copy=False) + + # this is a copy in 0.14 + result = self.frame.xs('two', level='second') + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + def f(x): + x[:] = 10 + self.assertRaises(com.SettingWithCopyError, f, result) def test_xs_level_multiple(self): from pandas import read_table @@ -461,8 +467,15 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) - with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): - df.xs(('a', 4), level=['one', 'four'], copy=False) + + # this is a copy in 0.14 + result = df.xs(('a', 4), level=['one', 'four']) + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + def f(x): + x[:] = 10 + self.assertRaises(com.SettingWithCopyError, f, result) # GH2107 dates = lrange(20111201, 20111205) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 176ef13d23d94..6aff61c4e2167 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -536,19 +536,15 @@ def test_xs(self): expected = self.panel['ItemA'] assert_frame_equal(itemA, expected) - # not view by default - itemA.values[:] = np.nan - self.assert_(not np.isnan(self.panel['ItemA'].values).all()) - - # but can get view - itemA_view = self.panel.xs('ItemA', axis=0, copy=False) + # get a view by default + itemA_view = self.panel.xs('ItemA', axis=0) itemA_view.values[:] = np.nan self.assert_(np.isnan(self.panel['ItemA'].values).all()) - # mixed-type + # mixed-type yields a copy self.panel['strings'] = 'foo' - self.assertRaises(Exception, self.panel.xs, 'D', axis=2, - copy=False) + result = self.panel.xs('D', axis=2) + self.assertIsNotNone(result.is_copy) def test_getitem_fancy_labels(self): p = self.panel diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index a7a87c998d839..77b70132d4bfb 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -452,19 +452,15 @@ def test_xs(self): expected = self.panel4d['l1'] assert_panel_equal(l1, expected) - # not view by default - l1.values[:] = np.nan - self.assert_(not np.isnan(self.panel4d['l1'].values).all()) - - # but can get view - l1_view = self.panel4d.xs('l1', axis=0, copy=False) + # view if possible + l1_view = self.panel4d.xs('l1', axis=0) l1_view.values[:] = np.nan self.assert_(np.isnan(self.panel4d['l1'].values).all()) # mixed-type self.panel4d['strings'] = 'foo' - self.assertRaises(Exception, self.panel4d.xs, 'D', axis=2, - copy=False) + result = self.panel4d.xs('D', axis=3) + self.assertIsNotNone(result.is_copy) def test_getitem_fancy_labels(self): panel4d = self.panel4d