From e9d6678ac98c56ac057baa1a7be2e32a8aed5896 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 9 Jul 2015 13:59:31 -0400 Subject: [PATCH] CLN: PEP 8 improvements --- doc/source/basics.rst | 16 ++++++++++------ doc/source/whatsnew/v0.17.0.txt | 33 ++++++++++++++++++++++++++++++--- pandas/core/common.py | 13 +++++++++---- pandas/core/internals.py | 6 ++++-- pandas/tests/test_common.py | 18 +++++++++++++++++- pandas/tests/test_series.py | 10 +++++----- 6 files changed, 75 insertions(+), 21 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index b5085ea1c55c4..f739d89295ac1 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1524,13 +1524,15 @@ object conversion .. note:: -The syntax of :meth:`~DataFrame.convert_objects` changed in 0.17.0. + The syntax of :meth:`~DataFrame.convert_objects` changed in 0.17.0. See + :ref:`API changes ` + for more details. :meth:`~DataFrame.convert_objects` is a method to try to force conversion of types from the ``object`` dtype to other types. To try converting specific types that are *number like*, e.g. could be a string that represents a number, -pass ``numeric=True``. The force the conversion, add the keword argument -``coerce=True``. This will force strings and numbers alike to be numbers if +pass ``numeric=True``. To force the conversion, add the keyword argument +``coerce=True``. This will force strings and number-like objects to be numbers if possible, otherwise they will be set to ``np.nan``. .. ipython:: python @@ -1559,10 +1561,12 @@ but occasionally has non-dates intermixed and you want to represent as missing. s.convert_objects(datetime=True, coerce=True) Without passing ``coerce=True``, :meth:`~DataFrame.convert_objects` will attempt -the *soft* conversion of any *object* dtypes, meaning that if all +*soft* conversion of any *object* dtypes, meaning that if all the objects in a Series are of the same type, the Series will have that dtype. -Setting ``coerce=True`` will not *convert* - for example, a series of string -dates will not be converted to a series of datetimes. +Note that setting ``coerce=True`` does not *convert* arbitrary types to either +``datetime64[ns]`` or ``timedelta64[ns]``. For example, a series containing string +dates will not be converted to a series of datetimes. To convert between types, +see :ref:`converting to timestamps `. gotchas ~~~~~~~ diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 41569fd1eda65..5abdd272b442b 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -48,6 +48,7 @@ Backwards incompatible API changes .. _whatsnew_0170.api_breaking.other: +.. _whatsnew_0170.api_breaking.convert_objects: Changes to convert_objects ^^^^^^^^^^^^^^^^^^^^^^^^^^ - ``DataFrame.convert_objects`` keyword arguments have been shortened. (:issue:`10265`) @@ -66,7 +67,9 @@ keyword argument to ``'coerce'`` instead of ``True``, as in ``convert_dates='coe .. ipython:: python - df = pd.DataFrame({'i': ['1','2'], 'f': ['apple', '4.2']}) + df = pd.DataFrame({'i': ['1','2'], + 'f': ['apple', '4.2'], + 's': ['apple','banana']}) df The old usage of ``DataFrame.convert_objects`` used `'coerce'` along with the @@ -82,8 +85,32 @@ keyword argument to ``'coerce'`` instead of ``True``, as in ``convert_dates='coe df.convert_objects(numeric=True, coerce=True) -- The new default behavior for ``DataFrame.convert_objects`` is to do nothing, -and so it is necessary to pass at least one conversion target when calling. +- In earlier versions of pandas, ``DataFrame.convert_objects`` would not coerce +numeric types when there were no values convertible to a numeric type. For example, + + .. code-block:: python + + In [1]: df = pd.DataFrame({'s': ['a','b']}) + In [2]: df.convert_objects(convert_numeric='coerce') + Out[2]: + s + 0 a + 1 b + +returns the original DataFrame with no conversion. This change alters +this behavior so that + + .. ipython:: python + + pd.DataFrame({'s': ['a','b']}) + df.convert_objects(numeric=True, coerce=True) + +converts all non-number-like strings to ``NaN``. + +- In earlier versions of pandas, the default behavior was to try and convert +datetimes and timestamps. The new default is for ``DataFrame.convert_objects`` +to do nothing, and so it is necessary to pass at least one conversion target +in the method call. Other API Changes diff --git a/pandas/core/common.py b/pandas/core/common.py index 2e20c25f2327d..33a2fc0aea732 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1892,15 +1892,18 @@ def _possibly_convert_objects(values, datetime=True, numeric=True, timedelta=True, - coerce=False): + coerce=False, + copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: import warnings - warnings.warn('Must explicitly pass type for conversion. Original ' - 'value returned.', RuntimeWarning) - return values + warnings.warn('Must explicitly pass type for conversion. Defaulting to ' + 'pre-0.17 behavior where datetime=True, numeric=True, ' + 'timedelta=True and coerce=False', DeprecationWarning) + datetime = numeric = timedelta = True + coerce = False if isinstance(values, (list, tuple)): # List or scalar @@ -1909,6 +1912,7 @@ def _possibly_convert_objects(values, values = np.array([values], dtype=np.object_) elif not is_object_dtype(values.dtype): # If not object, do not attempt conversion + values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False @@ -1942,6 +1946,7 @@ def _possibly_convert_objects(values, coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isnull(converted).all() else values + values = values.copy() if copy else values except: pass diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 360f0ca4685a0..6b7909086403e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1476,7 +1476,8 @@ def convert(self, datetime=True, numeric=True, timedelta=True, coerce=False, datetime=datetime, numeric=numeric, timedelta=timedelta, - coerce=coerce + coerce=coerce, + copy=copy ).reshape(values.shape) values = _block_shape(values, ndim=self.ndim) newb = make_block(values, @@ -1490,7 +1491,8 @@ def convert(self, datetime=True, numeric=True, timedelta=True, coerce=False, datetime=datetime, numeric=numeric, timedelta=timedelta, - coerce=coerce + coerce=coerce, + copy=copy ).reshape(self.values.shape) blocks.append(make_block(values, ndim=self.ndim, placement=self.mgr_locs)) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index f7121fa54a5b1..94f151efbe2a6 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -4,7 +4,7 @@ import re import nose -from nose.tools import assert_equal +from nose.tools import assert_equal, assert_true import numpy as np from pandas.tslib import iNaT, NaT from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp, Float64Index @@ -1026,6 +1026,22 @@ def test_dict_compat(): assert(com._dict_compat(expected) == expected) assert(com._dict_compat(data_unchanged) == data_unchanged) +def test_possibly_convert_objects_copy(): + values = np.array([1, 2]) + + out = com._possibly_convert_objects(values, copy=False) + assert_true(values is out) + + out = com._possibly_convert_objects(values, copy=True) + assert_true(values is not out) + + values = np.array(['apply','banana']) + out = com._possibly_convert_objects(values, copy=False) + assert_true(values is out) + + out = com._possibly_convert_objects(values, copy=True) + assert_true(values is not out) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 2bec54028fa35..7326d7a9d811d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5959,7 +5959,7 @@ def test_convert_objects(self): # test pass-through and non-conversion when other types selected s = Series(['1.0','2.0','3.0']) - results = s.convert_objects(True,True,True) + results = s.convert_objects(datetime=True, numeric=True, timedelta=True) expected = Series([1.0,2.0,3.0]) assert_series_equal(results, expected) results = s.convert_objects(True,False,True) @@ -5967,15 +5967,15 @@ def test_convert_objects(self): s = Series([datetime(2001, 1, 1, 0, 0),datetime(2001, 1, 1, 0, 0)], dtype='O') - results = s.convert_objects(True,True,True) + results = s.convert_objects(datetime=True, numeric=True, timedelta=True) expected = Series([datetime(2001, 1, 1, 0, 0),datetime(2001, 1, 1, 0, 0)]) assert_series_equal(results, expected) - results = s.convert_objects(False,True,True) + results = s.convert_objects(datetime=False,numeric=True,timedelta=True) assert_series_equal(results, s) td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) s = Series([td, td], dtype='O') - results = s.convert_objects(True,True,True) + results = s.convert_objects(datetime=True, numeric=True, timedelta=True) expected = Series([td, td]) assert_series_equal(results, expected) results = s.convert_objects(True,True,False) @@ -6068,7 +6068,7 @@ def test_convert_objects(self): def test_convert_objects_no_arg_warning(self): s = Series(['1.0','2']) with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', RuntimeWarning) + warnings.simplefilter('always', DeprecationWarning) s.convert_objects() self.assertEqual(len(w), 1)