From 3cc2fae24b6f117e9ed7daccf72ef478608c5f74 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 2 Nov 2018 16:34:34 +0100 Subject: [PATCH] API: fix corner case of lib.infer_dtype (#23422) --- pandas/_libs/lib.pyx | 5 ++++- pandas/_libs/missing.pxd | 6 ++++++ pandas/_libs/missing.pyx | 2 +- pandas/tests/dtypes/test_inference.py | 16 ++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6ec9a7e93bc55b..3a02a4ac33192a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -57,7 +57,7 @@ from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare -from missing cimport (checknull, +from missing cimport (checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period) @@ -1177,6 +1177,9 @@ def infer_dtype(object value, bint skipna=False): values = construct_1d_object_array_from_listlike(value) values = getattr(values, 'values', values) + if skipna: + values = values[~isnaobj(values)] + val = _try_infer_map(values) if val is not None: return val diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index 9f660cc6785c8c..e171dc5f2c9624 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,8 +1,14 @@ # -*- coding: utf-8 -*- +from numpy cimport ndarray, uint8_t + +from tslibs.nattype cimport is_null_datetimelike + cpdef bint checknull(object val) cpdef bint checknull_old(object val) +cpdef ndarray[uint8_t] isnaobj(ndarray arr) + cdef bint is_null_datetime64(v) cdef bint is_null_timedelta64(v) cdef bint is_null_period(v) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 2590a30c57f33d..6776a4b6d7f7e7 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -124,7 +124,7 @@ cdef inline bint _check_none_nan_inf_neginf(object val): @cython.wraparound(False) @cython.boundscheck(False) -def isnaobj(ndarray arr): +cpdef ndarray[uint8_t] isnaobj(ndarray arr): """ Return boolean mask denoting which elements of a 1-D array are na-like, according to the criteria defined in `_check_all_nulls`: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0dd03d6eb8df8..c5911da1666d25 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -591,6 +591,22 @@ def test_unicode(self): expected = 'unicode' if PY2 else 'string' assert result == expected + @pytest.mark.parametrize('dtype, missing, skipna, expected', [ + (float, np.nan, False, 'floating'), + (float, np.nan, True, 'floating'), + (object, np.nan, False, 'floating'), + (object, np.nan, True, 'empty'), + (object, None, False, 'mixed'), + (object, None, True, 'empty') + ]) + @pytest.mark.parametrize('box', [pd.Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + def test_datetime(self): dates = [datetime(2012, 1, x) for x in range(1, 20)]