diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 3ba79210a43ee..e1508cb7b4e16 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -1968,11 +1968,11 @@ dtype of the column will be chosen to accommodate all of the data types pd.Series([1, 2, 3, 6., 'foo']) The number of columns of each type in a ``DataFrame`` can be found by calling -:meth:`~DataFrame.get_dtype_counts`. +``DataFrame.dtypes.value_counts()``. .. ipython:: python - dft.get_dtype_counts() + dft.dtypes.value_counts() Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b9f90bf750482..9fe9452b2ee56 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3754,7 +3754,7 @@ defaults to `nan`. store.append('df_mixed', df_mixed, min_itemsize={'values': 50}) df_mixed1 = store.select('df_mixed') df_mixed1 - df_mixed1.get_dtype_counts() + df_mixed1.dtypes.value_counts() # we have provided a minimum string column size store.root.df_mixed.table diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index ef77826e9a444..6c36a6470f841 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -105,7 +105,7 @@ pandas objects provide compatibility between ``NaT`` and ``NaN``. df2 df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan df2 - df2.get_dtype_counts() + df2.dtypes.value_counts() .. _missing.inserting: diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst index 7d51ded1cad19..c4251f70d85b6 100644 --- a/doc/source/whatsnew/v0.10.1.rst +++ b/doc/source/whatsnew/v0.10.1.rst @@ -89,7 +89,7 @@ You can now store ``datetime64`` in data columns store.append('df_mixed', df_mixed) df_mixed1 = store.select('df_mixed') df_mixed1 - df_mixed1.get_dtype_counts() + df_mixed1.dtypes.value_counts() You can pass ``columns`` keyword to select to filter a list of the return columns, this is equivalent to passing a diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index 31fab6c9aeb74..03480ebeed78e 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -296,7 +296,7 @@ Furthermore ``datetime64[ns]`` columns are created by default, when passed datet df # datetime64[ns] out of the box - df.get_dtype_counts() + df.dtypes.value_counts() # use the traditional nan, which is mapped to NaT internally df.loc[df.index[2:4], ['A', 'timestamp']] = np.nan diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e9d23cfd8efc1..df6148772f744 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -762,6 +762,7 @@ Other deprecations - :meth:`Series.put` is deprecated. (:issue:`18262`) - :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) - :meth:`Index.contains` is deprecated. Use ``key in index`` (``__contains__``) instead (:issue:`17753`). +- :meth:`DataFrame.get_dtype_counts` is deprecated. (:issue:`18262`) .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index f293b3b33e8d3..b01000a7aee5b 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -79,11 +79,11 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # check for dtype compatibility dtypes = set() for o in [a, b]: - if hasattr(o, 'get_dtype_counts'): - s = o.get_dtype_counts() + if hasattr(o, 'dtypes'): + s = o.dtypes.value_counts() if len(s) > 1: return False - dtypes |= set(s.index) + dtypes |= set(s.index.astype(str)) elif isinstance(o, np.ndarray): dtypes |= {o.dtype.name} diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ff3fff22f4f0..d3ce77c0684f9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2326,7 +2326,7 @@ def _sizeof_fmt(num, size_qualifier): else: _verbose_repr() - counts = self.get_dtype_counts() + counts = self._data.get_dtype_counts() dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k in sorted(counts.items())] lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 822428c6787be..0679aa27b1ad3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5263,6 +5263,10 @@ def get_dtype_counts(self): """ Return counts of unique dtypes in this object. + .. deprecated:: 0.25.0 + + Use `.dtypes.value_counts()` instead. + Returns ------- dtype : Series @@ -5288,6 +5292,10 @@ def get_dtype_counts(self): object 1 dtype: int64 """ + warnings.warn("`get_dtype_counts` has been deprecated and will be " + "removed in a future version. For DataFrames use " + "`.dtypes.value_counts()", FutureWarning, + stacklevel=2) from pandas import Series return Series(self._data.get_dtype_counts()) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index ed224e23fbe20..6372029f2efe7 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -7,8 +7,8 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, Series, SparseDataFrame, compat, date_range, - timedelta_range) + Categorical, DataFrame, Series, SparseDataFrame, SparseDtype, compat, + date_range, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) @@ -433,11 +433,11 @@ def test_with_datetimelikes(self): 'B': timedelta_range('1 day', periods=10)}) t = df.T - result = t.get_dtype_counts() + result = t.dtypes.value_counts() if self.klass is DataFrame: - expected = Series({'object': 10}) + expected = Series({np.dtype('object'): 10}) else: - expected = Series({'Sparse[object, nan]': 10}) + expected = Series({SparseDtype(dtype=object): 10}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 061e0d32e1f06..bcbea9d7a2236 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -273,8 +273,8 @@ def test_df_flex_cmp_constant_return_types(self, opname): df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) const = 2 - result = getattr(df, opname)(const).get_dtype_counts() - tm.assert_series_equal(result, pd.Series([2], ['bool'])) + result = getattr(df, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_df_flex_cmp_constant_return_types_empty(self, opname): @@ -283,8 +283,8 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): const = 2 empty = df.iloc[:0] - result = getattr(empty, opname)(const).get_dtype_counts() - tm.assert_series_equal(result, pd.Series([2], ['bool'])) + result = getattr(empty, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) # ------------------------------------------------------------------- diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 6fbc884829784..f1cbd7763474e 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -217,7 +217,7 @@ def test_construction_with_mixed(self, float_string_frame): df = DataFrame(data) # check dtypes - result = df.get_dtype_counts().sort_values() + result = df.dtypes expected = Series({'datetime64[ns]': 3}) # mixed-type frames @@ -225,11 +225,13 @@ def test_construction_with_mixed(self, float_string_frame): float_string_frame['timedelta'] = timedelta(days=1, seconds=1) assert float_string_frame['datetime'].dtype == 'M8[ns]' assert float_string_frame['timedelta'].dtype == 'm8[ns]' - result = float_string_frame.get_dtype_counts().sort_values() - expected = Series({'float64': 4, - 'object': 1, - 'datetime64[ns]': 1, - 'timedelta64[ns]': 1}).sort_values() + result = float_string_frame.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('object'), + np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]')], + index=list('ABCD') + ['foo', 'datetime', + 'timedelta']) assert_series_equal(result, expected) def test_construction_with_conversions(self): @@ -409,11 +411,12 @@ def test_get_numeric_data(self): df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'f': Timestamp('20010102')}, index=np.arange(10)) - result = df.get_dtype_counts() - expected = Series({'int64': 1, 'float64': 1, - datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('float64'), + np.dtype('int64'), + np.dtype(objectname), + np.dtype(datetime64name)], + index=['a', 'b', 'c', 'f']) assert_series_equal(result, expected) df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index faa86acb1584f..c1d057da91b8f 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -17,8 +17,10 @@ def test_concat_multiple_frames_dtypes(self): A = DataFrame(data=np.ones((10, 2)), columns=[ 'foo', 'bar'], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = pd.concat((A, B), axis=1).get_dtype_counts() - expected = Series(dict(float64=2, float32=2)) + results = pd.concat((A, B), axis=1).dtypes + expected = Series([np.dtype('float64')] * 2 + + [np.dtype('float32')] * 2, + index=['foo', 'bar', 0, 1]) assert_series_equal(results, expected) @pytest.mark.parametrize('data', [ diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 981dc8b32b8cc..73a8720adb5cc 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1579,10 +1579,11 @@ def test_constructor_with_datetimes(self): 'D': Timestamp("20010101"), 'E': datetime(2001, 1, 2, 0, 0)}, index=np.arange(10)) - result = df.get_dtype_counts() - expected = Series({'int64': 1, datetime64name: 2, objectname: 2}) - result.sort_index() - expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('int64')] + + [np.dtype(objectname)] * 2 + + [np.dtype(datetime64name)] * 2, + index=list("ABCDE")) tm.assert_series_equal(result, expected) # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 @@ -1591,21 +1592,13 @@ def test_constructor_with_datetimes(self): floatname: np.array(1., dtype=floatname), intname: np.array(1, dtype=intname)}, index=np.arange(10)) - result = df.get_dtype_counts() - expected = {objectname: 1} - if intname == 'int64': - expected['int64'] = 2 - else: - expected['int64'] = 1 - expected[intname] = 1 - if floatname == 'float64': - expected['float64'] = 2 - else: - expected['float64'] = 1 - expected[floatname] = 1 - - result = result.sort_index() - expected = Series(expected).sort_index() + result = df.dtypes + expected = Series([np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('object')] + + [np.dtype('float64')] + + [np.dtype(intname)], + index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) # check with ndarray construction ndim>0 @@ -1613,8 +1606,13 @@ def test_constructor_with_datetimes(self): floatname: np.array([1.] * 10, dtype=floatname), intname: np.array([1] * 10, dtype=intname)}, index=np.arange(10)) - result = df.get_dtype_counts() - result = result.sort_index() + result = df.dtypes + expected = Series([np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('object')] + + [np.dtype('float64')] + + [np.dtype(intname)], + index=['a', 'b', 'c', floatname, intname]) tm.assert_series_equal(result, expected) # GH 2809 @@ -1622,22 +1620,16 @@ def test_constructor_with_datetimes(self): datetimes = [ts.to_pydatetime() for ts in ind] datetime_s = Series(datetimes) assert datetime_s.dtype == 'M8[ns]' - df = DataFrame({'datetime_s': datetime_s}) - result = df.get_dtype_counts() - expected = Series({datetime64name: 1}) - result = result.sort_index() - expected = expected.sort_index() - tm.assert_series_equal(result, expected) # GH 2810 ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] dates = [ts.date() for ts in ind] - df = DataFrame({'datetimes': datetimes, 'dates': dates}) - result = df.get_dtype_counts() - expected = Series({datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + df = DataFrame(datetimes, columns=['datetimes']) + df['dates'] = dates + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('object')], + index=['datetimes', 'dates']) tm.assert_series_equal(result, expected) # GH 7594 @@ -1693,75 +1685,59 @@ def test_constructor_datetimes_with_nulls(self): for arr in [np.array([None, None, None, None, datetime.now(), None]), np.array([None, None, datetime.now(), None])]: - result = DataFrame(arr).get_dtype_counts() - expected = Series({'datetime64[ns]': 1}) + result = DataFrame(arr).dtypes + expected = Series([np.dtype('datetime64[ns]')]) tm.assert_series_equal(result, expected) def test_constructor_for_list_with_dtypes(self): - # TODO(wesm): unused - intname = np.dtype(np.int_).name # noqa - floatname = np.dtype(np.float_).name # noqa - datetime64name = np.dtype('M8[ns]').name - objectname = np.dtype(np.object_).name - # test list of lists/ndarrays df = DataFrame([np.arange(5) for x in range(5)]) - result = df.get_dtype_counts() - expected = Series({'int64': 5}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 5) + tm.assert_series_equal(result, expected) df = DataFrame([np.array(np.arange(5), dtype='int32') for x in range(5)]) - result = df.get_dtype_counts() - expected = Series({'int32': 5}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 5) + tm.assert_series_equal(result, expected) # overflow issue? (we always expecte int64 upcasting here) df = DataFrame({'a': [2 ** 31, 2 ** 31 + 1]}) - result = df.get_dtype_counts() - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') # GH #2751 (construction with no index specified), make sure we cast to # platform values df = DataFrame([1, 2]) - result = df.get_dtype_counts() - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame([1., 2.]) - result = df.get_dtype_counts() - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') df = DataFrame({'a': [1, 2]}) - result = df.get_dtype_counts() - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame({'a': [1., 2.]}) - result = df.get_dtype_counts() - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') df = DataFrame({'a': 1}, index=range(3)) - result = df.get_dtype_counts() - expected = Series({'int64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('int64') df = DataFrame({'a': 1.}, index=range(3)) - result = df.get_dtype_counts() - expected = Series({'float64': 1}) - tm.assert_series_equal(result, expected) + assert df.dtypes.iloc[0] == np.dtype('float64') # with object list df = DataFrame({'a': [1, 2, 4, 7], 'b': [1.2, 2.3, 5.1, 6.3], 'c': list('abcd'), 'd': [datetime(2000, 1, 1) for i in range(4)], 'e': [1., 2, 4., 7]}) - result = df.get_dtype_counts() - expected = Series( - {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) - result = result.sort_index() - expected = expected.sort_index() + result = df.dtypes + expected = Series([np.dtype('int64'), + np.dtype('float64'), + np.dtype('object'), + np.dtype('datetime64[ns]'), + np.dtype('float64')], + index=list('abcde')) tm.assert_series_equal(result, expected) def test_constructor_frame_copy(self, float_frame): @@ -2077,16 +2053,19 @@ def test_from_records_misc_brokenness(self): rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 'hi']) # test col upconverts to obj df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts() - expected = Series({'datetime64[ns]': 1, 'object': 1}) + result = df2_obj.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('object')], + index=['date', 'test']) + tm.assert_series_equal(result, expected) rows = [] rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 1]) df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts().sort_index() - expected = Series({'datetime64[ns]': 1, 'int64': 1}) - tm.assert_series_equal(results, expected) + result = df2_obj.dtypes + expected = Series([np.dtype('datetime64[ns]'), np.dtype('int64')], + index=['date', 'test']) + tm.assert_series_equal(result, expected) def test_from_records_empty(self): # 3562 diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index f68770d796292..51578ba20b047 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -836,23 +836,28 @@ def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), B=Series([timedelta(days=i) for i in range(3)]))) - result = df.get_dtype_counts().sort_index() - expected = Series( - {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_index() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]')], + index=list("AB")) assert_series_equal(result, expected) df['C'] = df['A'] + df['B'] - expected = Series( - {'datetime64[ns]': 2, 'timedelta64[ns]': 1}).sort_values() - result = df.get_dtype_counts().sort_values() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]'), + np.dtype('datetime64[ns]')], + index=list("ABC")) assert_series_equal(result, expected) # mixed int types df['D'] = 1 - expected = Series({'datetime64[ns]': 2, - 'timedelta64[ns]': 1, - 'int64': 1}).sort_values() - result = df.get_dtype_counts().sort_values() + result = df.dtypes + expected = Series([np.dtype('datetime64[ns]'), + np.dtype('timedelta64[ns]'), + np.dtype('datetime64[ns]'), + np.dtype('int64')], + index=list("ABCD")) assert_series_equal(result, expected) def test_arg_for_errors_in_astype(self): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 4c1abfb1a7f6f..f8af942f67657 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -300,15 +300,23 @@ def test_getitem_boolean_casting(self, datetime_frame): df['F1'] = df['F'].copy() casted = df[df > 0] - result = casted.get_dtype_counts() - expected = Series({'float64': 4, 'int32': 2, 'int64': 2}) + result = casted.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('int32')] * 2 + + [np.dtype('int64')] * 2, + index=['A', 'B', 'C', 'D', 'E', 'E1', 'F', 'F1']) assert_series_equal(result, expected) # int block splitting df.loc[df.index[1:3], ['E1', 'F1']] = 0 casted = df[df > 0] - result = casted.get_dtype_counts() - expected = Series({'float64': 6, 'int32': 1, 'int64': 1}) + result = casted.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('int32')] + + [np.dtype('float64')] + + [np.dtype('int64')] + + [np.dtype('float64')], + index=['A', 'B', 'C', 'D', 'E', 'E1', 'F', 'F1']) assert_series_equal(result, expected) # where dtype conversions @@ -615,8 +623,9 @@ def test_setitem_cast(self, float_frame): df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC')) df['event'] = np.nan df.loc[10, 'event'] = 'foo' - result = df.get_dtype_counts().sort_values() - expected = Series({'float64': 3, 'object': 1}).sort_values() + result = df.dtypes + expected = Series([np.dtype('float64')] * 3 + [np.dtype('object')], + index=['A', 'B', 'C', 'event']) assert_series_equal(result, expected) # Test that data type is preserved . #5782 @@ -1614,8 +1623,10 @@ def test_setitem_single_column_mixed_datetime(self): df['timestamp'] = Timestamp('20010102') # check our dtypes - result = df.get_dtype_counts() - expected = Series({'float64': 3, 'datetime64[ns]': 1}) + result = df.dtypes + expected = Series([np.dtype('float64')] * 3 + + [np.dtype('datetime64[ns]')], + index=['foo', 'bar', 'baz', 'timestamp']) assert_series_equal(result, expected) # set an allowable datetime64 type @@ -2637,13 +2648,17 @@ def _check_get(df, cond, check_dtypes=True): for c in ['float32', 'float64', 'int32', 'int64']}) df.iloc[1, :] = 0 - result = df.where(df >= 0).get_dtype_counts() + result = df.dtypes + expected = Series([np.dtype('float32'), + np.dtype('float64'), + np.dtype('int32'), + np.dtype('int64')], + index=['float32', 'float64', 'int32', 'int64']) # when we don't preserve boolean casts # # expected = Series({ 'float32' : 1, 'float64' : 3 }) - expected = Series({'float32': 1, 'float64': 1, 'int32': 1, 'int64': 1}) assert_series_equal(result, expected) # aligning diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 807931567847f..e40ae6dd5494d 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -417,15 +417,13 @@ def test_fillna_downcast(self): def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = df.get_dtype_counts().sort_values() - expected = Series({'object': 5}) + result = df.dtypes + expected = Series([np.dtype('object')] * 5, index=[1, 2, 3, 4, 5]) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) - result = result.get_dtype_counts().sort_values() - expected = Series({'int64': 5}) - assert_series_equal(result, expected) + assert_frame_equal(result, expected) # empty block df = DataFrame(index=range(3), columns=['A', 'B'], dtype='float64') diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index dc2ac5f728ec7..ffc2a515bc4b7 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -158,17 +158,26 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') - result = Series(dict(float32=1, float64=5)) - assert (df.get_dtype_counts().sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 5 + [np.dtype('float32')], + index=['foo', 'c', 'bar', 'b', 'a', 'x']) + tm.assert_series_equal(result, expected) # replacing current (in different block) df['a'] = df['a'].astype('float32') - result = Series(dict(float32=2, float64=4)) - assert (df.get_dtype_counts().sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('float32')] * 2, + index=['foo', 'c', 'bar', 'b', 'a', 'x']) + tm.assert_series_equal(result, expected) df['y'] = df['a'].astype('int32') - result = Series(dict(float32=2, float64=4, int32=1)) - assert (df.get_dtype_counts().sort_index() == result).all() + result = df.dtypes + expected = Series([np.dtype('float64')] * 4 + + [np.dtype('float32')] * 2 + + [np.dtype('int32')], + index=['foo', 'c', 'bar', 'b', 'a', 'x', 'y']) + tm.assert_series_equal(result, expected) with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ac8d1557a4c43..04c1375418e67 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -502,29 +502,41 @@ def test_unstack_dtypes(self): [2, 2, 3, 4]] df = DataFrame(rows, columns=list('ABCD')) - result = df.get_dtype_counts() - expected = Series({'int64': 4}) + result = df.dtypes + expected = Series([np.dtype('int64')] * 4, + index=list('ABCD')) assert_series_equal(result, expected) # single dtype df2 = df.set_index(['A', 'B']) df3 = df2.unstack('B') - result = df3.get_dtype_counts() - expected = Series({'int64': 4}) + result = df3.dtypes + expected = Series([np.dtype('int64')] * 4, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) # mixed df2 = df.set_index(['A', 'B']) df2['C'] = 3. df3 = df2.unstack('B') - result = df3.get_dtype_counts() - expected = Series({'int64': 2, 'float64': 2}) + result = df3.dtypes + expected = Series([np.dtype('float64')] * 2 + [np.dtype('int64')] * 2, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) - df2['D'] = 'foo' df3 = df2.unstack('B') - result = df3.get_dtype_counts() - expected = Series({'float64': 2, 'object': 2}) + result = df3.dtypes + expected = Series([np.dtype('float64')] * 2 + [np.dtype('object')] * 2, + index=pd.MultiIndex.from_arrays([ + ['C', 'C', 'D', 'D'], + [1, 2, 1, 2] + ], names=(None, 'B'))) assert_series_equal(result, expected) # GH7405 diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 5b2f846eccdd5..b7c73daae0002 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -150,13 +150,18 @@ def test_frame_no_datetime64_dtype(self, tz): # GH#2810 (with timezones) datetimes_naive = [ts.to_pydatetime() for ts in dr] datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({'dr': dr, - 'dr_tz': dr_tz, - 'datetimes_naive': datetimes_naive, - 'datetimes_with_tz': datetimes_with_tz}) - result = df.get_dtype_counts().sort_index() - expected = Series({'datetime64[ns]': 2, - str(tz_expected): 2}).sort_index() + df = DataFrame({'dr': dr}) + df['dr_tz'] = dr_tz + df['datetimes_naive'] = datetimes_naive + df['datetimes_with_tz'] = datetimes_with_tz + result = df.dtypes + expected = Series([ + np.dtype('datetime64[ns]'), + DatetimeTZDtype(tz=tz), + np.dtype('datetime64[ns]'), + DatetimeTZDtype(tz=tz) + ], + index=['dr', 'dr_tz', 'datetimes_naive', 'datetimes_with_tz']) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index b1a083213debd..e8343a1cf318b 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -932,3 +932,9 @@ def test_deprecated_to_dense(self): with tm.assert_produces_warning(FutureWarning): result = ser.to_dense() tm.assert_series_equal(result, ser) + + def test_deprecated_get_dtype_counts(self): + # GH 18262 + df = DataFrame([1]) + with tm.assert_produces_warning(FutureWarning): + df.get_dtype_counts() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0fb8673e6274a..8f57254eae219 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -179,8 +179,9 @@ def test_apply_with_mixed_dtype(): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 df = DataFrame({'foo1': np.random.randn(6), 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) - result = df.apply(lambda x: x, axis=1) - tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) + result = df.apply(lambda x: x, axis=1).dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) # GH 3610 incorrect dtype conversion with as_index=False df = DataFrame({"c1": [1, 2, 6, 6, 8]}) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index dcd0d3938c6a5..d13dddac79042 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -88,10 +88,11 @@ def max_value(group): return group.loc[group['value'].idxmax()] applied = df.groupby('A').apply(max_value) - result = applied.get_dtype_counts().sort_values() - expected = Series({'float64': 2, - 'int64': 1, - 'object': 2}).sort_values() + result = applied.dtypes + expected = Series([np.dtype('object')] * 2 + + [np.dtype('float64')] * 2 + + [np.dtype('int64')], + index=['A', 'B', 'C', 'D', 'value']) assert_series_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index ec34739672718..00062b04d07d8 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -1985,7 +1985,8 @@ def test_table_values_dtypes_roundtrip(self): df1['time2'] = Timestamp('20130102') store.append('df_mixed_dtypes1', df1) - result = store.select('df_mixed_dtypes1').get_dtype_counts() + result = store.select('df_mixed_dtypes1').dtypes.value_counts() + result.index = [str(i) for i in result.index] expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 7def8e53859c7..7795c356bf43e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -245,8 +245,9 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.sum) - result = z.get_dtype_counts() - expected = Series(dict(int64=2)) + result = z.dtypes + expected = Series([np.dtype('int64')] * 2, + index=Index(list('ab'), name='i')) tm.assert_series_equal(result, expected) # cannot convert dtypes @@ -256,8 +257,9 @@ def test_pivot_dtypes(self): z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.mean) - result = z.get_dtype_counts() - expected = Series(dict(float64=2)) + result = z.dtypes + expected = Series([np.dtype('float64')] * 2, + index=Index(list('ab'), name='i')) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('columns,values', diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 283814d2375b1..d0979fb86d36d 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -101,7 +101,9 @@ def test_basic_types(self, sparse, dtype): dtype_name = self.effective_dtype(dtype).name expected = Series({dtype_name: 8}) - tm.assert_series_equal(result.get_dtype_counts(), expected) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + tm.assert_series_equal(result, expected) result = get_dummies(s_df, columns=['a'], sparse=sparse, dtype=dtype) @@ -109,8 +111,10 @@ def test_basic_types(self, sparse, dtype): expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) expected = Series(expected_counts).sort_index() - tm.assert_series_equal(result.get_dtype_counts().sort_index(), - expected) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + result = result.sort_index() + tm.assert_series_equal(result, expected) def test_just_na(self, sparse): just_na_list = [np.nan] diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 43fcddea3d964..2cc2ad080eb4c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -122,9 +122,9 @@ def test_ser_flex_cmp_return_dtypes(self, opname): # GH#15115 ser = Series([1, 3, 2], index=range(3)) const = 2 - - result = getattr(ser, opname)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([1], ['bool'])) + result = getattr(ser, opname)(const).dtypes + expected = np.dtype('bool') + assert result == expected @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_ser_flex_cmp_return_dtypes_empty(self, opname): @@ -132,9 +132,9 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): ser = Series([1, 3, 2], index=range(3)) empty = ser.iloc[:0] const = 2 - - result = getattr(empty, opname)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([1], ['bool'])) + result = getattr(empty, opname)(const).dtypes + expected = np.dtype('bool') + assert result == expected @pytest.mark.parametrize('op', [operator.eq, operator.ne, operator.le, operator.lt, diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 59566ad3232c7..89679e0c46908 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -56,8 +56,6 @@ def test_dtype(self, datetime_series): # GH 26705 - Assert .ftypes is deprecated with tm.assert_produces_warning(FutureWarning): assert datetime_series.ftypes == 'float64:dense' - tm.assert_series_equal(datetime_series.get_dtype_counts(), - Series(1, ['float64'])) # GH18243 - Assert .get_ftype_counts is deprecated with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(datetime_series.get_ftype_counts(), diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 2d0b338ef53c0..d3e2e1357f9d7 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -292,9 +292,8 @@ def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan sdf = df.to_sparse() - - result = sdf.get_dtype_counts() - expected = Series({'Sparse[float64, nan]': 4}) + result = sdf.dtypes + expected = Series(['Sparse[float64, nan]'] * 4) tm.assert_series_equal(result, expected) def test_shape(self, float_frame, float_frame_int_kind, @@ -902,7 +901,7 @@ def test_corr(self, float_frame): def test_describe(self, float_frame): float_frame['foo'] = np.nan - float_frame.get_dtype_counts() + float_frame.dtypes.value_counts() str(float_frame) desc = float_frame.describe() # noqa