-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DEPR: DataFrame.get_dtype_counts #27145
Changes from 5 commits
2fe603d
735a2fd
8473885
faeb972
95a7075
eb5213c
06af7fc
879485b
0c82f16
525fe51
592659f
0df4dd9
eba5396
eeba8a4
4419114
4ce1194
cb6c8bd
b3b839a
abe310c
aa172d6
6cb5d13
ca70a46
86c0a08
523ab60
852bd64
e700c63
475e361
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2325,7 +2325,7 @@ def _sizeof_fmt(num, size_qualifier): | |
else: | ||
_verbose_repr() | ||
|
||
counts = self.get_dtype_counts() | ||
counts = self._data.get_dtype_counts() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is okay. It's internal usage and slightly more performant I would think than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you remove get_dtype_counts() from blocks its unecessary as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks to be needed to get the dtypes later on for |
||
dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k | ||
in sorted(counts.items())] | ||
lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5290,6 +5290,9 @@ def get_dtype_counts(self): | |
object 1 | ||
dtype: int64 | ||
""" | ||
warnings.warn("`get_dtype_counts` has been deprecated and will be " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you update the docstring and add deprecated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we recommend There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah unfortunately that solution does not work for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, just need something as a replacement (may also want to add in the doc-string itself) |
||
"removed in a future version.", FutureWarning, | ||
stacklevel=2) | ||
from pandas import Series | ||
return Series(self._data.get_dtype_counts()) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -433,9 +433,10 @@ def test_with_datetimelikes(self): | |
'B': timedelta_range('1 day', periods=10)}) | ||
t = df.T | ||
|
||
result = t.get_dtype_counts() | ||
#result = Series(t._data.get_dtype_counts()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment here |
||
result = t.dtypes.value_counts() | ||
if self.klass is DataFrame: | ||
expected = Series({'object': 10}) | ||
expected = Series({np.dtype('object'): 10}) | ||
else: | ||
expected = Series({'Sparse[object, nan]': 10}) | ||
tm.assert_series_equal(result, expected) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1579,7 +1579,7 @@ def test_constructor_with_datetimes(self): | |
'D': Timestamp("20010101"), | ||
'E': datetime(2001, 1, 2, 0, 0)}, | ||
index=np.arange(10)) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 1, datetime64name: 2, objectname: 2}) | ||
result.sort_index() | ||
expected.sort_index() | ||
|
@@ -1591,7 +1591,7 @@ def test_constructor_with_datetimes(self): | |
floatname: np.array(1., dtype=floatname), | ||
intname: np.array(1, dtype=intname)}, | ||
index=np.arange(10)) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = {objectname: 1} | ||
if intname == 'int64': | ||
expected['int64'] = 2 | ||
|
@@ -1613,7 +1613,7 @@ def test_constructor_with_datetimes(self): | |
floatname: np.array([1.] * 10, dtype=floatname), | ||
intname: np.array([1] * 10, dtype=intname)}, | ||
index=np.arange(10)) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
result = result.sort_index() | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
@@ -1623,7 +1623,7 @@ def test_constructor_with_datetimes(self): | |
datetime_s = Series(datetimes) | ||
assert datetime_s.dtype == 'M8[ns]' | ||
df = DataFrame({'datetime_s': datetime_s}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({datetime64name: 1}) | ||
result = result.sort_index() | ||
expected = expected.sort_index() | ||
|
@@ -1634,7 +1634,7 @@ def test_constructor_with_datetimes(self): | |
datetimes = [ts.to_pydatetime() for ts in ind] | ||
dates = [ts.date() for ts in ind] | ||
df = DataFrame({'datetimes': datetimes, 'dates': dates}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({datetime64name: 1, objectname: 1}) | ||
result = result.sort_index() | ||
expected = expected.sort_index() | ||
|
@@ -1693,7 +1693,7 @@ def test_constructor_datetimes_with_nulls(self): | |
for arr in [np.array([None, None, None, None, | ||
datetime.now(), None]), | ||
np.array([None, None, datetime.now(), None])]: | ||
result = DataFrame(arr).get_dtype_counts() | ||
result = Series(DataFrame(arr)._data.get_dtype_counts()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don’t move it to a private method use .dtypes.value_counts() |
||
expected = Series({'datetime64[ns]': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
@@ -1706,49 +1706,49 @@ def test_constructor_for_list_with_dtypes(self): | |
|
||
# test list of lists/ndarrays | ||
df = DataFrame([np.arange(5) for x in range(5)]) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 5}) | ||
|
||
df = DataFrame([np.array(np.arange(5), dtype='int32') | ||
for x in range(5)]) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int32': 5}) | ||
|
||
# overflow issue? (we always expecte int64 upcasting here) | ||
df = DataFrame({'a': [2 ** 31, 2 ** 31 + 1]}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
# GH #2751 (construction with no index specified), make sure we cast to | ||
# platform values | ||
df = DataFrame([1, 2]) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
df = DataFrame([1., 2.]) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'float64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
df = DataFrame({'a': [1, 2]}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
df = DataFrame({'a': [1., 2.]}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'float64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
df = DataFrame({'a': 1}, index=range(3)) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'int64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
df = DataFrame({'a': 1.}, index=range(3)) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series({'float64': 1}) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
@@ -1757,7 +1757,7 @@ def test_constructor_for_list_with_dtypes(self): | |
'c': list('abcd'), | ||
'd': [datetime(2000, 1, 1) for i in range(4)], | ||
'e': [1., 2, 4., 7]}) | ||
result = df.get_dtype_counts() | ||
result = Series(df._data.get_dtype_counts()) | ||
expected = Series( | ||
{'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) | ||
result = result.sort_index() | ||
|
@@ -2077,14 +2077,14 @@ def test_from_records_misc_brokenness(self): | |
rows.append([datetime(2010, 1, 1), 1]) | ||
rows.append([datetime(2010, 1, 2), 'hi']) # test col upconverts to obj | ||
df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) | ||
results = df2_obj.get_dtype_counts() | ||
results = Series(df2_obj._data.get_dtype_counts()) | ||
expected = Series({'datetime64[ns]': 1, 'object': 1}) | ||
|
||
rows = [] | ||
rows.append([datetime(2010, 1, 1), 1]) | ||
rows.append([datetime(2010, 1, 2), 1]) | ||
df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) | ||
results = df2_obj.get_dtype_counts().sort_index() | ||
results = Series(df2_obj._data.get_dtype_counts()).sort_index() | ||
expected = Series({'datetime64[ns]': 1, 'int64': 1}) | ||
tm.assert_series_equal(results, expected) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can prob just do
o.dtypes.value_counts() (may need a hasattr)