From 14b37f1190719e58c440f00a942e21a05f315fe0 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 26 Nov 2017 22:26:13 +0000 Subject: [PATCH 1/6] DEPR: Deprecate from_items --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/frame.py | 10 ++- pandas/io/stata.py | 8 +- pandas/tests/frame/test_constructors.py | 73 ++++++++++++------ pandas/tests/frame/test_nonunique_indexes.py | 7 +- pandas/tests/io/parser/common.py | 4 +- pandas/tests/io/test_excel.py | 78 ++++++++++---------- pandas/tests/io/test_stata.py | 2 +- 8 files changed, 111 insertions(+), 73 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1890636bc8e1a..668eff917a32d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -329,7 +329,7 @@ Deprecations - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - +- :func:``DataFrame.from_items`` is deprecated. Use ``DataFrame.from_dict(OrderedDict())`` instead (:issue:`17320`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 788b236b0ec59..7a3e1690975a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -313,7 +313,7 @@ def _constructor(self): _constructor_sliced = Series _deprecations = NDFrame._deprecations | frozenset( - ['sortlevel', 'get_value', 'set_value', 'from_csv']) + ['sortlevel', 'get_value', 'set_value', 'from_csv', 'from_items']) @property def _constructor_expanddim(self): @@ -1246,6 +1246,9 @@ def to_records(self, index=True, convert_datetime64=True): @classmethod def from_items(cls, items, columns=None, orient='columns'): """ + DEPRECATED: from_items is deprecated and will be removed in a + future version. Use :meth:`DataFrame(dict())` instead. + Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified orientation). The values should be arrays or Series. @@ -1266,6 +1269,11 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ + + warnings.warn("from_items is deprecated. Please use " + "DataFrame(dict()) instead.", + FutureWarning, stacklevel=2) + keys, values = lzip(*items) if orient == 'columns': diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b409cf20e9a09..c511fcdba102c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1571,7 +1571,7 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_items(data_formatted) + data = DataFrame(dict(data_formatted), columns=dict(data_formatted).keys()) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1609,7 +1609,7 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_items(retyped_data) + data = DataFrame(dict(retyped_data), columns=dict(retyped_data).keys()) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1722,7 +1722,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_items(cat_converted_data) + data = DataFrame(dict(cat_converted_data), columns=dict(cat_converted_data).keys()) return data def data_label(self): @@ -1997,7 +1997,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_items(data_formatted) + return DataFrame(dict(data_formatted), columns=dict(data_formatted).keys()) def _replace_nans(self, data): # return data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b24ae22162a34..732de0c2e968a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -871,7 +871,7 @@ def __len__(self, n): # GH 4297 # support Array import array - result = DataFrame.from_items([('A', array.array('i', range(10)))]) + result = DataFrame({'A': array.array('i', range(10))}) expected = DataFrame({'A': list(range(10))}) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -1175,28 +1175,35 @@ def test_constructor_manager_resize(self): def test_constructor_from_items(self): items = [(c, self.frame[c]) for c in self.frame.columns] - recons = DataFrame.from_items(items) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items) tm.assert_frame_equal(recons, self.frame) # pass some columns - recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']]) # orient='index' row_items = [(idx, self.mixed_frame.xs(idx)) for idx in self.mixed_frame.index] - - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert recons['A'].dtype == np.float64 with tm.assert_raises_regex(TypeError, "Must pass columns with " "orient='index'"): - DataFrame.from_items(row_items, orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples arr = construct_1d_object_array_from_listlike( @@ -1204,15 +1211,19 @@ def test_constructor_from_items(self): self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) for idx in self.mixed_frame.index] - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert isinstance(recons['foo'][0], tuple) - rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', - columns=['one', 'two', 'three']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + orient='index', + columns=['one', 'two', 'three']) xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'], columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) @@ -1222,12 +1233,28 @@ def test_constructor_from_items_scalars(self): with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 4)]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 4)]) with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + + def test_from_items_deprecation(self): + # GH 17320 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + columns=['col1', 'col2', 'col3'], orient='index') def test_constructor_mix_series_nonseries(self): @@ -1256,13 +1283,17 @@ def test_constructor_column_duplicates(self): tm.assert_frame_equal(df, edf) - idf = DataFrame.from_items( - [('a', [8]), ('a', [5])], columns=['a', 'a']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + idf = DataFrame.from_items([('a', [8]), ('a', [5])], + columns=['a', 'a']) tm.assert_frame_equal(idf, edf) - pytest.raises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pytest.raises(ValueError, DataFrame.from_items, + [('a', [8]), ('a', [5]), ('b', [6])], + columns=['b', 'a', 'a']) def test_constructor_empty_with_string_dtype(self): # GH 9428 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index f0a21cde4fbd9..7c4565b477dc7 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -214,9 +214,10 @@ def check(result, expected=None): for index in [df.index, pd.Index(list('edcba'))]: this_df = df.copy() expected_ser = pd.Series(index.values, index=this_df.index) - expected_df = DataFrame.from_items([('A', expected_ser), - ('B', this_df['B']), - ('A', expected_ser)]) + expected_df = DataFrame({'A': expected_ser, + 'B': this_df['B'], + 'A': expected_ser}, + columns=['A', 'B', 'A']) this_df['A'] = index check(this_df, expected_df) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 8525cb42c2455..86717f63ce247 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -924,8 +924,8 @@ def test_float_parser(self): def test_scientific_no_exponent(self): # see gh-12215 - df = DataFrame.from_items([('w', ['2e']), ('x', ['3E']), - ('y', ['42e']), ('z', ['632E'])]) + df = DataFrame({'w': ['2e'], 'x': ['3E'], + 'y': ['42e'], 'z': ['632E']}) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index efbabcfd8fc4c..20ec968788385 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -315,17 +315,17 @@ def test_excel_table(self): def test_reader_special_dtypes(self): - expected = DataFrame.from_items([ - ("IntCol", [1, 2, -3, 4, 0]), - ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), - ("BoolCol", [True, False, True, True, False]), - ("StrCol", [1, 2, 3, 4, 5]), + expected = DataFrame({ + "IntCol": [1, 2, -3, 4, 0], + "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005], + "BoolCol": [True, False, True, True, False], + "StrCol": [1, 2, 3, 4, 5], # GH5394 - this is why convert_float isn't vectorized - ("Str2Col", ["a", 3, "c", "d", "e"]), - ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), + "Str2Col": ["a", 3, "c", "d", "e"], + "DateCol": [datetime(2013, 10, 30), datetime(2013, 10, 31), datetime(1905, 1, 1), datetime(2013, 12, 14), - datetime(2015, 3, 14)]) - ]) + datetime(2015, 3, 14)] + }, columns=["IntCol", "FloatCol", "BoolCol", "StrCol", "Str2Col", "DateCol"]) basename = 'test_types' @@ -363,12 +363,12 @@ def test_reader_converters(self): basename = 'test_converters' - expected = DataFrame.from_items([ - ("IntCol", [1, 2, -3, -1000, 0]), - ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), - ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), - ("StrCol", ['1', np.nan, '3', '4', '5']), - ]) + expected = DataFrame({ + "IntCol": [1, 2, -3, -1000, 0], + "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005], + "BoolCol": ['Found', 'Found', 'Found', 'Not found', 'Found'], + "StrCol": ['1', np.nan, '3', '4', '5'], + }, columns=['IntCol', 'FloatCol', 'BoolCol', 'StrCol']) converters = {'IntCol': lambda x: int(x) if x != '' else -1000, 'FloatCol': lambda x: 10 * x if x else np.nan, @@ -718,32 +718,30 @@ def test_reader_seconds(self): if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)])]) + expected = DataFrame({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) else: # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)])]) + expected = DataFrame({"Time": [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', 'Sheet1') tm.assert_frame_equal(actual, expected) @@ -1988,7 +1986,7 @@ def test_datetimes(self): datetime(2013, 1, 13, 18, 20, 52)] with ensure_clean(self.ext) as path: - write_frame = DataFrame.from_items([('A', datetimes)]) + write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(path, 'Sheet1') read_frame = read_excel(path, 'Sheet1', header=0) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index d0d7f881b37d0..719fb6257be33 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -945,7 +945,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame.from_items(cols) + expected = DataFrame(dict(cols), columns=dict(cols).keys()) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) From b01cdf8e2c3a7911c9da579a8f53baaed97ce992 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 27 Nov 2017 21:02:49 +0000 Subject: [PATCH 2/6] fixing some over-indentation --- pandas/io/stata.py | 12 ++++++++---- pandas/tests/frame/test_nonunique_indexes.py | 6 +++--- pandas/tests/io/test_excel.py | 7 ++++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c511fcdba102c..62a92cba85419 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1571,7 +1571,8 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame(dict(data_formatted), columns=dict(data_formatted).keys()) + data = DataFrame(dict(data_formatted), + columns=dict(data_formatted).keys()) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1609,7 +1610,8 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame(dict(retyped_data), columns=dict(retyped_data).keys()) + data = DataFrame(dict(retyped_data), + columns=dict(retyped_data).keys()) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1722,7 +1724,8 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame(dict(cat_converted_data), columns=dict(cat_converted_data).keys()) + data = DataFrame(dict(cat_converted_data), + columns=dict(cat_converted_data).keys()) return data def data_label(self): @@ -1997,7 +2000,8 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame(dict(data_formatted), columns=dict(data_formatted).keys()) + return DataFrame(dict(data_formatted), + columns=dict(data_formatted).keys()) def _replace_nans(self, data): # return data diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 7c4565b477dc7..93379cb403ec5 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -217,7 +217,7 @@ def check(result, expected=None): expected_df = DataFrame({'A': expected_ser, 'B': this_df['B'], 'A': expected_ser}, - columns=['A', 'B', 'A']) + columns=['A', 'B', 'A']) this_df['A'] = index check(this_df, expected_df) @@ -328,8 +328,8 @@ def check(result, expected=None): df1r = df1.reindex_like(df2) result = df1r == df2 - expected = DataFrame([[False, True], [True, False], [False, False], [ - True, False]], columns=['A', 'A']) + expected = DataFrame([[False, True], [True, False], [False, False], + [True, False]], columns=['A', 'A']) assert_frame_equal(result, expected) # mixed column selection diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 20ec968788385..550a027e74042 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -323,9 +323,10 @@ def test_reader_special_dtypes(self): # GH5394 - this is why convert_float isn't vectorized "Str2Col": ["a", 3, "c", "d", "e"], "DateCol": [datetime(2013, 10, 30), datetime(2013, 10, 31), - datetime(1905, 1, 1), datetime(2013, 12, 14), - datetime(2015, 3, 14)] - }, columns=["IntCol", "FloatCol", "BoolCol", "StrCol", "Str2Col", "DateCol"]) + datetime(1905, 1, 1), datetime(2013, 12, 14), + datetime(2015, 3, 14)] + }, columns=["IntCol", "FloatCol", "BoolCol", + "StrCol", "Str2Col", "DateCol"]) basename = 'test_types' From 1ff77e2cb336ba65785ea8a6fe314d09517931ee Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 28 Nov 2017 11:23:00 +0000 Subject: [PATCH 3/6] Use OrderedDict instead of dict in io/stata.py --- pandas/io/stata.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 62a92cba85419..4326c394f097e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -13,6 +13,7 @@ import datetime import struct import sys +from collections import OrderedDict import numpy as np from dateutil.relativedelta import relativedelta @@ -1571,8 +1572,8 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame(dict(data_formatted), - columns=dict(data_formatted).keys()) + data = DataFrame(OrderedDict(data_formatted), + columns=OrderedDict(data_formatted).keys()) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1610,8 +1611,8 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame(dict(retyped_data), - columns=dict(retyped_data).keys()) + data = DataFrame(OrderedDict(retyped_data), + columns=OrderedDict(retyped_data).keys()) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1724,8 +1725,8 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame(dict(cat_converted_data), - columns=dict(cat_converted_data).keys()) + data = DataFrame(OrderedDict(cat_converted_data), + columns=OrderedDict(cat_converted_data).keys()) return data def data_label(self): @@ -2000,8 +2001,8 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame(dict(data_formatted), - columns=dict(data_formatted).keys()) + return DataFrame(OrderedDict(data_formatted), + columns=OrderedDict(data_formatted).keys()) def _replace_nans(self, data): # return data From 4b986b36781869f83f35c567ab11a45a798ff734 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 28 Nov 2017 13:35:41 +0000 Subject: [PATCH 4/6] replace another dict with OrderedDict --- pandas/tests/io/test_stata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 719fb6257be33..0575f64fb12e1 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -8,6 +8,7 @@ import warnings from datetime import datetime from distutils.version import LooseVersion +from collections import OrderedDict import numpy as np import pandas as pd @@ -945,7 +946,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame(dict(cols), columns=dict(cols).keys()) + expected = DataFrame(dict(cols), columns=OrderedDict(cols).keys()) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) From c25f5419f0305b60bffb94e231f99158f96db61c Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 23 Jan 2018 22:40:23 +0000 Subject: [PATCH 5/6] recommend from_dict(dict()) and from_dict(OrderedDict()) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/frame.py | 8 +- pandas/io/stata.py | 12 +-- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/io/parser/common.py | 6 +- pandas/tests/io/test_excel.py | 79 ++++++++++---------- pandas/tests/io/test_stata.py | 2 +- 7 files changed, 57 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 668eff917a32d..4e59f2d0f844a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -329,7 +329,7 @@ Deprecations - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- :func:``DataFrame.from_items`` is deprecated. Use ``DataFrame.from_dict(OrderedDict())`` instead (:issue:`17320`) +- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7a3e1690975a8..0995e01238240 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1247,7 +1247,9 @@ def to_records(self, index=True, convert_datetime64=True): def from_items(cls, items, columns=None, orient='columns'): """ DEPRECATED: from_items is deprecated and will be removed in a - future version. Use :meth:`DataFrame(dict())` instead. + future version. Use :meth:`DataFrame.from_dict(dict())` + instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used + to preserve the key order. Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified @@ -1271,7 +1273,9 @@ def from_items(cls, items, columns=None, orient='columns'): """ warnings.warn("from_items is deprecated. Please use " - "DataFrame(dict()) instead.", + "DataFrame.from_dict(dict()) instead. " + "DataFrame.from_dict(OrderedDict()) may be used to " + "preserve the key order.", FutureWarning, stacklevel=2) keys, values = lzip(*items) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 4326c394f097e..0922a4a9c3e9b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1572,8 +1572,7 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame(OrderedDict(data_formatted), - columns=OrderedDict(data_formatted).keys()) + data = DataFrame.from_dict(OrderedDict(data_formatted)) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1611,8 +1610,7 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame(OrderedDict(retyped_data), - columns=OrderedDict(retyped_data).keys()) + data = DataFrame.from_dict(OrderedDict(retyped_data)) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1725,8 +1723,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame(OrderedDict(cat_converted_data), - columns=OrderedDict(cat_converted_data).keys()) + data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data def data_label(self): @@ -2001,8 +1998,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame(OrderedDict(data_formatted), - columns=OrderedDict(data_formatted).keys()) + return DataFrame.from_dict(OrderedDict(data_formatted)) def _replace_nans(self, data): # return data diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 93379cb403ec5..36465db78361f 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -328,8 +328,8 @@ def check(result, expected=None): df1r = df1.reindex_like(df2) result = df1r == df2 - expected = DataFrame([[False, True], [True, False], [False, False], - [True, False]], columns=['A', 'A']) + expected = DataFrame([[False, True], [True, False], [False, False], [ + True, False]], columns=['A', 'A']) assert_frame_equal(result, expected) # mixed column selection diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 86717f63ce247..f677b356a77a5 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -8,6 +8,7 @@ import re import sys from datetime import datetime +from collections import OrderedDict import pytest import numpy as np @@ -924,8 +925,9 @@ def test_float_parser(self): def test_scientific_no_exponent(self): # see gh-12215 - df = DataFrame({'w': ['2e'], 'x': ['3E'], - 'y': ['42e'], 'z': ['632E']}) + df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), + ('y', ['42e']), + ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 550a027e74042..ebb8424b78ed4 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -6,6 +6,7 @@ from distutils.version import LooseVersion from functools import partial from warnings import catch_warnings +from collections import OrderedDict import numpy as np import pytest @@ -315,19 +316,17 @@ def test_excel_table(self): def test_reader_special_dtypes(self): - expected = DataFrame({ - "IntCol": [1, 2, -3, 4, 0], - "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005], - "BoolCol": [True, False, True, True, False], - "StrCol": [1, 2, 3, 4, 5], + expected = DataFrame.from_dict(OrderedDict([ + ("IntCol", [1, 2, -3, 4, 0]), + ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), + ("BoolCol", [True, False, True, True, False]), + ("StrCol", [1, 2, 3, 4, 5]), # GH5394 - this is why convert_float isn't vectorized - "Str2Col": ["a", 3, "c", "d", "e"], - "DateCol": [datetime(2013, 10, 30), datetime(2013, 10, 31), - datetime(1905, 1, 1), datetime(2013, 12, 14), - datetime(2015, 3, 14)] - }, columns=["IntCol", "FloatCol", "BoolCol", - "StrCol", "Str2Col", "DateCol"]) - + ("Str2Col", ["a", 3, "c", "d", "e"]), + ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), + datetime(1905, 1, 1), datetime(2013, 12, 14), + datetime(2015, 3, 14)]) + ])) basename = 'test_types' # should read in correctly and infer types @@ -364,12 +363,12 @@ def test_reader_converters(self): basename = 'test_converters' - expected = DataFrame({ - "IntCol": [1, 2, -3, -1000, 0], - "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005], - "BoolCol": ['Found', 'Found', 'Found', 'Not found', 'Found'], - "StrCol": ['1', np.nan, '3', '4', '5'], - }, columns=['IntCol', 'FloatCol', 'BoolCol', 'StrCol']) + expected = DataFrame.from_dict(OrderedDict([ + ("IntCol", [1, 2, -3, -1000, 0]), + ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), + ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), + ("StrCol", ['1', np.nan, '3', '4', '5']), + ])) converters = {'IntCol': lambda x: int(x) if x != '' else -1000, 'FloatCol': lambda x: 10 * x if x else np.nan, @@ -719,30 +718,30 @@ def test_reader_seconds(self): if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame({"Time": [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)]}) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) else: # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame({"Time": [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)]}) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', 'Sheet1') tm.assert_frame_equal(actual, expected) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 0575f64fb12e1..89d76061329a3 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -946,7 +946,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame(dict(cols), columns=OrderedDict(cols).keys()) + expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) From 1838f65bf8ede9b83c3e227b95e50dde3abf4920 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Thu, 25 Jan 2018 23:01:35 +0000 Subject: [PATCH 6/6] change DEPRECATED and remove from_items from some constructor tests --- pandas/core/frame.py | 9 +++++---- pandas/tests/frame/test_constructors.py | 14 +++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0995e01238240..96d28581cfdd9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1246,10 +1246,11 @@ def to_records(self, index=True, convert_datetime64=True): @classmethod def from_items(cls, items, columns=None, orient='columns'): """ - DEPRECATED: from_items is deprecated and will be removed in a - future version. Use :meth:`DataFrame.from_dict(dict())` - instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used - to preserve the key order. + .. deprecated:: 0.23.0 + from_items is deprecated and will be removed in a + future version. Use :meth:`DataFrame.from_dict(dict())` + instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used + to preserve the key order. Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 732de0c2e968a..8abd88d8a379c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1283,17 +1283,13 @@ def test_constructor_column_duplicates(self): tm.assert_frame_equal(df, edf) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - idf = DataFrame.from_items([('a', [8]), ('a', [5])], - columns=['a', 'a']) + idf = DataFrame.from_records([(8, 5)], + columns=['a', 'a']) + tm.assert_frame_equal(idf, edf) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - pytest.raises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + pytest.raises(ValueError, DataFrame.from_dict, + OrderedDict([('b', 8), ('a', 5), ('a', 6)])) def test_constructor_empty_with_string_dtype(self): # GH 9428