recommend from_dict(dict()) and from_dict(OrderedDict())

pandas-dev · Jan 30, 2018 · c25f541 · c25f541
1 parent 4b986b3
commit c25f541
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 56 deletions.
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -329,7 +329,7 @@ Deprecations
 - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`)
 - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`).
 - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
-- :func:``DataFrame.from_items`` is deprecated. Use ``DataFrame.from_dict(OrderedDict())`` instead (:issue:`17320`)
+- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`)
 
 .. _whatsnew_0230.prior_deprecations:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1247,7 +1247,9 @@ def to_records(self, index=True, convert_datetime64=True):
     def from_items(cls, items, columns=None, orient='columns'):
         """
         DEPRECATED: from_items is deprecated and will be removed in a
-        future version. Use :meth:`DataFrame(dict())` instead.
+        future version. Use :meth:`DataFrame.from_dict(dict())`
+        instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used
+        to preserve the key order.
 
         Convert (key, value) pairs to DataFrame. The keys will be the axis
         index (usually the columns, but depends on the specified
@@ -1271,7 +1273,9 @@ def from_items(cls, items, columns=None, orient='columns'):
         """
 
         warnings.warn("from_items is deprecated. Please use "
-                      "DataFrame(dict()) instead.",
+                      "DataFrame.from_dict(dict()) instead. "
+                      "DataFrame.from_dict(OrderedDict()) may be used to "
+                      "preserve the key order.",
                       FutureWarning, stacklevel=2)
 
         keys, values = lzip(*items)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -1572,8 +1572,7 @@ def read(self, nrows=None, convert_dates=None,
                 else:
                     data_formatted.append((col, data[col]))
         if requires_type_conversion:
-            data = DataFrame(OrderedDict(data_formatted),
-                             columns=OrderedDict(data_formatted).keys())
+            data = DataFrame.from_dict(OrderedDict(data_formatted))
         del data_formatted
 
         self._do_convert_missing(data, convert_missing)
@@ -1611,8 +1610,7 @@ def read(self, nrows=None, convert_dates=None,
                     convert = True
                 retyped_data.append((col, data[col].astype(dtype)))
             if convert:
-                data = DataFrame(OrderedDict(retyped_data),
-                                 columns=OrderedDict(retyped_data).keys())
+                data = DataFrame.from_dict(OrderedDict(retyped_data))
 
         if index_col is not None:
             data = data.set_index(data.pop(index_col))
@@ -1725,8 +1723,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist,
                 cat_converted_data.append((col, cat_data))
             else:
                 cat_converted_data.append((col, data[col]))
-        data = DataFrame(OrderedDict(cat_converted_data),
-                         columns=OrderedDict(cat_converted_data).keys())
+        data = DataFrame.from_dict(OrderedDict(cat_converted_data))
         return data
 
     def data_label(self):
@@ -2001,8 +1998,7 @@ def _prepare_categoricals(self, data):
                 data_formatted.append((col, values))
             else:
                 data_formatted.append((col, data[col]))
-        return DataFrame(OrderedDict(data_formatted),
-                         columns=OrderedDict(data_formatted).keys())
+        return DataFrame.from_dict(OrderedDict(data_formatted))
 
     def _replace_nans(self, data):
         # return data

diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py
@@ -328,8 +328,8 @@ def check(result, expected=None):
 
         df1r = df1.reindex_like(df2)
         result = df1r == df2
-        expected = DataFrame([[False, True], [True, False], [False, False],
-                             [True, False]], columns=['A', 'A'])
+        expected = DataFrame([[False, True], [True, False], [False, False], [
+                             True, False]], columns=['A', 'A'])
         assert_frame_equal(result, expected)
 
         # mixed column selection

diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
@@ -8,6 +8,7 @@
 import re
 import sys
 from datetime import datetime
+from collections import OrderedDict
 
 import pytest
 import numpy as np
@@ -924,8 +925,9 @@ def test_float_parser(self):
 
     def test_scientific_no_exponent(self):
         # see gh-12215
-        df = DataFrame({'w': ['2e'], 'x': ['3E'],
-                        'y': ['42e'], 'z': ['632E']})
+        df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']),
+                                              ('y', ['42e']),
+                                              ('z', ['632E'])]))
         data = df.to_csv(index=False)
         for prec in self.float_precision_choices:
             df_roundtrip = self.read_csv(

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
@@ -6,6 +6,7 @@
 from distutils.version import LooseVersion
 from functools import partial
 from warnings import catch_warnings
+from collections import OrderedDict
 
 import numpy as np
 import pytest
@@ -315,19 +316,17 @@ def test_excel_table(self):
 
     def test_reader_special_dtypes(self):
 
-        expected = DataFrame({
-            "IntCol": [1, 2, -3, 4, 0],
-            "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005],
-            "BoolCol": [True, False, True, True, False],
-            "StrCol": [1, 2, 3, 4, 5],
+        expected = DataFrame.from_dict(OrderedDict([
+            ("IntCol", [1, 2, -3, 4, 0]),
+            ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]),
+            ("BoolCol", [True, False, True, True, False]),
+            ("StrCol", [1, 2, 3, 4, 5]),
             # GH5394 - this is why convert_float isn't vectorized
-            "Str2Col": ["a", 3, "c", "d", "e"],
-            "DateCol": [datetime(2013, 10, 30), datetime(2013, 10, 31),
-                        datetime(1905, 1, 1), datetime(2013, 12, 14),
-                        datetime(2015, 3, 14)]
-        }, columns=["IntCol", "FloatCol", "BoolCol",
-                    "StrCol", "Str2Col", "DateCol"])
-
+            ("Str2Col", ["a", 3, "c", "d", "e"]),
+            ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31),
+                         datetime(1905, 1, 1), datetime(2013, 12, 14),
+                         datetime(2015, 3, 14)])
+        ]))
         basename = 'test_types'
 
         # should read in correctly and infer types
@@ -364,12 +363,12 @@ def test_reader_converters(self):
 
         basename = 'test_converters'
 
-        expected = DataFrame({
-            "IntCol": [1, 2, -3, -1000, 0],
-            "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005],
-            "BoolCol": ['Found', 'Found', 'Found', 'Not found', 'Found'],
-            "StrCol": ['1', np.nan, '3', '4', '5'],
-        }, columns=['IntCol', 'FloatCol', 'BoolCol', 'StrCol'])
+        expected = DataFrame.from_dict(OrderedDict([
+            ("IntCol", [1, 2, -3, -1000, 0]),
+            ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]),
+            ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']),
+            ("StrCol", ['1', np.nan, '3', '4', '5']),
+        ]))
 
         converters = {'IntCol': lambda x: int(x) if x != '' else -1000,
                       'FloatCol': lambda x: 10 * x if x else np.nan,
@@ -719,30 +718,30 @@ def test_reader_seconds(self):
 
         if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
             # Xlrd >= 0.9.3 can handle Excel milliseconds.
-            expected = DataFrame({"Time": [time(1, 2, 3),
-                                           time(2, 45, 56, 100000),
-                                           time(4, 29, 49, 200000),
-                                           time(6, 13, 42, 300000),
-                                           time(7, 57, 35, 400000),
-                                           time(9, 41, 28, 500000),
-                                           time(11, 25, 21, 600000),
-                                           time(13, 9, 14, 700000),
-                                           time(14, 53, 7, 800000),
-                                           time(16, 37, 0, 900000),
-                                           time(18, 20, 54)]})
+            expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
+                                            time(2, 45, 56, 100000),
+                                            time(4, 29, 49, 200000),
+                                            time(6, 13, 42, 300000),
+                                            time(7, 57, 35, 400000),
+                                            time(9, 41, 28, 500000),
+                                            time(11, 25, 21, 600000),
+                                            time(13, 9, 14, 700000),
+                                            time(14, 53, 7, 800000),
+                                            time(16, 37, 0, 900000),
+                                            time(18, 20, 54)]})
         else:
             # Xlrd < 0.9.3 rounds Excel milliseconds.
-            expected = DataFrame({"Time": [time(1, 2, 3),
-                                           time(2, 45, 56),
-                                           time(4, 29, 49),
-                                           time(6, 13, 42),
-                                           time(7, 57, 35),
-                                           time(9, 41, 29),
-                                           time(11, 25, 22),
-                                           time(13, 9, 15),
-                                           time(14, 53, 8),
-                                           time(16, 37, 1),
-                                           time(18, 20, 54)]})
+            expected = DataFrame.from_dict({"Time": [time(1, 2, 3),
+                                            time(2, 45, 56),
+                                            time(4, 29, 49),
+                                            time(6, 13, 42),
+                                            time(7, 57, 35),
+                                            time(9, 41, 29),
+                                            time(11, 25, 22),
+                                            time(13, 9, 15),
+                                            time(14, 53, 8),
+                                            time(16, 37, 1),
+                                            time(18, 20, 54)]})
 
         actual = self.get_exceldf('times_1900', 'Sheet1')
         tm.assert_frame_equal(actual, expected)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -946,7 +946,7 @@ def test_categorical_order(self, file):
                 cols.append((col, pd.Categorical.from_codes(codes, labels)))
             else:
                 cols.append((col, pd.Series(labels, dtype=np.float32)))
-        expected = DataFrame(dict(cols), columns=OrderedDict(cols).keys())
+        expected = DataFrame.from_dict(OrderedDict(cols))
 
         # Read with and with out categoricals, ensure order is identical
         file = getattr(self, file)