From 5995dc36a090c193bca20d18f7089503a8674bbb Mon Sep 17 00:00:00 2001 From: Ross Petchler Date: Mon, 6 Oct 2014 10:26:40 -0400 Subject: [PATCH] ENH: Add orient argument and split option to DataFrame.to_dict. (GH7840) Update documentation with deprecation and enhancement notices. Remove indentation from list in docstring. --- doc/source/v0.15.0.txt | 3 ++- pandas/core/frame.py | 36 +++++++++++++++++++++++------------- pandas/tests/test_frame.py | 7 +++++++ 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 2c1fc9c9a6eef..cec3148a1f9fa 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -767,6 +767,7 @@ Prior Version Deprecations/Changes Deprecations ~~~~~~~~~~~~ +- The ``outtype`` argument to ``pd.DataFrame.to_dict`` has been deprecated in favor of ``orient``. (:issue:`7840`) - The ``convert_dummies`` method has been deprecated in favor of ``get_dummies`` (:issue:`8140`) - The ``infer_dst`` argument in ``tz_localize`` will be deprecated in favor of @@ -849,7 +850,7 @@ Enhancements idx idx + pd.offsets.MonthEnd(3) - +- Added ``split`` as an option to the ``orient`` argument in ``pd.DataFrame.to_dict``. (:issue:`7840`) - The ``get_dummies`` method can now be used on DataFrames. By default only catagorical columns are encoded as 0's and 1's, while other columns are diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8342c587ae4bb..2c172d6fe1af0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -640,19 +640,25 @@ def from_dict(cls, data, orient='columns', dtype=None): return cls(data, index=index, columns=columns, dtype=dtype) - def to_dict(self, outtype='dict'): - """ - Convert DataFrame to dictionary. + @deprecate_kwarg(old_arg_name='outtype', new_arg_name='orient') + def to_dict(self, orient='dict'): + """Convert DataFrame to dictionary. Parameters ---------- - outtype : str {'dict', 'list', 'series', 'records'} - Determines the type of the values of the dictionary. The - default `dict` is a nested dictionary {column -> {index -> value}}. - `list` returns {column -> list(values)}. `series` returns - {column -> Series(values)}. `records` returns [{columns -> value}]. - Abbreviations are allowed. + orient : str {'dict', 'list', 'series', 'split', 'records'} + Determines the type of the values of the dictionary. + + - dict (default) : dict like {column -> {index -> value}} + - list : dict like {column -> [values]} + - series : dict like {column -> Series(values)} + - split : dict like + {index -> [index], columns -> [columns], data -> [values]} + - records : list like + [{column -> value}, ... , {column -> value}] + Abbreviations are allowed. `s` indicates `series` and `sp` + indicates `split`. Returns ------- @@ -661,13 +667,17 @@ def to_dict(self, outtype='dict'): if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " "columns will be omitted.", UserWarning) - if outtype.lower().startswith('d'): + if orient.lower().startswith('d'): return dict((k, v.to_dict()) for k, v in compat.iteritems(self)) - elif outtype.lower().startswith('l'): + elif orient.lower().startswith('l'): return dict((k, v.tolist()) for k, v in compat.iteritems(self)) - elif outtype.lower().startswith('s'): + elif orient.lower().startswith('sp'): + return {'index': self.index.tolist(), + 'columns': self.columns.tolist(), + 'data': self.values.tolist()} + elif orient.lower().startswith('s'): return dict((k, v) for k, v in compat.iteritems(self)) - elif outtype.lower().startswith('r'): + elif orient.lower().startswith('r'): return [dict((k, v) for k, v in zip(self.columns, row)) for row in self.values] else: # pragma: no cover diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0dbca6f122b93..ba81d98510f5c 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4037,6 +4037,13 @@ def test_to_dict(self): for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][k2]) + recons_data = DataFrame(test_data).to_dict("sp") + + expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], + 'data': [[1.0, '1'], [2.0, '2'], [nan, '3']]} + + tm.assert_almost_equal(recons_data, expected_split) + recons_data = DataFrame(test_data).to_dict("r") expected_records = [{'A': 1.0, 'B': '1'},