Skip to content

Commit

Permalink
ENH: Provide dict object for to_dict() pandas-dev#16122
Browse files Browse the repository at this point in the history
  • Loading branch information
dwkenefick committed May 3, 2017
1 parent 02eafaf commit 9d5c8d2
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 60 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -526,9 +526,8 @@ Other Enhancements
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)
- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)

- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`)

- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)

.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations

Expand Down
41 changes: 27 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ def from_dict(cls, data, orient='columns', dtype=None):

return cls(data, index=index, columns=columns, dtype=dtype)

def to_dict(self, orient='dict'):
def to_dict(self, orient='dict', into=dict):
"""Convert DataFrame to dictionary.
Parameters
Expand All @@ -882,32 +882,45 @@ def to_dict(self, orient='dict'):
Abbreviations are allowed. `s` indicates `series` and `sp`
indicates `split`.
into : class, default dict
The collections.Mapping subclass used for all Mappings
in the return value.
.. versionadded:: 0.20.0
Returns
-------
result : dict like {column -> {index -> value}}
result : collections.Mapping like {column -> {index -> value}}
If ``into`` is collections.defaultdict, the return
value's default_factory will be None.
"""
# GH16122
if not issubclass(into, collections.Mapping):
raise TypeError('unsupported type: {}'.format(type(into)))
if not self.columns.is_unique:
warnings.warn("DataFrame columns are not unique, some "
"columns will be omitted.", UserWarning)
into_c = (functools.partial(into, None)
if into == collections.defaultdict else into)
if orient.lower().startswith('d'):
return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
return into_c(
(k, v.to_dict(into)) for k, v in compat.iteritems(self))
elif orient.lower().startswith('l'):
return dict((k, v.tolist()) for k, v in compat.iteritems(self))
return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
elif orient.lower().startswith('sp'):
return {'index': self.index.tolist(),
'columns': self.columns.tolist(),
'data': lib.map_infer(self.values.ravel(),
_maybe_box_datetimelike)
.reshape(self.values.shape).tolist()}
return into_c((('index', self.index.tolist()),
('columns', self.columns.tolist()),
('data', lib.map_infer(self.values.ravel(),
_maybe_box_datetimelike)
.reshape(self.values.shape).tolist())))
elif orient.lower().startswith('s'):
return dict((k, _maybe_box_datetimelike(v))
for k, v in compat.iteritems(self))
return into_c((k, _maybe_box_datetimelike(v))
for k, v in compat.iteritems(self))
elif orient.lower().startswith('r'):
return [dict((k, _maybe_box_datetimelike(v))
for k, v in zip(self.columns, row))
return [into_c((k, _maybe_box_datetimelike(v))
for k, v in zip(self.columns, row))
for row in self.values]
elif orient.lower().startswith('i'):
return dict((k, v.to_dict()) for k, v in self.iterrows())
return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
else:
raise ValueError("orient '%s' not understood" % orient)

Expand Down
26 changes: 21 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import types
import warnings
from textwrap import dedent
import collections

from numpy import nan, ndarray
import numpy as np
Expand Down Expand Up @@ -1074,15 +1075,30 @@ def tolist(self):
""" Convert Series to a nested list """
return list(self.asobject)

def to_dict(self):
def to_dict(self, into=dict):
"""
Convert Series to {label -> value} dict
Convert Series to {label -> value} dict or dict-like object
Parameters
----------
into : class, default dict
The collections.Mapping subclass to use as the return
object.
.. versionadded:: 0.20.0
Returns
-------
value_dict : dict
"""
return dict(compat.iteritems(self))
value_dict : collections.Mapping
If ``into`` is collections.defaultdict, the return
value's default_factory will be None.
"""
# GH16122
if issubclass(into, collections.Mapping):
if into == collections.defaultdict:
return into(None, compat.iteritems(self))
else:
return into(compat.iteritems(self))
else:
raise TypeError('unsupported type: {}'.format(type(into)))

def to_frame(self, name=None):
"""
Expand Down
81 changes: 43 additions & 38 deletions pandas/tests/frame/test_convert_to.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import pytest
import collections
import numpy as np

from pandas import compat
Expand All @@ -18,44 +19,48 @@ def test_to_dict(self):
'A': {'1': 1, '2': 2},
'B': {'1': '1', '2': '2', '3': '3'},
}
recons_data = DataFrame(test_data).to_dict()

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][k2]

recons_data = DataFrame(test_data).to_dict("l")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][int(k2) - 1]

recons_data = DataFrame(test_data).to_dict("s")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][k2]

recons_data = DataFrame(test_data).to_dict("sp")
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
tm.assert_dict_equal(recons_data, expected_split)

recons_data = DataFrame(test_data).to_dict("r")
expected_records = [{'A': 1.0, 'B': '1'},
{'A': 2.0, 'B': '2'},
{'A': np.nan, 'B': '3'}]
assert isinstance(recons_data, list)
assert len(recons_data) == 3
for l, r in zip(recons_data, expected_records):
tm.assert_dict_equal(l, r)

# GH10844
recons_data = DataFrame(test_data).to_dict("i")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k2][k]
# GH16122
test_maps = (
dict, collections.defaultdict, collections.OrderedDict)
for mapping in test_maps:
recons_data = DataFrame(test_data).to_dict(into=mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
self.assertEqual(v2, recons_data[k][k2])

recons_data = DataFrame(test_data).to_dict("l", mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
self.assertEqual(v2, recons_data[k][int(k2) - 1])

recons_data = DataFrame(test_data).to_dict("s", mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
self.assertEqual(v2, recons_data[k][k2])

recons_data = DataFrame(test_data).to_dict("sp", mapping)
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
tm.assert_dict_equal(recons_data, expected_split)

recons_data = DataFrame(test_data).to_dict("r", mapping)
expected_records = [{'A': 1.0, 'B': '1'},
{'A': 2.0, 'B': '2'},
{'A': np.nan, 'B': '3'}]
assert isinstance(recons_data, list)
self.assertEqual(len(recons_data), 3)
for l, r in zip(recons_data, expected_records):
tm.assert_dict_equal(l, r)

# GH10844
recons_data = DataFrame(test_data).to_dict("i")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
self.assertEqual(v2, recons_data[k2][k])

def test_to_dict_timestamp(self):

Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/series/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable-msg=E1101,W0612

from datetime import datetime
import collections

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -127,7 +128,15 @@ def test_to_frame(self):
assert_frame_equal(rs, xp)

def test_to_dict(self):
tm.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts)
# GH16122
test_maps = (
dict, collections.defaultdict, collections.OrderedDict)
for mapping in test_maps:
tm.assert_series_equal(
Series(self.ts.to_dict(mapping), name='ts'), self.ts)
from_method = Series(self.ts.to_dict(collections.Counter))
from_constructor = Series(collections.Counter(self.ts.iteritems()))
tm.assert_series_equal(from_method, from_constructor)

def test_timeseries_periodindex(self):
# GH2891
Expand Down

0 comments on commit 9d5c8d2

Please sign in to comment.