Skip to content

Commit

Permalink
ENH: Provide dict object for to_dict() #16122 (#16220)
Browse files Browse the repository at this point in the history
* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122

* ENH: Provide dict object for to_dict() #16122
  • Loading branch information
dwkenefick authored and TomAugspurger committed May 16, 2017
1 parent 42e2a87 commit f040ed2
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 78 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ New features

Other Enhancements
^^^^^^^^^^^^^^^^^^
- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)
- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`)


Expand Down
38 changes: 38 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import warnings
from datetime import datetime, timedelta
from functools import partial
import inspect
import collections

import numpy as np
from pandas._libs import lib, tslib
Expand Down Expand Up @@ -479,6 +481,42 @@ def _dict_compat(d):
for key, value in iteritems(d))


def standardize_mapping(into):
"""
Helper function to standardize a supplied mapping.
.. versionadded:: 0.21.0
Parameters
----------
into : instance or subclass of collections.Mapping
Must be a class, an initialized collections.defaultdict,
or an instance of a collections.Mapping subclass.
Returns
-------
mapping : a collections.Mapping subclass or other constructor
a callable object that can accept an iterator to create
the desired Mapping.
See Also
--------
DataFrame.to_dict
Series.to_dict
"""
if not inspect.isclass(into):
if isinstance(into, collections.defaultdict):
return partial(
collections.defaultdict, into.default_factory)
into = type(into)
if not issubclass(into, collections.Mapping):
raise TypeError('unsupported type: {}'.format(into))
elif into == collections.defaultdict:
raise TypeError(
'to_dict() only accepts initialized defaultdicts')
return into


def sentinel_factory():
class Sentinel(object):
pass
Expand Down
84 changes: 69 additions & 15 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
_default_index,
_values_from_object,
_maybe_box_datetimelike,
_dict_compat)
_dict_compat,
standardize_mapping)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
Expand Down Expand Up @@ -860,7 +861,7 @@ def from_dict(cls, data, orient='columns', dtype=None):

return cls(data, index=index, columns=columns, dtype=dtype)

def to_dict(self, orient='dict'):
def to_dict(self, orient='dict', into=dict):
"""Convert DataFrame to dictionary.
Parameters
Expand All @@ -882,32 +883,85 @@ def to_dict(self, orient='dict'):
Abbreviations are allowed. `s` indicates `series` and `sp`
indicates `split`.
into : class, default dict
The collections.Mapping subclass used for all Mappings
in the return value. Can be the actual class or an empty
instance of the mapping type you want. If you want a
collections.defaultdict, you must pass it initialized.
.. versionadded:: 0.21.0
Returns
-------
result : dict like {column -> {index -> value}}
result : collections.Mapping like {column -> {index -> value}}
Examples
--------
>>> df = pd.DataFrame(
{'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b'])
>>> df
col1 col2
a 1 0.1
b 2 0.2
>>> df.to_dict()
{'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}}
You can specify the return orientation.
>>> df.to_dict('series')
{'col1': a 1
b 2
Name: col1, dtype: int64, 'col2': a 0.50
b 0.75
Name: col2, dtype: float64}
>>> df.to_dict('split')
{'columns': ['col1', 'col2'],
'data': [[1.0, 0.5], [2.0, 0.75]],
'index': ['a', 'b']}
>>> df.to_dict('records')
[{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
>>> df.to_dict('index')
{'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}}
You can also specify the mapping type.
>>> from collections import OrderedDict, defaultdict
>>> df.to_dict(into=OrderedDict)
OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])),
('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))])
If you want a `defaultdict`, you need to initialize it:
>>> dd = defaultdict(list)
>>> df.to_dict('records', into=dd)
[defaultdict(<type 'list'>, {'col2': 0.5, 'col1': 1.0}),
defaultdict(<type 'list'>, {'col2': 0.75, 'col1': 2.0})]
"""
if not self.columns.is_unique:
warnings.warn("DataFrame columns are not unique, some "
"columns will be omitted.", UserWarning)
# GH16122
into_c = standardize_mapping(into)
if orient.lower().startswith('d'):
return dict((k, v.to_dict()) for k, v in compat.iteritems(self))
return into_c(
(k, v.to_dict(into)) for k, v in compat.iteritems(self))
elif orient.lower().startswith('l'):
return dict((k, v.tolist()) for k, v in compat.iteritems(self))
return into_c((k, v.tolist()) for k, v in compat.iteritems(self))
elif orient.lower().startswith('sp'):
return {'index': self.index.tolist(),
'columns': self.columns.tolist(),
'data': lib.map_infer(self.values.ravel(),
_maybe_box_datetimelike)
.reshape(self.values.shape).tolist()}
return into_c((('index', self.index.tolist()),
('columns', self.columns.tolist()),
('data', lib.map_infer(self.values.ravel(),
_maybe_box_datetimelike)
.reshape(self.values.shape).tolist())))
elif orient.lower().startswith('s'):
return dict((k, _maybe_box_datetimelike(v))
for k, v in compat.iteritems(self))
return into_c((k, _maybe_box_datetimelike(v))
for k, v in compat.iteritems(self))
elif orient.lower().startswith('r'):
return [dict((k, _maybe_box_datetimelike(v))
for k, v in zip(self.columns, row))
return [into_c((k, _maybe_box_datetimelike(v))
for k, v in zip(self.columns, row))
for row in self.values]
elif orient.lower().startswith('i'):
return dict((k, v.to_dict()) for k, v in self.iterrows())
return into_c((k, v.to_dict(into)) for k, v in self.iterrows())
else:
raise ValueError("orient '%s' not understood" % orient)

Expand Down
37 changes: 31 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
_maybe_match_name,
SettingWithCopyError,
_maybe_box_datetimelike,
_dict_compat)
_dict_compat,
standardize_mapping)
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
Float64Index, _ensure_index)
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
Expand Down Expand Up @@ -1074,15 +1075,39 @@ def tolist(self):
""" Convert Series to a nested list """
return list(self.asobject)

def to_dict(self):
def to_dict(self, into=dict):
"""
Convert Series to {label -> value} dict
Convert Series to {label -> value} dict or dict-like object.
Parameters
----------
into : class, default dict
The collections.Mapping subclass to use as the return
object. Can be the actual class or an empty
instance of the mapping type you want. If you want a
collections.defaultdict, you must pass it initialized.
.. versionadded:: 0.21.0
Returns
-------
value_dict : dict
"""
return dict(compat.iteritems(self))
value_dict : collections.Mapping
Examples
--------
>>> s = pd.Series([1, 2, 3, 4])
>>> s.to_dict()
{0: 1, 1: 2, 2: 3, 3: 4}
>>> from collections import OrderedDict, defaultdict
>>> s.to_dict(OrderedDict)
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
>>> dd = defaultdict(list)
>>> s.to_dict(dd)
defaultdict(<type 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
"""
# GH16122
into_c = standardize_mapping(into)
return into_c(compat.iteritems(self))

def to_frame(self, name=None):
"""
Expand Down
133 changes: 79 additions & 54 deletions pandas/tests/frame/test_convert_to.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import pytest
import collections
import numpy as np

from pandas import compat
Expand All @@ -13,50 +14,6 @@

class TestDataFrameConvertTo(TestData):

def test_to_dict(self):
test_data = {
'A': {'1': 1, '2': 2},
'B': {'1': '1', '2': '2', '3': '3'},
}
recons_data = DataFrame(test_data).to_dict()

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][k2]

recons_data = DataFrame(test_data).to_dict("l")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][int(k2) - 1]

recons_data = DataFrame(test_data).to_dict("s")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k][k2]

recons_data = DataFrame(test_data).to_dict("sp")
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
tm.assert_dict_equal(recons_data, expected_split)

recons_data = DataFrame(test_data).to_dict("r")
expected_records = [{'A': 1.0, 'B': '1'},
{'A': 2.0, 'B': '2'},
{'A': np.nan, 'B': '3'}]
assert isinstance(recons_data, list)
assert len(recons_data) == 3
for l, r in zip(recons_data, expected_records):
tm.assert_dict_equal(l, r)

# GH10844
recons_data = DataFrame(test_data).to_dict("i")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert v2 == recons_data[k2][k]

def test_to_dict_timestamp(self):

# GH11247
Expand Down Expand Up @@ -190,17 +147,85 @@ def test_to_records_with_unicode_column_names(self):
)
tm.assert_almost_equal(result, expected)

@pytest.mark.parametrize('mapping', [
dict,
collections.defaultdict(list),
collections.OrderedDict])
def test_to_dict(self, mapping):
test_data = {
'A': {'1': 1, '2': 2},
'B': {'1': '1', '2': '2', '3': '3'},
}

# GH16122
recons_data = DataFrame(test_data).to_dict(into=mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert (v2 == recons_data[k][k2])

recons_data = DataFrame(test_data).to_dict("l", mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert (v2 == recons_data[k][int(k2) - 1])

recons_data = DataFrame(test_data).to_dict("s", mapping)

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert (v2 == recons_data[k][k2])

recons_data = DataFrame(test_data).to_dict("sp", mapping)
expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'],
'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]}
tm.assert_dict_equal(recons_data, expected_split)

recons_data = DataFrame(test_data).to_dict("r", mapping)
expected_records = [{'A': 1.0, 'B': '1'},
{'A': 2.0, 'B': '2'},
{'A': np.nan, 'B': '3'}]
assert isinstance(recons_data, list)
assert (len(recons_data) == 3)
for l, r in zip(recons_data, expected_records):
tm.assert_dict_equal(l, r)

# GH10844
recons_data = DataFrame(test_data).to_dict("i")

for k, v in compat.iteritems(test_data):
for k2, v2 in compat.iteritems(v):
assert (v2 == recons_data[k2][k])

df = DataFrame(test_data)
df['duped'] = df[df.columns[0]]
recons_data = df.to_dict("i")
comp_data = test_data.copy()
comp_data['duped'] = comp_data[df.columns[0]]
for k, v in compat.iteritems(comp_data):
for k2, v2 in compat.iteritems(v):
assert (v2 == recons_data[k2][k])

@pytest.mark.parametrize('mapping', [
list,
collections.defaultdict,
[]])
def test_to_dict_errors(self, mapping):
# GH16122
df = DataFrame(np.random.randn(3, 3))
with pytest.raises(TypeError):
df.to_dict(into=mapping)

@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
def test_to_records_datetimeindex_with_tz(tz):
# GH13937
dr = date_range('2016-01-01', periods=10,
freq='S', tz=tz)
@pytest.mark.parametrize('tz', ['UTC', 'GMT', 'US/Eastern'])
def test_to_records_datetimeindex_with_tz(self, tz):
# GH13937
dr = date_range('2016-01-01', periods=10,
freq='S', tz=tz)

df = DataFrame({'datetime': dr}, index=dr)
df = DataFrame({'datetime': dr}, index=dr)

expected = df.to_records()
result = df.tz_convert("UTC").to_records()
expected = df.to_records()
result = df.tz_convert("UTC").to_records()

# both converted to UTC, so they are equal
tm.assert_numpy_array_equal(result, expected)
# both converted to UTC, so they are equal
tm.assert_numpy_array_equal(result, expected)
Loading

0 comments on commit f040ed2

Please sign in to comment.