Skip to content

Commit

Permalink
ENH: Add support for dataclasses in the DataFrame constructor (pandas…
Browse files Browse the repository at this point in the history
  • Loading branch information
asosnovsky authored Mar 15, 2020
1 parent 2b34275 commit 6620dc6
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 1 deletion.
22 changes: 22 additions & 0 deletions doc/source/user_guide/dsintro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,28 @@ The result will be a DataFrame with the same index as the input Series, and
with one column whose name is the original name of the Series (only if no other
column name provided).

.. _basics.dataframe.from_list_dataclasses:

From a list of dataclasses
~~~~~~~~~~~~~~~~~~~~~~~~~~

.. versionadded:: 1.1.0

Data Classes as introduced in `PEP557 <https://www.python.org/dev/peps/pep-0557>`__,
can be passed into the DataFrame constructor.
Passing a list of dataclasses is equivilent to passing a list of dictionaries.

Please be aware, that that all values in the list should be dataclasses, mixing
types in the list would result in a TypeError.

.. ipython:: python
from dataclasses import make_dataclass
Point = make_dataclass("Point", [("x", int), ("y", int)])
pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
**Missing data**

Much more will be said on this topic in the :ref:`Missing data <missing_data>`
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
is_array_like,
is_bool,
is_complex,
is_dataclass,
is_decimal,
is_dict_like,
is_file_like,
Expand Down
36 changes: 36 additions & 0 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,39 @@ def is_sequence(obj) -> bool:
return not isinstance(obj, (str, bytes))
except (TypeError, AttributeError):
return False


def is_dataclass(item):
"""
Checks if the object is a data-class instance
Parameters
----------
item : object
Returns
--------
is_dataclass : bool
True if the item is an instance of a data-class,
will return false if you pass the data class itself
Examples
--------
>>> from dataclasses import dataclass
>>> @dataclass
... class Point:
... x: int
... y: int
>>> is_dataclass(Point)
False
>>> is_dataclass(Point(0,2))
True
"""
try:
from dataclasses import is_dataclass

return is_dataclass(item) and not isinstance(item, type)
except ImportError:
return False
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
ensure_platform_int,
infer_dtype_from_object,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
is_dict_like,
is_dtype_equal,
Expand Down Expand Up @@ -117,6 +118,7 @@
from pandas.core.internals import BlockManager
from pandas.core.internals.construction import (
arrays_to_mgr,
dataclasses_to_dicts,
get_names_from_index,
init_dict,
init_ndarray,
Expand Down Expand Up @@ -474,6 +476,8 @@ def __init__(
if not isinstance(data, (abc.Sequence, ExtensionArray)):
data = list(data)
if len(data) > 0:
if is_dataclass(data[0]):
data = dataclasses_to_dicts(data)
if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1:
if is_named_tuple(data[0]) and columns is None:
columns = data[0]._fields
Expand Down
27 changes: 27 additions & 0 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,33 @@ def _get_axes(N, K, index, columns):
return index, columns


def dataclasses_to_dicts(data):
""" Converts a list of dataclass instances to a list of dictionaries
Parameters
----------
data : List[Type[dataclass]]
Returns
--------
list_dict : List[dict]
Examples
--------
>>> @dataclass
>>> class Point:
... x: int
... y: int
>>> dataclasses_to_dicts([Point(1,2), Point(2,3)])
[{"x":1,"y":2},{"x":2,"y":3}]
"""
from dataclasses import asdict

return list(map(asdict, data))


# ---------------------------------------------------------------------
# Conversion of Inputs to Arrays

Expand Down
42 changes: 41 additions & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
import pytz

from pandas.compat import is_platform_little_endian
from pandas.compat import PY37, is_platform_little_endian
from pandas.compat.numpy import _is_numpy_dev

from pandas.core.dtypes.common import is_integer_dtype
Expand Down Expand Up @@ -1364,6 +1364,46 @@ def test_constructor_list_of_namedtuples(self):
result = DataFrame(tuples, columns=["y", "z"])
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
def test_constructor_list_of_dataclasses(self):
# GH21910
from dataclasses import make_dataclass

Point = make_dataclass("Point", [("x", int), ("y", int)])

datas = [Point(0, 3), Point(1, 3)]
expected = DataFrame({"x": [0, 1], "y": [3, 3]})
result = DataFrame(datas)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
def test_constructor_list_of_dataclasses_with_varying_types(self):
# GH21910
from dataclasses import make_dataclass

# varying types
Point = make_dataclass("Point", [("x", int), ("y", int)])
HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)])

datas = [Point(0, 3), HLine(1, 3, 3)]

expected = DataFrame(
{"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]}
)
result = DataFrame(datas)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
def test_constructor_list_of_dataclasses_error_thrown(self):
# GH21910
from dataclasses import make_dataclass

Point = make_dataclass("Point", [("x", int), ("y", int)])

# expect TypeError
with pytest.raises(TypeError):
DataFrame([Point(0, 0), {"x": 1, "y": 0}])

def test_constructor_list_of_dict_order(self):
# GH10056
data = [
Expand Down

0 comments on commit 6620dc6

Please sign in to comment.