Skip to content

Commit

Permalink
REF: simplify internals.construction (#38400)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Dec 11, 2020
1 parent f7e1f94 commit 1fa0c4c
Showing 1 changed file with 44 additions and 44 deletions.
88 changes: 44 additions & 44 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,59 +525,49 @@ def to_arrays(
if columns is not None:
return [[]] * len(columns), columns
return [], [] # columns if columns is not None else []
if isinstance(data[0], (list, tuple)):
return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
elif isinstance(data[0], abc.Mapping):
return _list_of_dict_to_arrays(
data, columns, coerce_float=coerce_float, dtype=dtype
)
elif isinstance(data[0], ABCSeries):
return _list_of_series_to_arrays(
data, columns, coerce_float=coerce_float, dtype=dtype
)

elif isinstance(data[0], Categorical):
if columns is None:
columns = ibase.default_index(len(data))
return data, columns
elif (
isinstance(data, (np.ndarray, ABCSeries, Index))
and data.dtype.names is not None
):

elif isinstance(data, np.ndarray) and data.dtype.names is not None:
# e.g. recarray
columns = list(data.dtype.names)
arrays = [data[k] for k in columns]
return arrays, columns

if isinstance(data[0], (list, tuple)):
content, columns = _list_to_arrays(data, columns)
elif isinstance(data[0], abc.Mapping):
content, columns = _list_of_dict_to_arrays(data, columns)
elif isinstance(data[0], ABCSeries):
content, columns = _list_of_series_to_arrays(data, columns)
else:
# last ditch effort
data = [tuple(x) for x in data]
return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
content, columns = _list_to_arrays(data, columns)

content, columns = _finalize_columns_and_data(content, columns, dtype, coerce_float)
return content, columns


def _list_to_arrays(
data: List[Scalar],
columns: Union[Index, List],
coerce_float: bool = False,
dtype: Optional[DtypeObj] = None,
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
if len(data) > 0 and isinstance(data[0], tuple):
content = list(lib.to_object_array_tuples(data).T)
# Note: we already check len(data) > 0 before getting hre
if isinstance(data[0], tuple):
content = lib.to_object_array_tuples(data)
else:
# list of lists
content = list(lib.to_object_array(data).T)
# gh-26429 do not raise user-facing AssertionError
try:
columns = _validate_or_indexify_columns(content, columns)
result = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
except AssertionError as e:
raise ValueError(e) from e
return result, columns
content = lib.to_object_array(data)
return content, columns


def _list_of_series_to_arrays(
data: List,
columns: Union[Index, List],
coerce_float: bool = False,
dtype: Optional[DtypeObj] = None,
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
if columns is None:
# We know pass_data is non-empty because data[0] is a Series
Expand All @@ -600,22 +590,14 @@ def _list_of_series_to_arrays(
values = extract_array(s, extract_numpy=True)
aligned_values.append(algorithms.take_1d(values, indexer))

values = np.vstack(aligned_values)
content = np.vstack(aligned_values)

if values.dtype == np.object_:
content = list(values.T)
columns = _validate_or_indexify_columns(content, columns)
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
return content, columns
else:
return values.T, columns
return content, columns


def _list_of_dict_to_arrays(
data: List[Dict],
columns: Union[Index, List],
coerce_float: bool = False,
dtype: Optional[DtypeObj] = None,
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
"""
Convert list of dicts to numpy arrays
Expand All @@ -630,8 +612,6 @@ def _list_of_dict_to_arrays(
data : iterable
collection of records (OrderedDict, dict)
columns: iterables or None
coerce_float : bool
dtype : np.dtype
Returns
-------
Expand All @@ -647,9 +627,29 @@ def _list_of_dict_to_arrays(
# classes
data = [(type(d) is dict) and d or dict(d) for d in data]

content = list(lib.dicts_to_array(data, list(columns)).T)
columns = _validate_or_indexify_columns(content, columns)
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
content = lib.dicts_to_array(data, list(columns))
return content, columns


def _finalize_columns_and_data(
content: np.ndarray,
columns: Optional[Union[Index, List]],
dtype: Optional[DtypeObj],
coerce_float: bool,
) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]:
"""
Ensure we have valid columns, cast object dtypes if possible.
"""
content = list(content.T)

try:
columns = _validate_or_indexify_columns(content, columns)
except AssertionError as err:
# GH#26429 do not raise user-facing AssertionError
raise ValueError(err) from err

if len(content) and content[0].dtype == np.object_:
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
return content, columns


Expand Down

0 comments on commit 1fa0c4c

Please sign in to comment.