Skip to content

Commit

Permalink
preserve dtypes for empty dataframes
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed May 13, 2024
1 parent 7d7914b commit 981c74e
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
5 changes: 4 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3321,8 +3321,11 @@ def _extract_partitions(self):
if self._partitions.size > 0:
return self._partitions
else:
dtypes = None
if self.has_materialized_dtypes:
dtypes = self.dtypes

Check warning on line 3326 in modin/core/dataframe/pandas/dataframe/dataframe.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/pandas/dataframe/dataframe.py#L3324-L3326

Added lines #L3324 - L3326 were not covered by tests
return self._partition_mgr_cls.create_partition_from_metadata(
index=self.index, columns=self.columns
index=self.index, columns=self.columns, dtypes=dtypes
)

@lazy_metadata_decorator(apply_axis="both")
Expand Down
10 changes: 7 additions & 3 deletions modin/core/dataframe/pandas/partitioning/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import warnings
from abc import ABC
from functools import wraps
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

import numpy as np
import pandas
Expand Down Expand Up @@ -183,12 +183,16 @@ def preprocess_func(cls, map_func):
# END Abstract Methods

@classmethod
def create_partition_from_metadata(cls, **metadata):
def create_partition_from_metadata(
cls, dtypes: Optional[pandas.Series] = None, **metadata
):
"""
Create NumPy array of partitions that holds an empty dataframe with given metadata.
Parameters
----------
dtypes : pandas.Series, optional
Dtypes that will be used when calling `astype`.
**metadata : dict
Metadata that has to be wrapped in a partition.
Expand All @@ -197,7 +201,7 @@ def create_partition_from_metadata(cls, **metadata):
np.ndarray
A NumPy 2D array of a single partition which contains the data.
"""
metadata_dataframe = pandas.DataFrame(**metadata)
metadata_dataframe = pandas.DataFrame(**metadata).astype(dtypes)

Check warning on line 204 in modin/core/dataframe/pandas/partitioning/partition_manager.py

View check run for this annotation

Codecov / codecov/patch

modin/core/dataframe/pandas/partitioning/partition_manager.py#L204

Added line #L204 was not covered by tests
return np.array([[cls._partition_class.put(metadata_dataframe)]])

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion modin/tests/pandas/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_join_empty(how):
data = np.random.randint(0, 100, size=(64, 64))
eval_general(
*create_test_dfs(data),
lambda df: df.join(df.iloc[:0], how=how, lsuffix="_caller"),
lambda df: df.join(df.iloc[:0], on=1, how=how, lsuffix="_caller"),
)


Expand Down

0 comments on commit 981c74e

Please sign in to comment.