Skip to content

Commit

Permalink
FIX-#5188: Fix getitem_bool when the key is Series with empty partition
Browse files Browse the repository at this point in the history
Signed-off-by: Igoshev, Iaroslav <iaroslav.igoshev@intel.com>
  • Loading branch information
YarShev committed Nov 3, 2022
1 parent ee6c6ae commit f327452
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
4 changes: 3 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2182,7 +2182,9 @@ def applyier(df, internal_indices, other=[], internal_other_indices=[]):
# __getitem__ methods
__getitem_bool = Binary.register(
# r is usually a list, but when r.size == 1, the array is squeezed to a scalar
lambda df, r: df[r] if r.size > 1 else df[[r]],
lambda df, r: df[r]
if r.size > 1 or isinstance(r, (pandas.Series, pandas.DataFrame)) and r.empty
else df[[r]],
join_type="left",
labels="drop",
)
Expand Down
27 changes: 26 additions & 1 deletion modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
extra_test_parameters,
default_to_pandas_ignore_string,
)
from modin.config import NPartitions
from modin.config import NPartitions, MinPartitionSize
from modin.utils import get_current_execution
from modin.test.test_utils import warns_that_defaulting_to_pandas
from modin.pandas.indexing import is_range_like
Expand Down Expand Up @@ -2214,6 +2214,31 @@ def test__getitem_bool_single_row_dataframe():
eval_general(pd, pandas, lambda lib: lib.DataFrame([1])[lib.Series([True])])


def test__getitem_bool_with_empty_partition():
# This test case comes from
# https://github.com/modin-project/modin/issues/5188

size = MinPartitionSize.get()

pandas_series = pandas.Series([True if i % 2 else False for i in range(size)])
modin_series = pd.Series(pandas_series)

pandas_df = pandas.DataFrame([i for i in range(size + 1)])
pandas_df.iloc[size] = np.nan
modin_df = pd.DataFrame(pandas_df)

pandas_tmp_result = pandas_df.dropna()
modin_tmp_result = modin_df.dropna()

eval_general(
modin_tmp_result,
pandas_tmp_result,
lambda df: df[modin_series]
if isinstance(df, pd.DataFrame)
else df[pandas_series],
)


# This is a very subtle bug that comes from:
# https://github.com/modin-project/modin/issues/4945
def test_lazy_eval_index():
Expand Down

0 comments on commit f327452

Please sign in to comment.