Skip to content

Commit

Permalink
FEAT-#5141: Implement 2D insertion of Modin DFs in .__setitem__ (#5142
Browse files Browse the repository at this point in the history
)

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Oct 21, 2022
1 parent 8eca03d commit 6cc441a
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
21 changes: 21 additions & 0 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2527,6 +2527,27 @@ def __setitem__(self, key, value):
self._update_inplace(new_qc)
# self.loc[:, key] = value
return
elif (
isinstance(key, list)
and isinstance(value, type(self))
# Mixed case is more complicated, it's defaulting to pandas for now
and all((x not in self.columns for x in key))
):
if len(key) != len(value.columns):
raise ValueError("Columns must be same length as key")

# Aligning the value's columns with the key
if not np.array_equal(value.columns, key):
value = value.set_axis(key, axis=1)

new_qc = self._query_compiler.insert_item(
axis=1,
loc=len(self.columns),
value=value._query_compiler,
how="left",
)
self._update_inplace(new_qc)
return

def setitem_unhashable_key(df, value):
df[key] = value
Expand Down
51 changes: 51 additions & 0 deletions modin/pandas/test/dataframe/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2047,6 +2047,57 @@ def _make_copy(df1, df2):
eval_setitem(modin_df, pandas_df, df_value[["value_col1"]], key)


def test_setitem_2d_insertion():
def build_value_picker(modin_value, pandas_value):
"""Build a function that returns either Modin or pandas DataFrame depending on the passed frame."""
return (
lambda source_df, *args, **kwargs: modin_value
if isinstance(source_df, (pd.DataFrame, pd.Series))
else pandas_value
)

modin_df, pandas_df = create_test_dfs(test_data["int_data"])

# Easy case - key and value.columns are equal
modin_value, pandas_value = create_test_dfs(
{"new_value1": np.arange(len(modin_df)), "new_value2": np.arange(len(modin_df))}
)
eval_setitem(
modin_df,
pandas_df,
build_value_picker(modin_value, pandas_value),
col=["new_value1", "new_value2"],
)

# Key and value.columns have equal values but in different order
new_columns = ["new_value3", "new_value4"]
modin_value.columns, pandas_value.columns = new_columns, new_columns
eval_setitem(
modin_df,
pandas_df,
build_value_picker(modin_value, pandas_value),
col=["new_value4", "new_value3"],
)

# Key and value.columns have different values
new_columns = ["new_value5", "new_value6"]
modin_value.columns, pandas_value.columns = new_columns, new_columns
eval_setitem(
modin_df,
pandas_df,
build_value_picker(modin_value, pandas_value),
col=["__new_value5", "__new_value6"],
)

# Key and value.columns have different lengths, testing that both raise the same exception
eval_setitem(
modin_df,
pandas_df,
build_value_picker(modin_value.iloc[:, [0]], pandas_value.iloc[:, [0]]),
col=["new_value7", "new_value8"],
)


def test___setitem__single_item_in_series():
# Test assigning a single item in a Series for issue
# https://github.com/modin-project/modin/issues/3860
Expand Down

0 comments on commit 6cc441a

Please sign in to comment.