Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTOR-#4796: Introduce constant for __reduced__ column name #4799

Merged
merged 7 commits into from
Aug 10, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release_notes/release_notes-0.16.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Key Features and Updates
* REFACTOR-#4774: remove `_build_treereduce_func` call from `_compute_dtypes` (#4775)
* REFACTOR-#4750: Delete BaseDataframeAxisPartition.shuffle (#4751)
* REFACTOR-#4722: Stop suppressing undefined name lint (#4723)
* REFACTOR-#4796: Introduce constant for __reduced__ column name (#4799)
noloerino marked this conversation as resolved.
Show resolved Hide resolved
* Pandas API implementations and improvements
* FEAT-#4670: Implement convert_dtypes by mapping across partitions (#4671)
* OmniSci enhancements
Expand Down
4 changes: 2 additions & 2 deletions modin/core/dataframe/algebra/default2pandas/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""Module houses default functions builder class."""

from modin.core.dataframe.algebra import Operator
from modin.utils import try_cast_to_pandas
from modin.utils import try_cast_to_pandas, MODIN_UNNAMED_SERIES_LABEL

from pandas.core.dtypes.common import is_list_like
import pandas
Expand Down Expand Up @@ -102,7 +102,7 @@ def applyier(df, *args, **kwargs):
)
if isinstance(result, pandas.Series):
if result.name is None:
result.name = "__reduced__"
result.name = MODIN_UNNAMED_SERIES_LABEL
result = result.to_frame()

inplace_method = kwargs.get("inplace", False)
Expand Down
10 changes: 7 additions & 3 deletions modin/core/dataframe/algebra/default2pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import pandas
from pandas.core.dtypes.common import is_list_like

from modin.utils import MODIN_UNNAMED_SERIES_LABEL


# FIXME: there is no sence of keeping `GroupBy` and `GroupByDefault` logic in a different
# classes. They should be combined.
Expand Down Expand Up @@ -56,7 +58,7 @@ def try_cast_series(df):
df = df.squeeze(axis=1)
if not isinstance(df, pandas.Series):
return df
if df.name == "__reduced__":
if df.name == MODIN_UNNAMED_SERIES_LABEL:
df.name = None
return df

Expand Down Expand Up @@ -245,7 +247,7 @@ def fn(
inplace=True,
)

if result.index.name == "__reduced__":
if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
result.index.name = None

return result
Expand Down Expand Up @@ -465,7 +467,9 @@ def handle_as_index(
internal_by_cols = pandas.Index(internal_by_cols)

internal_by_cols = (
internal_by_cols[~internal_by_cols.str.startswith("__reduced__", na=False)]
internal_by_cols[
~internal_by_cols.str.startswith(MODIN_UNNAMED_SERIES_LABEL, na=False)
]
if hasattr(internal_by_cols, "str")
else internal_by_cols
)
Expand Down
4 changes: 2 additions & 2 deletions modin/core/dataframe/algebra/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from .tree_reduce import TreeReduce
from .default2pandas.groupby import GroupBy
from modin.utils import try_cast_to_pandas, hashable
from modin.utils import try_cast_to_pandas, hashable, MODIN_UNNAMED_SERIES_LABEL
from modin.error_message import ErrorMessage


Expand Down Expand Up @@ -350,7 +350,7 @@ def caller(
)

result = query_compiler.__constructor__(new_modin_frame)
if result.index.name == "__reduced__":
if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
result.index.name = None
return result

Expand Down
7 changes: 4 additions & 3 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from modin.pandas.indexing import is_range_like
from modin.pandas.utils import is_full_grab_slice, check_both_not_none
from modin.logging import ClassLogger
from modin.utils import MODIN_UNNAMED_SERIES_LABEL


def lazy_metadata_decorator(apply_axis=None, axis_arg=-1, transpose=False):
Expand Down Expand Up @@ -1417,11 +1418,11 @@ def _tree_reduce_func(df, *args, **kwargs):
# line up with the index of the data based on how pandas creates a
# DataFrame from a Series.
result = pandas.DataFrame(series_result).T
result.index = ["__reduced__"]
result.index = [MODIN_UNNAMED_SERIES_LABEL]
else:
result = pandas.DataFrame(series_result)
if isinstance(series_result, pandas.Series):
result.columns = ["__reduced__"]
result.columns = [MODIN_UNNAMED_SERIES_LABEL]
return result

return _tree_reduce_func
Expand All @@ -1444,7 +1445,7 @@ def _compute_tree_reduce_metadata(self, axis, new_parts):
"""
new_axes, new_axes_lengths = [0, 0], [0, 0]

new_axes[axis] = ["__reduced__"]
new_axes[axis] = [MODIN_UNNAMED_SERIES_LABEL]
new_axes[axis ^ 1] = self.axes[axis ^ 1]

new_axes_lengths[axis] = [1]
Expand Down
3 changes: 2 additions & 1 deletion modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from modin.error_message import ErrorMessage
from . import doc_utils
from modin.logging import ClassLogger
from modin.utils import MODIN_UNNAMED_SERIES_LABEL

from pandas.core.dtypes.common import is_scalar
import pandas.core.resample
Expand Down Expand Up @@ -898,7 +899,7 @@ def columnarize(self):
Transposed new QueryCompiler or self.
"""
if len(self.columns) != 1 or (
len(self.index) == 1 and self.index[0] == "__reduced__"
len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
):
return self.transpose()
return self
Expand Down
36 changes: 23 additions & 13 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
wrap_udf_function,
hashable,
_inherit_docstrings,
MODIN_UNNAMED_SERIES_LABEL,
)
from modin.core.dataframe.algebra import (
Fold,
Expand Down Expand Up @@ -246,7 +247,7 @@ def default_to_pandas(self, pandas_op, *args, **kwargs):
result = pandas_op(self.to_pandas(), *args, **kwargs)
if isinstance(result, pandas.Series):
if result.name is None:
result.name = "__reduced__"
result.name = MODIN_UNNAMED_SERIES_LABEL
result = result.to_frame()
if isinstance(result, pandas.DataFrame):
return self.from_pandas(result, type(self._modin_frame))
Expand Down Expand Up @@ -674,7 +675,7 @@ def transpose(self, *args, **kwargs):

def columnarize(self):
if len(self.columns) != 1 or (
len(self.index) == 1 and self.index[0] == "__reduced__"
len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
):
return self.transpose()
return self
Expand Down Expand Up @@ -1015,7 +1016,9 @@ def resample_prod(self, resample_kwargs, _method, min_count, *args, **kwargs):
)

def resample_size(self, resample_kwargs):
return self._resample_func(resample_kwargs, "size", new_columns=["__reduced__"])
return self._resample_func(
resample_kwargs, "size", new_columns=[MODIN_UNNAMED_SERIES_LABEL]
)

def resample_sem(self, resample_kwargs, _method, *args, **kwargs):
return self._resample_func(
Expand Down Expand Up @@ -1178,7 +1181,7 @@ def unstack(self, level, fill_value):
and len(level) == self.index.nlevels
):
axis = 1
new_columns = ["__reduced__"]
new_columns = [MODIN_UNNAMED_SERIES_LABEL]
need_reindex = True
else:
axis = 0
Expand Down Expand Up @@ -1331,7 +1334,7 @@ def stack(self, level, dropna):
and is_list_like(level)
and len(level) == self.columns.nlevels
):
new_columns = ["__reduced__"]
new_columns = [MODIN_UNNAMED_SERIES_LABEL]
else:
new_columns = None

Expand Down Expand Up @@ -1730,15 +1733,17 @@ def map_func(df, other=other, squeeze_self=squeeze_self):
num_cols = other.shape[1] if len(other.shape) > 1 else 1
if len(self.columns) == 1:
new_index = (
["__reduced__"]
[MODIN_UNNAMED_SERIES_LABEL]
if (len(self.index) == 1 or squeeze_self) and num_cols == 1
else None
)
new_columns = ["__reduced__"] if squeeze_self and num_cols == 1 else None
new_columns = (
[MODIN_UNNAMED_SERIES_LABEL] if squeeze_self and num_cols == 1 else None
)
axis = 0
else:
new_index = self.index
new_columns = ["__reduced__"] if num_cols == 1 else None
new_columns = [MODIN_UNNAMED_SERIES_LABEL] if num_cols == 1 else None
axis = 1

new_modin_frame = self._modin_frame.apply_full_axis(
Expand Down Expand Up @@ -1785,7 +1790,7 @@ def map_func(df, n=n, keep=keep, columns=columns):
)

if columns is None:
new_columns = ["__reduced__"]
new_columns = [MODIN_UNNAMED_SERIES_LABEL]
else:
new_columns = self.columns

Expand All @@ -1810,7 +1815,9 @@ def eval(self, expr, **kwargs):
)
if isinstance(empty_eval, pandas.Series):
new_columns = (
[empty_eval.name] if empty_eval.name is not None else ["__reduced__"]
[empty_eval.name]
if empty_eval.name is not None
else [MODIN_UNNAMED_SERIES_LABEL]
vnlitvinov marked this conversation as resolved.
Show resolved Hide resolved
)
else:
new_columns = empty_eval.columns
Expand Down Expand Up @@ -2584,7 +2591,7 @@ def groupby_size(
default_to_pandas_func=lambda grp: grp.size(),
)
if groupby_kwargs.get("as_index", True):
result.columns = ["__reduced__"]
result.columns = [MODIN_UNNAMED_SERIES_LABEL]
elif isinstance(result.columns, pandas.MultiIndex):
# Dropping one extra-level which was added because of renaming aggregation
result.columns = (
Expand Down Expand Up @@ -2835,7 +2842,9 @@ def compute_groupby(df, drop=False, partition_idx=0):
result = pandas.DataFrame(index=grouped_df.size().index)
if isinstance(result, pandas.Series):
result = result.to_frame(
result.name if result.name is not None else "__reduced__"
result.name
if result.name is not None
else MODIN_UNNAMED_SERIES_LABEL
noloerino marked this conversation as resolved.
Show resolved Hide resolved
)

selection = agg_func.keys() if isinstance(agg_func, dict) else None
Expand Down Expand Up @@ -2866,7 +2875,8 @@ def compute_groupby(df, drop=False, partition_idx=0):
else:
new_index_names = tuple(
None
if isinstance(name, str) and name.startswith("__reduced__")
if isinstance(name, str)
and name.startswith(MODIN_UNNAMED_SERIES_LABEL)
else name
for name in result.index.names
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas.core.dtypes.common import get_dtype, is_list_like, is_bool_dtype
from modin.error_message import ErrorMessage
from modin.pandas.indexing import is_range_like
from modin.utils import MODIN_UNNAMED_SERIES_LABEL
import pandas as pd
from typing import List, Hashable, Optional, Tuple, Union

Expand Down Expand Up @@ -2323,7 +2324,7 @@ def _index_name(self, col):
match = re.search("__index__\\d+_(.*)", col)
if match:
name = match.group(1)
if name in ("__None__", "__reduced__"):
if name in ("__None__", MODIN_UNNAMED_SERIES_LABEL):
return None
return name

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
_get_axis as default_axis_getter,
)
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.utils import _inherit_docstrings
from modin.utils import _inherit_docstrings, MODIN_UNNAMED_SERIES_LABEL
from modin.error_message import ErrorMessage
import pandas

Expand Down Expand Up @@ -315,7 +315,7 @@ def groupby_size(
)
if as_index:
shape_hint = "column"
new_frame = new_frame._set_columns(["__reduced__"])
new_frame = new_frame._set_columns([MODIN_UNNAMED_SERIES_LABEL])
else:
shape_hint = None
new_frame = new_frame._set_columns(["size"]).reset_index(drop=False)
Expand Down Expand Up @@ -445,7 +445,9 @@ def _agg(self, agg, axis=0, level=None, **kwargs):

new_frame = self._modin_frame.agg(agg)
new_frame = new_frame._set_index(
pandas.Index.__new__(pandas.Index, data=["__reduced__"], dtype="O")
pandas.Index.__new__(
pandas.Index, data=[MODIN_UNNAMED_SERIES_LABEL], dtype="O"
)
)
return self.__constructor__(new_frame, shape_hint="row")

Expand Down Expand Up @@ -718,7 +720,7 @@ def columnarize(self):
return self.transpose()

if len(self.columns) != 1 or (
len(self.index) == 1 and self.index[0] == "__reduced__"
len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
):
res = self.transpose()
res._shape_hint = "column"
Expand Down
10 changes: 8 additions & 2 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@

from modin.pandas import Categorical
from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings, to_pandas, hashable, append_to_docstring
from modin.utils import (
_inherit_docstrings,
to_pandas,
hashable,
append_to_docstring,
MODIN_UNNAMED_SERIES_LABEL,
)
from modin.config import Engine, IsExperimental, PersistentPickle
from .utils import (
from_pandas,
Expand Down Expand Up @@ -385,7 +391,7 @@ def _apply(
# the 'else' branch also handles 'result_type == "expand"' since it makes the output type
# depend on the `func` result (Series for a scalar, DataFrame for list-like)
else:
reduced_index = pandas.Index(["__reduced__"])
reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])
if query_compiler.get_axis(axis).equals(
reduced_index
) or query_compiler.get_axis(axis ^ 1).equals(reduced_index):
Expand Down
5 changes: 3 additions & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
wrap_udf_function,
hashable,
wrap_into_list,
MODIN_UNNAMED_SERIES_LABEL,
)
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy
Expand Down Expand Up @@ -678,8 +679,8 @@ def size(self):
if not self._kwargs.get("as_index") and not isinstance(result, Series):
result = result.rename(columns={0: "size"})
result = (
result.rename(columns={"__reduced__": "index"})
if "__reduced__" in result.columns
result.rename(columns={MODIN_UNNAMED_SERIES_LABEL: "index"})
if MODIN_UNNAMED_SERIES_LABEL in result.columns
else result
)
elif isinstance(self._df, Series):
Expand Down
Loading