modin-project · YarShev · Aug 10, 2022 · Aug 9, 2022 · Aug 9, 2022 · Aug 9, 2022
@@ -53,6 +53,7 @@ Key Features and Updates
   * REFACTOR-#4774: remove `_build_treereduce_func` call from `_compute_dtypes` (#4775)
   * REFACTOR-#4750: Delete BaseDataframeAxisPartition.shuffle (#4751)
   * REFACTOR-#4722: Stop suppressing undefined name lint (#4723)
+  * REFACTOR-#4796: Introduce constant for __reduced__ column name (#4799)  
 * Pandas API implementations and improvements
   * FEAT-#4670: Implement convert_dtypes by mapping across partitions (#4671)
 * OmniSci enhancements

@@ -14,7 +14,7 @@
 """Module houses default functions builder class."""
 
 from modin.core.dataframe.algebra import Operator
-from modin.utils import try_cast_to_pandas
+from modin.utils import try_cast_to_pandas, MODIN_UNNAMED_SERIES_LABEL
 
 from pandas.core.dtypes.common import is_list_like
 import pandas
@@ -102,7 +102,7 @@ def applyier(df, *args, **kwargs):
                 )
             if isinstance(result, pandas.Series):
                 if result.name is None:
-                    result.name = "__reduced__"
+                    result.name = MODIN_UNNAMED_SERIES_LABEL
                 result = result.to_frame()
 
             inplace_method = kwargs.get("inplace", False)

@@ -18,6 +18,8 @@
 import pandas
 from pandas.core.dtypes.common import is_list_like
 
+from modin.utils import MODIN_UNNAMED_SERIES_LABEL
+
 
 # FIXME: there is no sence of keeping `GroupBy` and `GroupByDefault` logic in a different
 # classes. They should be combined.
@@ -56,7 +58,7 @@ def try_cast_series(df):
                 df = df.squeeze(axis=1)
             if not isinstance(df, pandas.Series):
                 return df
-            if df.name == "__reduced__":
+            if df.name == MODIN_UNNAMED_SERIES_LABEL:
                 df.name = None
             return df
 
@@ -245,7 +247,7 @@ def fn(
                     inplace=True,
                 )
 
-            if result.index.name == "__reduced__":
+            if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
                 result.index.name = None
 
             return result
@@ -465,7 +467,9 @@ def handle_as_index(
             internal_by_cols = pandas.Index(internal_by_cols)
 
         internal_by_cols = (
-            internal_by_cols[~internal_by_cols.str.startswith("__reduced__", na=False)]
+            internal_by_cols[
+                ~internal_by_cols.str.startswith(MODIN_UNNAMED_SERIES_LABEL, na=False)
+            ]
             if hasattr(internal_by_cols, "str")
             else internal_by_cols
         )

@@ -18,7 +18,7 @@
 
 from .tree_reduce import TreeReduce
 from .default2pandas.groupby import GroupBy
-from modin.utils import try_cast_to_pandas, hashable
+from modin.utils import try_cast_to_pandas, hashable, MODIN_UNNAMED_SERIES_LABEL
 from modin.error_message import ErrorMessage
 
 
@@ -350,7 +350,7 @@ def caller(
         )
 
         result = query_compiler.__constructor__(new_modin_frame)
-        if result.index.name == "__reduced__":
+        if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
             result.index.name = None
         return result
 

@@ -42,6 +42,7 @@
 from modin.pandas.indexing import is_range_like
 from modin.pandas.utils import is_full_grab_slice, check_both_not_none
 from modin.logging import ClassLogger
+from modin.utils import MODIN_UNNAMED_SERIES_LABEL
 
 
 def lazy_metadata_decorator(apply_axis=None, axis_arg=-1, transpose=False):
@@ -1417,11 +1418,11 @@ def _tree_reduce_func(df, *args, **kwargs):
                 # line up with the index of the data based on how pandas creates a
                 # DataFrame from a Series.
                 result = pandas.DataFrame(series_result).T
-                result.index = ["__reduced__"]
+                result.index = [MODIN_UNNAMED_SERIES_LABEL]
             else:
                 result = pandas.DataFrame(series_result)
                 if isinstance(series_result, pandas.Series):
-                    result.columns = ["__reduced__"]
+                    result.columns = [MODIN_UNNAMED_SERIES_LABEL]
             return result
 
         return _tree_reduce_func
@@ -1444,7 +1445,7 @@ def _compute_tree_reduce_metadata(self, axis, new_parts):
         """
         new_axes, new_axes_lengths = [0, 0], [0, 0]
 
-        new_axes[axis] = ["__reduced__"]
+        new_axes[axis] = [MODIN_UNNAMED_SERIES_LABEL]
         new_axes[axis ^ 1] = self.axes[axis ^ 1]
 
         new_axes_lengths[axis] = [1]

@@ -33,6 +33,7 @@
 from modin.error_message import ErrorMessage
 from . import doc_utils
 from modin.logging import ClassLogger
+from modin.utils import MODIN_UNNAMED_SERIES_LABEL
 
 from pandas.core.dtypes.common import is_scalar
 import pandas.core.resample
@@ -898,7 +899,7 @@ def columnarize(self):
             Transposed new QueryCompiler or self.
         """
         if len(self.columns) != 1 or (
-            len(self.index) == 1 and self.index[0] == "__reduced__"
+            len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
         ):
             return self.transpose()
         return self

@@ -42,6 +42,7 @@
     wrap_udf_function,
     hashable,
     _inherit_docstrings,
+    MODIN_UNNAMED_SERIES_LABEL,
 )
 from modin.core.dataframe.algebra import (
     Fold,
@@ -246,7 +247,7 @@ def default_to_pandas(self, pandas_op, *args, **kwargs):
         result = pandas_op(self.to_pandas(), *args, **kwargs)
         if isinstance(result, pandas.Series):
             if result.name is None:
-                result.name = "__reduced__"
+                result.name = MODIN_UNNAMED_SERIES_LABEL
             result = result.to_frame()
         if isinstance(result, pandas.DataFrame):
             return self.from_pandas(result, type(self._modin_frame))
@@ -674,7 +675,7 @@ def transpose(self, *args, **kwargs):
 
     def columnarize(self):
         if len(self.columns) != 1 or (
-            len(self.index) == 1 and self.index[0] == "__reduced__"
+            len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
         ):
             return self.transpose()
         return self
@@ -1015,7 +1016,9 @@ def resample_prod(self, resample_kwargs, _method, min_count, *args, **kwargs):
         )
 
     def resample_size(self, resample_kwargs):
-        return self._resample_func(resample_kwargs, "size", new_columns=["__reduced__"])
+        return self._resample_func(
+            resample_kwargs, "size", new_columns=[MODIN_UNNAMED_SERIES_LABEL]
+        )
 
     def resample_sem(self, resample_kwargs, _method, *args, **kwargs):
         return self._resample_func(
@@ -1178,7 +1181,7 @@ def unstack(self, level, fill_value):
             and len(level) == self.index.nlevels
         ):
             axis = 1
-            new_columns = ["__reduced__"]
+            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
             need_reindex = True
         else:
             axis = 0
@@ -1331,7 +1334,7 @@ def stack(self, level, dropna):
             and is_list_like(level)
             and len(level) == self.columns.nlevels
         ):
-            new_columns = ["__reduced__"]
+            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
         else:
             new_columns = None
 
@@ -1730,15 +1733,17 @@ def map_func(df, other=other, squeeze_self=squeeze_self):
         num_cols = other.shape[1] if len(other.shape) > 1 else 1
         if len(self.columns) == 1:
             new_index = (
-                ["__reduced__"]
+                [MODIN_UNNAMED_SERIES_LABEL]
                 if (len(self.index) == 1 or squeeze_self) and num_cols == 1
                 else None
             )
-            new_columns = ["__reduced__"] if squeeze_self and num_cols == 1 else None
+            new_columns = (
+                [MODIN_UNNAMED_SERIES_LABEL] if squeeze_self and num_cols == 1 else None
+            )
             axis = 0
         else:
             new_index = self.index
-            new_columns = ["__reduced__"] if num_cols == 1 else None
+            new_columns = [MODIN_UNNAMED_SERIES_LABEL] if num_cols == 1 else None
             axis = 1
 
         new_modin_frame = self._modin_frame.apply_full_axis(
@@ -1785,7 +1790,7 @@ def map_func(df, n=n, keep=keep, columns=columns):
             )
 
         if columns is None:
-            new_columns = ["__reduced__"]
+            new_columns = [MODIN_UNNAMED_SERIES_LABEL]
         else:
             new_columns = self.columns
 
@@ -1810,7 +1815,9 @@ def eval(self, expr, **kwargs):
         )
         if isinstance(empty_eval, pandas.Series):
             new_columns = (
-                [empty_eval.name] if empty_eval.name is not None else ["__reduced__"]
+                [empty_eval.name]
+                if empty_eval.name is not None
+                else [MODIN_UNNAMED_SERIES_LABEL]
             )
         else:
             new_columns = empty_eval.columns
@@ -2584,7 +2591,7 @@ def groupby_size(
             default_to_pandas_func=lambda grp: grp.size(),
         )
         if groupby_kwargs.get("as_index", True):
-            result.columns = ["__reduced__"]
+            result.columns = [MODIN_UNNAMED_SERIES_LABEL]
         elif isinstance(result.columns, pandas.MultiIndex):
             # Dropping one extra-level which was added because of renaming aggregation
             result.columns = (
@@ -2835,7 +2842,9 @@ def compute_groupby(df, drop=False, partition_idx=0):
                     result = pandas.DataFrame(index=grouped_df.size().index)
                 if isinstance(result, pandas.Series):
                     result = result.to_frame(
-                        result.name if result.name is not None else "__reduced__"
+                        result.name
+                        if result.name is not None
+                        else MODIN_UNNAMED_SERIES_LABEL
                     )
 
                 selection = agg_func.keys() if isinstance(agg_func, dict) else None
@@ -2866,7 +2875,8 @@ def compute_groupby(df, drop=False, partition_idx=0):
                 else:
                     new_index_names = tuple(
                         None
-                        if isinstance(name, str) and name.startswith("__reduced__")
+                        if isinstance(name, str)
+                        and name.startswith(MODIN_UNNAMED_SERIES_LABEL)
                         else name
                         for name in result.index.names
                     )

@@ -27,6 +27,7 @@
 from pandas.core.dtypes.common import get_dtype, is_list_like, is_bool_dtype
 from modin.error_message import ErrorMessage
 from modin.pandas.indexing import is_range_like
+from modin.utils import MODIN_UNNAMED_SERIES_LABEL
 import pandas as pd
 from typing import List, Hashable, Optional, Tuple, Union
 
@@ -2323,7 +2324,7 @@ def _index_name(self, col):
         match = re.search("__index__\\d+_(.*)", col)
         if match:
             name = match.group(1)
-            if name in ("__None__", "__reduced__"):
+            if name in ("__None__", MODIN_UNNAMED_SERIES_LABEL):
                 return None
             return name
 

@@ -23,7 +23,7 @@
     _get_axis as default_axis_getter,
 )
 from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
-from modin.utils import _inherit_docstrings
+from modin.utils import _inherit_docstrings, MODIN_UNNAMED_SERIES_LABEL
 from modin.error_message import ErrorMessage
 import pandas
 
@@ -315,7 +315,7 @@ def groupby_size(
         )
         if as_index:
             shape_hint = "column"
-            new_frame = new_frame._set_columns(["__reduced__"])
+            new_frame = new_frame._set_columns([MODIN_UNNAMED_SERIES_LABEL])
         else:
             shape_hint = None
             new_frame = new_frame._set_columns(["size"]).reset_index(drop=False)
@@ -445,7 +445,9 @@ def _agg(self, agg, axis=0, level=None, **kwargs):
 
         new_frame = self._modin_frame.agg(agg)
         new_frame = new_frame._set_index(
-            pandas.Index.__new__(pandas.Index, data=["__reduced__"], dtype="O")
+            pandas.Index.__new__(
+                pandas.Index, data=[MODIN_UNNAMED_SERIES_LABEL], dtype="O"
+            )
         )
         return self.__constructor__(new_frame, shape_hint="row")
 
@@ -718,7 +720,7 @@ def columnarize(self):
             return self.transpose()
 
         if len(self.columns) != 1 or (
-            len(self.index) == 1 and self.index[0] == "__reduced__"
+            len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
         ):
             res = self.transpose()
             res._shape_hint = "column"

@@ -36,7 +36,13 @@
 
 from modin.pandas import Categorical
 from modin.error_message import ErrorMessage
-from modin.utils import _inherit_docstrings, to_pandas, hashable, append_to_docstring
+from modin.utils import (
+    _inherit_docstrings,
+    to_pandas,
+    hashable,
+    append_to_docstring,
+    MODIN_UNNAMED_SERIES_LABEL,
+)
 from modin.config import Engine, IsExperimental, PersistentPickle
 from .utils import (
     from_pandas,
@@ -385,7 +391,7 @@ def _apply(
         # the 'else' branch also handles 'result_type == "expand"' since it makes the output type
         # depend on the `func` result (Series for a scalar, DataFrame for list-like)
         else:
-            reduced_index = pandas.Index(["__reduced__"])
+            reduced_index = pandas.Index([MODIN_UNNAMED_SERIES_LABEL])
             if query_compiler.get_axis(axis).equals(
                 reduced_index
             ) or query_compiler.get_axis(axis ^ 1).equals(reduced_index):

@@ -29,6 +29,7 @@
     wrap_udf_function,
     hashable,
     wrap_into_list,
+    MODIN_UNNAMED_SERIES_LABEL,
 )
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 from modin.core.dataframe.algebra.default2pandas.groupby import GroupBy
@@ -678,8 +679,8 @@ def size(self):
         if not self._kwargs.get("as_index") and not isinstance(result, Series):
             result = result.rename(columns={0: "size"})
             result = (
-                result.rename(columns={"__reduced__": "index"})
-                if "__reduced__" in result.columns
+                result.rename(columns={MODIN_UNNAMED_SERIES_LABEL: "index"})
+                if MODIN_UNNAMED_SERIES_LABEL in result.columns
                 else result
             )
         elif isinstance(self._df, Series):