PR comments

Signed-off-by: arunjose696 <arunjose696@gmail.com>
modin-project · Jul 8, 2024 · 4f40c12 · 4f40c12
1 parent c748be6
commit 4f40c12
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 23 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -221,9 +221,8 @@ jobs:
       id: filter
       with:
         filters: |
-          test-small-query-compiler:
+          test-native-dataframe-mode:
             - 'modin/core/storage_formats/pandas/native_query_compiler.py'
-            - 'modin/core/storage_formats/pandas/query_compiler.py'
             - 'modin/core/storage_formats/base/query_compiler.py'
           shared: &shared
             - 'modin/core/execution/dispatching/**'
@@ -636,9 +635,9 @@ jobs:
           python-version: ${{matrix.python-version}}
       - run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py
 
-  test-small-query-compiler:
+  test-native-dataframe-mode:
     needs: [ lint-flake8, execution-filter]
-    if: ${{ needs.execution-filter.outputs.test-small-query-compiler == 'true' }}
+    if: ${{ needs.execution-filter.outputs.test-native-dataframe-mode == 'true' }}
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -648,15 +647,13 @@ jobs:
         python-version: ["3.9"]
     env:
       MODIN_NATIVE_DATAFRAME_MODE: "Pandas"
-    name: test-small-query-compiler python ${{matrix.python-version}})
+    name: test-native-dataframe-mode python ${{matrix.python-version}})
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/mamba-env
         with:
           environment-file: environment-dev.yml
           python-version: ${{matrix.python-version}}
-      - run: python -m pytest modin/tests/config/test_envvars.py
-      - run: python -m pytest modin/tests/config/test_parameter.py
       - run: python -m pytest modin/tests/pandas/dataframe/test_binary.py
       - run: python -m pytest modin/tests/pandas/dataframe/test_default.py
       - run: python -m pytest modin/tests/pandas/dataframe/test_indexing.py

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
@@ -915,18 +915,23 @@ def _check_vars() -> None:
 
 class NativeDataframeMode(EnvironmentVariable, type=str):
     """
+    Configures the query compiler to process Modin data.
+
     When this config is set to ``Default``, ``PandasQueryCompiler`` is used,
     which leads to Modin executing dataframes in distributed fashion.
-    When set to a string (e.g., ``Pandas``), ``NativeQueryCompiler`` is used,
+    When set to a string (e.g., ``pandas``), ``NativeQueryCompiler`` is used,
     which handles the dataframes without distributing,
-    falling back to native library functions (e.g., ``Pandas``).
+    falling back to native library functions (e.g., ``pandas``).
 
     This could be beneficial for handling relatively small dataframes
     without involving additional overhead of communication between processes.
     """
 
     varname = "MODIN_NATIVE_DATAFRAME_MODE"
-    choices = ("Default", "Pandas",)
+    choices = (
+        "Default",
+        "Pandas",
+    )
     default = "Default"
 
 

diff --git a/modin/core/storage_formats/pandas/native_query_compiler.py b/modin/core/storage_formats/pandas/native_query_compiler.py
@@ -352,6 +352,20 @@ def binary_operator(df, other, **kwargs):
 
 
 def _register_expanding(func):
+    """
+    Build function that apply specified expanding window functions.
+
+    Parameters
+    ----------
+    func : str
+        Expanding window functionname to apply.
+
+    Returns
+    -------
+    callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
+        Function to be applied to the frame.
+    """
+
     def expanding_operator(df, fold_axis, rolling_args, *args, **kwargs):
         squeeze_self = kwargs.pop("squeeze_self", False)
 
@@ -497,7 +511,6 @@ def _register_default_pandas(
     squeeze_kwargs=False,
     return_raw=False,
     in_place=False,
-    filter_kwargs=[],
 ):
     """
     Build function that apply specified method of the passed frame.
@@ -516,8 +529,6 @@ def _register_default_pandas(
         If True, and the result not DataFrame or Series it is returned as is without wrapping in query compiler.
     in_place : bool, default: False
         If True, the specified function will be applied on the passed frame in place.
-    filter_kwargs : list, default: []
-        List of key word argument names to remove.
 
     Returns
     -------
@@ -529,7 +540,7 @@ def caller(query_compiler, *args, **kwargs):
         df = query_compiler._modin_frame
         if is_series:
             df = df.squeeze(axis=1)
-        exclude_names = ["fold_axis"] + filter_kwargs
+        exclude_names = ["fold_axis", "dtypes"]
         kwargs = kwargs.copy()
         for name in exclude_names:
             kwargs.pop(name, None)
@@ -565,7 +576,9 @@ class NativeQueryCompiler(BaseQueryCompiler):
     Parameters
     ----------
     pandas_frame : pandas.DataFrame
-        Pandas frame to query with the compiled queries.
+        The pandas frame to query with the compiled queries.
+    shape_hint : {"row", "column", None}, default: None
+        Shape hint for frames known to be a column or a row, otherwise None.
     """
 
     _modin_frame: pandas.DataFrame
@@ -767,7 +780,7 @@ def setitem_bool(self, row_loc, col_loc, item):
     dt_weekofyear = _register_default_pandas(_dt_prop_map("weekofyear"))
     dt_year = _register_default_pandas(_dt_prop_map("year"))
     duplicated = _register_default_pandas(pandas.DataFrame.duplicated)
-    eq = _register_default_pandas(_register_binary("eq"), filter_kwargs=["dtypes"])
+    eq = _register_default_pandas(_register_binary("eq"))
     equals = _register_default_pandas(_register_binary("equals"))
     eval = _register_default_pandas(pandas.DataFrame.eval)
     explode = _register_default_pandas(pandas.DataFrame.explode)
@@ -819,7 +832,7 @@ def setitem_bool(self, row_loc, col_loc, item):
         pandas.DataFrame.first_valid_index, return_raw=True
     )
     floordiv = _register_default_pandas(_register_binary("floordiv"))
-    ge = _register_default_pandas(_register_binary("ge"), filter_kwargs=["dtypes"])
+    ge = _register_default_pandas(_register_binary("ge"))
     get_dummies = _register_default_pandas(_get_dummies)
     getitem_array = _register_default_pandas(_getitem_array)
     getitem_row_array = _register_default_pandas(_getitem_row_array)
@@ -846,7 +859,7 @@ def setitem_bool(self, row_loc, col_loc, item):
     groupby_std = _register_default_pandas(_groupby("std"))
     groupby_sum = _register_default_pandas(_groupby("sum"))
     groupby_var = _register_default_pandas(_groupby("var"))
-    gt = _register_default_pandas(_register_binary("gt"), filter_kwargs=["dtypes"])
+    gt = _register_default_pandas(_register_binary("gt"))
     idxmax = _register_default_pandas(pandas.DataFrame.idxmax)
     idxmin = _register_default_pandas(pandas.DataFrame.idxmin)
     infer_objects = _register_default_pandas(
@@ -871,8 +884,8 @@ def setitem_bool(self, row_loc, col_loc, item):
     last_valid_index = _register_default_pandas(
         pandas.DataFrame.last_valid_index, return_raw=True
     )
-    le = _register_default_pandas(_register_binary("le"), filter_kwargs=["dtypes"])
-    lt = _register_default_pandas(_register_binary("lt"), filter_kwargs=["dtypes"])
+    le = _register_default_pandas(_register_binary("le"))
+    lt = _register_default_pandas(_register_binary("lt"))
     # mad = _register_default_pandas(pandas.DataFrame.mad)
     mask = _register_default_pandas(pandas.DataFrame.mask)
     max = _register_default_pandas(pandas.DataFrame.max)
@@ -886,7 +899,7 @@ def setitem_bool(self, row_loc, col_loc, item):
     mod = _register_default_pandas(_register_binary("mod"))
     mode = _register_default_pandas(pandas.DataFrame.mode)
     mul = _register_default_pandas(_register_binary("mul"))
-    ne = _register_default_pandas(_register_binary("ne"), filter_kwargs=["dtypes"])
+    ne = _register_default_pandas(_register_binary("ne"))
     negative = _register_default_pandas(pandas.DataFrame.__neg__)
     nlargest = _register_default_pandas(pandas.DataFrame.nlargest)
     notna = _register_default_pandas(pandas.DataFrame.notna)
@@ -1214,7 +1227,7 @@ def from_pandas(cls, df, data_cls):
 
     @classmethod
     def from_arrow(cls, at, data_cls):
-        return
+        return cls(at.to_pandas())
 
     def free(self):
         return
@@ -1231,7 +1244,7 @@ def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
 
     @classmethod
     def from_dataframe(cls, df, data_cls):
-        return cls(data_cls.from_dataframe(df))
+        return cls(pandas.api.interchange.from_dataframe(df))
 
     # END Dataframe exchange protocol
 

diff --git a/modin/tests/pandas/dataframe/test_iter.py b/modin/tests/pandas/dataframe/test_iter.py
@@ -232,6 +232,8 @@ def test___repr__():
 "2016-08-26 09:00:16.413",5,60.193055,24.767427,5,"WALKING",85,"ON_BICYCLE",15,"UNKNOWN",0
 "2016-08-26 09:00:20.578",3,60.152996,24.745216,3.90000009536743,"STILL",69,"IN_VEHICLE",31,"UNKNOWN",0"""
     pandas_df = pandas.read_csv(io.StringIO(string_data))
+    # Using `force` for `NativeDataframeMode` as the warnings are raised at the API layer,
+    # before geting into the Query Compiler layer.
     with warns_that_defaulting_to_pandas(force=True):
         modin_df = pd.read_csv(io.StringIO(string_data))
     assert repr(pandas_df) == repr(modin_df)