Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
Signed-off-by: arunjose696 <arunjose696@gmail.com>
  • Loading branch information
arunjose696 committed Jul 8, 2024
1 parent c748be6 commit 4f40c12
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
11 changes: 4 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,8 @@ jobs:
id: filter
with:
filters: |
test-small-query-compiler:
test-native-dataframe-mode:
- 'modin/core/storage_formats/pandas/native_query_compiler.py'
- 'modin/core/storage_formats/pandas/query_compiler.py'
- 'modin/core/storage_formats/base/query_compiler.py'
shared: &shared
- 'modin/core/execution/dispatching/**'
Expand Down Expand Up @@ -636,9 +635,9 @@ jobs:
python-version: ${{matrix.python-version}}
- run: python -m pytest modin/tests/experimental/spreadsheet/test_general.py

test-small-query-compiler:
test-native-dataframe-mode:
needs: [ lint-flake8, execution-filter]
if: ${{ needs.execution-filter.outputs.test-small-query-compiler == 'true' }}
if: ${{ needs.execution-filter.outputs.test-native-dataframe-mode == 'true' }}
runs-on: ubuntu-latest
defaults:
run:
Expand All @@ -648,15 +647,13 @@ jobs:
python-version: ["3.9"]
env:
MODIN_NATIVE_DATAFRAME_MODE: "Pandas"
name: test-small-query-compiler python ${{matrix.python-version}})
name: test-native-dataframe-mode python ${{matrix.python-version}})
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/mamba-env
with:
environment-file: environment-dev.yml
python-version: ${{matrix.python-version}}
- run: python -m pytest modin/tests/config/test_envvars.py
- run: python -m pytest modin/tests/config/test_parameter.py
- run: python -m pytest modin/tests/pandas/dataframe/test_binary.py
- run: python -m pytest modin/tests/pandas/dataframe/test_default.py
- run: python -m pytest modin/tests/pandas/dataframe/test_indexing.py
Expand Down
11 changes: 8 additions & 3 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,18 +915,23 @@ def _check_vars() -> None:

class NativeDataframeMode(EnvironmentVariable, type=str):
"""
Configures the query compiler to process Modin data.
When this config is set to ``Default``, ``PandasQueryCompiler`` is used,
which leads to Modin executing dataframes in distributed fashion.
When set to a string (e.g., ``Pandas``), ``NativeQueryCompiler`` is used,
When set to a string (e.g., ``pandas``), ``NativeQueryCompiler`` is used,
which handles the dataframes without distributing,
falling back to native library functions (e.g., ``Pandas``).
falling back to native library functions (e.g., ``pandas``).
This could be beneficial for handling relatively small dataframes
without involving additional overhead of communication between processes.
"""

varname = "MODIN_NATIVE_DATAFRAME_MODE"
choices = ("Default", "Pandas",)
choices = (
"Default",
"Pandas",
)
default = "Default"


Expand Down
39 changes: 26 additions & 13 deletions modin/core/storage_formats/pandas/native_query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,20 @@ def binary_operator(df, other, **kwargs):


def _register_expanding(func):
"""
Build function that apply specified expanding window functions.
Parameters
----------
func : str
Expanding window functionname to apply.
Returns
-------
callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
Function to be applied to the frame.
"""

def expanding_operator(df, fold_axis, rolling_args, *args, **kwargs):
squeeze_self = kwargs.pop("squeeze_self", False)

Check warning on line 370 in modin/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/native_query_compiler.py#L370

Added line #L370 was not covered by tests

Expand Down Expand Up @@ -497,7 +511,6 @@ def _register_default_pandas(
squeeze_kwargs=False,
return_raw=False,
in_place=False,
filter_kwargs=[],
):
"""
Build function that apply specified method of the passed frame.
Expand All @@ -516,8 +529,6 @@ def _register_default_pandas(
If True, and the result not DataFrame or Series it is returned as is without wrapping in query compiler.
in_place : bool, default: False
If True, the specified function will be applied on the passed frame in place.
filter_kwargs : list, default: []
List of key word argument names to remove.
Returns
-------
Expand All @@ -529,7 +540,7 @@ def caller(query_compiler, *args, **kwargs):
df = query_compiler._modin_frame
if is_series:
df = df.squeeze(axis=1)
exclude_names = ["fold_axis"] + filter_kwargs
exclude_names = ["fold_axis", "dtypes"]
kwargs = kwargs.copy()
for name in exclude_names:
kwargs.pop(name, None)
Expand Down Expand Up @@ -565,7 +576,9 @@ class NativeQueryCompiler(BaseQueryCompiler):
Parameters
----------
pandas_frame : pandas.DataFrame
Pandas frame to query with the compiled queries.
The pandas frame to query with the compiled queries.
shape_hint : {"row", "column", None}, default: None
Shape hint for frames known to be a column or a row, otherwise None.
"""

_modin_frame: pandas.DataFrame
Expand Down Expand Up @@ -767,7 +780,7 @@ def setitem_bool(self, row_loc, col_loc, item):
dt_weekofyear = _register_default_pandas(_dt_prop_map("weekofyear"))
dt_year = _register_default_pandas(_dt_prop_map("year"))
duplicated = _register_default_pandas(pandas.DataFrame.duplicated)
eq = _register_default_pandas(_register_binary("eq"), filter_kwargs=["dtypes"])
eq = _register_default_pandas(_register_binary("eq"))
equals = _register_default_pandas(_register_binary("equals"))
eval = _register_default_pandas(pandas.DataFrame.eval)
explode = _register_default_pandas(pandas.DataFrame.explode)
Expand Down Expand Up @@ -819,7 +832,7 @@ def setitem_bool(self, row_loc, col_loc, item):
pandas.DataFrame.first_valid_index, return_raw=True
)
floordiv = _register_default_pandas(_register_binary("floordiv"))
ge = _register_default_pandas(_register_binary("ge"), filter_kwargs=["dtypes"])
ge = _register_default_pandas(_register_binary("ge"))
get_dummies = _register_default_pandas(_get_dummies)
getitem_array = _register_default_pandas(_getitem_array)
getitem_row_array = _register_default_pandas(_getitem_row_array)
Expand All @@ -846,7 +859,7 @@ def setitem_bool(self, row_loc, col_loc, item):
groupby_std = _register_default_pandas(_groupby("std"))
groupby_sum = _register_default_pandas(_groupby("sum"))
groupby_var = _register_default_pandas(_groupby("var"))
gt = _register_default_pandas(_register_binary("gt"), filter_kwargs=["dtypes"])
gt = _register_default_pandas(_register_binary("gt"))
idxmax = _register_default_pandas(pandas.DataFrame.idxmax)
idxmin = _register_default_pandas(pandas.DataFrame.idxmin)
infer_objects = _register_default_pandas(
Expand All @@ -871,8 +884,8 @@ def setitem_bool(self, row_loc, col_loc, item):
last_valid_index = _register_default_pandas(
pandas.DataFrame.last_valid_index, return_raw=True
)
le = _register_default_pandas(_register_binary("le"), filter_kwargs=["dtypes"])
lt = _register_default_pandas(_register_binary("lt"), filter_kwargs=["dtypes"])
le = _register_default_pandas(_register_binary("le"))
lt = _register_default_pandas(_register_binary("lt"))
# mad = _register_default_pandas(pandas.DataFrame.mad)
mask = _register_default_pandas(pandas.DataFrame.mask)
max = _register_default_pandas(pandas.DataFrame.max)
Expand All @@ -886,7 +899,7 @@ def setitem_bool(self, row_loc, col_loc, item):
mod = _register_default_pandas(_register_binary("mod"))
mode = _register_default_pandas(pandas.DataFrame.mode)
mul = _register_default_pandas(_register_binary("mul"))
ne = _register_default_pandas(_register_binary("ne"), filter_kwargs=["dtypes"])
ne = _register_default_pandas(_register_binary("ne"))
negative = _register_default_pandas(pandas.DataFrame.__neg__)
nlargest = _register_default_pandas(pandas.DataFrame.nlargest)
notna = _register_default_pandas(pandas.DataFrame.notna)
Expand Down Expand Up @@ -1214,7 +1227,7 @@ def from_pandas(cls, df, data_cls):

@classmethod
def from_arrow(cls, at, data_cls):
return
return cls(at.to_pandas())

Check warning on line 1230 in modin/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/native_query_compiler.py#L1230

Added line #L1230 was not covered by tests

def free(self):
return
Expand All @@ -1231,7 +1244,7 @@ def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):

@classmethod
def from_dataframe(cls, df, data_cls):
return cls(data_cls.from_dataframe(df))
return cls(pandas.api.interchange.from_dataframe(df))

Check warning on line 1247 in modin/core/storage_formats/pandas/native_query_compiler.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/native_query_compiler.py#L1247

Added line #L1247 was not covered by tests

# END Dataframe exchange protocol

Expand Down
2 changes: 2 additions & 0 deletions modin/tests/pandas/dataframe/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ def test___repr__():
"2016-08-26 09:00:16.413",5,60.193055,24.767427,5,"WALKING",85,"ON_BICYCLE",15,"UNKNOWN",0
"2016-08-26 09:00:20.578",3,60.152996,24.745216,3.90000009536743,"STILL",69,"IN_VEHICLE",31,"UNKNOWN",0"""
pandas_df = pandas.read_csv(io.StringIO(string_data))
# Using `force` for `NativeDataframeMode` as the warnings are raised at the API layer,
# before geting into the Query Compiler layer.
with warns_that_defaulting_to_pandas(force=True):
modin_df = pd.read_csv(io.StringIO(string_data))
assert repr(pandas_df) == repr(modin_df)
Expand Down

0 comments on commit 4f40c12

Please sign in to comment.