fixes

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
modin-project · Feb 26, 2024 · 50fa735 · 50fa735
1 parent 8ab5269
commit 50fa735
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 13 deletions.
diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
@@ -194,7 +194,7 @@ def operation(df):
 
     raising_exceptions = None
     if "int_data" in request.node.callspec.id and other == "a":
-        raising_exceptions = TypeError("Invalid comparison between dtype=int32 and str")
+        raising_exceptions = TypeError("Invalid comparison between dtype=int64 and str")
     elif "float_nan_data" in request.node.callspec.id and other == "a":
         raising_exceptions = TypeError(
             "Invalid comparison between dtype=float64 and str"

diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
@@ -26,8 +26,6 @@
 from modin.pandas.test.utils import (
     axis_keys,
     axis_values,
-    bool_arg_keys,
-    bool_arg_values,
     create_test_dfs,
     default_to_pandas_ignore_string,
     df_equals,
@@ -300,11 +298,15 @@ def test_corr_min_periods(self, min_periods):
                 modin_df, pandas_df, lambda df: df.corr(min_periods=min_periods)
             )
 
-    @pytest.mark.parametrize("numeric_only", [True, False, None])
+    @pytest.mark.parametrize("numeric_only", [True, False])
     def test_corr_non_numeric(self, numeric_only):
         eval_general(
             *create_test_dfs({"a": [1, 2, 3], "b": [3, 2, 5], "c": ["a", "b", "c"]}),
             lambda df: df.corr(numeric_only=numeric_only),
+            # Use False to compare only types:
+            # Modin exc: `ValueError("Unsupported types with 'numeric_only=False'")`
+            # Pandas exc: `ValueError("could not convert string to float: 'a'")`
+            raising_exceptions=False,
         )
 
     @pytest.mark.skipif(
@@ -377,11 +379,15 @@ def comparator1(df1, df2):
     )
 
 
-@pytest.mark.parametrize("numeric_only", [True, False, None])
+@pytest.mark.parametrize("numeric_only", [True, False])
 def test_cov_numeric_only(numeric_only):
     eval_general(
         *create_test_dfs({"a": [1, 2, 3], "b": [3, 2, 5], "c": ["a", "b", "c"]}),
         lambda df: df.cov(numeric_only=numeric_only),
+        # Use False to compare only types:
+        # Modin exc: `ValueError("Unsupported types with 'numeric_only=False'")`
+        # Pandas exc: `ValueError("could not convert string to float: 'a'")`
+        raising_exceptions=False,
     )
 
 
@@ -526,8 +532,8 @@ def test_info(data, verbose, max_cols, memory_usage, show_counts):
 
 
 @pytest.mark.parametrize("axis", axis_values, ids=axis_keys)
-@pytest.mark.parametrize("skipna", bool_arg_values, ids=bool_arg_keys)
-@pytest.mark.parametrize("numeric_only", bool_arg_values, ids=bool_arg_keys)
+@pytest.mark.parametrize("skipna", [False, True])
+@pytest.mark.parametrize("numeric_only", [False, True])
 @pytest.mark.parametrize("method", ["kurtosis", "kurt"])
 def test_kurt_kurtosis(axis, skipna, numeric_only, method):
     data = test_data["float_nan_data"]
@@ -706,7 +712,7 @@ def test_pivot_table_data(data, index, columns, values, aggfunc):
 )
 @pytest.mark.parametrize(
     "margins_name",
-    [pytest.param("Custom name", id="str_name"), pytest.param(None, id="None_name")],
+    [pytest.param("Custom name", id="str_name")],
 )
 @pytest.mark.parametrize("fill_value", [None, 0])
 def test_pivot_table_margins(
@@ -717,7 +723,11 @@ def test_pivot_table_margins(
     aggfunc,
     margins_name,
     fill_value,
+    request,
 ):
+    raising_exceptions = None
+    if "dict_func" in request.node.callspec.id:
+        raising_exceptions = KeyError("Column(s) ['col28', 'col38'] do not exist")
     eval_general(
         *create_test_dfs(data),
         operation=lambda df, *args, **kwargs: df.pivot_table(*args, **kwargs),
@@ -728,6 +738,7 @@ def test_pivot_table_margins(
         margins=True,
         margins_name=margins_name,
         fill_value=fill_value,
+        raising_exceptions=raising_exceptions,
     )
 
 
@@ -834,11 +845,22 @@ def test_resampler_functions(rule, axis, method):
     )
     modin_df = pd.DataFrame(data, index=index)
     pandas_df = pandas.DataFrame(data, index=index)
+    if axis == "columns":
+        columns = pandas.date_range("31/12/2000", periods=len(data), freq="min")
+        modin_df.columns = columns
+        pandas_df.columns = columns
+
+    raising_exceptions = None
+    if method in ("interpolate", "asfreq", "nearest", "bfill", "ffill"):
+        # It looks like pandas is preparing to completely
+        # remove `axis` parameter for `resample` function.
+        raising_exceptions = AssertionError("axis must be 0")
 
     eval_general(
         modin_df,
         pandas_df,
         lambda df: getattr(df.resample(rule, axis=axis), method)(),
+        raising_exceptions=raising_exceptions,
     )
 
 
@@ -1135,10 +1157,13 @@ def test_take():
 
 
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
-def test_to_records(request, data):
+def test_to_records(data):
+    # `to_records` doesn't work when `index` is among column names
     eval_general(
         *create_test_dfs(data),
-        lambda df: df.dropna().to_records(),
+        lambda df: (
+            df.dropna().drop("index", axis=1) if "index" in df.columns else df.dropna()
+        ).to_records(),
     )
 
 
@@ -1307,9 +1332,15 @@ def test___array__(data):
     assert_array_equal(modin_df.__array__(), pandas_df.__array__())
 
 
-@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
+@pytest.mark.parametrize("data", [[False], [True], [1, 2]])
 def test___bool__(data):
-    eval_general(*create_test_dfs(data), lambda df: df.__bool__())
+    eval_general(
+        *create_test_dfs(data),
+        lambda df: df.__bool__(),
+        raising_exceptions=ValueError(
+            "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+        ),
+    )
 
 
 @pytest.mark.parametrize(

diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
@@ -914,7 +914,7 @@ def execute_callable(fn, inplace=False, md_kwargs={}, pd_kwargs={}):
                         type(modin_exception) is type(raising_exceptions)
                         and modin_exception.args == raising_exceptions.args
                     ), f"not acceptable exception: [{modin_exception}]"
-                else:
+                elif raising_exceptions is not False:
                     raise
         else:
             md_result = fn(modin_df, **md_kwargs)