use new functions for more files [part2]

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
data-apis · Jan 6, 2024 · a88360c · a88360c
1 parent 18f6d95
commit a88360c
Show file tree

Hide file tree

Showing 11 changed files with 87 additions and 117 deletions.
diff --git a/dataframe_api_compat/pandas_standard/column_object.py b/dataframe_api_compat/pandas_standard/column_object.py
@@ -35,6 +35,7 @@
     "UInt16": "uint16",
     "UInt8": "uint8",
     "boolean": "bool",
+    "Float64": "float64",
 }
 
 

diff --git a/tests/column/divmod_test.py b/tests/column/divmod_test.py
@@ -1,41 +1,31 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_column_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_expression_divmod(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = df.col("b")
     result_quotient, result_remainder = ser.__divmod__(other)
     # quotient
     result = df.assign(result_quotient.rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected_quotient = pd.Series([0, 0, 0], name="result")
-    pd.testing.assert_series_equal(result_pd, expected_quotient)
+    compare_column_with_reference(result.col("result"), [0, 0, 0], pdx.Int64)
     # remainder
     result = df.assign(result_remainder.rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected_remainder = pd.Series([1, 2, 3], name="result")
-    pd.testing.assert_series_equal(result_pd, expected_remainder)
+    compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)
 
 
 def test_expression_divmod_with_scalar(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result_quotient, result_remainder = ser.__divmod__(2)
     # quotient
     result = df.assign(result_quotient.rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected_quotient = pd.Series([0, 1, 1], name="result")
-    pd.testing.assert_series_equal(result_pd, expected_quotient)
+    compare_column_with_reference(result.col("result"), [0, 1, 1], pdx.Int64)
     # remainder
     result = df.assign(result_remainder.rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected_remainder = pd.Series([1, 0, 1], name="result")
-    pd.testing.assert_series_equal(result_pd, expected_remainder)
+    compare_column_with_reference(result.col("result"), [1, 0, 1], pdx.Int64)
diff --git a/tests/column/fill_nan_test.py b/tests/column/fill_nan_test.py
@@ -1,27 +1,22 @@
 from __future__ import annotations
 
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_column_with_reference
 from tests.utils import nan_dataframe_1
 
 
 def test_column_fill_nan(library: str) -> None:
     # TODO: test with nullable pandas, check null isn't filled
-    df = nan_dataframe_1(library).persist()
+    df = nan_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign(ser.fill_nan(-1.0).rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([1.0, 2.0, -1.0], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [1.0, 2.0, -1.0], pdx.Float64)
 
 
 def test_column_fill_nan_with_null(library: str) -> None:
     # TODO: test with nullable pandas, check null isn't filled
-    df = nan_dataframe_1(library).persist()
-    ns = df.__dataframe_namespace__()
+    df = nan_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
-    result = df.assign(ser.fill_nan(ns.null).is_null().rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([False, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    result = df.assign(ser.fill_nan(pdx.null).is_null().rename("result"))
+    compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
diff --git a/tests/column/get_rows_by_mask_test.py b/tests/column/get_rows_by_mask_test.py
@@ -2,8 +2,8 @@
 
 import pandas as pd
 
+from tests.utils import compare_column_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_column_filter(library: str) -> None:
@@ -18,11 +18,9 @@ def test_column_filter(library: str) -> None:
 
 def test_column_take_by_mask_noop(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     mask = ser > 0
     ser = ser.filter(mask)
     result = df.assign(ser.rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([1, 2, 3], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)
diff --git a/tests/column/get_rows_test.py b/tests/column/get_rows_test.py
@@ -1,16 +1,13 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_column_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_expression_take(library: str) -> None:
     df = integer_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     indices = df.col("a") - 1
     result = df.assign(ser.take(indices).rename("result")).select("result")
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([1, 2, 3], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)
diff --git a/tests/column/invert_test.py b/tests/column/invert_test.py
@@ -1,25 +1,20 @@
 from __future__ import annotations
 
-import pandas as pd
-
 from tests.utils import bool_dataframe_1
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_column_with_reference
 
 
 def test_expression_invert(library: str) -> None:
     df = bool_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign((~ser).rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([False, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
 
 
 def test_column_invert(library: str) -> None:
-    df = bool_dataframe_1(library).persist()
+    df = bool_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign((~ser).rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([False, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
diff --git a/tests/column/is_in_test.py b/tests/column/is_in_test.py
@@ -3,13 +3,12 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_column_with_reference
 from tests.utils import float_dataframe_1
 from tests.utils import float_dataframe_2
 from tests.utils import float_dataframe_3
-from tests.utils import interchange_to_pandas
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -29,13 +28,12 @@ def test_is_in(
     df_factory: Callable[[str], Any],
     expected_values: list[bool],
 ) -> None:
-    df = df_factory(library).persist()
+    df = df_factory(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = ser + 1
     result = df.assign(ser.is_in(other).rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series(expected_values, name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), expected_values, pdx.Bool)
 
 
 @pytest.mark.parametrize(
@@ -53,10 +51,9 @@ def test_expr_is_in(
     expected_values: list[bool],
 ) -> None:
     df = df_factory(library)
+    pdx = df.__dataframe_namespace__()
     col = df.col
     ser = col("a")
     other = ser + 1
     result = df.assign(ser.is_in(other).rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series(expected_values, name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), expected_values, pdx.Bool)
diff --git a/tests/column/is_nan_test.py b/tests/column/is_nan_test.py
@@ -1,15 +1,12 @@
 from __future__ import annotations
 
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_column_with_reference
 from tests.utils import nan_dataframe_1
 
 
 def test_column_is_nan(library: str) -> None:
-    df = nan_dataframe_1(library).persist()
+    df = nan_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign(ser.is_nan().rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([False, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
diff --git a/tests/column/is_null_test.py b/tests/column/is_null_test.py
@@ -1,28 +1,25 @@
 from __future__ import annotations
 
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_column_with_reference
 from tests.utils import nan_dataframe_1
 from tests.utils import null_dataframe_1
 
 
 def test_column_is_null_1(library: str) -> None:
-    df = nan_dataframe_1(library).persist()
+    df = nan_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign(ser.is_null().rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
     if library == "pandas-numpy":
-        expected = pd.Series([False, False, True], name="result")
+        expected = [False, False, True]
     else:
-        expected = pd.Series([False, False, False], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+        expected = [False, False, False]
+    compare_column_with_reference(result.col("result"), expected, pdx.Bool)
 
 
 def test_column_is_null_2(library: str) -> None:
-    df = null_dataframe_1(library).persist()
+    df = null_dataframe_1(library)
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     result = df.assign(ser.is_null().rename("result"))
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([False, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
diff --git a/tests/column/pow_test.py b/tests/column/pow_test.py
@@ -1,56 +1,60 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_float_powers_column(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = df.col("b") * 1.0
     result = df.assign(ser.__pow__(other).rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
+    compare_dataframe_with_reference(
+        result,
         {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 32.0, 729.0]},
+        {"a": pdx.Int64, "b": pdx.Int64, "result": pdx.Float64},
     )
-    pd.testing.assert_frame_equal(result_pd, expected)
 
 
 def test_float_powers_scalar_column(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = 1.0
     result = df.assign(ser.__pow__(other).rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    compare_dataframe_with_reference(
+        result,
+        {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]},
+        {"a": pdx.Int64, "b": pdx.Int64, "result": pdx.Float64},
+    )
 
 
 def test_int_powers_column(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = df.col("b") * 1
     result = df.assign(ser.__pow__(other).rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]})
     if library in ("polars", "polars-lazy"):
-        result_pd = result_pd.astype("int64")
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({name: pdx.Int64() for name in ("a", "b", "result")})
+    compare_dataframe_with_reference(
+        result,
+        {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]},
+        {name: pdx.Int64 for name in ("a", "b", "result")},
+    )
 
 
 def test_int_powers_scalar_column(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    pdx = df.__dataframe_namespace__()
     ser = df.col("a")
     other = 1
     result = df.assign(ser.__pow__(other).rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]})
     if library in ("polars", "polars-lazy"):
-        result_pd = result_pd.astype("int64")
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({name: pdx.Int64() for name in ("a", "b", "result")})
+    compare_dataframe_with_reference(
+        result,
+        {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]},
+        {name: pdx.Int64 for name in ("a", "b", "result")},
+    )