Skip to content

Commit

Permalink
use new functions for more files [part2]
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Jan 6, 2024
1 parent 18f6d95 commit a88360c
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 117 deletions.
1 change: 1 addition & 0 deletions dataframe_api_compat/pandas_standard/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"UInt16": "uint16",
"UInt8": "uint8",
"boolean": "bool",
"Float64": "float64",
}


Expand Down
24 changes: 7 additions & 17 deletions tests/column/divmod_test.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,31 @@
from __future__ import annotations

import pandas as pd

from tests.utils import compare_column_with_reference
from tests.utils import integer_dataframe_1
from tests.utils import interchange_to_pandas


def test_expression_divmod(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = df.col("b")
result_quotient, result_remainder = ser.__divmod__(other)
# quotient
result = df.assign(result_quotient.rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected_quotient = pd.Series([0, 0, 0], name="result")
pd.testing.assert_series_equal(result_pd, expected_quotient)
compare_column_with_reference(result.col("result"), [0, 0, 0], pdx.Int64)
# remainder
result = df.assign(result_remainder.rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected_remainder = pd.Series([1, 2, 3], name="result")
pd.testing.assert_series_equal(result_pd, expected_remainder)
compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)


def test_expression_divmod_with_scalar(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result_quotient, result_remainder = ser.__divmod__(2)
# quotient
result = df.assign(result_quotient.rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected_quotient = pd.Series([0, 1, 1], name="result")
pd.testing.assert_series_equal(result_pd, expected_quotient)
compare_column_with_reference(result.col("result"), [0, 1, 1], pdx.Int64)
# remainder
result = df.assign(result_remainder.rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected_remainder = pd.Series([1, 0, 1], name="result")
pd.testing.assert_series_equal(result_pd, expected_remainder)
compare_column_with_reference(result.col("result"), [1, 0, 1], pdx.Int64)
21 changes: 8 additions & 13 deletions tests/column/fill_nan_test.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,22 @@
from __future__ import annotations

import pandas as pd

from tests.utils import interchange_to_pandas
from tests.utils import compare_column_with_reference
from tests.utils import nan_dataframe_1


def test_column_fill_nan(library: str) -> None:
# TODO: test with nullable pandas, check null isn't filled
df = nan_dataframe_1(library).persist()
df = nan_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign(ser.fill_nan(-1.0).rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([1.0, 2.0, -1.0], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [1.0, 2.0, -1.0], pdx.Float64)


def test_column_fill_nan_with_null(library: str) -> None:
# TODO: test with nullable pandas, check null isn't filled
df = nan_dataframe_1(library).persist()
ns = df.__dataframe_namespace__()
df = nan_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign(ser.fill_nan(ns.null).is_null().rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([False, False, True], name="result")
pd.testing.assert_series_equal(result_pd, expected)
result = df.assign(ser.fill_nan(pdx.null).is_null().rename("result"))
compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
8 changes: 3 additions & 5 deletions tests/column/get_rows_by_mask_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import pandas as pd

from tests.utils import compare_column_with_reference
from tests.utils import integer_dataframe_1
from tests.utils import interchange_to_pandas


def test_column_filter(library: str) -> None:
Expand All @@ -18,11 +18,9 @@ def test_column_filter(library: str) -> None:

def test_column_take_by_mask_noop(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
mask = ser > 0
ser = ser.filter(mask)
result = df.assign(ser.rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([1, 2, 3], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)
9 changes: 3 additions & 6 deletions tests/column/get_rows_test.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
from __future__ import annotations

import pandas as pd

from tests.utils import compare_column_with_reference
from tests.utils import integer_dataframe_1
from tests.utils import interchange_to_pandas


def test_expression_take(library: str) -> None:
df = integer_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
indices = df.col("a") - 1
result = df.assign(ser.take(indices).rename("result")).select("result")
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([1, 2, 3], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [1, 2, 3], pdx.Int64)
17 changes: 6 additions & 11 deletions tests/column/invert_test.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,20 @@
from __future__ import annotations

import pandas as pd

from tests.utils import bool_dataframe_1
from tests.utils import interchange_to_pandas
from tests.utils import compare_column_with_reference


def test_expression_invert(library: str) -> None:
df = bool_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign((~ser).rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([False, False, True], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)


def test_column_invert(library: str) -> None:
df = bool_dataframe_1(library).persist()
df = bool_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign((~ser).rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([False, False, True], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
15 changes: 6 additions & 9 deletions tests/column/is_in_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
from typing import TYPE_CHECKING
from typing import Any

import pandas as pd
import pytest

from tests.utils import compare_column_with_reference
from tests.utils import float_dataframe_1
from tests.utils import float_dataframe_2
from tests.utils import float_dataframe_3
from tests.utils import interchange_to_pandas

if TYPE_CHECKING:
from collections.abc import Callable
Expand All @@ -29,13 +28,12 @@ def test_is_in(
df_factory: Callable[[str], Any],
expected_values: list[bool],
) -> None:
df = df_factory(library).persist()
df = df_factory(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = ser + 1
result = df.assign(ser.is_in(other).rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series(expected_values, name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), expected_values, pdx.Bool)


@pytest.mark.parametrize(
Expand All @@ -53,10 +51,9 @@ def test_expr_is_in(
expected_values: list[bool],
) -> None:
df = df_factory(library)
pdx = df.__dataframe_namespace__()
col = df.col
ser = col("a")
other = ser + 1
result = df.assign(ser.is_in(other).rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series(expected_values, name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), expected_values, pdx.Bool)
11 changes: 4 additions & 7 deletions tests/column/is_nan_test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from __future__ import annotations

import pandas as pd

from tests.utils import interchange_to_pandas
from tests.utils import compare_column_with_reference
from tests.utils import nan_dataframe_1


def test_column_is_nan(library: str) -> None:
df = nan_dataframe_1(library).persist()
df = nan_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign(ser.is_nan().rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([False, False, True], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
21 changes: 9 additions & 12 deletions tests/column/is_null_test.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,25 @@
from __future__ import annotations

import pandas as pd

from tests.utils import interchange_to_pandas
from tests.utils import compare_column_with_reference
from tests.utils import nan_dataframe_1
from tests.utils import null_dataframe_1


def test_column_is_null_1(library: str) -> None:
df = nan_dataframe_1(library).persist()
df = nan_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign(ser.is_null().rename("result"))
result_pd = interchange_to_pandas(result)["result"]
if library == "pandas-numpy":
expected = pd.Series([False, False, True], name="result")
expected = [False, False, True]
else:
expected = pd.Series([False, False, False], name="result")
pd.testing.assert_series_equal(result_pd, expected)
expected = [False, False, False]
compare_column_with_reference(result.col("result"), expected, pdx.Bool)


def test_column_is_null_2(library: str) -> None:
df = null_dataframe_1(library).persist()
df = null_dataframe_1(library)
pdx = df.__dataframe_namespace__()
ser = df.col("a")
result = df.assign(ser.is_null().rename("result"))
result_pd = interchange_to_pandas(result)["result"]
expected = pd.Series([False, False, True], name="result")
pd.testing.assert_series_equal(result_pd, expected)
compare_column_with_reference(result.col("result"), [False, False, True], pdx.Bool)
46 changes: 25 additions & 21 deletions tests/column/pow_test.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,60 @@
from __future__ import annotations

import pandas as pd

from tests.utils import compare_dataframe_with_reference
from tests.utils import integer_dataframe_1
from tests.utils import interchange_to_pandas


def test_float_powers_column(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = df.col("b") * 1.0
result = df.assign(ser.__pow__(other).rename("result"))
result_pd = interchange_to_pandas(result)
expected = pd.DataFrame(
compare_dataframe_with_reference(
result,
{"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 32.0, 729.0]},
{"a": pdx.Int64, "b": pdx.Int64, "result": pdx.Float64},
)
pd.testing.assert_frame_equal(result_pd, expected)


def test_float_powers_scalar_column(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = 1.0
result = df.assign(ser.__pow__(other).rename("result"))
result_pd = interchange_to_pandas(result)
expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]})
pd.testing.assert_frame_equal(result_pd, expected)
compare_dataframe_with_reference(
result,
{"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]},
{"a": pdx.Int64, "b": pdx.Int64, "result": pdx.Float64},
)


def test_int_powers_column(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = df.col("b") * 1
result = df.assign(ser.__pow__(other).rename("result"))
result_pd = interchange_to_pandas(result)
expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]})
if library in ("polars", "polars-lazy"):
result_pd = result_pd.astype("int64")
pd.testing.assert_frame_equal(result_pd, expected)
result = result.cast({name: pdx.Int64() for name in ("a", "b", "result")})
compare_dataframe_with_reference(
result,
{"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]},
{name: pdx.Int64 for name in ("a", "b", "result")},
)


def test_int_powers_scalar_column(library: str) -> None:
df = integer_dataframe_1(library)
df.__dataframe_namespace__()
pdx = df.__dataframe_namespace__()
ser = df.col("a")
other = 1
result = df.assign(ser.__pow__(other).rename("result"))
result_pd = interchange_to_pandas(result)
expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]})
if library in ("polars", "polars-lazy"):
result_pd = result_pd.astype("int64")
pd.testing.assert_frame_equal(result_pd, expected)
result = result.cast({name: pdx.Int64() for name in ("a", "b", "result")})
compare_dataframe_with_reference(
result,
{"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]},
{name: pdx.Int64 for name in ("a", "b", "result")},
)
Loading

0 comments on commit a88360c

Please sign in to comment.