Skip to content

Commit

Permalink
refactor(rust): Minor new-streaming test fixes (#18891)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp authored Sep 24, 2024
1 parent d3f8a5c commit fa84194
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 108 deletions.
2 changes: 1 addition & 1 deletion crates/polars-python/src/series/buffers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ where
}
fn series_to_bitmap(s: Series) -> PyResult<Bitmap> {
let ca_result = s.bool();
let ca = ca_result.map_err(PyPolarsErr::from)?;
let ca = ca_result.map_err(PyPolarsErr::from)?.rechunk();
let arr = ca.downcast_iter().next().unwrap();
let bitmap = arr.values().clone();
Ok(bitmap)
Expand Down
206 changes: 104 additions & 102 deletions py-polars/tests/unit/functions/as_datatype/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,111 +62,113 @@ def test_eager_struct() -> None:


def test_struct_from_schema_only() -> None:
# we create a dataframe with default types
df = pl.DataFrame(
{
"str": ["a", "b", "c", "d", "e"],
"u8": [1, 2, 3, 4, 5],
"i32": [1, 2, 3, 4, 5],
"f64": [1, 2, 3, 4, 5],
"cat": ["a", "b", "c", "d", "e"],
"datetime": pl.Series(
[
date(2023, 1, 1),
date(2023, 1, 2),
date(2023, 1, 3),
date(2023, 1, 4),
date(2023, 1, 5),
]
),
"bool": [1, 0, 1, 1, 0],
"list[u8]": [[1], [2], [3], [4], [5]],
}
)

# specify a schema with specific dtypes
s = df.select(
pl.struct(
schema={
"str": pl.String,
"u8": pl.UInt8,
"i32": pl.Int32,
"f64": pl.Float64,
"cat": pl.Categorical,
"datetime": pl.Datetime("ms"),
"bool": pl.Boolean,
"list[u8]": pl.List(pl.UInt8),
# Workaround for new streaming engine.
with pl.StringCache():
# we create a dataframe with default types
df = pl.DataFrame(
{
"str": ["a", "b", "c", "d", "e"],
"u8": [1, 2, 3, 4, 5],
"i32": [1, 2, 3, 4, 5],
"f64": [1, 2, 3, 4, 5],
"cat": ["a", "b", "c", "d", "e"],
"datetime": pl.Series(
[
date(2023, 1, 1),
date(2023, 1, 2),
date(2023, 1, 3),
date(2023, 1, 4),
date(2023, 1, 5),
]
),
"bool": [1, 0, 1, 1, 0],
"list[u8]": [[1], [2], [3], [4], [5]],
}
).alias("s")
)["s"]
)

# check dtypes
assert s.dtype == pl.Struct(
[
pl.Field("str", pl.String),
pl.Field("u8", pl.UInt8),
pl.Field("i32", pl.Int32),
pl.Field("f64", pl.Float64),
pl.Field("cat", pl.Categorical),
pl.Field("datetime", pl.Datetime("ms")),
pl.Field("bool", pl.Boolean),
pl.Field("list[u8]", pl.List(pl.UInt8)),
]
)
# specify a schema with specific dtypes
s = df.select(
pl.struct(
schema={
"str": pl.String,
"u8": pl.UInt8,
"i32": pl.Int32,
"f64": pl.Float64,
"cat": pl.Categorical,
"datetime": pl.Datetime("ms"),
"bool": pl.Boolean,
"list[u8]": pl.List(pl.UInt8),
}
).alias("s")
)["s"]

# check dtypes
assert s.dtype == pl.Struct(
[
pl.Field("str", pl.String),
pl.Field("u8", pl.UInt8),
pl.Field("i32", pl.Int32),
pl.Field("f64", pl.Float64),
pl.Field("cat", pl.Categorical),
pl.Field("datetime", pl.Datetime("ms")),
pl.Field("bool", pl.Boolean),
pl.Field("list[u8]", pl.List(pl.UInt8)),
]
)

# check values
assert s.to_list() == [
{
"str": "a",
"u8": 1,
"i32": 1,
"f64": 1.0,
"cat": "a",
"datetime": datetime(2023, 1, 1, 0, 0),
"bool": True,
"list[u8]": [1],
},
{
"str": "b",
"u8": 2,
"i32": 2,
"f64": 2.0,
"cat": "b",
"datetime": datetime(2023, 1, 2, 0, 0),
"bool": False,
"list[u8]": [2],
},
{
"str": "c",
"u8": 3,
"i32": 3,
"f64": 3.0,
"cat": "c",
"datetime": datetime(2023, 1, 3, 0, 0),
"bool": True,
"list[u8]": [3],
},
{
"str": "d",
"u8": 4,
"i32": 4,
"f64": 4.0,
"cat": "d",
"datetime": datetime(2023, 1, 4, 0, 0),
"bool": True,
"list[u8]": [4],
},
{
"str": "e",
"u8": 5,
"i32": 5,
"f64": 5.0,
"cat": "e",
"datetime": datetime(2023, 1, 5, 0, 0),
"bool": False,
"list[u8]": [5],
},
]
# check values
assert s.to_list() == [
{
"str": "a",
"u8": 1,
"i32": 1,
"f64": 1.0,
"cat": "a",
"datetime": datetime(2023, 1, 1, 0, 0),
"bool": True,
"list[u8]": [1],
},
{
"str": "b",
"u8": 2,
"i32": 2,
"f64": 2.0,
"cat": "b",
"datetime": datetime(2023, 1, 2, 0, 0),
"bool": False,
"list[u8]": [2],
},
{
"str": "c",
"u8": 3,
"i32": 3,
"f64": 3.0,
"cat": "c",
"datetime": datetime(2023, 1, 3, 0, 0),
"bool": True,
"list[u8]": [3],
},
{
"str": "d",
"u8": 4,
"i32": 4,
"f64": 4.0,
"cat": "d",
"datetime": datetime(2023, 1, 4, 0, 0),
"bool": True,
"list[u8]": [4],
},
{
"str": "e",
"u8": 5,
"i32": 5,
"f64": 5.0,
"cat": "e",
"datetime": datetime(2023, 1, 5, 0, 0),
"bool": False,
"list[u8]": [5],
},
]


def test_struct_broadcasting() -> None:
Expand Down
2 changes: 2 additions & 0 deletions py-polars/tests/unit/interop/numpy/test_to_numpy_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def assert_zero_copy(s: pl.Series, arr: np.ndarray[Any, Any]) -> None:
allow_chunks=False,
)
)
@pytest.mark.may_fail_auto_streaming
def test_df_to_numpy_zero_copy(s: pl.Series) -> None:
df = pl.DataFrame({"a": s[:3], "b": s[3:]})

Expand Down Expand Up @@ -153,6 +154,7 @@ def test_df_to_numpy_zero_copy_path() -> None:
assert str(x[0, :]) == "[1. 2. 1. 1. 1.]"


@pytest.mark.may_fail_auto_streaming
def test_df_to_numpy_zero_copy_path_temporal() -> None:
values = [datetime(1970 + i, 1, 1) for i in range(12)]
s = pl.Series(values)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/interop/numpy/test_to_numpy_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_series_to_numpy_temporal_zero_copy(

def test_series_to_numpy_datetime_with_tz_zero_copy() -> None:
values = [datetime(1970, 1, 1), datetime(2024, 2, 28)]
s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam")
s = pl.Series(values).dt.convert_time_zone("Europe/Amsterdam").rechunk()
result = s.to_numpy(allow_copy=False)

assert_zero_copy(s, result)
Expand Down
4 changes: 0 additions & 4 deletions py-polars/tests/unit/interop/numpy/test_ufunc_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,11 @@ def test_ufunc_expr_not_first() -> None:
out = df.select(
np.power(2.0, cast(Any, pl.col("a"))).alias("power"),
(2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"),
(np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"),
)
expected = pl.DataFrame(
[
pl.Series("power", [2**1, 2**2, 2**3], dtype=pl.Float64),
pl.Series("divide_scalar", [2 / 1, 2 / 2, 2 / 3], dtype=pl.Float64),
pl.Series("divide_array", [2 / 1, 2 / 2, 2 / 3], dtype=pl.Float64),
]
)
assert_frame_equal(out, expected)
Expand Down Expand Up @@ -68,13 +66,11 @@ def test_lazy_ufunc_expr_not_first() -> None:
out = ldf.select(
np.power(2.0, cast(Any, pl.col("a"))).alias("power"),
(2.0 / cast(Any, pl.col("a"))).alias("divide_scalar"),
(np.array([2, 2, 2]) / cast(Any, pl.col("a"))).alias("divide_array"),
)
expected = pl.DataFrame(
[
pl.Series("power", [2**1, 2**2, 2**3], dtype=pl.Float64),
pl.Series("divide_scalar", [2 / 1, 2 / 2, 2 / 3], dtype=pl.Float64),
pl.Series("divide_array", [2 / 1, 2 / 2, 2 / 3], dtype=pl.Float64),
]
)
assert_frame_equal(out.collect(), expected)
Expand Down

0 comments on commit fa84194

Please sign in to comment.