Skip to content

Commit

Permalink
fix(rust, python): fix false positive in parquet stats evaluation (po…
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jul 26, 2023
1 parent d7308f3 commit 83da1e8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
8 changes: 1 addition & 7 deletions polars/polars-core/src/series/comparison.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
//! Comparison operations on Series.

#[cfg(any(
feature = "dtype-duration",
feature = "dtype-datetime",
feature = "dtype-date",
feature = "dtype-time",
feature = "dtype-struct"
))]
#[cfg(feature = "dtype-struct")]
use std::ops::Deref;

use super::Series;
Expand Down
5 changes: 5 additions & 0 deletions polars/polars-lazy/src/physical_plan/expressions/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,11 @@ impl ApplyExpr {
} => (function, input),
_ => return Ok(true),
};
// ensure the input of the function is only a `col(..)`
// if it does any arithmetic the code below is flawed
if !matches!(input[0], Expr::Column(_)) {
return Ok(true);
}

match function {
FunctionExpr::Boolean(BooleanFunction::IsNull) => {
Expand Down
13 changes: 13 additions & 0 deletions py-polars/tests/unit/io/test_lazy_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,16 @@ def test_glob_n_rows(io_files_path: Path) -> None:
"fats_g": [0.5, 6.0],
"sugars_g": [2, 2],
}


@pytest.mark.write_disk()
def test_parquet_statistics_filter_9925(tmp_path: Path) -> None:
tmp_path.mkdir(exist_ok=True)
file_path = tmp_path / "codes.parquet"
df = pl.DataFrame({"code": [300964, 300972, 500_000, 26]})
df.write_parquet(file_path, statistics=True)

q = pl.scan_parquet(file_path).filter(
(pl.col("code").floordiv(100_000)).is_in([0, 3])
)
assert q.collect().to_dict(False) == {"code": [300964, 300972, 26]}

0 comments on commit 83da1e8

Please sign in to comment.