Skip to content

Commit

Permalink
fix(rust, python): Clear window cache after evaluate predication expr
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa committed Aug 15, 2023
1 parent 6634621 commit 745c8a4
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
3 changes: 3 additions & 0 deletions crates/polars-lazy/src/physical_plan/executors/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ impl Executor for FilterExec {
state.insert_has_window_function_flag()
}
let s = self.predicate.evaluate(&df, state)?;
if state.cache_window() {
state.clear_window_expr_cache()
}
let mask = s.bool().map_err(|_| {
polars_err!(
ComputeError: "filter predicate must be of type `Boolean`, got `{}`", s.dtype()
Expand Down
6 changes: 6 additions & 0 deletions crates/polars-lazy/src/physical_plan/executors/scan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ impl Executor for DataFrameExec {
state.insert_has_window_function_flag()
}
let s = selection.evaluate(&df, state)?;
if state.cache_window() {
state.clear_window_expr_cache()
}
let mask = s.bool().map_err(
|_| polars_err!(ComputeError: "filter predicate was not of type boolean"),
)?;
Expand Down Expand Up @@ -114,6 +117,9 @@ impl Executor for AnonymousScanExec {
(false, Some(predicate)) => {
let mut df = self.function.scan(self.options.clone())?;
let s = predicate.evaluate(&df, state)?;
if state.cache_window() {
state.clear_window_expr_cache()
}
let mask = s.bool().map_err(
|_| polars_err!(ComputeError: "filter predicate was not of type boolean"),
)?;
Expand Down
13 changes: 13 additions & 0 deletions py-polars/tests/unit/operations/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,16 @@ def test_categorical_string_comparison_6283() -> None:
"funding": ["yes", "yes", "no"],
"score": [78, 39, 76],
}


def test_clear_window_cache_after_filter_10499() -> None:
df = pl.from_dict(
{
"a": [None, None, 3, None, 5, 0, 0, 0, 9, 10],
"b": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
}
)

assert df.lazy().filter((pl.col("a").null_count() < pl.count()).over("b")).filter(
((pl.col("a") == 0).sum() < pl.count()).over("b")
).collect().to_dict(False) == {"a": [3, None, 5, 0, 9, 10], "b": [2, 2, 3, 3, 5, 5]}

0 comments on commit 745c8a4

Please sign in to comment.