Skip to content

Commit

Permalink
fix: Fix invalid len due to projection pushdown selection of scalar (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Nov 28, 2024
1 parent 49b2e7b commit 99c7f4d
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,25 +67,66 @@ pub(super) fn process_projection(
let expr = if input_schema.is_empty() {
// If the input schema is empty, we should just project
// ourselves
exprs[0].node()
Some(exprs[0].node())
} else {
// simply select the last column
// NOTE: the first can be the inserted index column, so that might not work
let (first_name, _) = input_schema.try_get_at_index(input_schema.len() - 1)?;
let expr = expr_arena.add(AExpr::Column(first_name.clone()));
if !acc_projections.is_empty() {
check_double_projection(
&exprs[0],
expr_arena,
&mut acc_projections,
&mut projected_names,
);
// Select the last column projection.
let mut name = None;
for (_, plan) in (&*lp_arena).iter(input) {
match plan {
IR::Select { expr: exprs, .. } | IR::HStack { exprs, .. } => {
for e in exprs {
if !e.is_scalar(expr_arena) {
name = Some(e.output_name());
break;
}
}
},
IR::Scan {
file_info,
output_schema,
..
} => {
let schema = output_schema.as_ref().unwrap_or(&file_info.schema);
// NOTE: the first can be the inserted index column, so that might not work
let (last_name, _) = schema.try_get_at_index(schema.len() - 1)?;
name = Some(last_name);
break;
},
IR::DataFrameScan {
schema,
output_schema,
..
} => {
// NOTE: the first can be the inserted index column, so that might not work
let schema = output_schema.as_ref().unwrap_or(schema);
let (last_name, _) = schema.try_get_at_index(schema.len() - 1)?;
name = Some(last_name);
break;
},
_ => {},
}
}

if let Some(name) = name {
let expr = expr_arena.add(AExpr::Column(name.clone()));
if !acc_projections.is_empty() {
check_double_projection(
&exprs[0],
expr_arena,
&mut acc_projections,
&mut projected_names,
);
}
Some(expr)
} else {
None
}
expr
};
add_expr_to_accumulated(expr, &mut acc_projections, &mut projected_names, expr_arena);
local_projection.push(exprs.pop().unwrap());
proj_pd.is_count_star = true;
if let Some(expr) = expr {
add_expr_to_accumulated(expr, &mut acc_projections, &mut projected_names, expr_arena);
local_projection.push(exprs.pop().unwrap());
proj_pd.is_count_star = true;
}
} else {
// A projection can consist of a chain of expressions followed by an alias.
// We want to do the chain locally because it can have complicated side effects.
Expand Down
16 changes: 16 additions & 0 deletions py-polars/tests/unit/test_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,19 @@ def test_scalar_19957() -> None:
"foo": [1, 1, 1],
"bar": [1, 1, 1],
}


def test_scalar_len_20046() -> None:
df = pl.DataFrame({"a": [1, 2, 3]})

assert (
df.lazy()
.select(
pl.col("a"),
pl.lit(1),
)
.select(pl.len())
.collect()
.item()
== 3
)

0 comments on commit 99c7f4d

Please sign in to comment.