-
Notifications
You must be signed in to change notification settings - Fork 750
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature: Integrate Databend PushDown with Iceberg Rust's Scan Planning #16415
Comments
I found some similar issues and discussions that might be relevant to integrating Databend PushDown with Iceberg Rust's scan planning:
To integrate Databend PushDown with Iceberg Rust's scan planning, you need to utilize the
Here is a code snippet that demonstrates how to integrate Databend PushDown with Iceberg Rust's scan planning: use databend_common_catalog::plan::PushDownInfo;
use databend_common_expression::TableSchema;
use crate::plans::Scan;
use crate::PhysicalPlanBuilder;
impl PhysicalPlanBuilder {
fn push_downs(
&self,
scan: &Scan,
table_schema: &TableSchema,
has_inner_column: bool,
has_virtual_column: bool,
) -> Result<PushDownInfo> {
let metadata = self.metadata.read().clone();
let projection = Self::build_projection(
&metadata,
table_schema,
scan.columns.iter(),
has_inner_column,
true,
true,
true,
);
let output_columns = if has_virtual_column {
Some(Self::build_projection(
&metadata,
table_schema,
scan.columns.iter(),
has_inner_column,
true,
false,
true,
))
} else {
None
};
let mut is_deterministic = true;
let push_down_filter = scan
.push_down_predicates
.as_ref()
.filter(|p| !p.is_empty())
.map(|predicates: &Vec<ScalarExpr>| -> Result<Filters> {
let predicates = predicates
.iter()
.map(|p| {
Ok(p.as_raw_expr()
.type_check(&metadata)?
.project_column_ref(|col| col.column_name.clone()))
})
.collect::<Result<Vec<_>>>()?;
let expr = predicates
.into_iter()
.try_reduce(|lhs, rhs| {
check_function(None, "and_filters", &[], &[lhs, rhs], &BUILTIN_FUNCTIONS)
})?
.unwrap();
let expr = cast_expr_to_non_null_boolean(expr)?;
let (expr, _) = ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS);
is_deterministic = expr.is_deterministic(&BUILTIN_FUNCTIONS);
let inverted_filter =
check_function(None, "not", &[], &[expr.clone()], &BUILTIN_FUNCTIONS)?;
Ok(Filters {
filter: expr.as_remote_expr(),
inverted_filter: inverted_filter.as_remote_expr(),
})
})
.transpose()?;
let prewhere_info = scan
.prewhere
.as_ref()
.map(|prewhere| -> Result<PrewhereInfo> {
let remain_columns = scan
.columns
.difference(&prewhere.prewhere_columns)
.copied()
.collect::<HashSet<usize>>();
let output_columns = Self::build_projection(
&metadata,
table_schema,
prewhere.output_columns.iter(),
has_inner_column,
true,
false,
true,
);
let prewhere_columns = Self::build_projection(
&metadata,
table_schema,
prewhere.prewhere_columns.iter(),
has_inner_column,
true,
true,
true,
);
let remain_columns = Self::build_projection(
&metadata,
table_schema,
remain_columns.iter(),
has_inner_column,
true,
true,
true,
);
let predicate = prewhere
.predicates
.iter()
.cloned()
.reduce(|lhs, rhs| {
ScalarExpr::FunctionCall(FunctionCall {
span: None,
func_name: "and_filters".to_string(),
params: vec![],
arguments: vec![lhs, rhs],
})
})
.expect("there should be at least one predicate in prewhere");
let filter = cast_expr_to_non_null_boolean(
predicate
.as_raw_expr()
.type_check(&metadata)?
.project_column_ref(|col| col.column_name.clone()),
)?;
let filter = filter.as_remote_expr();
let virtual_columns = self.build_virtual_columns(&prewhere.prewhere_columns);
Ok::<PrewhereInfo, ErrorCode>(PrewhereInfo {
output_columns,
prewhere_columns,
remain_columns,
filter,
virtual_columns,
})
})
.transpose()?;
let order_by = scan
.order_by
.clone()
.map(|items| {
items
.into_iter()
.map(|item| {
let metadata = self.metadata.read();
let column = metadata.column(item.index);
let (name, data_type) = match column {
ColumnEntry::BaseTableColumn(BaseTableColumn {
column_name,
data_type,
..
}) => (column_name.clone(), DataType::from(data_type)),
ColumnEntry::DerivedColumn(DerivedColumn {
alias, data_type, ..
}) => (alias.clone(), data_type.clone()),
ColumnEntry::InternalColumn(TableInternalColumn {
internal_column,
..
}) => (
internal_column.column_name().to_owned(),
internal_column.data_type(),
),
ColumnEntry::VirtualColumn(VirtualColumn {
column_name,
data_type,
..
}) => (column_name.clone(), DataType::from(data_type)),
};
// sort item is already a column
let scalar = RemoteExpr::ColumnRef {
span: None,
id: name.clone(),
data_type,
display_name: name,
};
Ok((scalar, item.asc, item.nulls_first))
})
.collect::<Result<Vec<_>>>()
})
.transpose()?;
Ok(PushDownInfo {
projection,
output_columns,
push_down_filter,
prewhere_info,
order_by,
is_deterministic,
})
}
} This code snippet shows how to build the Additionally, you can refer to the |
Summary
Iceberg Rust has implemented scan planning, let's integrate with it directly instead.
Tasks
The text was updated successfully, but these errors were encountered: