From 43eb318172eab11f2c557ba6ff5398df42e99477 Mon Sep 17 00:00:00 2001 From: MianChen <283559115@qq.com> Date: Tue, 12 Mar 2024 11:34:42 +0800 Subject: [PATCH] feat: optimize minor issues (#1496) ## Rationale Related with https://github.com/apache/incubator-horaedb/issues/1466 ## Detailed Changes 1. Make execution_props an arguments to logical2physical. 2. Make scan_batch_size NonZeroUsize ## Test Plan Manual test --- src/analytic_engine/src/manifest/details.rs | 7 +++---- .../parquet_ext/src/prune/min_max.rs | 18 +++++++++++------- src/components/tracing_util/src/logging.rs | 4 ---- src/table_engine/src/partition/rule/key.rs | 2 -- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/analytic_engine/src/manifest/details.rs b/src/analytic_engine/src/manifest/details.rs index 1d1c27fcb5..f9218c56a6 100644 --- a/src/analytic_engine/src/manifest/details.rs +++ b/src/analytic_engine/src/manifest/details.rs @@ -376,8 +376,7 @@ pub struct Options { pub scan_timeout: ReadableDuration, /// Batch size to read manifest entries - // TODO: use NonZeroUsize - pub scan_batch_size: usize, + pub scan_batch_size: NonZeroUsize, /// Timeout to store manifest entries pub store_timeout: ReadableDuration, @@ -388,7 +387,7 @@ impl Default for Options { Self { snapshot_every_n_updates: NonZeroUsize::new(100).unwrap(), scan_timeout: ReadableDuration::secs(5), - scan_batch_size: 100, + scan_batch_size: NonZeroUsize::new(100).unwrap(), store_timeout: ReadableDuration::secs(5), } } @@ -690,7 +689,7 @@ impl MetaUpdateLogStore for WalBasedLogStore { async fn scan(&self, start: ReadBoundary) -> Result { let ctx = ReadContext { timeout: self.opts.scan_timeout.0, - batch_size: self.opts.scan_batch_size, + batch_size: self.opts.scan_batch_size.into(), }; let read_req = ReadRequest { diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs index 0a717021a1..25e774b2d8 100644 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ b/src/components/parquet_ext/src/prune/min_max.rs @@ -59,8 +59,9 @@ fn filter_row_groups_inner( row_groups: &[RowGroupMetaData], ) -> Vec { let mut results = vec![true; row_groups.len()]; + let execution_props = ExecutionProps::new(); for expr in exprs { - match logical2physical(expr, &schema) + match logical2physical(expr, &schema, &execution_props) .and_then(|physical_expr| PruningPredicate::try_new(physical_expr, schema.clone())) { Ok(pruning_predicate) => { @@ -86,12 +87,15 @@ fn filter_row_groups_inner( results } -fn logical2physical(expr: &Expr, schema: &ArrowSchema) -> DataFusionResult> { - schema.clone().to_dfschema().and_then(|df_schema| { - // TODO: props should be an argument - let execution_props = ExecutionProps::new(); - create_physical_expr(expr, &df_schema, schema, &execution_props) - }) +fn logical2physical( + expr: &Expr, + schema: &ArrowSchema, + execution_props: &ExecutionProps, +) -> DataFusionResult> { + schema + .clone() + .to_dfschema() + .and_then(|df_schema| create_physical_expr(expr, &df_schema, schema, execution_props)) } fn build_row_group_predicate( diff --git a/src/components/tracing_util/src/logging.rs b/src/components/tracing_util/src/logging.rs index 07b57bbfc2..9770569ba9 100644 --- a/src/components/tracing_util/src/logging.rs +++ b/src/components/tracing_util/src/logging.rs @@ -126,10 +126,6 @@ pub fn init_tracing_with_file(config: &Config, node_addr: &str, rotation: Rotati .with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE); let subscriber = Registry::default().with(f_layer); - // TODO: subscriber.with(layer1) has the different type with - // subscriber.with(layer1).with(layer2)... - // So left some duplicated codes here. Maybe we can use marco to simplify - // it. match &config.console { Some(console) => { let console_addr = format!("{}:{}", node_addr, console.port); diff --git a/src/table_engine/src/partition/rule/key.rs b/src/table_engine/src/partition/rule/key.rs index a05ef011ed..70a3038f6e 100644 --- a/src/table_engine/src/partition/rule/key.rs +++ b/src/table_engine/src/partition/rule/key.rs @@ -237,8 +237,6 @@ fn expand_partition_keys_group<'a>( for filter_idx in group { let filter = &filters[*filter_idx]; let datums = match &filter.condition { - // Only `Eq` is supported now. - // TODO: to support `In`'s extracting. PartitionCondition::Eq(datum) => vec![datum.as_view()], PartitionCondition::In(datums) => datums.iter().map(Datum::as_view).collect_vec(), _ => {