From 8955e7b57a083bb0d21f4d9984307255e23ac053 Mon Sep 17 00:00:00 2001 From: Vivek Khatri Date: Fri, 28 Jun 2024 12:05:36 +0530 Subject: [PATCH] Remove #[allow(dead_code)] from the codebase (#421) * Remove #[allow(dead_code)] from the codebase * Remove: dead_code, move: avroschema fn to test * Fix checks and code style, remove unused code * Change function name --- crates/iceberg/src/avro/mod.rs | 1 - crates/iceberg/src/avro/schema.rs | 115 +++++++++--------- .../src/expr/visitors/expression_evaluator.rs | 11 +- .../visitors/inclusive_metrics_evaluator.rs | 69 ----------- .../src/expr/visitors/manifest_evaluator.rs | 10 +- crates/iceberg/src/lib.rs | 2 - crates/iceberg/src/scan.rs | 6 +- 7 files changed, 70 insertions(+), 144 deletions(-) diff --git a/crates/iceberg/src/avro/mod.rs b/crates/iceberg/src/avro/mod.rs index bdccb2ff4..f2a9310e7 100644 --- a/crates/iceberg/src/avro/mod.rs +++ b/crates/iceberg/src/avro/mod.rs @@ -16,6 +16,5 @@ // under the License. //! Avro related codes. -#[allow(dead_code)] mod schema; pub(crate) use schema::*; diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 636f1283c..d83cdc36b 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -293,39 +293,6 @@ pub(crate) trait AvroSchemaVisitor { fn primitive(&mut self, schema: &AvroSchema) -> Result; } -/// Visit avro schema in post order visitor. -pub(crate) fn visit(schema: &AvroSchema, visitor: &mut V) -> Result { - match schema { - AvroSchema::Record(record) => { - let field_results = record - .fields - .iter() - .map(|f| visit(&f.schema, visitor)) - .collect::>>()?; - - visitor.record(record, field_results) - } - AvroSchema::Union(union) => { - let option_results = union - .variants() - .iter() - .map(|f| visit(f, visitor)) - .collect::>>()?; - - visitor.union(union, option_results) - } - AvroSchema::Array(item) => { - let item_result = visit(item, visitor)?; - visitor.array(schema, item_result) - } - AvroSchema::Map(inner) => { - let item_result = visit(inner, visitor)?; - visitor.map(schema, item_result) - } - schema => visitor.primitive(schema), - } -} - struct AvroSchemaToSchema { next_id: i32, } @@ -496,29 +463,6 @@ impl AvroSchemaVisitor for AvroSchemaToSchema { } } -/// Converts avro schema to iceberg schema. -pub(crate) fn avro_schema_to_schema(avro_schema: &AvroSchema) -> Result { - if let AvroSchema::Record(_) = avro_schema { - let mut converter = AvroSchemaToSchema { next_id: 0 }; - let typ = visit(avro_schema, &mut converter)?.expect("Iceberg schema should not be none."); - if let Type::Struct(s) = typ { - Schema::builder() - .with_fields(s.fields().iter().cloned()) - .build() - } else { - Err(Error::new( - ErrorKind::Unexpected, - format!("Expected to convert avro record schema to struct type, but {typ}"), - )) - } - } else { - Err(Error::new( - ErrorKind::DataInvalid, - "Can't convert non record avro schema to iceberg schema: {avro_schema}", - )) - } -} - #[cfg(test)] mod tests { use super::*; @@ -528,6 +472,65 @@ mod tests { use apache_avro::Schema as AvroSchema; use std::fs::read_to_string; + /// Visit avro schema in post order visitor. + pub(crate) fn visit( + schema: &AvroSchema, + visitor: &mut V, + ) -> Result { + match schema { + AvroSchema::Record(record) => { + let field_results = record + .fields + .iter() + .map(|f| visit(&f.schema, visitor)) + .collect::>>()?; + + visitor.record(record, field_results) + } + AvroSchema::Union(union) => { + let option_results = union + .variants() + .iter() + .map(|f| visit(f, visitor)) + .collect::>>()?; + + visitor.union(union, option_results) + } + AvroSchema::Array(item) => { + let item_result = visit(item, visitor)?; + visitor.array(schema, item_result) + } + AvroSchema::Map(inner) => { + let item_result = visit(inner, visitor)?; + visitor.map(schema, item_result) + } + schema => visitor.primitive(schema), + } + } + /// Converts avro schema to iceberg schema. + pub(crate) fn avro_schema_to_schema(avro_schema: &AvroSchema) -> Result { + if let AvroSchema::Record(_) = avro_schema { + let mut converter = AvroSchemaToSchema { next_id: 0 }; + let typ = + visit(avro_schema, &mut converter)?.expect("Iceberg schema should not be none."); + if let Type::Struct(s) = typ { + Schema::builder() + .with_fields(s.fields().iter().cloned()) + .build() + } else { + Err(Error::new( + ErrorKind::Unexpected, + format!("Expected to convert avro record schema to struct type, but {typ}"), + )) + } + } else { + Err(Error::new( + ErrorKind::DataInvalid, + "Can't convert non record avro schema to iceberg schema: {avro_schema}", + )) + } + } + fn read_test_data_file_to_avro_schema(filename: &str) -> AvroSchema { let input = read_to_string(format!( "{}/testdata/{}", diff --git a/crates/iceberg/src/expr/visitors/expression_evaluator.rs b/crates/iceberg/src/expr/visitors/expression_evaluator.rs index b69d093bd..81e91f3ee 100644 --- a/crates/iceberg/src/expr/visitors/expression_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/expression_evaluator.rs @@ -47,7 +47,7 @@ impl ExpressionEvaluator { /// to see if this [`DataFile`] could possibly contain data that matches /// the scan's filter. pub(crate) fn eval(&self, data_file: &DataFile) -> Result { - let mut visitor = ExpressionEvaluatorVisitor::new(self, data_file.partition()); + let mut visitor = ExpressionEvaluatorVisitor::new(data_file.partition()); visit(&mut visitor, &self.partition_filter) } @@ -58,19 +58,14 @@ impl ExpressionEvaluator { /// specifically for data file partitions. #[derive(Debug)] struct ExpressionEvaluatorVisitor<'a> { - /// Reference to an [`ExpressionEvaluator`]. - expression_evaluator: &'a ExpressionEvaluator, /// Reference to a [`DataFile`]'s partition [`Struct`]. partition: &'a Struct, } impl<'a> ExpressionEvaluatorVisitor<'a> { /// Creates a new [`ExpressionEvaluatorVisitor`]. - fn new(expression_evaluator: &'a ExpressionEvaluator, partition: &'a Struct) -> Self { - Self { - expression_evaluator, - partition, - } + fn new(partition: &'a Struct) -> Self { + Self { partition } } } diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index 5f73f2f84..8d45fa29d 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -1206,64 +1206,6 @@ mod test { assert!(result, "Should read: id above upper bound"); } - fn test_case_insensitive_integer_not_eq_rewritten() { - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MIN_VALUE - 25), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id below lower bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MIN_VALUE - 1), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id below lower bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MIN_VALUE), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id equal to lower bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MAX_VALUE - 4), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id between lower and upper bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MAX_VALUE), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id equal to upper bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MAX_VALUE + 1), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id above upper bound"); - - let result = InclusiveMetricsEvaluator::eval( - &equal_int_not_case_insensitive("ID", INT_MAX_VALUE + 6), - &get_test_file_1(), - true, - ) - .unwrap(); - assert!(result, "Should read: id above upper bound"); - } - #[test] #[should_panic] fn test_case_sensitive_integer_not_eq_rewritten() { @@ -1882,17 +1824,6 @@ mod test { filter.bind(schema.clone(), true).unwrap() } - fn equal_int_not_case_insensitive(reference: &str, int_literal: i32) -> BoundPredicate { - let schema = create_test_schema(); - let filter = Predicate::Binary(BinaryExpression::new( - Eq, - Reference::new(reference), - Datum::int(int_literal), - )) - .not(); - filter.bind(schema.clone(), false).unwrap() - } - fn not_equal_int(reference: &str, int_literal: i32) -> BoundPredicate { let schema = create_test_schema(); let filter = Predicate::Binary(BinaryExpression::new( diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index 30ae58f3b..eb770ea2c 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -46,23 +46,19 @@ impl ManifestEvaluator { return Ok(true); } - let mut evaluator = ManifestFilterVisitor::new(self, &manifest_file.partitions); + let mut evaluator = ManifestFilterVisitor::new(&manifest_file.partitions); visit(&mut evaluator, &self.partition_filter) } } struct ManifestFilterVisitor<'a> { - manifest_evaluator: &'a ManifestEvaluator, partitions: &'a Vec, } impl<'a> ManifestFilterVisitor<'a> { - fn new(manifest_evaluator: &'a ManifestEvaluator, partitions: &'a Vec) -> Self { - ManifestFilterVisitor { - manifest_evaluator, - partitions, - } + fn new(partitions: &'a Vec) -> Self { + ManifestFilterVisitor { partitions } } } diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index b7e0b3be0..475a0584a 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -38,7 +38,6 @@ pub use catalog::TableIdent; pub use catalog::TableRequirement; pub use catalog::TableUpdate; -#[allow(dead_code)] pub mod table; mod avro; @@ -47,7 +46,6 @@ pub mod spec; pub mod scan; -#[allow(dead_code)] pub mod expr; pub mod transaction; pub mod transform; diff --git a/crates/iceberg/src/scan.rs b/crates/iceberg/src/scan.rs index 8d3ef10b4..730e3dadf 100644 --- a/crates/iceberg/src/scan.rs +++ b/crates/iceberg/src/scan.rs @@ -214,7 +214,6 @@ impl<'a> TableScanBuilder<'a> { /// Table scan. #[derive(Debug)] -#[allow(dead_code)] pub struct TableScan { snapshot: SnapshotRef, table_metadata: TableMetadataRef, @@ -346,6 +345,11 @@ impl TableScan { } false } + + /// Returns a reference to the column names of the table scan. + pub fn column_names(&self) -> &[String] { + &self.column_names + } } /// Holds the context necessary for file scanning operations