From 920eabbfa080017f00b957ee6a6e2aeb2079b084 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 23 Sep 2024 21:15:53 -0700 Subject: [PATCH 1/4] Change OnceLock to LazyLock, update MSRV to 1.80 --- Cargo.toml | 2 +- datafusion-cli/src/main.rs | 34 +++++----- .../physical_plan/parquet/access_plan.rs | 44 ++++++------- datafusion/core/tests/expr_api/mod.rs | 66 +++++++++---------- datafusion/core/tests/memory_limit/mod.rs | 6 +- .../tests/parquet/external_access_plan.rs | 64 +++++++++--------- datafusion/expr/src/test/function_stub.rs | 12 ++-- datafusion/functions-aggregate/src/macros.rs | 12 ++-- datafusion/functions-nested/src/macros.rs | 16 ++--- datafusion/functions-window/src/row_number.rs | 12 ++-- datafusion/functions/src/macros.rs | 16 ++--- .../functions/src/regex/regexpreplace.rs | 7 +- .../physical-expr/src/utils/guarantee.rs | 16 ++--- .../engines/datafusion_engine/normalize.rs | 47 ++++++------- 14 files changed, 172 insertions(+), 182 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 629992177913..45e8f7dde915 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ homepage = "https://datafusion.apache.org" license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/datafusion" -rust-version = "1.78" +rust-version = "1.80" version = "42.0.0" [workspace.dependencies] diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 4c6c352ff339..779e52226554 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use std::env; use std::path::Path; use std::process::ExitCode; -use std::sync::{Arc, OnceLock}; +use std::sync::{Arc, LazyLock}; use datafusion::error::{DataFusionError, Result}; use datafusion::execution::context::SessionConfig; @@ -289,25 +289,25 @@ impl ByteUnit { fn extract_memory_pool_size(size: &str) -> Result { fn byte_suffixes() -> &'static HashMap<&'static str, ByteUnit> { - static BYTE_SUFFIXES: OnceLock> = OnceLock::new(); - BYTE_SUFFIXES.get_or_init(|| { - let mut m = HashMap::new(); - m.insert("b", ByteUnit::Byte); - m.insert("k", ByteUnit::KiB); - m.insert("kb", ByteUnit::KiB); - m.insert("m", ByteUnit::MiB); - m.insert("mb", ByteUnit::MiB); - m.insert("g", ByteUnit::GiB); - m.insert("gb", ByteUnit::GiB); - m.insert("t", ByteUnit::TiB); - m.insert("tb", ByteUnit::TiB); - m - }) + static BYTE_SUFFIXES: LazyLock> = + LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("b", ByteUnit::Byte); + m.insert("k", ByteUnit::KiB); + m.insert("kb", ByteUnit::KiB); + m.insert("m", ByteUnit::MiB); + m.insert("mb", ByteUnit::MiB); + m.insert("g", ByteUnit::GiB); + m.insert("gb", ByteUnit::GiB); + m.insert("t", ByteUnit::TiB); + m.insert("tb", ByteUnit::TiB); + m + }); } fn suffix_re() -> &'static regex::Regex { - static SUFFIX_REGEX: OnceLock = OnceLock::new(); - SUFFIX_REGEX.get_or_init(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap()) + static SUFFIX_REGEX: LazyLock = + LazyLock::new(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap()); } let lower = size.to_lowercase(); diff --git a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs index ea3030664b7b..6275b70d0790 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/access_plan.rs @@ -345,7 +345,7 @@ mod test { use parquet::basic::LogicalType; use parquet::file::metadata::ColumnChunkMetaData; use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor}; - use std::sync::{Arc, OnceLock}; + use std::sync::{Arc, LazyLock}; #[test] fn test_only_scans() { @@ -511,31 +511,31 @@ mod test { assert_contains!(err, "Invalid ParquetAccessPlan Selection. Row group 1 has 20 rows but selection only specifies 22 rows"); } - static ROW_GROUP_METADATA: OnceLock> = OnceLock::new(); + static ROW_GROUP_METADATA: LazyLock> = LazyLock::new(|| { + let schema_descr = get_test_schema_descr(); + let row_counts = [10, 20, 30, 40]; + + row_counts + .into_iter() + .map(|num_rows| { + let column = ColumnChunkMetaData::builder(schema_descr.column(0)) + .set_num_values(num_rows) + .build() + .unwrap(); + + RowGroupMetaData::builder(schema_descr.clone()) + .set_num_rows(num_rows) + .set_column_metadata(vec![column]) + .build() + .unwrap() + }) + .collect() + }); /// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows /// respectively fn row_group_metadata() -> &'static [RowGroupMetaData] { - ROW_GROUP_METADATA.get_or_init(|| { - let schema_descr = get_test_schema_descr(); - let row_counts = [10, 20, 30, 40]; - - row_counts - .into_iter() - .map(|num_rows| { - let column = ColumnChunkMetaData::builder(schema_descr.column(0)) - .set_num_values(num_rows) - .build() - .unwrap(); - - RowGroupMetaData::builder(schema_descr.clone()) - .set_num_rows(num_rows) - .set_column_metadata(vec![column]) - .build() - .unwrap() - }) - .collect() - }) + &ROW_GROUP_METADATA } /// Single column schema with a single column named "a" of type `BYTE_ARRAY`/`String` diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index cbd892672152..31e43749563b 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -28,7 +28,7 @@ use datafusion_functions_aggregate::sum::sum_udaf; use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor}; use sqlparser::ast::NullTreatment; /// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan -use std::sync::{Arc, OnceLock}; +use std::sync::{Arc, LazyLock}; mod parse_sql_expr; mod simplification; @@ -350,39 +350,37 @@ fn evaluate_expr_test(expr: Expr, expected_lines: Vec<&str>) { ); } -static TEST_BATCH: OnceLock = OnceLock::new(); +static TEST_BATCH: LazyLock = LazyLock::new(|| { + let string_array: ArrayRef = Arc::new(StringArray::from(vec!["1", "2", "3"])); + let int_array: ArrayRef = + Arc::new(Int64Array::from_iter(vec![Some(10), None, Some(5)])); + + // { a: "2021-02-01" } { a: "2021-02-02" } { a: "2021-02-03" } + let struct_array: ArrayRef = Arc::from(StructArray::from(vec![( + Arc::new(Field::new("a", DataType::Utf8, false)), + Arc::new(StringArray::from(vec![ + "2021-02-01", + "2021-02-02", + "2021-02-03", + ])) as _, + )])); + + // ["one"] ["two", "three", "four"] ["five"] + let mut builder = ListBuilder::new(StringBuilder::new()); + builder.append_value([Some("one")]); + builder.append_value([Some("two"), Some("three"), Some("four")]); + builder.append_value([Some("five")]); + let list_array: ArrayRef = Arc::new(builder.finish()); + + RecordBatch::try_from_iter(vec![ + ("id", string_array), + ("i", int_array), + ("props", struct_array), + ("list", list_array), + ]) + .unwrap() +}); fn test_batch() -> RecordBatch { - TEST_BATCH - .get_or_init(|| { - let string_array: ArrayRef = Arc::new(StringArray::from(vec!["1", "2", "3"])); - let int_array: ArrayRef = - Arc::new(Int64Array::from_iter(vec![Some(10), None, Some(5)])); - - // { a: "2021-02-01" } { a: "2021-02-02" } { a: "2021-02-03" } - let struct_array: ArrayRef = Arc::from(StructArray::from(vec![( - Arc::new(Field::new("a", DataType::Utf8, false)), - Arc::new(StringArray::from(vec![ - "2021-02-01", - "2021-02-02", - "2021-02-03", - ])) as _, - )])); - - // ["one"] ["two", "three", "four"] ["five"] - let mut builder = ListBuilder::new(StringBuilder::new()); - builder.append_value([Some("one")]); - builder.append_value([Some("two"), Some("three"), Some("four")]); - builder.append_value([Some("five")]); - let list_array: ArrayRef = Arc::new(builder.finish()); - - RecordBatch::try_from_iter(vec![ - ("id", string_array), - ("i", int_array), - ("props", struct_array), - ("list", list_array), - ]) - .unwrap() - }) - .clone() + TEST_BATCH.clone() } diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index 69ef6058a2f6..8cebf2731a2d 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -34,7 +34,7 @@ use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; use futures::StreamExt; use std::any::Any; use std::num::NonZeroUsize; -use std::sync::{Arc, OnceLock}; +use std::sync::{Arc, LazyLock}; use tokio::fs::File; use datafusion::datasource::streaming::StreamingTable; @@ -725,7 +725,7 @@ fn maybe_split_batches( .collect() } -static DICT_BATCHES: OnceLock> = OnceLock::new(); +static DICT_BATCHES: LazyLock> = LazyLock::new(make_dict_batches); /// Returns 5 sorted string dictionary batches each with 50 rows with /// this schema. @@ -733,7 +733,7 @@ static DICT_BATCHES: OnceLock> = OnceLock::new(); /// a: Dictionary, /// b: Dictionary, fn dict_batches() -> Vec { - DICT_BATCHES.get_or_init(make_dict_batches).clone() + DICT_BATCHES.clone() } fn make_dict_batches() -> Vec { diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 03afc858dfca..aecc289edc91 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -33,7 +33,7 @@ use datafusion_physical_plan::ExecutionPlan; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; -use std::sync::{Arc, OnceLock}; +use std::sync::{Arc, LazyLock}; use tempfile::NamedTempFile; #[tokio::test] @@ -369,45 +369,45 @@ struct TestData { file_size: u64, } -static TEST_DATA: OnceLock = OnceLock::new(); +static TEST_DATA: LazyLock = LazyLock::new(|| { + let scenario = Scenario::UTF8; + let row_per_group = 5; -/// Return a parquet file with 2 row groups each with 5 rows -fn get_test_data() -> &'static TestData { - TEST_DATA.get_or_init(|| { - let scenario = Scenario::UTF8; - let row_per_group = 5; + let mut temp_file = tempfile::Builder::new() + .prefix("user_access_plan") + .suffix(".parquet") + .tempfile() + .expect("tempfile creation"); - let mut temp_file = tempfile::Builder::new() - .prefix("user_access_plan") - .suffix(".parquet") - .tempfile() - .expect("tempfile creation"); + let props = WriterProperties::builder() + .set_max_row_group_size(row_per_group) + .build(); - let props = WriterProperties::builder() - .set_max_row_group_size(row_per_group) - .build(); + let batches = create_data_batch(scenario); + let schema = batches[0].schema(); - let batches = create_data_batch(scenario); - let schema = batches[0].schema(); + let mut writer = + ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); - let mut writer = - ArrowWriter::try_new(&mut temp_file, schema.clone(), Some(props)).unwrap(); + for batch in batches { + writer.write(&batch).expect("writing batch"); + } + writer.close().unwrap(); - for batch in batches { - writer.write(&batch).expect("writing batch"); - } - writer.close().unwrap(); + let file_name = temp_file.path().to_string_lossy().to_string(); + let file_size = temp_file.path().metadata().unwrap().len(); - let file_name = temp_file.path().to_string_lossy().to_string(); - let file_size = temp_file.path().metadata().unwrap().len(); + TestData { + temp_file, + schema, + file_name, + file_size, + } +}); - TestData { - temp_file, - schema, - file_name, - file_size, - } - }) +/// Return a parquet file with 2 row groups each with 5 rows +fn get_test_data() -> &'static TestData { + &TEST_DATA } /// Return the total value of the specified metric name diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs index b4f768085fcc..ad0a72afa546 100644 --- a/datafusion/expr/src/test/function_stub.rs +++ b/datafusion/expr/src/test/function_stub.rs @@ -43,16 +43,14 @@ macro_rules! create_func { /// Singleton instance of [$UDAF], ensures the UDAF is only created once /// named STATIC_$(UDAF). For example `STATIC_FirstValue` #[allow(non_upper_case_globals)] - static [< STATIC_ $UDAF >]: std::sync::OnceLock> = - std::sync::OnceLock::new(); + static [< STATIC_ $UDAF >]: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(crate::AggregateUDF::from(<$UDAF>::default())) + }); #[doc = concat!("AggregateFunction that returns a [AggregateUDF](crate::AggregateUDF) for [`", stringify!($UDAF), "`]")] pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc { - [< STATIC_ $UDAF >] - .get_or_init(|| { - std::sync::Arc::new(crate::AggregateUDF::from(<$UDAF>::default())) - }) - .clone() + [< STATIC_ $UDAF >].clone() } } } diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs index 573b9fd5bdb2..aacebb692bcd 100644 --- a/datafusion/functions-aggregate/src/macros.rs +++ b/datafusion/functions-aggregate/src/macros.rs @@ -85,16 +85,14 @@ macro_rules! create_func { /// Singleton instance of [$UDAF], ensures the UDAF is only created once /// named STATIC_$(UDAF). For example `STATIC_FirstValue` #[allow(non_upper_case_globals)] - static [< STATIC_ $UDAF >]: std::sync::OnceLock> = - std::sync::OnceLock::new(); + static [< STATIC_ $UDAF >]: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE)) + }); #[doc = concat!("AggregateFunction that returns a [`AggregateUDF`](datafusion_expr::AggregateUDF) for [`", stringify!($UDAF), "`]")] pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc { - [< STATIC_ $UDAF >] - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE)) - }) - .clone() + [< STATIC_ $UDAF >].clone() } } } diff --git a/datafusion/functions-nested/src/macros.rs b/datafusion/functions-nested/src/macros.rs index 00247f39ac10..a2c60feed473 100644 --- a/datafusion/functions-nested/src/macros.rs +++ b/datafusion/functions-nested/src/macros.rs @@ -88,19 +88,17 @@ macro_rules! create_func { /// Singleton instance of [`$UDF`], ensures the UDF is only created once /// named STATIC_$(UDF). For example `STATIC_ArrayToString` #[allow(non_upper_case_globals)] - static [< STATIC_ $UDF >]: std::sync::OnceLock> = - std::sync::OnceLock::new(); + static [< STATIC_ $UDF >]: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl( + <$UDF>::new(), + )) + }); #[doc = concat!("ScalarFunction that returns a [`ScalarUDF`](datafusion_expr::ScalarUDF) for ")] #[doc = stringify!($UDF)] pub fn $SCALAR_UDF_FN() -> std::sync::Arc { - [< STATIC_ $UDF >] - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl( - <$UDF>::new(), - )) - }) - .clone() + [< STATIC_ $UDF >].clone() } } }; diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 7f348bf9d2a0..28dd80717f76 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -40,17 +40,15 @@ pub fn row_number() -> Expr { /// Singleton instance of `row_number`, ensures the UDWF is only created once. #[allow(non_upper_case_globals)] -static STATIC_RowNumber: std::sync::OnceLock> = - std::sync::OnceLock::new(); +static STATIC_RowNumber: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(datafusion_expr::WindowUDF::from(RowNumber::default())) + }); /// Returns a [`WindowUDF`](datafusion_expr::WindowUDF) for `row_number` /// user-defined window function. pub fn row_number_udwf() -> std::sync::Arc { - STATIC_RowNumber - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::WindowUDF::from(RowNumber::default())) - }) - .clone() + STATIC_RowNumber.clone() } /// row_number expression diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index e47818bc86a4..671d85170f75 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -72,19 +72,17 @@ macro_rules! export_functions { macro_rules! make_udf_function { ($UDF:ty, $GNAME:ident, $NAME:ident) => { /// Singleton instance of the function - static $GNAME: std::sync::OnceLock> = - std::sync::OnceLock::new(); + static $GNAME: std::sync::LazyLock> = + std::sync::LazyLock::new(|| { + std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl( + <$UDF>::new(), + )) + }); #[doc = "Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation "] #[doc = stringify!($UDF)] pub fn $NAME() -> std::sync::Arc { - $GNAME - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl( - <$UDF>::new(), - )) - }) - .clone() + $GNAME.clone() } }; } diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 3eb72a1fb5f5..0495e7d5ec96 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -39,7 +39,7 @@ use regex::Regex; use std::any::Any; use std::collections::HashMap; use std::sync::Arc; -use std::sync::OnceLock; +use std::sync::LazyLock; #[derive(Debug)] pub struct RegexpReplaceFunc { signature: Signature, @@ -140,8 +140,9 @@ fn regexp_replace_func(args: &[ColumnarValue]) -> Result { /// used by regexp_replace fn regex_replace_posix_groups(replacement: &str) -> String { fn capture_groups_re() -> &'static Regex { - static CAPTURE_GROUPS_RE_LOCK: OnceLock = OnceLock::new(); - CAPTURE_GROUPS_RE_LOCK.get_or_init(|| Regex::new(r"(\\)(\d*)").unwrap()) + static CAPTURE_GROUPS_RE_LOCK: LazyLock = + LazyLock::new(|| Regex::new(r"(\\)(\d*)").unwrap()); + &CAPTURE_GROUPS_RE_LOCK } capture_groups_re() .replace_all(replacement, "$${$2}") diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs index cd1597217c83..9e76c30a3c05 100644 --- a/datafusion/physical-expr/src/utils/guarantee.rs +++ b/datafusion/physical-expr/src/utils/guarantee.rs @@ -415,7 +415,7 @@ impl<'a> ColOpLit<'a> { #[cfg(test)] mod test { - use std::sync::OnceLock; + use std::sync::LazyLock; use super::*; use crate::planner::logical2physical; @@ -866,13 +866,13 @@ mod test { // Schema for testing fn schema() -> SchemaRef { - Arc::clone(SCHEMA.get_or_init(|| { - Arc::new(Schema::new(vec![ - Field::new("a", DataType::Utf8, false), - Field::new("b", DataType::Int32, false), - ])) - })) + SCHEMA.clone() } - static SCHEMA: OnceLock = OnceLock::new(); + static SCHEMA: LazyLock = LazyLock::new(|| { + Arc::new(Schema::new(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::Int32, false), + ])) + }); } diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index b6b583b9fbdb..e5a3503afc0d 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -21,7 +21,7 @@ use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBat use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; use datafusion_common::DataFusionError; use std::path::PathBuf; -use std::sync::OnceLock; +use std::sync::LazyLock; use crate::engines::output::DFColumnType; @@ -140,31 +140,32 @@ fn normalize_paths(mut row: Vec) -> Vec { /// return the location of the datafusion checkout fn workspace_root() -> &'static object_store::path::Path { - static WORKSPACE_ROOT_LOCK: OnceLock = OnceLock::new(); - WORKSPACE_ROOT_LOCK.get_or_init(|| { - // e.g. /Software/datafusion/datafusion/core - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + static WORKSPACE_ROOT_LOCK: LazyLock = + LazyLock::new(|| { + // e.g. /Software/datafusion/datafusion/core + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // e.g. /Software/datafusion/datafusion - let workspace_root = dir - .parent() - .expect("Can not find parent of datafusion/core") - // e.g. /Software/datafusion - .parent() - .expect("parent of datafusion") - .to_string_lossy(); + // e.g. /Software/datafusion/datafusion + let workspace_root = dir + .parent() + .expect("Can not find parent of datafusion/core") + // e.g. /Software/datafusion + .parent() + .expect("parent of datafusion") + .to_string_lossy(); - let sanitized_workplace_root = if cfg!(windows) { - // Object store paths are delimited with `/`, e.g. `/datafusion/datafusion/testing/data/csv/aggregate_test_100.csv`. - // The default windows delimiter is `\`, so the workplace path is `datafusion\datafusion`. - workspace_root - .replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER) - } else { - workspace_root.to_string() - }; + let sanitized_workplace_root = if cfg!(windows) { + // Object store paths are delimited with `/`, e.g. `/datafusion/datafusion/testing/data/csv/aggregate_test_100.csv`. + // The default windows delimiter is `\`, so the workplace path is `datafusion\datafusion`. + workspace_root + .replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER) + } else { + workspace_root.to_string() + }; - object_store::path::Path::parse(sanitized_workplace_root).unwrap() - }) + object_store::path::Path::parse(sanitized_workplace_root).unwrap() + }); + &WORKSPACE_ROOT_LOCK } /// Convert a single batch to a `Vec>` for comparison From 2e1cb36bfbc244500f9dfe3fd79ce23ae445864a Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 23 Sep 2024 21:45:41 -0700 Subject: [PATCH 2/4] Fix failing datafusion-cli --- datafusion-cli/Cargo.toml | 2 +- datafusion-cli/src/main.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index b86dbd2a3802..e29964786769 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -26,7 +26,7 @@ license = "Apache-2.0" homepage = "https://datafusion.apache.org" repository = "https://github.com/apache/datafusion" # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.78" +rust-version = "1.80" readme = "README.md" [dependencies] diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 779e52226554..58d237fd8272 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -303,11 +303,13 @@ fn extract_memory_pool_size(size: &str) -> Result { m.insert("tb", ByteUnit::TiB); m }); + &BYTE_SUFFIXES } fn suffix_re() -> &'static regex::Regex { static SUFFIX_REGEX: LazyLock = LazyLock::new(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap()); + &SUFFIX_REGEX } let lower = size.to_lowercase(); From 0b0d74b817f4e59387aedbe2c6a45050d3cb246d Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 23 Sep 2024 21:57:30 -0700 Subject: [PATCH 3/4] Fix core MSRV --- datafusion/core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 01ba90ee5de8..49d085b14c31 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -30,7 +30,7 @@ authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version and fails with # "Unable to find key 'package.rust-version' (or 'package.metadata.msrv') in 'arrow-datafusion/Cargo.toml'" # https://github.com/foresterre/cargo-msrv/issues/590 -rust-version = "1.78" +rust-version = "1.80" [lints] workspace = true From ec7101013f4162c088c304a11567a1cf769b9a72 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 23 Sep 2024 22:10:52 -0700 Subject: [PATCH 4/4] Fix MSRV issues in substrait and proto --- datafusion/proto/Cargo.toml | 2 +- datafusion/substrait/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index d65c6ccaa660..ae4d7b5736e0 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -27,7 +27,7 @@ repository = { workspace = true } license = { workspace = true } authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.78" +rust-version = "1.80" # Exclude proto files so crates.io consumers don't need protoc exclude = ["*.proto"] diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 6f8f81401f3b..8570195248e4 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -26,7 +26,7 @@ repository = { workspace = true } license = { workspace = true } authors = { workspace = true } # Specify MSRV here as `cargo msrv` doesn't support workspace version -rust-version = "1.78" +rust-version = "1.80" [lints] workspace = true