Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid boxing config keys #4762

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 25 additions & 20 deletions datafusion/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use itertools::Itertools;
use log::warn;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::env;
use std::fmt::{Debug, Formatter};
Expand Down Expand Up @@ -134,9 +135,9 @@ pub const OPT_HASH_JOIN_SINGLE_PARTITION_THRESHOLD: &str =
/// Definition of a configuration option
pub struct ConfigDefinition {
/// key used to identifier this configuration option
key: String,
key: Cow<'static, str>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would you think about using Arc here instead?

Arc might would be more consistent with the rest of the DataFusion codebase as well as help systems like Ballista where the configuration was serialized / deserialized.

I believe with a Cow and without any extra work, the deserialized ConfigOptions will still be owned (and copied) Strings

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... You raise a good point r.e. serialization, perhaps we should just make ConfigOptions a struct... I'll leave this open if anyone else has any thoughts

/// Description to be used in generated documentation
description: String,
description: Cow<'static, str>,
/// Data type of this option
data_type: DataType,
/// Default value
Expand All @@ -162,8 +163,8 @@ macro_rules! get_conf_value {
impl ConfigDefinition {
/// Create a configuration option definition
pub fn new(
name: impl Into<String>,
description: impl Into<String>,
name: impl Into<Cow<'static, str>>,
description: impl Into<Cow<'static, str>>,
data_type: DataType,
default_value: ScalarValue,
) -> Self {
Expand All @@ -177,8 +178,8 @@ impl ConfigDefinition {

/// Create a configuration option definition with a boolean value
pub fn new_bool(
key: impl Into<String>,
description: impl Into<String>,
key: impl Into<Cow<'static, str>>,
description: impl Into<Cow<'static, str>>,
default_value: bool,
) -> Self {
Self::new(
Expand All @@ -191,8 +192,8 @@ impl ConfigDefinition {

/// Create a configuration option definition with a u64 value
pub fn new_u64(
key: impl Into<String>,
description: impl Into<String>,
key: impl Into<Cow<'static, str>>,
description: impl Into<Cow<'static, str>>,
default_value: u64,
) -> Self {
Self::new(
Expand All @@ -205,8 +206,8 @@ impl ConfigDefinition {

/// Create a configuration option definition with a string value
pub fn new_string(
key: impl Into<String>,
description: impl Into<String>,
key: impl Into<Cow<'static, str>>,
description: impl Into<Cow<'static, str>>,
default_value: Option<String>,
) -> Self {
Self::new(
Expand Down Expand Up @@ -426,7 +427,7 @@ impl BuiltInConfigs {
.map(normalize_for_display)
.collect();

for config in config_definitions.iter().sorted_by_key(|c| c.key.as_str()) {
for config in config_definitions.iter().sorted_by_key(|c| c.key.as_ref()) {
let _ = writeln!(
&mut docs,
"| {} | {} | {} | {} |",
Expand All @@ -450,7 +451,7 @@ fn normalize_for_display(mut v: ConfigDefinition) -> ConfigDefinition {
/// Configuration options struct. This can contain values for built-in and custom options
#[derive(Clone)]
pub struct ConfigOptions {
options: HashMap<String, ScalarValue>,
options: HashMap<Cow<'static, str>, ScalarValue>,
}

/// Print the configurations in an ordered way so that we can directly compare the equality of two ConfigOptions by their debug strings
Expand Down Expand Up @@ -514,28 +515,32 @@ impl ConfigOptions {
}

/// set a configuration option
pub fn set(&mut self, key: &str, value: ScalarValue) {
self.options.insert(key.to_string(), value);
pub fn set(&mut self, key: impl Into<Cow<'static, str>>, value: ScalarValue) {
self.options.insert(key.into(), value);
}

/// set a boolean configuration option
pub fn set_bool(&mut self, key: &str, value: bool) {
pub fn set_bool(&mut self, key: impl Into<Cow<'static, str>>, value: bool) {
self.set(key, ScalarValue::Boolean(Some(value)))
}

/// set a `u64` configuration option
pub fn set_u64(&mut self, key: &str, value: u64) {
pub fn set_u64(&mut self, key: impl Into<Cow<'static, str>>, value: u64) {
self.set(key, ScalarValue::UInt64(Some(value)))
}

/// set a `usize` configuration option
pub fn set_usize(&mut self, key: &str, value: usize) {
pub fn set_usize(&mut self, key: impl Into<Cow<'static, str>>, value: usize) {
let value: u64 = value.try_into().expect("convert u64 to usize");
self.set(key, ScalarValue::UInt64(Some(value)))
}

/// set a `String` configuration option
pub fn set_string(&mut self, key: &str, value: impl Into<String>) {
pub fn set_string(
&mut self,
key: impl Into<Cow<'static, str>>,
value: impl Into<String>,
) {
self.set(key, ScalarValue::Utf8(Some(value.into())))
}

Expand Down Expand Up @@ -566,7 +571,7 @@ impl ConfigOptions {
}

/// Access the underlying hashmap
pub fn options(&self) -> &HashMap<String, ScalarValue> {
pub fn options(&self) -> &HashMap<Cow<'static, str>, ScalarValue> {
&self.options
}

Expand All @@ -590,7 +595,7 @@ mod test {
);
let configs = BuiltInConfigs::default();
for config in configs.config_definitions {
assert!(docs.contains(&config.key));
assert!(docs.contains(config.key.as_ref()));
}
}

Expand Down
17 changes: 9 additions & 8 deletions datafusion/core/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{
pub use datafusion_physical_expr::execution_props::ExecutionProps;
use datafusion_physical_expr::var_provider::is_system_variables;
use parking_lot::RwLock;
use std::borrow::Cow;
use std::str::FromStr;
use std::sync::Arc;
use std::{
Expand Down Expand Up @@ -391,7 +392,7 @@ impl SessionContext {
value,
))
})?;
config_options.set_bool(&variable, new_value);
config_options.set_bool(variable, new_value);
}

ScalarValue::UInt64(_) => {
Expand All @@ -401,7 +402,7 @@ impl SessionContext {
value,
))
})?;
config_options.set_u64(&variable, new_value);
config_options.set_u64(variable, new_value);
}

ScalarValue::Utf8(_) => {
Expand All @@ -411,7 +412,7 @@ impl SessionContext {
value,
))
})?;
config_options.set_string(&variable, new_value);
config_options.set_string(variable, new_value);
}

_ => {
Expand Down Expand Up @@ -1192,29 +1193,29 @@ impl SessionConfig {
}

/// Set a configuration option
pub fn set(mut self, key: &str, value: ScalarValue) -> Self {
pub fn set(mut self, key: impl Into<Cow<'static, str>>, value: ScalarValue) -> Self {
self.config_options.set(key, value);
self
}

/// Set a boolean configuration option
pub fn set_bool(self, key: &str, value: bool) -> Self {
pub fn set_bool(self, key: impl Into<Cow<'static, str>>, value: bool) -> Self {
self.set(key, ScalarValue::Boolean(Some(value)))
}

/// Set a generic `u64` configuration option
pub fn set_u64(self, key: &str, value: u64) -> Self {
pub fn set_u64(self, key: impl Into<Cow<'static, str>>, value: u64) -> Self {
self.set(key, ScalarValue::UInt64(Some(value)))
}

/// Set a generic `usize` configuration option
pub fn set_usize(self, key: &str, value: usize) -> Self {
pub fn set_usize(self, key: impl Into<Cow<'static, str>>, value: usize) -> Self {
let value: u64 = value.try_into().expect("convert usize to u64");
self.set(key, ScalarValue::UInt64(Some(value)))
}

/// Set a generic `str` configuration option
pub fn set_str(self, key: &str, value: &str) -> Self {
pub fn set_str(self, key: impl Into<Cow<'static, str>>, value: &str) -> Self {
self.set(key, ScalarValue::Utf8(Some(value.to_string())))
}

Expand Down