Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/update_arrow_53
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Aug 22, 2024
2 parents 82641d8 + b8b76bc commit c61b499
Show file tree
Hide file tree
Showing 35 changed files with 1,419 additions and 438 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ futures = "0.3"
half = { version = "2.2.1", default-features = false }
hashbrown = { version = "0.14.5", features = ["raw"] }
indexmap = "2.0.0"
itertools = "0.12"
itertools = "0.13"
log = "^0.4"
num_cpus = "1.13.0"
object_store = { version = "0.11.0", default-features = false }
Expand Down
25 changes: 8 additions & 17 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ impl FileGroupPartitioner {
},
)
.flatten()
.group_by(|(partition_idx, _)| *partition_idx)
.chunk_by(|(partition_idx, _)| *partition_idx)
.into_iter()
.map(|(_, group)| group.map(|(_, vals)| vals).collect_vec())
.collect_vec();
Expand Down
85 changes: 53 additions & 32 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,16 +481,22 @@ fn type_union_resolution_coercion(
}
}

/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
/// Unlike `coerced_from`, usually the coerced type is for comparison only.
/// For example, compare with Dictionary and Dictionary, only value type is what we care about
/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
/// comparison operation
///
/// Example comparison operations are `lhs = rhs` and `lhs > rhs`
///
/// Binary comparison kernels require the two arguments to be the (exact) same
/// data type. However, users can write queries where the two arguments are
/// different data types. In such cases, the data types are automatically cast
/// (coerced) to a single data type to pass to the kernels.
pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
if lhs_type == rhs_type {
// same type => equality is possible
return Some(lhs_type.clone());
}
binary_numeric_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
.or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, true))
.or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| list_coercion(lhs_type, rhs_type))
Expand All @@ -501,7 +507,11 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
.or_else(|| struct_coercion(lhs_type, rhs_type))
}

/// Coerce `lhs_type` and `rhs_type` to a common type for value exprs
/// Coerce `lhs_type` and `rhs_type` to a common type for `VALUES` expression
///
/// For example `VALUES (1, 2), (3.0, 4.0)` where the first row is `Int32` and
/// the second row is `Float64` will coerce to `Float64`
///
pub fn values_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
if lhs_type == rhs_type {
// same type => equality is possible
Expand Down Expand Up @@ -883,7 +893,7 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) ->
///
/// Not all operators support dictionaries, if `preserve_dictionaries` is true
/// dictionaries will be preserved if possible
fn dictionary_coercion(
fn dictionary_comparison_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
preserve_dictionaries: bool,
Expand Down Expand Up @@ -912,26 +922,22 @@ fn dictionary_coercion(

/// Coercion rules for string concat.
/// This is a union of string coercion rules and specified rules:
/// 1. At lease one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
/// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
/// 2. Data type of the other side should be able to cast to string type
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
// If Utf8View is in any side, we coerce to Utf8.
// Ref: https://github.com/apache/datafusion/pull/11796
(Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
Some(Utf8)
string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
(Utf8View, from_type) | (from_type, Utf8View) => {
string_concat_internal_coercion(from_type, &Utf8View)
}
_ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
(Utf8, from_type) | (from_type, Utf8) => {
string_concat_internal_coercion(from_type, &Utf8)
}
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
string_concat_internal_coercion(from_type, &LargeUtf8)
}
_ => None,
}),
}
(Utf8, from_type) | (from_type, Utf8) => {
string_concat_internal_coercion(from_type, &Utf8)
}
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
string_concat_internal_coercion(from_type, &LargeUtf8)
}
_ => None,
})
}

fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
Expand All @@ -942,6 +948,8 @@ fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
}
}

/// If `from_type` can be casted to `to_type`, return `to_type`, otherwise
/// return `None`.
fn string_concat_internal_coercion(
from_type: &DataType,
to_type: &DataType,
Expand All @@ -967,6 +975,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
}
// Then, if LargeUtf8 is in any side, we coerce to LargeUtf8.
(LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8),
// Utf8 coerces to Utf8
(Utf8, Utf8) => Some(Utf8),
_ => None,
}
Expand Down Expand Up @@ -1044,7 +1053,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
string_coercion(lhs_type, rhs_type)
.or_else(|| list_coercion(lhs_type, rhs_type))
.or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
.or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false))
.or_else(|| regex_null_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
}
Expand All @@ -1064,7 +1073,7 @@ fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
/// This is a union of string coercion rules and dictionary coercion rules
pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
.or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false))
.or_else(|| regex_null_coercion(lhs_type, rhs_type))
}

Expand Down Expand Up @@ -1324,38 +1333,50 @@ mod tests {

let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Int32));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, false),
dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
Some(Int32)
);
assert_eq!(
dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
Some(Int32)
);

// Since we can coerce values of Int16 to Utf8 can support this
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Utf8));
assert_eq!(
dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
Some(Utf8)
);

// Since we can coerce values of Utf8 to Binary can support this
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
Some(Binary)
);

let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Utf8;
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
Some(Utf8)
);
assert_eq!(
dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
Some(lhs_type.clone())
);

let lhs_type = Utf8;
let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
Some(Utf8)
);
assert_eq!(
dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
Some(rhs_type.clone())
);
}
Expand Down
Loading

0 comments on commit c61b499

Please sign in to comment.