Skip to content

Commit

Permalink
Fix Dictionary logical nulls
Browse files Browse the repository at this point in the history
For `DictionaryArray`, methods `logical_nulls` and `logical_null_count`
would return incorrect result if the underlying values had different
physical and logical nullability.
  • Loading branch information
findepi committed Nov 16, 2024
1 parent 008b5fe commit 75faa9d
Showing 1 changed file with 51 additions and 2 deletions.
53 changes: 51 additions & 2 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
}

fn logical_nulls(&self) -> Option<NullBuffer> {
match self.values.nulls() {
match self.values.logical_nulls() {
None => self.nulls().cloned(),
Some(value_nulls) => {
let mut builder = BooleanBufferBuilder::new(self.len());
Expand Down Expand Up @@ -1020,8 +1020,9 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
mod tests {
use super::*;
use crate::cast::as_dictionary_array;
use crate::{Int16Array, Int32Array, Int8Array};
use crate::{Int16Array, Int32Array, Int8Array, RunArray};
use arrow_buffer::{Buffer, ToByteSlice};
use arrow_schema::DataType::RunEndEncoded;

#[test]
fn test_dictionary_array() {
Expand Down Expand Up @@ -1445,6 +1446,54 @@ mod tests {
assert_eq!(values, &[Some(50), None, None, Some(2)])
}

#[test]
fn test_logical_nulls() -> Result<(), ArrowError> {
let values = Arc::new(RunArray::try_new(
&Int32Array::from(vec![1, 3, 7]),
&Int32Array::from(vec![Some(1), None, Some(3)]),
)?) as ArrayRef;

// For this test to be meaningful, the values array need to have different nulls and logical nulls
assert_eq!(values.null_count(), 0);
assert_eq!(values.logical_null_count(), 2);

// Construct a trivial dictionary with 1-1 mapping to underlying array
let dictionary = DictionaryArray::<Int8Type>::try_new(
Int8Array::from((0..values.len()).map(|i| i as i8).collect::<Vec<_>>()),
Arc::clone(&values),
)?;

// No keys are null
assert_eq!(dictionary.null_count(), 0);
// Dictionary array values are logically nullable
assert_eq!(dictionary.logical_null_count(), values.logical_null_count());
assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
assert_eq!(dictionary.is_nullable(), true);

// Construct a trivial dictionary with 1-1 mapping to underlying array except that key 0 is nulled out
let dictionary = DictionaryArray::<Int8Type>::try_new(
Int8Array::from(
(0..values.len())
.map(|i| i as i8)
.map(|i| if i == 0 { None } else { Some(i) })
.collect::<Vec<_>>(),
),
Arc::clone(&values),
)?;

// One key is null
assert_eq!(dictionary.null_count(), 1);

// Dictionary array values are logically nullable
assert_eq!(
dictionary.logical_null_count(),
values.logical_null_count() + 1
);
assert_eq!(dictionary.is_nullable(), true);

Ok(())
}

#[test]
fn test_normalized_keys() {
let values = vec![132, 0, 1].into();
Expand Down

0 comments on commit 75faa9d

Please sign in to comment.