Skip to content

Commit

Permalink
Support Duration in JSON Reader (#6683)
Browse files Browse the repository at this point in the history
* Support Duration in JSON Reader

* fix clippy
  • Loading branch information
simonvandel authored Nov 16, 2024
1 parent 1f19412 commit 008b5fe
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
4 changes: 4 additions & 0 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,10 @@ parser_primitive!(Int64Type);
parser_primitive!(Int32Type);
parser_primitive!(Int16Type);
parser_primitive!(Int8Type);
parser_primitive!(DurationNanosecondType);
parser_primitive!(DurationMicrosecondType);
parser_primitive!(DurationMillisecondType);
parser_primitive!(DurationSecondType);

impl Parser for TimestampNanosecondType {
fn parse(string: &str) -> Option<i64> {
Expand Down
35 changes: 35 additions & 0 deletions arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,10 @@ fn make_decoder(
DataType::Time32(TimeUnit::Millisecond) => primitive_decoder!(Time32MillisecondType, data_type),
DataType::Time64(TimeUnit::Microsecond) => primitive_decoder!(Time64MicrosecondType, data_type),
DataType::Time64(TimeUnit::Nanosecond) => primitive_decoder!(Time64NanosecondType, data_type),
DataType::Duration(TimeUnit::Nanosecond) => primitive_decoder!(DurationNanosecondType, data_type),
DataType::Duration(TimeUnit::Microsecond) => primitive_decoder!(DurationMicrosecondType, data_type),
DataType::Duration(TimeUnit::Millisecond) => primitive_decoder!(DurationMillisecondType, data_type),
DataType::Duration(TimeUnit::Second) => primitive_decoder!(DurationSecondType, data_type),
DataType::Decimal128(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
DataType::Decimal256(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
Expand Down Expand Up @@ -1330,6 +1334,37 @@ mod tests {
test_time::<Time64NanosecondType>();
}

fn test_duration<T: ArrowTemporalType>() {
let buf = r#"
{"a": 1, "b": "2"}
{"a": 3, "b": null}
"#;

let schema = Arc::new(Schema::new(vec![
Field::new("a", T::DATA_TYPE, true),
Field::new("b", T::DATA_TYPE, true),
]));

let batches = do_read(buf, 1024, true, false, schema);
assert_eq!(batches.len(), 1);

let col_a = batches[0].column_by_name("a").unwrap().as_primitive::<T>();
assert_eq!(col_a.null_count(), 0);
assert_eq!(col_a.values(), &[1, 3].map(T::Native::usize_as));

let col2 = batches[0].column_by_name("b").unwrap().as_primitive::<T>();
assert_eq!(col2.null_count(), 1);
assert_eq!(col2.values(), &[2, 0].map(T::Native::usize_as));
}

#[test]
fn test_durations() {
test_duration::<DurationNanosecondType>();
test_duration::<DurationMicrosecondType>();
test_duration::<DurationMillisecondType>();
test_duration::<DurationSecondType>();
}

#[test]
fn test_delta_checkpoint() {
let json = "{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";
Expand Down

0 comments on commit 008b5fe

Please sign in to comment.