Skip to content

Commit

Permalink
ARROW-11269: [Rust] [Parquet] Preserve timezone in int96 reader
Browse files Browse the repository at this point in the history
The Int96 timestamp was not using the specialised timestamp builder that takes the timezone as a paramenter.
This changes that to use the builder that preserves timezones.

I tested this change with the test file provided in the JIRA.
It looks like we don't have a way of writing int96 from the arrow writer, so there isn't an easy way to add a testcase.

Closes apache#9253 from nevi-me/ARROW-11269

Authored-by: Neville Dipale <nevilledips@gmail.com>
Signed-off-by: Neville Dipale <nevilledips@gmail.com>
  • Loading branch information
nevi-me authored and michalursa committed Jun 13, 2021
1 parent 41186f0 commit 060af3e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
13 changes: 12 additions & 1 deletion rust/parquet/src/arrow/array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1498,7 +1498,18 @@ impl<'a> ArrayReaderBuilder {
arrow_type,
)?)),
PhysicalType::INT96 => {
let converter = Int96Converter::new(Int96ArrayConverter {});
// get the optional timezone information from arrow type
let timezone = arrow_type
.as_ref()
.map(|data_type| {
if let ArrowType::Timestamp(_, tz) = data_type {
tz.clone()
} else {
None
}
})
.flatten();
let converter = Int96Converter::new(Int96ArrayConverter { timezone });
Ok(Box::new(ComplexObjectArrayReader::<
Int96Type,
Int96Converter,
Expand Down
21 changes: 10 additions & 11 deletions rust/parquet/src/arrow/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use arrow::array::{
IntervalDayTimeArray, IntervalDayTimeBuilder, IntervalYearMonthArray,
IntervalYearMonthBuilder, LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder,
PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder,
TimestampNanosecondBuilder,
};
use arrow::compute::cast;
use std::convert::{From, TryInto};
Expand Down Expand Up @@ -168,19 +167,19 @@ impl Converter<Vec<Option<FixedLenByteArray>>, IntervalDayTimeArray>
}
}

pub struct Int96ArrayConverter {}
pub struct Int96ArrayConverter {
pub timezone: Option<String>,
}

impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConverter {
fn convert(&self, source: Vec<Option<Int96>>) -> Result<TimestampNanosecondArray> {
let mut builder = TimestampNanosecondBuilder::new(source.len());
for v in source {
match v {
Some(array) => builder.append_value(array.to_i64() * 1000000),
None => builder.append_null(),
}?
}

Ok(builder.finish())
Ok(TimestampNanosecondArray::from_opt_vec(
source
.into_iter()
.map(|int96| int96.map(|val| val.to_i64() * 1_000_000))
.collect(),
self.timezone.clone(),
))
}
}

Expand Down

0 comments on commit 060af3e

Please sign in to comment.