Skip to content

Commit

Permalink
Result into error in case of endianness mismatches (#5301)
Browse files Browse the repository at this point in the history
This commit implements the Byte Order (Endianness) recommendations we
could read from the Apache Arrow official specification (quoted here):

> _"At first we will return an error when trying to read a Schema
> with an endianness that does not match the underlying system."_

Resolves: #3459
  • Loading branch information
pangiole authored Jan 15, 2024
1 parent 07fa7f6 commit 8345991
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 23 deletions.
31 changes: 8 additions & 23 deletions arrow-integration-testing/tests/ipc_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! Tests for reading the content of [`FileReader`] and [`StreamReader`]
//! in `testing/arrow-ipc-stream/integration/...`

use arrow::error::ArrowError;
use arrow::ipc::reader::{FileReader, StreamReader};
use arrow::util::test_util::arrow_test_data;
use arrow_integration_testing::read_gzip_json;
Expand Down Expand Up @@ -55,26 +56,12 @@ fn read_0_1_7() {
});
}

#[test]
#[should_panic(expected = "Big Endian is not supported for Decimal!")]
fn read_1_0_0_bigendian_decimal_should_panic() {
let testdata = arrow_test_data();
verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_decimal");
}

#[test]
#[should_panic(expected = "Last offset 687865856 of Utf8 is larger than values length 41")]
fn read_1_0_0_bigendian_dictionary_should_panic() {
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let testdata = arrow_test_data();
verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_dictionary");
}

#[test]
fn read_1_0_0_bigendian() {
let testdata = arrow_test_data();
let paths = [
"generated_decimal",
"generated_dictionary",
"generated_interval",
"generated_datetime",
"generated_map",
Expand All @@ -91,14 +78,12 @@ fn read_1_0_0_bigendian() {
))
.unwrap();

FileReader::try_new(file, None).unwrap();
let reader = FileReader::try_new(file, None);

// While the the reader doesn't error but the values are not
// read correctly on little endian platforms so verifying the
// contents fails
//
// https://github.com/apache/arrow-rs/issues/3459
//verify_arrow_file(&testdata, "1.0.0-bigendian", path);
assert!(reader.is_err());
let err = reader.err().unwrap();
assert!(matches!(err, ArrowError::IpcError(_)));
assert_eq!(err.to_string(), "Ipc error: the endianness of the source system does not match the endianness of the target system.");
});
}

Expand Down
9 changes: 9 additions & 0 deletions arrow-ipc/src/gen/Schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1039,6 +1039,15 @@ impl Endianness {
_ => None,
}
}

/// Returns true if the endianness of the source system matches the endianness of the target system.
pub fn equals_to_target_endianness(self) -> bool {
match self {
Self::Little => cfg!(target_endian = "little"),
Self::Big => cfg!(target_endian = "big"),
_ => false,
}
}
}
impl core::fmt::Debug for Endianness {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
Expand Down
6 changes: 6 additions & 0 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,12 @@ impl FileReaderBuilder {
let total_blocks = blocks.len();

let ipc_schema = footer.schema().unwrap();
if !ipc_schema.endianness().equals_to_target_endianness() {
return Err(ArrowError::IpcError(
"the endianness of the source system does not match the endianness of the target system.".to_owned()
));
}

let schema = crate::convert::fb_to_schema(ipc_schema);

let mut custom_metadata = HashMap::new();
Expand Down

0 comments on commit 8345991

Please sign in to comment.