Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for Arrow Flight support for StringViewArray and BinaryViewArray #5601

Merged
merged 2 commits into from
Apr 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions arrow-flight/tests/encode_decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
use std::{collections::HashMap, sync::Arc};

use arrow_array::types::Int32Type;
use arrow_array::{ArrayRef, DictionaryArray, Float64Array, RecordBatch, UInt8Array};
use arrow_array::{
ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, RecordBatch, StringViewArray,
UInt8Array,
};
use arrow_cast::pretty::pretty_format_batches;
use arrow_flight::flight_descriptor::DescriptorType;
use arrow_flight::FlightDescriptor;
Expand Down Expand Up @@ -111,6 +114,22 @@ async fn test_dictionary_many() {
.await;
}

#[tokio::test]
async fn test_view_types_one() {
roundtrip(vec![make_view_batches(5)]).await;
}

#[tokio::test]
async fn test_view_types_many() {
roundtrip(vec![
make_view_batches(5),
make_view_batches(9),
make_view_batches(5),
make_view_batches(5),
])
.await;
}

#[tokio::test]
async fn test_zero_batches_no_schema() {
let stream = FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![]));
Expand Down Expand Up @@ -450,16 +469,51 @@ fn make_dictionary_batch(num_rows: usize) -> RecordBatch {
RecordBatch::try_from_iter(vec![("a", Arc::new(a) as ArrayRef)]).unwrap()
}

fn make_view_batches(num_rows: usize) -> RecordBatch {
const LONG_TEST_STRING: &str =
"This is a long string to make sure binary view array handles it";
let schema = Schema::new(vec![
Field::new("field1", DataType::BinaryView, true),
Field::new("field2", DataType::Utf8View, true),
]);

let string_view_values: Vec<Option<&str>> = (0..num_rows)
.map(|i| match i % 3 {
0 => None,
1 => Some("foo"),
2 => Some(LONG_TEST_STRING),
_ => unreachable!(),
})
.collect();

let bin_view_values: Vec<Option<&[u8]>> = (0..num_rows)
.map(|i| match i % 3 {
0 => None,
1 => Some("bar".as_bytes()),
2 => Some(LONG_TEST_STRING.as_bytes()),
_ => unreachable!(),
})
.collect();

let binary_array = BinaryViewArray::from_iter(bin_view_values);
let utf8_array = StringViewArray::from_iter(string_view_values);
RecordBatch::try_new(
Arc::new(schema.clone()),
vec![Arc::new(binary_array), Arc::new(utf8_array)],
)
.unwrap()
}

/// Encodes input as a FlightData stream, and then decodes it using
/// FlightRecordBatchStream and valides the decoded record batches
/// FlightRecordBatchStream and validates the decoded record batches
/// match the input.
async fn roundtrip(input: Vec<RecordBatch>) {
let expected_output = input.clone();
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
}

/// Encodes input as a FlightData stream, and then decodes it using
/// FlightRecordBatchStream and valides the decoded record batches
/// FlightRecordBatchStream and validates the decoded record batches
/// match the expected input.
///
/// When <https://github.com/apache/arrow-rs/issues/3389> is resolved,
Expand Down
Loading