Skip to content

Commit

Permalink
Treat end-of-stream as a potential unstarted, incomplete value in Str…
Browse files Browse the repository at this point in the history
…eamingRawReader
  • Loading branch information
popematt committed Oct 30, 2024
1 parent f1f4435 commit 771ff07
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/lazy/streaming_raw_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ impl<Encoding: Decoder, Input: IonInput> StreamingRawReader<Encoding, Input> {
let bytes_read = end_position - starting_position;
let input = unsafe { &mut *self.input.get() };
// If we ran out of data before we could get a result...
if matches!(result, Err(IonError::Incomplete(_))) {
if matches!(result, Err(IonError::Incomplete(_)) | Ok(LazyRawStreamItem::<Encoding>::EndOfStream(_))) {
//if matches!(result, Err(IonError::Incomplete(_))) {
// ...try to pull more data from the data source. It's ok to modify the buffer in
// this case because `result` (which holds a reference to the buffer) will be
// discarded.
Expand Down
27 changes: 23 additions & 4 deletions tests/detect_incomplete_text.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![cfg(feature = "experimental-reader-writer")]

use crate::ion_tests::{DataStraw, ELEMENT_GLOBAL_SKIP_LIST};
use ion_rs::{AnyEncoding, ElementReader, IonError, IonResult, IonStream, Reader};
use crate::ion_tests::{DataStraw, ELEMENT_GLOBAL_SKIP_LIST, SkipList};
use ion_rs::{AnyEncoding, Element, ElementReader, IonData, IonError, IonResult, IonStream, Reader};
use std::collections::HashSet;
use std::fs;
use std::io::BufReader;
Expand All @@ -11,12 +11,29 @@ use test_generator::test_resources;

mod ion_tests;

// These tests are all failing because multipart long strings are not handled correctly when the
// "part" boundary happens to also fall on a point where the reader needs to refill the input buffer.
const INCOMPLETE_LONG_STRING_SKIP_LIST: SkipList = &[
"ion-tests/iontestdata/good/equivs/localSymbolTableAppend.ion",
"ion-tests/iontestdata/good/equivs/localSymbolTableNullSlots.ion",
"ion-tests/iontestdata/good/equivs/longStringsWithComments.ion",
"ion-tests/iontestdata/good/equivs/strings.ion",
"ion-tests/iontestdata/good/lists.ion",
"ion-tests/iontestdata/good/strings.ion",
"ion-tests/iontestdata/good/stringsWithWhitespace.ion",
"ion-tests/iontestdata/good/strings_cr_nl.ion",
"ion-tests/iontestdata/good/strings2.ion",
"ion-tests/iontestdata/good/structs.ion",
"ion-tests/iontestdata/good/strings_nl.ion",
];

// A copy of the `ELEMENT_GLOBAL_SKIP_LIST` in which each file name has been canonicalized for the
// current host machine. This makes it possible to compare names in the list with names of files
// on the host without worrying about differences in (for example) path separators.
static CANONICAL_FILE_NAMES: LazyLock<Vec<String>> = LazyLock::new(|| {
ELEMENT_GLOBAL_SKIP_LIST
.iter()
.chain(INCOMPLETE_LONG_STRING_SKIP_LIST.iter())
.filter_map(|filename| {
// Canonicalize the skip list file names so they're in the host OS' preferred format.
// This involves looking up the actual file; if canonicalization fails, the file could
Expand Down Expand Up @@ -56,14 +73,16 @@ fn incomplete_text_detection_test(skip_list: &HashSet<String>, file_name: &str)
return Ok(());
}
println!("testing {file_name}");
let file = fs::File::open(file_name)?;
let file = fs::File::open(&file_name)?;
let buf_reader = BufReader::new(file);
let input = DataStraw::new(buf_reader);
let ion_stream = IonStream::new(input);
let mut reader = Reader::new(AnyEncoding, ion_stream)?;
// Manually destructure to allow for pretty-printing of errors
match reader.read_all_elements() {
Ok(_) => {}
Ok(elements) => {
assert_eq!(IonData::from(elements), IonData::from(Element::read_all(fs::read(&file_name)?)?))
}
Err(IonError::Decoding(e)) => {
panic!("{:?}: {}", e.position(), e);
}
Expand Down

0 comments on commit 771ff07

Please sign in to comment.