Skip to content

Commit

Permalink
Fix size_hint for EncodeUtf16
Browse files Browse the repository at this point in the history
  • Loading branch information
ajtribick committed Jul 20, 2023
1 parent 06a53dd commit e6fa5c1
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
22 changes: 22 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1738,6 +1738,28 @@ fn test_utf16_code_units() {
assert_eq!(\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(), [0xE9, 0xD83D, 0xDCA9])
}

#[test]
fn test_utf16_size_hint() {
assert_eq!("".encode_utf16().size_hint(), (0, Some(0)));
assert_eq!("123".encode_utf16().size_hint(), (1, Some(3)));
assert_eq!("1234".encode_utf16().size_hint(), (2, Some(4)));
assert_eq!("12345678".encode_utf16().size_hint(), (3, Some(8)));

fn hint_vec(src: &str) -> Vec<(usize, Option<usize>)> {
let mut it = src.encode_utf16();
let mut result = Vec::new();
result.push(it.size_hint());
while it.next().is_some() {
result.push(it.size_hint())
}
result
}

assert_eq!(hint_vec("12"), [(1, Some(2)), (1, Some(1)), (0, Some(0))]);
assert_eq!(hint_vec("\u{101234}"), [(2, Some(4)), (1, Some(1)), (0, Some(0))]);
assert_eq!(hint_vec("\u{101234}a"), [(2, Some(5)), (2, Some(2)), (1, Some(1)), (0, Some(0))]);
}

#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
Expand Down
19 changes: 14 additions & 5 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1439,11 +1439,20 @@ impl<'a> Iterator for EncodeUtf16<'a> {

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (low, high) = self.chars.size_hint();
// every char gets either one u16 or two u16,
// so this iterator is between 1 or 2 times as
// long as the underlying iterator.
(low, high.and_then(|n| n.checked_mul(2)))
let len = self.chars.iter.len();
// The highest bytes:code units ratio occurs for 3-byte sequences, so
// use this to determine the lower bound for the hint. The lowest
// ratio is for 1-byte sequences, so use this for the upper bound.
// `(len + 2)` can't overflow, because we know that the `slice::Iter`
// belongs to a slice in memory which has a maximum length of
// `isize::MAX` (that's well below `usize::MAX`)
if self.extra == 0 {
((len + 2) / 3, Some(len))
} else {
// We're in the middle of a surrogate pair, so add the remaining
// surrogate to the bounds.
((len + 2) / 3 + 1, Some(len + 1))
}
}
}

Expand Down

0 comments on commit e6fa5c1

Please sign in to comment.