From cadcd70775cf42b2add2526026a0a06c1ced411c Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Tue, 12 Jan 2016 23:04:46 +0100 Subject: [PATCH] UTF-8 validation: Add missing if conditional for short input We need to guard that `len` is large enough for the fast skip loop. --- src/libcore/str/mod.rs | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 64c21836b000..d85212d25e79 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1158,24 +1158,27 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { offset += 1; } else { // Ascii case, try to skip forward quickly. + // When the pointer is aligned, read 2 words of data per iteration + // until we find a word containing a non-ascii byte. + const BYTES_PER_ITERATION: usize = 2 * usize::BYTES; let ptr = v.as_ptr(); let align = (ptr as usize + offset) & (usize::BYTES - 1); if align == 0 { - // When the pointer is aligned, read 2 words of data per iteration - // until we find a word containing a non-ascii byte. - while offset <= len - 2 * usize::BYTES { - unsafe { - let u = *(ptr.offset(offset as isize) as *const usize); - let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize); - - // break if there is a nonascii byte - let zu = contains_nonascii(u); - let zv = contains_nonascii(v); - if zu || zv { - break; + if len >= BYTES_PER_ITERATION { + while offset <= len - BYTES_PER_ITERATION { + unsafe { + let u = *(ptr.offset(offset as isize) as *const usize); + let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize); + + // break if there is a nonascii byte + let zu = contains_nonascii(u); + let zv = contains_nonascii(v); + if zu || zv { + break; + } } + offset += BYTES_PER_ITERATION; } - offset += usize::BYTES * 2; } // step from the point where the wordwise loop stopped while offset < len && v[offset] < 128 {