Skip to content

Commit

Permalink
Auto merge of #31253 - ranma42:improve-unicode-iter-offset, r=brson
Browse files Browse the repository at this point in the history
Improve computation of offset in `EscapeUnicode`

Unify the computation of `offset` and use `leading_zeros` instead of manually scanning the bits.
This PR removes some duplicated code and makes it a little simpler .
The computation of `offset` is also faster, but it is unlikely to have an impact on actual code.

(split from #31049)
  • Loading branch information
bors committed Apr 20, 2016
2 parents c2aaad4 + 8984242 commit 9cf6fba
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,20 @@ impl CharExt for char {

#[inline]
fn escape_unicode(self) -> EscapeUnicode {
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
let c = self as u32;

// or-ing 1 ensures that for c==0 the code computes that one
// digit should be printed and (which is the same) avoids the
// (31 - 32) underflow
let msb = 31 - (c | 1).leading_zeros();

// the index of the most significant hex digit
let ms_hex_digit = msb / 4;
EscapeUnicode {
c: self,
state: EscapeUnicodeState::Backslash,
hex_digit_idx: ms_hex_digit as usize,
}
}

#[inline]
Expand Down Expand Up @@ -392,15 +405,20 @@ impl CharExt for char {
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeUnicode {
c: char,
state: EscapeUnicodeState
state: EscapeUnicodeState,

// The index of the next hex digit to be printed (0 if none),
// i.e. the number of remaining hex digits to be printed;
// increasing from the least significant digit: 0x543210
hex_digit_idx: usize,
}

#[derive(Clone, Debug)]
enum EscapeUnicodeState {
Backslash,
Type,
LeftBrace,
Value(usize),
Value,
RightBrace,
Done,
}
Expand All @@ -420,19 +438,16 @@ impl Iterator for EscapeUnicode {
Some('u')
}
EscapeUnicodeState::LeftBrace => {
let mut n = 0;
while (self.c as u32) >> (4 * (n + 1)) != 0 {
n += 1;
}
self.state = EscapeUnicodeState::Value(n);
self.state = EscapeUnicodeState::Value;
Some('{')
}
EscapeUnicodeState::Value(offset) => {
let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap();
if offset == 0 {
EscapeUnicodeState::Value => {
let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
let c = from_digit(hex_digit, 16).unwrap();
if self.hex_digit_idx == 0 {
self.state = EscapeUnicodeState::RightBrace;
} else {
self.state = EscapeUnicodeState::Value(offset - 1);
self.hex_digit_idx -= 1;
}
Some(c)
}
Expand All @@ -445,18 +460,15 @@ impl Iterator for EscapeUnicode {
}

fn size_hint(&self) -> (usize, Option<usize>) {
let mut n = 0;
while (self.c as usize) >> (4 * (n + 1)) != 0 {
n += 1;
}
let n = match self.state {
EscapeUnicodeState::Backslash => n + 5,
EscapeUnicodeState::Type => n + 4,
EscapeUnicodeState::LeftBrace => n + 3,
EscapeUnicodeState::Value(offset) => offset + 2,
EscapeUnicodeState::Backslash => 5,
EscapeUnicodeState::Type => 4,
EscapeUnicodeState::LeftBrace => 3,
EscapeUnicodeState::Value => 2,
EscapeUnicodeState::RightBrace => 1,
EscapeUnicodeState::Done => 0,
};
let n = n + self.hex_digit_idx;
(n, Some(n))
}
}
Expand Down

0 comments on commit 9cf6fba

Please sign in to comment.