Skip to content

Commit

Permalink
Merge pull request #831 from serde-rs/from_u32
Browse files Browse the repository at this point in the history
Skip error codepath on hex escape outside the surrogate range
  • Loading branch information
dtolnay authored Nov 25, 2021
2 parents 76e376c + 4a0924c commit fc0ca07
Showing 1 changed file with 26 additions and 33 deletions.
59 changes: 26 additions & 33 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -926,12 +926,9 @@ fn parse_escape<'de, R: Read<'de>>(
}
}

n => match char::from_u32(n as u32) {
Some(c) => c,
None => {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
}
},
// Every u16 outside of the surrogate ranges above is guaranteed
// to be a legal char.
n => char::from_u32(n as u32).unwrap(),
};

scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
Expand All @@ -954,38 +951,34 @@ where

match ch {
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
b'u' => {
let n = match tri!(read.decode_hex_escape()) {
0xDC00..=0xDFFF => {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

// Non-BMP characters are encoded as a sequence of
// two hex escapes, representing UTF-16 surrogates.
n1 @ 0xD800..=0xDBFF => {
if tri!(next_or_eof(read)) != b'\\' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}
if tri!(next_or_eof(read)) != b'u' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}

let n2 = tri!(read.decode_hex_escape());

if n2 < 0xDC00 || n2 > 0xDFFF {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}
b'u' => match tri!(read.decode_hex_escape()) {
0xDC00..=0xDFFF => {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

(((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000
// Non-BMP characters are encoded as a sequence of
// two hex escapes, representing UTF-16 surrogates.
n1 @ 0xD800..=0xDBFF => {
if tri!(next_or_eof(read)) != b'\\' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}
if tri!(next_or_eof(read)) != b'u' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}

n => n as u32,
};
let n2 = tri!(read.decode_hex_escape());
if n2 < 0xDC00 || n2 > 0xDFFF {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

if char::from_u32(n).is_none() {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
if char::from_u32(n).is_none() {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
}
}
}

_ => {}
},
_ => {
return error(read, ErrorCode::InvalidEscape);
}
Expand Down

0 comments on commit fc0ca07

Please sign in to comment.