Skip to content

Commit

Permalink
Use standard methods to parse character reference to a number
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jun 28, 2024
1 parent 55a537a commit 04bddd6
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 37 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@

- [#771]: `EscapeError::UnrecognizedSymbol` renamed to `EscapeError::UnrecognizedEntity`.
- [#771]: Implemented `PartialEq` for `EscapeError`.
- [#771]: Replace the following variants of `EscapeError` by `InvalidCharRef` variant
with a standard `ParseIntError` inside:
- `InvalidDecimal`
- `InvalidHexadecimal`

[#771]: https://github.com/tafia/quick-xml/pull/771
[#772]: https://github.com/tafia/quick-xml/pull/772
Expand Down
55 changes: 18 additions & 37 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use memchr::memchr2_iter;
use std::borrow::Cow;
use std::num::ParseIntError;
use std::ops::Range;

/// Error for XML escape / unescape.
Expand All @@ -13,10 +14,9 @@ pub enum EscapeError {
UnrecognizedEntity(Range<usize>, String),
/// Cannot find `;` after `&`
UnterminatedEntity(Range<usize>),
/// Character is not a valid hexadecimal value
InvalidHexadecimal(char),
/// Character is not a valid decimal value
InvalidDecimal(char),
/// Attempt to parse character reference (`&#<dec-number>;` or `&#x<hex-number>;`)
/// was unsuccessful, not all characters are decimal or hexadecimal numbers.
InvalidCharRef(ParseIntError),
/// Not a valid unicode codepoint
InvalidCodepoint(u32),
}
Expand All @@ -37,16 +37,22 @@ impl std::fmt::Display for EscapeError {
"Error while escaping character at range {:?}: Cannot find ';' after '&'",
e
),
EscapeError::InvalidHexadecimal(e) => {
write!(f, "'{}' is not a valid hexadecimal character", e)
EscapeError::InvalidCharRef(e) => {
write!(f, "invalid character reference: {}", e)
}
EscapeError::InvalidDecimal(e) => write!(f, "'{}' is not a valid decimal character", e),
EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n),
}
}
}

impl std::error::Error for EscapeError {}
impl std::error::Error for EscapeError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::InvalidCharRef(e) => Some(e),
_ => None,
}
}
}

/// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`)
/// with their corresponding xml escaped value.
Expand Down Expand Up @@ -1787,10 +1793,11 @@ pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> {

fn parse_number(bytes: &str, range: Range<usize>) -> Result<char, EscapeError> {
let code = if let Some(hex_digits) = bytes.strip_prefix('x') {
parse_hexadecimal(hex_digits)
u32::from_str_radix(hex_digits, 16)
} else {
parse_decimal(bytes)
}?;
u32::from_str_radix(bytes, 10)
}
.map_err(EscapeError::InvalidCharRef)?;
if code == 0 {
return Err(EscapeError::EntityWithNull(range));
}
Expand All @@ -1799,29 +1806,3 @@ fn parse_number(bytes: &str, range: Range<usize>) -> Result<char, EscapeError> {
None => Err(EscapeError::InvalidCodepoint(code)),
}
}

fn parse_hexadecimal(bytes: &str) -> Result<u32, EscapeError> {
let mut code = 0;
for b in bytes.bytes() {
code <<= 4;
code += match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
b => return Err(EscapeError::InvalidHexadecimal(b as char)),
} as u32;
}
Ok(code)
}

fn parse_decimal(bytes: &str) -> Result<u32, EscapeError> {
let mut code = 0;
for b in bytes.bytes() {
code *= 10;
code += match b {
b'0'..=b'9' => b - b'0',
b => return Err(EscapeError::InvalidDecimal(b as char)),
} as u32;
}
Ok(code)
}
21 changes: 21 additions & 0 deletions tests/escape.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use pretty_assertions::assert_eq;
use quick_xml::escape::{self, EscapeError};
use std::borrow::Cow;
use std::num::IntErrorKind;

#[test]
fn escape() {
Expand Down Expand Up @@ -89,6 +90,16 @@ fn unescape_long() {
escape::unescape("&#x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000030;"),
Ok("0".into()),
);

// Too big numbers for u32 should produce errors
match escape::unescape(&format!("&#{};", u32::MAX as u64 + 1)) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
}
match escape::unescape(&format!("&#x{:x};", u32::MAX as u64 + 1)) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
}
}

#[test]
Expand Down Expand Up @@ -142,4 +153,14 @@ fn unescape_with_long() {
escape::unescape_with("&#x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000030;", |_| None),
Ok("0".into()),
);

// Too big numbers for u32 should produce errors
match escape::unescape_with(&format!("&#{};", u32::MAX as u64 + 1), |_| None) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
}
match escape::unescape_with(&format!("&#x{:x};", u32::MAX as u64 + 1), |_| None) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
}
}

0 comments on commit 04bddd6

Please sign in to comment.