Skip to content

Commit

Permalink
Do not allow (positive) sign in character references
Browse files Browse the repository at this point in the history
Negative sing will return ParseIntError(InvalidDigit) because we parse into unsigned number,
but `+` sign is allowed by the parse method. To return the same error for both `+` and `-`
we check for sign itself before parse
  • Loading branch information
Mingun committed Jun 28, 2024
1 parent 04bddd6 commit 42d7123
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 30 deletions.
4 changes: 3 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
- [#771]: `EscapeError::UnrecognizedSymbol` renamed to `EscapeError::UnrecognizedEntity`.
- [#771]: Implemented `PartialEq` for `EscapeError`.
- [#771]: Replace the following variants of `EscapeError` by `InvalidCharRef` variant
with a standard `ParseIntError` inside:
with a new `ParseCharRefError` inside:
- `EntityWithNull`
- `InvalidDecimal`
- `InvalidHexadecimal`
- `InvalidCodepoint`

[#771]: https://github.com/tafia/quick-xml/pull/771
[#772]: https://github.com/tafia/quick-xml/pull/772
Expand Down
74 changes: 54 additions & 20 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,56 @@ use std::borrow::Cow;
use std::num::ParseIntError;
use std::ops::Range;

/// Error of parsing character reference (`&#<dec-number>;` or `&#x<hex-number>;`).
#[derive(Clone, Debug, PartialEq)]
pub enum ParseCharRefError {
/// Number contains sign character (`+` or `-`) which is not allowed.
UnexpectedSign,
/// Number cannot be parsed due to non-number characters or a numeric overflow.
InvalidNumber(ParseIntError),
/// Character reference represents not a valid unicode codepoint.
InvalidCodepoint(u32),
/// Character reference expanded to a not permitted character for an XML.
///
/// Currently, only `0x0` character produces this error.
IllegalCharacter(u32),
}

impl std::fmt::Display for ParseCharRefError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::UnexpectedSign => f.write_str("unexpected number sign"),
Self::InvalidNumber(e) => e.fmt(f),
Self::InvalidCodepoint(n) => write!(f, "`{}` is not a valid codepoint", n),
Self::IllegalCharacter(n) => write!(f, "0x{:x} character is not permitted in XML", n),
}
}
}

impl std::error::Error for ParseCharRefError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::InvalidNumber(e) => Some(e),
_ => None,
}
}
}

/// Error for XML escape / unescape.
#[derive(Clone, Debug, PartialEq)]
pub enum EscapeError {
/// Entity with Null character
EntityWithNull(Range<usize>),
/// Referenced entity in unknown to the parser.
UnrecognizedEntity(Range<usize>, String),
/// Cannot find `;` after `&`
UnterminatedEntity(Range<usize>),
/// Attempt to parse character reference (`&#<dec-number>;` or `&#x<hex-number>;`)
/// was unsuccessful, not all characters are decimal or hexadecimal numbers.
InvalidCharRef(ParseIntError),
/// Not a valid unicode codepoint
InvalidCodepoint(u32),
InvalidCharRef(ParseCharRefError),
}

impl std::fmt::Display for EscapeError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
EscapeError::EntityWithNull(e) => write!(
f,
"Error while escaping character at range {:?}: Null character entity not allowed",
e
),
EscapeError::UnrecognizedEntity(rge, res) => {
write!(f, "at {:?}: unrecognized entity `{}`", rge, res)
}
Expand All @@ -40,7 +66,6 @@ impl std::fmt::Display for EscapeError {
EscapeError::InvalidCharRef(e) => {
write!(f, "invalid character reference: {}", e)
}
EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n),
}
}
}
Expand Down Expand Up @@ -246,7 +271,7 @@ where
// search for character correctness
let pat = &raw[start + 1..end];
if let Some(entity) = pat.strip_prefix('#') {
let codepoint = parse_number(entity, start..end)?;
let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?;
unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
} else if let Some(value) = resolve_entity(pat) {
unescaped.push_str(value);
Expand Down Expand Up @@ -1791,18 +1816,27 @@ pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> {
Some(s)
}

fn parse_number(bytes: &str, range: Range<usize>) -> Result<char, EscapeError> {
let code = if let Some(hex_digits) = bytes.strip_prefix('x') {
u32::from_str_radix(hex_digits, 16)
fn parse_number(num: &str) -> Result<char, ParseCharRefError> {
let code = if let Some(hex) = num.strip_prefix('x') {
from_str_radix(hex, 16)?
} else {
u32::from_str_radix(bytes, 10)
}
.map_err(EscapeError::InvalidCharRef)?;
from_str_radix(num, 10)?
};
if code == 0 {
return Err(EscapeError::EntityWithNull(range));
return Err(ParseCharRefError::IllegalCharacter(code));
}
match std::char::from_u32(code) {
Some(c) => Ok(c),
None => Err(EscapeError::InvalidCodepoint(code)),
None => Err(ParseCharRefError::InvalidCodepoint(code)),
}
}

#[inline]
fn from_str_radix(src: &str, radix: u32) -> Result<u32, ParseCharRefError> {
match src.as_bytes().first().copied() {
// We should not allow sign numbers, but u32::from_str_radix will accept `+`.
// We also handle `-` to be consistent in returned errors
Some(b'+') | Some(b'-') => Err(ParseCharRefError::UnexpectedSign),
_ => u32::from_str_radix(src, radix).map_err(ParseCharRefError::InvalidNumber),
}
}
96 changes: 87 additions & 9 deletions tests/escape.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use pretty_assertions::assert_eq;
use quick_xml::escape::{self, EscapeError};
use quick_xml::escape::{self, EscapeError, ParseCharRefError};
use std::borrow::Cow;
use std::num::IntErrorKind;

Expand Down Expand Up @@ -93,15 +93,54 @@ fn unescape_long() {

// Too big numbers for u32 should produce errors
match escape::unescape(&format!("&#{};", u32::MAX as u64 + 1)) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
}
x => panic!(
"expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
x
),
}
match escape::unescape(&format!("&#x{:x};", u32::MAX as u64 + 1)) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
}
x => panic!(
"expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
x
),
}
}

#[test]
fn unescape_sign() {
assert_eq!(
escape::unescape("&#+48;"),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
assert_eq!(
escape::unescape("&#x+30;"),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);

assert_eq!(
escape::unescape("&#-48;"),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
assert_eq!(
escape::unescape("&#x-30;"),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
}

#[test]
fn unescape_with() {
let custom_entities = |ent: &str| match ent {
Expand Down Expand Up @@ -156,11 +195,50 @@ fn unescape_with_long() {

// Too big numbers for u32 should produce errors
match escape::unescape_with(&format!("&#{};", u32::MAX as u64 + 1), |_| None) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
}
x => panic!(
"expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
x
),
}
match escape::unescape_with(&format!("&#x{:x};", u32::MAX as u64 + 1), |_| None) {
Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow),
x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x),
Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => {
assert_eq!(err.kind(), &IntErrorKind::PosOverflow)
}
x => panic!(
"expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}",
x
),
}
}

#[test]
fn unescape_with_sign() {
assert_eq!(
escape::unescape_with("&#+48;", |_| None),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
assert_eq!(
escape::unescape_with("&#x+30;", |_| None),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);

assert_eq!(
escape::unescape_with("&#-48;", |_| None),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
assert_eq!(
escape::unescape_with("&#x-30;", |_| None),
Err(EscapeError::InvalidCharRef(
ParseCharRefError::UnexpectedSign
)),
);
}

0 comments on commit 42d7123

Please sign in to comment.