Skip to content

Commit

Permalink
Handle invalid unicode in source
Browse files Browse the repository at this point in the history
  • Loading branch information
Porges committed Aug 5, 2024
1 parent bf5aa37 commit eb6d721
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/handlers/graphical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ impl GraphicalReportHandler {
let context_data = source
.read_span(context_span, self.context_lines, self.context_lines)
.map_err(|_| fmt::Error)?;
let context = std::str::from_utf8(context_data.data()).expect("Bad utf8 detected");
let context = String::from_utf8_lossy(context_data.data());
let mut line = context_data.line();
let mut column = context_data.column();
let mut offset = context_data.span().offset();
Expand Down
157 changes: 157 additions & 0 deletions tests/graphical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2349,3 +2349,160 @@ Error: oops::my::inner
assert_eq!(expected, &out);
Ok(())
}

#[test]
fn after_unicode_width() -> Result<(), MietteError> {
#[derive(Debug, Diagnostic, Error)]
#[error("pointing")]
#[diagnostic(code(pointing_at))]
struct E {
#[label("something of interest")]
src: SourceSpan,
}

let unicode_source = "höööt!";

// make err pointing at the t
let (t_index, _) = unicode_source
.bytes()
.enumerate()
.find(|&(_, x)| x == b't')
.unwrap();

let err = E {
src: SourceSpan::from((t_index, 1)),
};

let result = fmt_report(Report::new(err).with_source_code(String::from(unicode_source)));

let expected = "pointing_at
× pointing
╭────
1 │ höööt!
· ┬
· ╰── something of interest
╰────
";

assert_eq!(expected, result);

Ok(())
}

#[test]
fn at_unicode_width() -> Result<(), MietteError> {
#[derive(Debug, Diagnostic, Error)]
#[error("pointing")]
#[diagnostic(code(pointing_at))]
struct E {
#[label("something of interest")]
src: SourceSpan,
}

let unicode_source = "höööt!";

// make err pointing at an ö
let err = E {
src: SourceSpan::from((1, "ö".len())),
};

// we want to make sure the pointer is one char wide, not 2
assert!(err.src.len() == 2);

let result = fmt_report(Report::new(err).with_source_code(String::from(unicode_source)));

let expected = "pointing_at
× pointing
╭────
1 │ höööt!
· ┬
· ╰── something of interest
╰────
";

assert_eq!(expected, result);

Ok(())
}

#[test]
fn at_invalid_unicode() -> Result<(), MietteError> {
#[derive(Debug, Diagnostic, Error)]
#[error("decoding error")]
#[diagnostic(code(decode_err))]
struct E {
#[label("invalid data here")]
src: SourceSpan,
}

// 3 bytes here are mapped to 1 char when replaced by replacement character
// this tests that the line pointing from the label is the correct length when rendered
// - it should be 1 char wide not 3 chars
let invalid_source = b"malformed h\xf0\x93\x8aXYZ";

#[allow(invalid_from_utf8)]
let utf8_err = std::str::from_utf8(invalid_source).unwrap_err();

// make err pointing at the invalid part
let err = E {
src: SourceSpan::from((utf8_err.valid_up_to(), utf8_err.error_len().unwrap_or(1))),
};

let result = fmt_report(Report::new(err).with_source_code(Vec::from(invalid_source)));

let expected = "decode_err
× decoding error
╭────
1 │ malformed h�XYZ
· ┬
· ╰── invalid data here
╰────
";

assert_eq!(expected, result);

Ok(())
}

#[test]
fn after_invalid_unicode() -> Result<(), MietteError> {
#[derive(Debug, Diagnostic, Error)]
#[error("decoding error")]
#[diagnostic(code(decode_err))]
struct E {
#[label("valid data here")]
src: SourceSpan,
}

let invalid_source: &[u8] = b"malformed h\xf0\x93\x8aXYZ";

let (x_index, _) = invalid_source
.iter()
.enumerate()
.find(|&(_, &x)| x == b'X')
.unwrap();

// make err pointing at the X
let err = E {
src: SourceSpan::from((x_index, 1)),
};

let result = fmt_report(Report::new(err).with_source_code(invalid_source));

let expected = "decode_err
× decoding error
╭────
1 │ malformed h�XYZ
· ┬
· ╰── valid data here
╰────
";

assert_eq!(expected, result);

Ok(())
}

0 comments on commit eb6d721

Please sign in to comment.