Skip to content

Commit

Permalink
Fix infinite loop on unsupported escape sequences
Browse files Browse the repository at this point in the history
Despite the previous fix for screen modes, any other escape sequence not
matched by the parser would still cause an infinite loop. This is a more
general solution which attempts to parse most common escape codes the
way a terminal would, but it is not exaustive. At the very least it
guarantees the escape character will be consumed which should prevent an
infinite loop even if some unexpected garbage could end up in the output
text.

---

I chose to eat unsupported escape codes as I believe this to be the
behavior most likely expected by consumers of this library, and there
are potential security ramifications of emitting control characters
where developers may not be expect them.

However, I do think there is an argument to be made for allowing
unsupported escape sequences through for a downstream client to handle.
This implementation should facilitate implementing that as an option at
a later date.
  • Loading branch information
gandalf3 authored and uttarayan21 committed Mar 16, 2023
1 parent 8a59123 commit 225669f
Showing 1 changed file with 56 additions and 34 deletions.
90 changes: 56 additions & 34 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ use nom::{
branch::alt,
bytes::complete::*,
character::complete::*,
character::is_alphabetic,
combinator::{map_res, opt, recognize, value},
error,
error::FromExternalError,
multi::*,
sequence::tuple,
sequence::{delimited, preceded, tuple},
IResult, Parser,
};
use std::str::FromStr;
Expand Down Expand Up @@ -94,10 +95,12 @@ fn spans(style: Style) -> impl Fn(&[u8]) -> IResult<&[u8], (Spans<'static>, Styl
let (s, _) = opt(tag("\n"))(s)?;
let mut spans = Vec::new();
let mut last = style;
loop {
let (s, span) = span(last)(text)?;
last = span.style;
spans.push(span);
while let Ok((s, span)) = span(last)(text) {
// don't include empty spans, provided we have at least one
if spans.len() <= 0 || span.content != "" {
last = span.style;
spans.push(span);
}
text = s;
if text.is_empty() {
break;
Expand All @@ -113,6 +116,7 @@ fn span(last: Style) -> impl Fn(&[u8]) -> IResult<&[u8], Span<'static>, nom::err
move |s: &[u8]| -> IResult<&[u8], Span<'static>> {
let mut last = last;
let (s, style) = opt(style(last))(s)?;

#[cfg(feature = "simd")]
let (s, text) = map_res(take_while(|c| c != b'\x1b' | b'\n'), |t| {
simdutf8::basic::from_utf8(t)
Expand All @@ -134,41 +138,59 @@ fn span(last: Style) -> impl Fn(&[u8]) -> IResult<&[u8], Span<'static>, nom::err
Ok((s, Span::styled(text.to_owned(), last)))
}
}

fn style(style: Style) -> impl Fn(&[u8]) -> IResult<&[u8], Style, nom::error::Error<&[u8]>> {
move |s: &[u8]| -> IResult<&[u8], Style> {
let (s, _) = tag("\x1b[")(s)?;
let (s, r) = separated_list0(tag(";"), ansi_item)(s)?;
// the ones ending with m are styles and the ones ending with h are screen mode escapes
let (s, _) = alt((char('m'), alt((char('h'), char('l')))))(s)?;
let (s, r) = match opt(ansi_sgr_code)(s)? {
(s, Some(r)) => (s, r),
(s, None) => {
let (s, _) = any_escape_sequence(s)?;
(s, Vec::new())
}
};
Ok((s, Style::from(AnsiStates { style, items: r })))
}
}

/// An ansi item is a code with a possible color.
fn ansi_item(s: &[u8]) -> IResult<&[u8], AnsiItem> {
// Screen escape modes start with '?' or '=' or non-number
let (s, nc) = opt(alt((char('?'), char('='))))(s)?;
let (mut s, c) = i64(s)?;
if let Some(nc) = nc {
return Ok((
s,
AnsiItem {
code: AnsiCode::Code(vec![nc as u8]),
color: None,
},
));
}
let code = AnsiCode::from(c as u8);
let color = if matches!(
code,
AnsiCode::SetBackgroundColor | AnsiCode::SetForegroundColor
) {
let (_s, _) = opt(tag(";"))(s)?;
let (_s, color) = color(_s)?;
s = _s;
Some(color)
} else {
None
/// A complete ANSI SGR code
fn ansi_sgr_code(s: &[u8]) -> IResult<&[u8], Vec<AnsiItem>, nom::error::Error<&[u8]>> {
delimited(
tag("\x1b["),
separated_list1(tag(";"), ansi_sgr_item),
char('m'),
)(s)
}

fn any_escape_sequence(s: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
// Attempt to consume most escape codes, including a single escape char.
//
// Most escape codes begin with ESC[ and are terminated by an alphabetic character,
// but OSC codes begin with ESC] and are terminated by an ascii bell (\x07)
// and a truncated/invalid code may just be a standalone ESC or not be terminated.
//
// We should try to consume as much of it as possible to match behavior of most terminals;
// where we fail at that we should at least consume the escape char to avoid infinitely looping

preceded(
char('\x1b'),
opt(alt((
delimited(char('['), take_till(|c| is_alphabetic(c)), opt(take(1u8))),
delimited(char(']'), take_till(|c| c == b'\x07'), opt(take(1u8))),
))),
)(s)
}

/// An ANSI SGR attribute
fn ansi_sgr_item(s: &[u8]) -> IResult<&[u8], AnsiItem> {
let (s, c) = u8(s)?;
let code = AnsiCode::from(c);
let (s, color) = match code {
AnsiCode::SetForegroundColor | AnsiCode::SetBackgroundColor => {
let (s, _) = opt(tag(";"))(s)?;
let (s, color) = color(s)?;
(s, Some(color))
}
_ => (s, None),
};
Ok((s, AnsiItem { code, color }))
}
Expand Down

0 comments on commit 225669f

Please sign in to comment.