From 8cc5a9248a3f756ff443206d3cf5ce81ed4c140a Mon Sep 17 00:00:00 2001 From: d0rianb Date: Mon, 18 Mar 2024 12:04:41 +0100 Subject: [PATCH] Fix stylesheet not being detected --- src/header.rs | 4 +++ src/parser.rs | 25 ++++++++++++++---- src/tokens.rs | 70 ++++++++++++++++++++++++++------------------------- 3 files changed, 60 insertions(+), 39 deletions(-) diff --git a/src/header.rs b/src/header.rs index 3fc1dd6..1c90535 100644 --- a/src/header.rs +++ b/src/header.rs @@ -9,6 +9,7 @@ pub type FontTable = HashMap; pub struct RtfHeader { pub character_set: CharacterSet, pub font_table: FontTable, + pub stylesheet: StyleSheet, } #[derive(Hash, Default, Clone, Debug, PartialEq)] @@ -64,3 +65,6 @@ impl FontFamily { } } } + +#[derive(Default, Debug, Clone, PartialEq)] +pub struct StyleSheet; diff --git a/src/parser.rs b/src/parser.rs index fdc1dd4..07d774a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,7 +4,7 @@ use std::{fmt, mem}; use derivative::Derivative; use crate::document::RtfDocument; -use crate::header::{CharacterSet, Font, FontFamily, FontRef, FontTable, RtfHeader}; +use crate::header::{CharacterSet, Font, FontFamily, FontRef, FontTable, RtfHeader, StyleSheet}; use crate::paragraph::{Alignment, Paragraph, SpaceBetweenLine}; use crate::tokens::{ControlWord, Property, Token}; @@ -57,7 +57,7 @@ impl fmt::Display for ParserError { let _ = match self { ParserError::InvalidToken(msg) => write!(f, "{}", msg), ParserError::IgnorableDestinationParsingError => write!(f, "No ignorable destination should be left"), - ParserError::MalformedPainterStack => write!(f, "Malformed painter stack"), + ParserError::MalformedPainterStack => write!(f, "Malformed painter stack : Unbalanced number of brackets"), ParserError::InvalidFontIdentifier(property) => write!(f, "Invalid font identifier : {:?}", property), ParserError::NoMoreToken => write!(f, "No more token to parse"), }; @@ -238,6 +238,14 @@ impl<'a> Parser<'a> { break; } } + (Some(Token::OpeningBracket), Some(&header_control_word!(StyleSheet, None))) => { + let stylesheet_tokens = self.consume_tokens_until_matching_bracket(); + header.stylesheet = Self::parse_stylesheet(&stylesheet_tokens)?; + // After the stylesheet, check if next token is plain text without consuming it. If so, break + if let Some(&Token::PlainText(_text)) = self.get_next_token() { + break; + } + } // Break on par, pard, sectd, or plain - We no longer are in the header (Some(header_control_word!(Pard) | header_control_word!(Sectd) | header_control_word!(Plain) | header_control_word!(Par)), _) => break, // Break if it declares a font after the font table --> no more in the header @@ -306,6 +314,11 @@ impl<'a> Parser<'a> { return Ok(table); } + fn parse_stylesheet(stylesheet_tokens: &Vec>) -> Result { + // TODO + return Ok(StyleSheet {}); + } + // Traverse all the tokens and consume the ignore groups fn parse_ignore_groups(&mut self) { self.cursor = 0; // Reset the cursor @@ -313,7 +326,7 @@ impl<'a> Parser<'a> { let mut i = 0; // Manage the case where there is CRLF between { and ignore_group // {\n /*/ignoregroup } - while next_token == &Token::CRLF { + while *next_token == Token::CRLF { if let Some(next_token_not_crlf) = self.get_token_at(self.cursor + 1 + i) { next_token = next_token_not_crlf; i += 1; @@ -344,7 +357,7 @@ pub mod tests { use crate::lexer::Lexer; #[test] - fn parser_simple_test() { + fn parser_header() { let tokens = Lexer::scan(r#"{ \rtf1\ansi{\fonttbl\f0\fswiss Helvetica;}\f0\pard Voici du texte en {\b gras}.\par }"#).unwrap(); let doc = Parser::new(tokens).parse().unwrap(); assert_eq!( @@ -358,7 +371,8 @@ pub mod tests { character_set: 0, font_family: Swiss } - )]) + )]), + ..RtfHeader::default() } ); assert_eq!( @@ -427,6 +441,7 @@ pub mod tests { } ) ]), + ..RtfHeader::default() } ); } diff --git a/src/tokens.rs b/src/tokens.rs index 832871d..28fe328 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -74,6 +74,7 @@ pub enum ControlWord<'a> { ColorTable, FileTable, + StyleSheet, Italic, Bold, @@ -139,48 +140,49 @@ impl<'a> ControlWord<'a> { #[rustfmt::skip] let control_word = match prefix { - r"\rtf" => ControlWord::Rtf, - r"\ansi" => ControlWord::Ansi, + r"\rtf" => ControlWord::Rtf, + r"\ansi" => ControlWord::Ansi, // Header - r"\fonttbl" => ControlWord::FontTable, - r"\colortabl" => ControlWord::ColorTable, - r"\filetbl" => ControlWord::FileTable, + r"\fonttbl" => ControlWord::FontTable, + r"\colortabl" => ControlWord::ColorTable, + r"\filetbl" => ControlWord::FileTable, + r"\stylesheet" => ControlWord::StyleSheet, // Font - r"\fcharset" => ControlWord::FontCharset, - r"\f" => ControlWord::FontNumber, - r"\fs" => ControlWord::FontSize, + r"\fcharset" => ControlWord::FontCharset, + r"\f" => ControlWord::FontNumber, + r"\fs" => ControlWord::FontSize, // Format - r"\i" => ControlWord::Italic, - r"\b" => ControlWord::Bold, - r"\u" => ControlWord::Underline, - r"\ul" => ControlWord::Underline, - r"\super" => ControlWord::Superscript, - r"\sub" => ControlWord::Subscript, - r"\scaps" => ControlWord::Smallcaps, - r"\strike" => ControlWord::Strikethrough, + r"\i" => ControlWord::Italic, + r"\b" => ControlWord::Bold, + r"\u" => ControlWord::Underline, + r"\ul" => ControlWord::Underline, + r"\super" => ControlWord::Superscript, + r"\sub" => ControlWord::Subscript, + r"\scaps" => ControlWord::Smallcaps, + r"\strike" => ControlWord::Strikethrough, // Paragraph - r"\par" => ControlWord::Par, - r"\pard" => ControlWord::Pard, - r"\sectd" => ControlWord::Sectd, - r"\plain" => ControlWord::Plain, - r"\s" => ControlWord::ParStyle, - r"\pardeftab" => ControlWord::ParDefTab, + r"\par" => ControlWord::Par, + r"\pard" => ControlWord::Pard, + r"\sectd" => ControlWord::Sectd, + r"\plain" => ControlWord::Plain, + r"\s" => ControlWord::ParStyle, + r"\pardeftab" => ControlWord::ParDefTab, // Paragraph alignment - r"\ql" => ControlWord::LeftAligned, - r"\qr" => ControlWord::RightAligned, - r"\qj" => ControlWord::Justify, - r"\qc" => ControlWord::Center, + r"\ql" => ControlWord::LeftAligned, + r"\qr" => ControlWord::RightAligned, + r"\qj" => ControlWord::Justify, + r"\qc" => ControlWord::Center, // Paragraph indent - r"\fi" => ControlWord::FirstLineIdent, - r"\ri" => ControlWord::RightIndent, - r"\li" => ControlWord::LeftIndent, + r"\fi" => ControlWord::FirstLineIdent, + r"\ri" => ControlWord::RightIndent, + r"\li" => ControlWord::LeftIndent, // Paragraph Spacing - r"\sb" => ControlWord::SpaceBefore, - r"\sa" => ControlWord::SpaceAfter, - r"\sl" => ControlWord::SpaceBetweenLine, - r"\slmul" => ControlWord::SpaceLineMul, + r"\sb" => ControlWord::SpaceBefore, + r"\sa" => ControlWord::SpaceAfter, + r"\sl" => ControlWord::SpaceBetweenLine, + r"\slmul" => ControlWord::SpaceLineMul, // Unknown - _ => ControlWord::Unknown(prefix), + _ => ControlWord::Unknown(prefix), }; return Ok((control_word, property)); }