From 3e2e0b5f1d86569cf1aba0e56ae2a257ede318d5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 9 Oct 2015 21:11:24 +0200 Subject: [PATCH] Treat url() as a normal functions, per spec change. Only unquoted URLs are special tokens now. Use `Parser::expect_url`. This is a [breaking-change]. The version number was incremented accordingly. This change will help with https://github.com/servo/servo/issues/7767 --- Cargo.toml | 2 +- src/parser.rs | 20 ++++++++----- src/serializer.rs | 26 +++++++++++++++-- src/tests.rs | 71 ++++++++++++++++++++++++++--------------------- src/tokenizer.rs | 43 ++++++++++++++-------------- 5 files changed, 98 insertions(+), 64 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ea0f06c4..58d812c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.3.9" +version = "0.4.0" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/parser.rs b/src/parser.rs index bef0fff5..df827173 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -331,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> { /// This can help tell e.g. `color: green;` from `color: green 4px;` #[inline] pub fn parse_entirely(&mut self, parse: F) -> Result - where F: FnOnce(&mut Parser) -> Result { + where F: FnOnce(&mut Parser<'i, 't>) -> Result { let result = parse(self); try!(self.expect_exhausted()); result @@ -374,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> { /// The result is overridden to `Err(())` if the closure leaves some input before that point. #[inline] pub fn parse_nested_block(&mut self, parse: F) -> Result - where F: FnOnce(&mut Parser) -> Result { + where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result { let block_type = self.at_start_of.take().expect("\ A nested parser can only be created when a Function, \ ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ @@ -412,7 +412,7 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn parse_until_before(&mut self, delimiters: Delimiters, parse: F) -> Result - where F: FnOnce(&mut Parser) -> Result { + where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result { let delimiters = self.stop_before | delimiters; let result; // Introduce a new scope to limit duration of nested_parser’s borrow @@ -451,7 +451,7 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn parse_until_after(&mut self, delimiters: Delimiters, parse: F) -> Result - where F: FnOnce(&mut Parser) -> Result { + where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result { let result = self.parse_until_before(delimiters, parse); let next_byte = self.tokenizer.next_byte(); if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) { @@ -481,7 +481,7 @@ impl<'i, 't> Parser<'i, 't> { /// Parse a whose unescaped value is an ASCII-insensitive match for the given value. #[inline] - pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> { + pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), ()> { match try!(self.next()) { Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()), _ => Err(()) @@ -511,7 +511,10 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn expect_url(&mut self) -> Result, ()> { match try!(self.next()) { - Token::Url(value) => Ok(value), + Token::UnquotedUrl(value) => Ok(value), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { + self.parse_nested_block(|input| input.expect_string()) + }, _ => Err(()) } } @@ -520,8 +523,11 @@ impl<'i, 't> Parser<'i, 't> { #[inline] pub fn expect_url_or_string(&mut self) -> Result, ()> { match try!(self.next()) { - Token::Url(value) => Ok(value), + Token::UnquotedUrl(value) => Ok(value), Token::QuotedString(value) => Ok(value), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { + self.parse_nested_block(|input| input.expect_string()) + }, _ => Err(()) } } diff --git a/src/serializer.rs b/src/serializer.rs index f94903e6..b58e5ec3 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -81,9 +81,9 @@ impl<'a> ToCss for Token<'a> { try!(serialize_identifier(&**value, dest)); } Token::QuotedString(ref value) => try!(serialize_string(&**value, dest)), - Token::Url(ref value) => { + Token::UnquotedUrl(ref value) => { try!(dest.write_str("url(")); - try!(serialize_string(&**value, dest)); + try!(serialize_unquoted_url(&**value, dest)); try!(dest.write_str(")")); }, Token::Delim(value) => try!(write!(dest, "{}", value)), @@ -213,6 +213,26 @@ fn serialize_name(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Writ } +fn serialize_unquoted_url(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Write { + let mut chunk_start = 0; + for (i, b) in value.bytes().enumerate() { + let hex = match b { + b'\0' ... b' ' | b'\x7F' => true, + b'(' | b')' | b'"' | b'\'' | b'\\' => false, + _ => continue + }; + try!(dest.write_str(&value[chunk_start..i])); + if hex { + try!(write!(dest, "\\{:X} ", b)); + } else { + try!(write!(dest, "\\{}", b as char)); + } + chunk_start = i + 1; + } + dest.write_str(&value[chunk_start..]) +} + + /// Write a double-quoted CSS string token, escaping content as necessary. pub fn serialize_string(value: &str, dest: &mut W) -> fmt::Result where W: fmt::Write { try!(dest.write_str("\"")); @@ -382,7 +402,7 @@ impl<'a> Token<'a> { TokenSerializationType(match *self { Token::Ident(_) => Ident, Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash, - Token::Url(_) | Token::BadUrl => UrlOrBadUrl, + Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl, Token::Delim('#') => DelimHash, Token::Delim('@') => DelimAt, Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus, diff --git a/src/tests.rs b/src/tests.rs index 3e2f70e1..13d03c2a 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -2,12 +2,11 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::borrow::Cow::Borrowed; +use std::borrow::Cow::{self, Borrowed}; use std::fs::File; use std::io::{self, Write}; use std::path::Path; use std::process::Command; -use std::mem; use rustc_serialize::json::{self, Json, ToJson}; use tempdir::TempDir; @@ -74,14 +73,8 @@ fn almost_equals(a: &Json, b: &Json) -> bool { fn normalize(json: &mut Json) { match *json { Json::Array(ref mut list) => { - match find_url(list) { - Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)], - Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()], - None => { - for item in list.iter_mut() { - normalize(item) - } - } + for item in list.iter_mut() { + normalize(item) } } Json::String(ref mut s) => { @@ -93,26 +86,6 @@ fn normalize(json: &mut Json) { } } -fn find_url(list: &mut [Json]) -> Option> { - if list.len() < 2 || - list[0].as_string() != Some("function") || - list[1].as_string() != Some("url") { - return None - } - - let mut args = list[2..].iter_mut().filter(|a| a.as_string() != Some(" ")); - if let (Some(&mut Json::Array(ref mut arg)), None) = (args.next(), args.next()) { - if arg.len() == 2 && arg[0].as_string() == Some("string") { - if let &mut Json::String(ref mut value) = &mut arg[1] { - return Some(Ok(mem::replace(value, String::new()))) - } - } - } - - Some(Err(())) -} - - fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) { normalize(&mut expected); if !almost_equals(&results, &expected) { @@ -281,6 +254,42 @@ fn outer_block_end_consumed() { assert_eq!(input.next(), Err(())); } +#[test] +fn unquoted_url_escaping() { + let token = Token::UnquotedUrl("\ + \x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\ + \x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \ + !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\ + ^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\ + ".into()); + let serialized = token.to_css_string(); + assert_eq!(serialized, "\ + url(\ + \\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\A \\B \\C \\D \\E \\F \\10 \ + \\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1A \\1B \\1C \\1D \\1E \\1F \\20 \ + !\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\ + ^_`abcdefghijklmnopqrstuvwxyz{|}~\\7F é\ + )\ + "); + assert_eq!(Parser::new(&serialized).next(), Ok(token)) +} + +#[test] +fn test_expect_url() { + fn parse(s: &str) -> Result, ()> { + Parser::new(s).expect_url() + } + assert_eq!(parse("url()").unwrap(), ""); + assert_eq!(parse("url( ").unwrap(), ""); + assert_eq!(parse("url( abc").unwrap(), "abc"); + assert_eq!(parse("url( abc \t)").unwrap(), "abc"); + assert_eq!(parse("url( 'abc' \t)").unwrap(), "abc"); + assert_eq!(parse("url(abc more stuff)"), Err(())); + // The grammar at https://drafts.csswg.org/css-values/#urls plans for `*` + // at the position of "more stuff", but no such modifier is defined yet. + assert_eq!(parse("url('abc' more stuff)"), Err(())); +} + fn run_color_tests) -> Json>(json_data: &str, to_json: F) { run_json_tests(json_data, |input| { @@ -606,7 +615,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::Hash(value) => JArray!["hash", value, "unrestricted"], Token::IDHash(value) => JArray!["hash", value, "id"], Token::QuotedString(value) => JArray!["string", value], - Token::Url(value) => JArray!["url", value], + Token::UnquotedUrl(value) => JArray!["url", value], Token::Delim('\\') => "\\".to_json(), Token::Delim(value) => value.to_string().to_json(), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54c78d63..a2ba721d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -48,7 +48,7 @@ pub enum Token<'a> { /// A [``](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( )` function /// /// The value does not include the `url(` `)` markers or the quotes. - Url(Cow<'a, str>), + UnquotedUrl(Cow<'a, str>), /// A `` Delim(char), @@ -628,7 +628,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { if !tokenizer.is_eof() && tokenizer.next_char() == '(' { tokenizer.advance(1); if value.eq_ignore_ascii_case("url") { - consume_url(tokenizer) + consume_unquoted_url(tokenizer).unwrap_or(Function(value)) } else { if tokenizer.var_functions == VarFunctions::LookingForThem && value.eq_ignore_ascii_case("var") { @@ -791,31 +791,30 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } -fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { - while !tokenizer.is_eof() { - match tokenizer.next_char() { - ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1), - '"' => return consume_quoted_url(tokenizer, false), - '\'' => return consume_quoted_url(tokenizer, true), - ')' => { tokenizer.advance(1); break }, - _ => return consume_unquoted_url(tokenizer), - } - } - return Url(Borrowed("")); - - fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { - match consume_quoted_string(tokenizer, single_quote) { - Ok(value) => consume_url_end(tokenizer, value), - Err(()) => consume_bad_url(tokenizer), +fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ()> { + for (offset, c) in tokenizer.input[tokenizer.position..].char_indices() { + match c { + ' ' | '\t' | '\n' | '\r' | '\x0C' => {}, + '"' | '\'' => return Err(()), // Do not advance + ')' => { + tokenizer.advance(offset + 1); + return Ok(UnquotedUrl(Borrowed(""))); + } + _ => { + tokenizer.advance(offset); + return Ok(consume_unquoted_url(tokenizer)) + } } } + tokenizer.position = tokenizer.input.len(); + return Ok(UnquotedUrl(Borrowed(""))); fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { let start_pos = tokenizer.position(); let mut string; loop { if tokenizer.is_eof() { - return Url(Borrowed(tokenizer.slice_from(start_pos))) + return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos))) } match tokenizer.next_char() { ' ' | '\t' | '\n' | '\r' | '\x0C' => { @@ -826,7 +825,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { ')' => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return Url(Borrowed(value)) + return UnquotedUrl(Borrowed(value)) } '\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable | '"' | '\'' | '(' => { @@ -861,7 +860,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { }; string.push(next_char) } - Url(Owned(string)) + UnquotedUrl(Owned(string)) } fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> { @@ -872,7 +871,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { _ => return consume_bad_url(tokenizer) } } - Url(string) + UnquotedUrl(string) } fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {