From da2c62dac9f75756fae4d1fc4fff950b1cb2e30d Mon Sep 17 00:00:00 2001 From: Kornel Date: Fri, 4 May 2018 01:32:10 +0100 Subject: [PATCH] Add url_filter_map() #95 --- src/lib.rs | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 098e333..b0063ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,17 +241,21 @@ pub struct Builder<'a> { generic_attributes: HashSet<&'a str>, url_schemes: HashSet<&'a str>, url_relative: UrlRelative, + url_filter_map_callback: Option>, link_rel: Option<&'a str>, allowed_classes: HashMap<&'a str, HashSet<&'a str>>, strip_comments: bool, id_prefix: Option<&'a str>, } +type UrlFilterCallback = for<'a> Fn(&str, &str, &'a str) -> Option> + Send + Sync + 'static; + impl<'a> fmt::Debug for Builder<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, r"Builder {{tags: {tags:?}, clean_content_tags: {clean_content_tags:?}, \ tag_attributes: {tag_attributes:?}, generic_attributes: {generic_attributes:?}, \ url_schemes: {url_schemes:?}, url_relative: {url_relative:?}, \ +url_filter_map_callback: {url_filter_map_callback:?}, \ link_rel: {link_rel:?}, allowed_classes: {allowed_classes:?}, \ strip_comments: {strip_comments:?}, id_prefix: {id_prefix:?}}}", tags = self.tags, @@ -260,6 +264,7 @@ strip_comments: {strip_comments:?}, id_prefix: {id_prefix:?}}}", generic_attributes = self.generic_attributes, url_schemes = self.url_schemes, url_relative = self.url_relative, + url_filter_map_callback = self.url_filter_map_callback.is_some(), link_rel = self.link_rel, allowed_classes = self.allowed_classes, strip_comments = self.strip_comments, @@ -357,6 +362,7 @@ impl<'a> Default for Builder<'a> { generic_attributes: generic_attributes, url_schemes: url_schemes, url_relative: UrlRelative::PassThrough, + url_filter_map_callback: None, link_rel: Some("noopener noreferrer"), allowed_classes: allowed_classes, strip_comments: true, @@ -826,6 +832,42 @@ impl<'a> Builder<'a> { self } + /// Allows rewriting of all URLs using a callback. + /// + /// The callback takes name of the element, attribute and its value. + /// Returns `None` to remove the attribute, or a URL to use. + /// If the returned URLs is relative it will be filtered according to `url_relative()`. + /// + /// # Panics + /// + /// If more than one callback is set. + /// + /// # Examples + /// + /// ```rust + /// use ammonia::Builder; + /// let a = Builder::new() + /// .url_filter_map(|element, attribute, value| { + /// if element == "a" { + /// Some(value.into()) + /// } else { + /// None + /// } + /// }) + /// .link_rel(None) + /// .clean("Home") + /// .to_string(); + /// assert_eq!(a, + /// r#"Home"#); + /// ``` + pub fn url_filter_map<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self + where CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option> + Send + Sync + 'static + { + assert!(self.url_filter_map_callback.is_none(), "url_filter_map can be set only once"); + self.url_filter_map_callback = Some(Box::new(callback)); + self + } + /// Returns `true` if the relative URL resolver is set to `Deny`. /// /// # Examples @@ -1331,6 +1373,31 @@ impl<'a> Builder<'a> { } } } + if let Some(ref filter_map) = self.url_filter_map_callback { + let mut drop_attrs = Vec::new(); + let mut attrs = attrs.borrow_mut(); + for (i, attr) in &mut attrs.iter_mut().enumerate() { + if !is_url_attr(&*name.local, &*attr.name.local) { + continue; + } + let replace_with = if let Some(new) = filter_map(&*name.local, &*attr.name.local, &*attr.value) { + if *new != *attr.value { + Some(format_tendril!("{}", new)) + } else { + None // no need to replace the attr if filter_map() returned the same value + } + } else { + drop_attrs.push(i); + None + }; + if let Some(replace_with) = replace_with { + attr.value = replace_with; + } + } + for i in drop_attrs.into_iter().rev() { + attrs.swap_remove(i); + } + } if let Some(ref base) = url_base { for attr in &mut *attrs.borrow_mut() { if is_url_attr(&*name.local, &*attr.name.local) { @@ -1739,6 +1806,80 @@ mod test { "Test" ); } + #[test] + fn url_filter_map_nop() { + let fragment = "Test"; + let result = Builder::new() + .url_filter_map(|elem, attr, value| { + assert_eq!("a", elem); + assert_eq!("href", attr); + assert_eq!("test", value); + Some(value.into()) + }) + .clean(fragment) + .to_string(); + assert_eq!( + result, + "Test" + ); + } + + #[test] + fn url_filter_map_drop() { + let fragment = "Testtest"; + let result = Builder::new() + .url_filter_map(|elem, attr, value| { + assert_eq!("img", elem); + assert_eq!("src", attr); + assert_eq!("imgtest", value); + None + }) + .clean(fragment) + .to_string(); + assert_eq!( + result, + r#"Testtest"# + ); + } + + #[test] + fn url_filter_absolute() { + let fragment = "Testtest"; + let result = Builder::new() + .url_filter_map(|elem, attr, value| { + assert_eq!("img", elem); + assert_eq!("src", attr); + assert_eq!("imgtest", value); + Some(format!("https://example.com/images/{}", value).into()) + }) + .url_relative(UrlRelative::RewriteWithBase(Url::parse("http://wrong.invalid/").unwrap())) + .clean(fragment) + .to_string(); + assert_eq!( + result, + r#"Testtest"# + ); + } + + #[test] + fn url_filter_relative() { + let fragment = "Testtest"; + let result = Builder::new() + .url_filter_map(|elem, attr, value| { + assert_eq!("img", elem); + assert_eq!("src", attr); + assert_eq!("imgtest", value); + Some("rewrite".into()) + }) + .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://example.com/base/#").unwrap())) + .clean(fragment) + .to_string(); + assert_eq!( + result, + r#"Testtest"# + ); + } + #[test] fn rewrite_url_relative_no_rel() { let fragment = "Test";