Skip to content

Commit

Permalink
fix(css_remove): prevent removing wrong content
Browse files Browse the repository at this point in the history
the same string in multiple places could have been removed wrongly
  • Loading branch information
EdJoPaTo committed Feb 1, 2022
1 parent 3cd98c7 commit e459bc6
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions src/editor/css_remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ impl CssRemover {
fn parse(&self) -> anyhow::Result<scraper::Selector> {
let scrape_selector = scraper::Selector::parse(&self.0)
.map_err(|err| anyhow::anyhow!("css remover ({}) parse error: {:?}", self.0, err))?;

Ok(scrape_selector)
}

Expand All @@ -17,16 +16,17 @@ impl CssRemover {
}

pub fn apply(&self, html: &str) -> anyhow::Result<String> {
let parsed_html = scraper::Html::parse_document(html);
let mut parsed_html = scraper::Html::parse_document(html);
let selected = parsed_html
.select(&self.parse()?)
.map(|o| o.html())
.map(|o| o.id())
.collect::<Vec<_>>();

let mut html = parsed_html.root_element().html();
for s in selected {
html = html.replace(&s, "");
for selected in selected {
if let Some(mut selected_mut) = parsed_html.tree.get_mut(selected) {
selected_mut.detach();
}
}
let html = parsed_html.root_element().html();
Ok(html)
}
}
Expand Down Expand Up @@ -96,3 +96,12 @@ fn multiple_selectors_inside_each_other_work() {
r#"<html><head></head><body><div class="b">B</div></body></html>"#
);
}

#[test]
fn multiple_hits_only_remove_exact() {
let html = CssRemover(".a p".to_string()).apply(r#"<html><head></head><body><div class="a"><p>TEST</p></div><div class="b"><p>TEST</p></div></body></html>"#).unwrap();
assert_eq!(
html,
r#"<html><head></head><body><div class="a"></div><div class="b"><p>TEST</p></div></body></html>"#
);
}

0 comments on commit e459bc6

Please sign in to comment.