Skip to content

Commit

Permalink
Don't check prefix attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
xlai89 committed Oct 19, 2024
1 parent 3a2533f commit 18a20a2
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
16 changes: 16 additions & 0 deletions lychee-lib/src/extract/html/html5ever.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ impl TokenSink for LinkExtractor {
}
}

// Check and exclude `prefix` attribute. This attribute is used to define a prefix
// for the current element. It is not used to link to a resource.
if let Some(_prefix) = attrs.iter().find(|attr| &attr.name.local == "prefix") {
return TokenSinkResult::Continue;
}

for attr in attrs {
let urls = LinkExtractor::extract_urls_from_elem_attr(
&attr.name.local,
Expand Down Expand Up @@ -416,6 +422,16 @@ mod tests {
assert!(uris.is_empty());
}

#[test]
fn test_skip_prefix() {
let input = r#"
<html lang="en-EN" prefix="og: https://ogp.me/ns#">
"#;

let uris = extract_html(input, false);
assert!(uris.is_empty());
}

#[test]
fn test_ignore_text_content_links() {
let input = r#"
Expand Down
15 changes: 15 additions & 0 deletions lychee-lib/src/extract/html/html5gum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ impl LinkExtractor {
return;
}

if self.current_attributes.contains_key("prefix") {
self.current_attributes.clear();
return;
}

let new_urls = self
.extract_urls_from_elem_attr()
.into_iter()
Expand Down Expand Up @@ -613,6 +618,16 @@ mod tests {
assert!(uris.is_empty());
}

#[test]
fn test_skip_prefix() {
let input = r#"
<html lang="en-EN" prefix="og: https://ogp.me/ns#">
"#;

let uris = extract_html(input, false);
assert!(uris.is_empty());
}

#[test]
fn test_ignore_text_content_links() {
let input = r#"
Expand Down

0 comments on commit 18a20a2

Please sign in to comment.