diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index d10ac4e00e..3a601287ae 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -76,10 +76,10 @@ impl TokenSink for LinkExtractor { } } - // Check and exclude rel=preconnect. Other than prefetch and preload, - // preconnect only does DNS lookups and might not be a link to a resource + // Check and exclude `rel=preconnect` and `rel=dns-prefetch`. Unlike `prefetch` and `preload`, + // `preconnect` and `dns-prefetch` only perform DNS lookups and do not necessarily link to a resource if let Some(rel) = attrs.iter().find(|attr| &attr.name.local == "rel") { - if rel.value.contains("preconnect") { + if rel.value.contains("preconnect") || rel.value.contains("dns-prefetch") { return TokenSinkResult::Continue; } } @@ -413,4 +413,24 @@ mod tests { let uris = extract_html(input, false); assert!(uris.is_empty()); } + + #[test] + fn test_skip_dns_prefetch() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + + #[test] + fn test_skip_dns_prefetch_reverse_order() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } } diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs index 276d2e0e86..fa5db3f9b3 100644 --- a/lychee-lib/src/extract/html/html5gum.rs +++ b/lychee-lib/src/extract/html/html5gum.rs @@ -151,8 +151,8 @@ impl LinkExtractor { /// Here are the rules for extracting links: /// - If the current element has a `rel=nofollow` attribute, the current attribute /// value is ignored. - /// - If the current element has a `rel=preconnect` attribute, the current attribute - /// value is ignored. + /// - If the current element has a `rel=preconnect` or `rel=dns-prefetch` + /// attribute, the current attribute value is ignored. /// - If the current attribute value is not a URL, it is treated as plain text and /// added to the links vector. /// - If the current attribute name is `id`, the current attribute value is added @@ -170,8 +170,9 @@ impl LinkExtractor { } if self.current_attributes.get("rel").map_or(false, |rel| { - rel.split(',') - .any(|r| r.trim() == "nofollow" || r.trim() == "preconnect") + rel.split(',').any(|r| { + r.trim() == "nofollow" || r.trim() == "preconnect" || r.trim() == "dns-prefetch" + }) }) { self.current_attributes.clear(); return; @@ -607,4 +608,24 @@ mod tests { let uris = extract_html(input, false); assert!(uris.is_empty()); } + + #[test] + fn test_skip_dns_prefetch() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } + + #[test] + fn test_skip_dns_prefetch_reverse_order() { + let input = r#" + + "#; + + let uris = extract_html(input, false); + assert!(uris.is_empty()); + } }