diff --git a/.changeset/thirty-garlics-tan.md b/.changeset/thirty-garlics-tan.md new file mode 100644 index 000000000000..d31c3a55b005 --- /dev/null +++ b/.changeset/thirty-garlics-tan.md @@ -0,0 +1,5 @@ +--- +'@sveltejs/kit': minor +--- + +feat: crawl URLs in `` tags diff --git a/packages/kit/src/core/postbuild/crawl.js b/packages/kit/src/core/postbuild/crawl.js index 4d96d3cce7fc..caffef583c55 100644 --- a/packages/kit/src/core/postbuild/crawl.js +++ b/packages/kit/src/core/postbuild/crawl.js @@ -13,6 +13,20 @@ const ATTRIBUTE_NAME = /[^\t\n\f />"'=]/; const WHITESPACE = /[\s\n\r]/; +const CRAWLABLE_META_NAME_ATTRS = new Set([ + 'og:url', + 'og:image', + 'og:image:url', + 'og:image:secure_url', + 'og:video', + 'og:video:url', + 'og:video:secure_url', + 'og:audio', + 'og:audio:url', + 'og:audio:secure_url', + 'twitter:image' +]); + /** * @param {string} html * @param {string} base @@ -81,6 +95,9 @@ export function crawl(html, base) { const tag = html.slice(start, i).toUpperCase(); + /** @type {Record} */ + const attributes = {}; + if (tag === 'SCRIPT' || tag === 'STYLE') { while (i < html.length) { if ( @@ -95,9 +112,6 @@ export function crawl(html, base) { } } - let href = ''; - let rel = ''; - while (i < html.length) { const start = i; @@ -159,44 +173,7 @@ export function crawl(html, base) { } value = decode(value); - - if (name === 'href') { - if (tag === 'BASE') { - base = resolve(base, value); - } else { - href = resolve(base, value); - } - } else if (name === 'id') { - ids.push(value); - } else if (name === 'name') { - if (tag === 'A') ids.push(value); - } else if (name === 'rel') { - rel = value; - } else if (name === 'src') { - if (value) hrefs.push(resolve(base, value)); - } else if (name === 'srcset') { - const candidates = []; - let insideURL = true; - value = value.trim(); - for (let i = 0; i < value.length; i++) { - if ( - value[i] === ',' && - (!insideURL || (insideURL && WHITESPACE.test(value[i + 1]))) - ) { - candidates.push(value.slice(0, i)); - value = value.substring(i + 1).trim(); - i = 0; - insideURL = true; - } else if (WHITESPACE.test(value[i])) { - insideURL = false; - } - } - candidates.push(value); - for (const candidate of candidates) { - const src = candidate.split(WHITESPACE)[0]; - if (src) hrefs.push(resolve(base, src)); - } - } + attributes[name] = value; } else { i -= 1; } @@ -205,8 +182,56 @@ export function crawl(html, base) { i += 1; } - if (href && !/\bexternal\b/i.test(rel)) { - hrefs.push(resolve(base, href)); + const { href, id, name, property, rel, src, srcset, content } = attributes; + + if (href) { + if (tag === 'BASE') { + base = resolve(base, href); + } else if (!rel || !/\bexternal\b/i.test(rel)) { + hrefs.push(resolve(base, href)); + } + } + + if (id) { + ids.push(id); + } + + if (name && tag === 'A') { + ids.push(name); + } + + if (src) { + hrefs.push(resolve(base, src)); + } + + if (srcset) { + let value = srcset; + const candidates = []; + let insideURL = true; + value = value.trim(); + for (let i = 0; i < value.length; i++) { + if (value[i] === ',' && (!insideURL || (insideURL && WHITESPACE.test(value[i + 1])))) { + candidates.push(value.slice(0, i)); + value = value.substring(i + 1).trim(); + i = 0; + insideURL = true; + } else if (WHITESPACE.test(value[i])) { + insideURL = false; + } + } + candidates.push(value); + for (const candidate of candidates) { + const src = candidate.split(WHITESPACE)[0]; + if (src) hrefs.push(resolve(base, src)); + } + } + + if (tag === 'META' && content) { + const attr = name ?? property; + + if (attr && CRAWLABLE_META_NAME_ATTRS.has(attr)) { + hrefs.push(resolve(base, content)); + } } } } diff --git a/packages/kit/src/core/postbuild/fixtures/meta/input.html b/packages/kit/src/core/postbuild/fixtures/meta/input.html new file mode 100644 index 000000000000..72c02f90961a --- /dev/null +++ b/packages/kit/src/core/postbuild/fixtures/meta/input.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/packages/kit/src/core/postbuild/fixtures/meta/output.json b/packages/kit/src/core/postbuild/fixtures/meta/output.json new file mode 100644 index 000000000000..76cfe72ada91 --- /dev/null +++ b/packages/kit/src/core/postbuild/fixtures/meta/output.json @@ -0,0 +1,4 @@ +{ + "hrefs": ["https://external.com", "/og-image.jpg", "https://example.com/audio.mp3", "/video.mp4"], + "ids": [] +}