diff --git a/.changeset/thirty-garlics-tan.md b/.changeset/thirty-garlics-tan.md
new file mode 100644
index 000000000000..d31c3a55b005
--- /dev/null
+++ b/.changeset/thirty-garlics-tan.md
@@ -0,0 +1,5 @@
+---
+'@sveltejs/kit': minor
+---
+
+feat: crawl URLs in `` tags
diff --git a/packages/kit/src/core/postbuild/crawl.js b/packages/kit/src/core/postbuild/crawl.js
index 4d96d3cce7fc..caffef583c55 100644
--- a/packages/kit/src/core/postbuild/crawl.js
+++ b/packages/kit/src/core/postbuild/crawl.js
@@ -13,6 +13,20 @@ const ATTRIBUTE_NAME = /[^\t\n\f />"'=]/;
const WHITESPACE = /[\s\n\r]/;
+const CRAWLABLE_META_NAME_ATTRS = new Set([
+ 'og:url',
+ 'og:image',
+ 'og:image:url',
+ 'og:image:secure_url',
+ 'og:video',
+ 'og:video:url',
+ 'og:video:secure_url',
+ 'og:audio',
+ 'og:audio:url',
+ 'og:audio:secure_url',
+ 'twitter:image'
+]);
+
/**
* @param {string} html
* @param {string} base
@@ -81,6 +95,9 @@ export function crawl(html, base) {
const tag = html.slice(start, i).toUpperCase();
+ /** @type {Record} */
+ const attributes = {};
+
if (tag === 'SCRIPT' || tag === 'STYLE') {
while (i < html.length) {
if (
@@ -95,9 +112,6 @@ export function crawl(html, base) {
}
}
- let href = '';
- let rel = '';
-
while (i < html.length) {
const start = i;
@@ -159,44 +173,7 @@ export function crawl(html, base) {
}
value = decode(value);
-
- if (name === 'href') {
- if (tag === 'BASE') {
- base = resolve(base, value);
- } else {
- href = resolve(base, value);
- }
- } else if (name === 'id') {
- ids.push(value);
- } else if (name === 'name') {
- if (tag === 'A') ids.push(value);
- } else if (name === 'rel') {
- rel = value;
- } else if (name === 'src') {
- if (value) hrefs.push(resolve(base, value));
- } else if (name === 'srcset') {
- const candidates = [];
- let insideURL = true;
- value = value.trim();
- for (let i = 0; i < value.length; i++) {
- if (
- value[i] === ',' &&
- (!insideURL || (insideURL && WHITESPACE.test(value[i + 1])))
- ) {
- candidates.push(value.slice(0, i));
- value = value.substring(i + 1).trim();
- i = 0;
- insideURL = true;
- } else if (WHITESPACE.test(value[i])) {
- insideURL = false;
- }
- }
- candidates.push(value);
- for (const candidate of candidates) {
- const src = candidate.split(WHITESPACE)[0];
- if (src) hrefs.push(resolve(base, src));
- }
- }
+ attributes[name] = value;
} else {
i -= 1;
}
@@ -205,8 +182,56 @@ export function crawl(html, base) {
i += 1;
}
- if (href && !/\bexternal\b/i.test(rel)) {
- hrefs.push(resolve(base, href));
+ const { href, id, name, property, rel, src, srcset, content } = attributes;
+
+ if (href) {
+ if (tag === 'BASE') {
+ base = resolve(base, href);
+ } else if (!rel || !/\bexternal\b/i.test(rel)) {
+ hrefs.push(resolve(base, href));
+ }
+ }
+
+ if (id) {
+ ids.push(id);
+ }
+
+ if (name && tag === 'A') {
+ ids.push(name);
+ }
+
+ if (src) {
+ hrefs.push(resolve(base, src));
+ }
+
+ if (srcset) {
+ let value = srcset;
+ const candidates = [];
+ let insideURL = true;
+ value = value.trim();
+ for (let i = 0; i < value.length; i++) {
+ if (value[i] === ',' && (!insideURL || (insideURL && WHITESPACE.test(value[i + 1])))) {
+ candidates.push(value.slice(0, i));
+ value = value.substring(i + 1).trim();
+ i = 0;
+ insideURL = true;
+ } else if (WHITESPACE.test(value[i])) {
+ insideURL = false;
+ }
+ }
+ candidates.push(value);
+ for (const candidate of candidates) {
+ const src = candidate.split(WHITESPACE)[0];
+ if (src) hrefs.push(resolve(base, src));
+ }
+ }
+
+ if (tag === 'META' && content) {
+ const attr = name ?? property;
+
+ if (attr && CRAWLABLE_META_NAME_ATTRS.has(attr)) {
+ hrefs.push(resolve(base, content));
+ }
}
}
}
diff --git a/packages/kit/src/core/postbuild/fixtures/meta/input.html b/packages/kit/src/core/postbuild/fixtures/meta/input.html
new file mode 100644
index 000000000000..72c02f90961a
--- /dev/null
+++ b/packages/kit/src/core/postbuild/fixtures/meta/input.html
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/packages/kit/src/core/postbuild/fixtures/meta/output.json b/packages/kit/src/core/postbuild/fixtures/meta/output.json
new file mode 100644
index 000000000000..76cfe72ada91
--- /dev/null
+++ b/packages/kit/src/core/postbuild/fixtures/meta/output.json
@@ -0,0 +1,4 @@
+{
+ "hrefs": ["https://external.com", "/og-image.jpg", "https://example.com/audio.mp3", "/video.mp4"],
+ "ids": []
+}