diff --git a/.changeset/chatty-pigs-film.md b/.changeset/chatty-pigs-film.md
new file mode 100644
index 000000000000..08aabe12ba02
--- /dev/null
+++ b/.changeset/chatty-pigs-film.md
@@ -0,0 +1,5 @@
+---
+'@astrojs/sitemap': minor
+---
+
+Ensure sitemap only excludes numerical pages matching `/404` and `/500` exactly
diff --git a/packages/integrations/sitemap/src/generate-sitemap.ts b/packages/integrations/sitemap/src/generate-sitemap.ts
index 03985f08d0e2..b10771ce481a 100644
--- a/packages/integrations/sitemap/src/generate-sitemap.ts
+++ b/packages/integrations/sitemap/src/generate-sitemap.ts
@@ -2,13 +2,11 @@ import type { EnumChangefreq } from 'sitemap';
import type { SitemapItem, SitemapOptions } from './index.js';
import { parseUrl } from './utils/parse-url.js';
-const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
-
/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
// TODO: find way to respect URLs here
- const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
+ const urls = [...pages];
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
const lastmod = lastmodSrc?.toISOString();
diff --git a/packages/integrations/sitemap/src/index.ts b/packages/integrations/sitemap/src/index.ts
index 950646247467..ffa593a92e40 100644
--- a/packages/integrations/sitemap/src/index.ts
+++ b/packages/integrations/sitemap/src/index.ts
@@ -22,24 +22,24 @@ export type LinkItem = LinkItemBase;
export type SitemapOptions =
| {
- filter?(page: string): boolean;
- customPages?: string[];
-
- i18n?: {
- defaultLocale: string;
- locales: Record;
- };
- // number of entries per sitemap file
- entryLimit?: number;
-
- // sitemap specific
- changefreq?: ChangeFreq;
- lastmod?: Date;
- priority?: number;
-
- // called for each sitemap item just before to save them on disk, sync or async
- serialize?(item: SitemapItem): SitemapItem | Promise | undefined;
- }
+ filter?(page: string): boolean;
+ customPages?: string[];
+
+ i18n?: {
+ defaultLocale: string;
+ locales: Record;
+ };
+ // number of entries per sitemap file
+ entryLimit?: number;
+
+ // sitemap specific
+ changefreq?: ChangeFreq;
+ lastmod?: Date;
+ priority?: number;
+
+ // called for each sitemap item just before to save them on disk, sync or async
+ serialize?(item: SitemapItem): SitemapItem | Promise | undefined;
+ }
| undefined;
function formatConfigErrorMessage(err: ZodError) {
@@ -49,6 +49,7 @@ function formatConfigErrorMessage(err: ZodError) {
const PKG_NAME = '@astrojs/sitemap';
const OUTFILE = 'sitemap-index.xml';
+const STATUS_CODE_PAGES = new Set(['/404', '/500']);
const createPlugin = (options?: SitemapOptions): AstroIntegration => {
let config: AstroConfig;
@@ -85,7 +86,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
return;
}
- let pageUrls = pages.map((p) => {
+ let pageUrls = pages.filter((p) => !STATUS_CODE_PAGES.has('/' + p.pathname.slice(0, -1))).map((p) => {
if (p.pathname !== '' && !finalSiteUrl.pathname.endsWith('/'))
finalSiteUrl.pathname += '/';
const path = finalSiteUrl.pathname + p.pathname;
@@ -97,6 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
* Dynamic URLs have entries with `undefined` pathnames
*/
if (r.pathname) {
+ if (STATUS_CODE_PAGES.has(r.pathname)) return urls;
/**
* remove the initial slash from relative pathname
* because `finalSiteUrl` always has trailing slash
diff --git a/packages/integrations/sitemap/test/filter.test.js b/packages/integrations/sitemap/test/filter.test.js
index 50a34007dd5d..b2623248170a 100644
--- a/packages/integrations/sitemap/test/filter.test.js
+++ b/packages/integrations/sitemap/test/filter.test.js
@@ -12,7 +12,7 @@ describe('Filter support', () => {
root: './fixtures/static/',
integrations: [
sitemap({
- filter: (page) => page !== 'http://example.com/two/',
+ filter: (page) => page === 'http://example.com/one/',
}),
],
});
@@ -32,7 +32,7 @@ describe('Filter support', () => {
root: './fixtures/ssr/',
integrations: [
sitemap({
- filter: (page) => page !== 'http://example.com/two/',
+ filter: (page) => page === 'http://example.com/one/',
}),
],
});
diff --git a/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro b/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro
new file mode 100644
index 000000000000..115292de96e0
--- /dev/null
+++ b/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro
@@ -0,0 +1,8 @@
+
+
+ 123
+
+
+ 123
+
+
diff --git a/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro b/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro
new file mode 100644
index 000000000000..9e307c5c292c
--- /dev/null
+++ b/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro
@@ -0,0 +1,8 @@
+
+
+ 404
+
+
+ 404
+
+
diff --git a/packages/integrations/sitemap/test/staticPaths.test.js b/packages/integrations/sitemap/test/staticPaths.test.js
index bb818e7cd0fd..6fddbb193076 100644
--- a/packages/integrations/sitemap/test/staticPaths.test.js
+++ b/packages/integrations/sitemap/test/staticPaths.test.js
@@ -4,19 +4,29 @@ import { expect } from 'chai';
describe('getStaticPaths support', () => {
/** @type {import('./test-utils.js').Fixture} */
let fixture;
+ /** @type {string[]} */
+ let urls;
before(async () => {
fixture = await loadFixture({
root: './fixtures/static/',
});
await fixture.build();
- });
- it('getStaticPath pages require zero config', async () => {
const data = await readXML(fixture.readFile('/sitemap-0.xml'));
- const urls = data.urlset.url;
+ urls = data.urlset.url.map(url => url.loc[0]);
+ });
- expect(urls[0].loc[0]).to.equal('http://example.com/one/');
- expect(urls[1].loc[0]).to.equal('http://example.com/two/');
+ it('requires zero config for getStaticPaths', async () => {
+ expect(urls).to.include('http://example.com/one/');
+ expect(urls).to.include('http://example.com/two/');
});
+
+ it('does not include 404 pages', () => {
+ expect(urls).to.not.include('http://example.com/404/');
+ });
+
+ it('includes numerical pages', () => {
+ expect(urls).to.include('http://example.com/123/');
+ })
});