diff --git a/.changeset/chatty-pigs-film.md b/.changeset/chatty-pigs-film.md new file mode 100644 index 000000000000..08aabe12ba02 --- /dev/null +++ b/.changeset/chatty-pigs-film.md @@ -0,0 +1,5 @@ +--- +'@astrojs/sitemap': minor +--- + +Ensure sitemap only excludes numerical pages matching `/404` and `/500` exactly diff --git a/packages/integrations/sitemap/src/generate-sitemap.ts b/packages/integrations/sitemap/src/generate-sitemap.ts index 03985f08d0e2..b10771ce481a 100644 --- a/packages/integrations/sitemap/src/generate-sitemap.ts +++ b/packages/integrations/sitemap/src/generate-sitemap.ts @@ -2,13 +2,11 @@ import type { EnumChangefreq } from 'sitemap'; import type { SitemapItem, SitemapOptions } from './index.js'; import { parseUrl } from './utils/parse-url.js'; -const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/; - /** Construct sitemap.xml given a set of URLs */ export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) { const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!; // TODO: find way to respect URLs here - const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url)); + const urls = [...pages]; urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time const lastmod = lastmodSrc?.toISOString(); diff --git a/packages/integrations/sitemap/src/index.ts b/packages/integrations/sitemap/src/index.ts index 950646247467..ffa593a92e40 100644 --- a/packages/integrations/sitemap/src/index.ts +++ b/packages/integrations/sitemap/src/index.ts @@ -22,24 +22,24 @@ export type LinkItem = LinkItemBase; export type SitemapOptions = | { - filter?(page: string): boolean; - customPages?: string[]; - - i18n?: { - defaultLocale: string; - locales: Record; - }; - // number of entries per sitemap file - entryLimit?: number; - - // sitemap specific - changefreq?: ChangeFreq; - lastmod?: Date; - priority?: number; - - // called for each sitemap item just before to save them on disk, sync or async - serialize?(item: SitemapItem): SitemapItem | Promise | undefined; - } + filter?(page: string): boolean; + customPages?: string[]; + + i18n?: { + defaultLocale: string; + locales: Record; + }; + // number of entries per sitemap file + entryLimit?: number; + + // sitemap specific + changefreq?: ChangeFreq; + lastmod?: Date; + priority?: number; + + // called for each sitemap item just before to save them on disk, sync or async + serialize?(item: SitemapItem): SitemapItem | Promise | undefined; + } | undefined; function formatConfigErrorMessage(err: ZodError) { @@ -49,6 +49,7 @@ function formatConfigErrorMessage(err: ZodError) { const PKG_NAME = '@astrojs/sitemap'; const OUTFILE = 'sitemap-index.xml'; +const STATUS_CODE_PAGES = new Set(['/404', '/500']); const createPlugin = (options?: SitemapOptions): AstroIntegration => { let config: AstroConfig; @@ -85,7 +86,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => { return; } - let pageUrls = pages.map((p) => { + let pageUrls = pages.filter((p) => !STATUS_CODE_PAGES.has('/' + p.pathname.slice(0, -1))).map((p) => { if (p.pathname !== '' && !finalSiteUrl.pathname.endsWith('/')) finalSiteUrl.pathname += '/'; const path = finalSiteUrl.pathname + p.pathname; @@ -97,6 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => { * Dynamic URLs have entries with `undefined` pathnames */ if (r.pathname) { + if (STATUS_CODE_PAGES.has(r.pathname)) return urls; /** * remove the initial slash from relative pathname * because `finalSiteUrl` always has trailing slash diff --git a/packages/integrations/sitemap/test/filter.test.js b/packages/integrations/sitemap/test/filter.test.js index 50a34007dd5d..b2623248170a 100644 --- a/packages/integrations/sitemap/test/filter.test.js +++ b/packages/integrations/sitemap/test/filter.test.js @@ -12,7 +12,7 @@ describe('Filter support', () => { root: './fixtures/static/', integrations: [ sitemap({ - filter: (page) => page !== 'http://example.com/two/', + filter: (page) => page === 'http://example.com/one/', }), ], }); @@ -32,7 +32,7 @@ describe('Filter support', () => { root: './fixtures/ssr/', integrations: [ sitemap({ - filter: (page) => page !== 'http://example.com/two/', + filter: (page) => page === 'http://example.com/one/', }), ], }); diff --git a/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro b/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro new file mode 100644 index 000000000000..115292de96e0 --- /dev/null +++ b/packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro @@ -0,0 +1,8 @@ + + + 123 + + +

123

+ + diff --git a/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro b/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro new file mode 100644 index 000000000000..9e307c5c292c --- /dev/null +++ b/packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro @@ -0,0 +1,8 @@ + + + 404 + + +

404

+ + diff --git a/packages/integrations/sitemap/test/staticPaths.test.js b/packages/integrations/sitemap/test/staticPaths.test.js index bb818e7cd0fd..6fddbb193076 100644 --- a/packages/integrations/sitemap/test/staticPaths.test.js +++ b/packages/integrations/sitemap/test/staticPaths.test.js @@ -4,19 +4,29 @@ import { expect } from 'chai'; describe('getStaticPaths support', () => { /** @type {import('./test-utils.js').Fixture} */ let fixture; + /** @type {string[]} */ + let urls; before(async () => { fixture = await loadFixture({ root: './fixtures/static/', }); await fixture.build(); - }); - it('getStaticPath pages require zero config', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); - const urls = data.urlset.url; + urls = data.urlset.url.map(url => url.loc[0]); + }); - expect(urls[0].loc[0]).to.equal('http://example.com/one/'); - expect(urls[1].loc[0]).to.equal('http://example.com/two/'); + it('requires zero config for getStaticPaths', async () => { + expect(urls).to.include('http://example.com/one/'); + expect(urls).to.include('http://example.com/two/'); }); + + it('does not include 404 pages', () => { + expect(urls).to.not.include('http://example.com/404/'); + }); + + it('includes numerical pages', () => { + expect(urls).to.include('http://example.com/123/'); + }) });