From 67a1fb1f97361c46b1a851d39ad1cb11c319578f Mon Sep 17 00:00:00 2001 From: Scott Nath Date: Fri, 26 Apr 2024 12:15:54 -0400 Subject: [PATCH] :bug: make 404 a sitemap; block AI in robots.txt --- workspaces/website/404.html | 12 ---- workspaces/website/src/consts.ts | 9 +++ workspaces/website/src/pages/404.astro | 38 +++++++++++ workspaces/website/src/pages/robots.txt.ts | 79 +++++++++++++++++++++- 4 files changed, 125 insertions(+), 13 deletions(-) delete mode 100644 workspaces/website/404.html create mode 100644 workspaces/website/src/pages/404.astro diff --git a/workspaces/website/404.html b/workspaces/website/404.html deleted file mode 100644 index a08c02a..0000000 --- a/workspaces/website/404.html +++ /dev/null @@ -1,12 +0,0 @@ - - - Redirecting to Scott Nath's fantastic website landing page that is awesome - - - - - - -

This page is fail, go to scottnath.com.

- - \ No newline at end of file diff --git a/workspaces/website/src/consts.ts b/workspaces/website/src/consts.ts index a0071ec..874aa77 100644 --- a/workspaces/website/src/consts.ts +++ b/workspaces/website/src/consts.ts @@ -29,6 +29,15 @@ export const RESUME_DEFUALTS = { }, }; +export const Four04 = { + title: `WHOOPS! (404)`, + description: `Looks like you got lost on this website by Scott Nath`, + pageTitle: '', + pageDesc: '' +} +Four04.pageTitle = `${Four04.title}: scottnath.com wayward visitor page with full site map` +Four04.pageDesc = `This page is the 404 landing page for scottnath.com, it includes links to all pages so you can get un-lost` + export const BLAHG = { title: `Blah blah blahg by Scott Nath`, description: `Articles and rambling-on by Scott Nath`, diff --git a/workspaces/website/src/pages/404.astro b/workspaces/website/src/pages/404.astro new file mode 100644 index 0000000..158ff59 --- /dev/null +++ b/workspaces/website/src/pages/404.astro @@ -0,0 +1,38 @@ +--- +import { getCollection } from 'astro:content'; +import { Image } from "astro:assets"; + +import SiteWide from '~layouts/SiteWide.astro'; +import { Four04, BLAHG, SITE_SUBTITLE } from '~/consts'; + +const posts = (await getCollection('blahg')).sort( + (a, b) => b.data.pubDate.valueOf() - a.data.pubDate.valueOf() +); +--- + + +
+

{Four04.title}

+

{Four04.description}

+ +
+
+ diff --git a/workspaces/website/src/pages/robots.txt.ts b/workspaces/website/src/pages/robots.txt.ts index 9649a44..498f01d 100644 --- a/workspaces/website/src/pages/robots.txt.ts +++ b/workspaces/website/src/pages/robots.txt.ts @@ -1,8 +1,85 @@ import type { APIRoute } from 'astro'; +/** + * @todo - try again to automate darkvisitors, on 4/26/24 api was status 500 + * @see https://darkvisitors.com/docs/set-up-a-robots-txt + * @see https://coryd.dev/posts/2024/go-ahead-and-block-ai-web-crawlers/ + */ const robotsTxt = ` User-agent: * Allow: / +# Disallow Rules + +User-agent: Amazonbot +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: AwarioRssBot +User-agent: AwarioSmartBot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: ClaudeBot +Disallow: / + +User-agent: Claude-Web +Disallow: / + +User-agent: cohere-ai +Disallow: / + +User-agent: DataForSeoBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: magpie-crawler +Disallow: / + +User-agent: NewsNow +Disallow: / + +User-agent: news-please +Disallow: / + +User-agent: omgili +Disallow: / + +User-agent: omgilibot +Disallow: / + +User-agent: peer39_crawler +User-agent: peer39_crawler/1.0 +Disallow: / + +User-agent: PerplexityBot +Disallow: / + +User-agent: Scrapy +Disallow: / + +User-agent: TurnitinBot +Disallow: / Sitemap: ${new URL('sitemap-index.xml', import.meta.env.SITE).href} `.trim(); @@ -13,4 +90,4 @@ export const GET: APIRoute = () => { 'Content-Type': 'text/plain; charset=utf-8', }, }); -}; \ No newline at end of file +};