Skip to content

Commit

Permalink
🐛 make 404 a sitemap; block AI in robots.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
scottnath committed Apr 26, 2024
1 parent 05a603a commit 67a1fb1
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 13 deletions.
12 changes: 0 additions & 12 deletions workspaces/website/404.html

This file was deleted.

9 changes: 9 additions & 0 deletions workspaces/website/src/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ export const RESUME_DEFUALTS = {
},
};

export const Four04 = {
title: `WHOOPS! (404)`,
description: `Looks like you got lost on this website by Scott Nath`,
pageTitle: '',
pageDesc: ''
}
Four04.pageTitle = `${Four04.title}: scottnath.com wayward visitor page with full site map`
Four04.pageDesc = `This page is the 404 landing page for scottnath.com, it includes links to all pages so you can get un-lost`

export const BLAHG = {
title: `Blah blah blahg by Scott Nath`,
description: `Articles and rambling-on by Scott Nath`,
Expand Down
38 changes: 38 additions & 0 deletions workspaces/website/src/pages/404.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
import { getCollection } from 'astro:content';
import { Image } from "astro:assets";
import SiteWide from '~layouts/SiteWide.astro';
import { Four04, BLAHG, SITE_SUBTITLE } from '~/consts';
const posts = (await getCollection('blahg')).sort(
(a, b) => b.data.pubDate.valueOf() - a.data.pubDate.valueOf()
);
---

<SiteWide title={Four04.pageTitle} description={Four04.pageDesc}>
<section itemscope itemtype="https://schema.org/WebPage" class="readable">
<h1 itemprop="name">{Four04.title}</h1>
<p itemprop="description">{Four04.description}</p>
<ul class="page-list">
<li><a href="/" itemprop="relatedLink">Home page of scottnath.com</a></li>
<li><a href="/whoami/" itemprop="relatedLink">The Scott Nath About page</a></li>
<li><a href="/resume/" itemprop="relatedLink">Resume for Scott Nath, {SITE_SUBTITLE}</a></li>
<li><a href="/sitemap-index.xml" target="_blank" itemprop="significantLink">Sitemap: scottnath.com/sitemap-index.xml</a></li>
<li><a href="/blahg/" itemprop="relatedLink">{BLAHG.title}</a><ul>
{
posts.map((post, index) => (
<li itemprop="significantLink" itemscope itemtype="https://schema.org/BlogPosting">
<a href={`/blahg/${post.slug}/`} itemprop="name">{post.data.title}</a>
</li>
))
}
</ul></li>
</ul>
</section>
</SiteWide>
<style>
a {
text-decoration: none;
}
</style>
79 changes: 78 additions & 1 deletion workspaces/website/src/pages/robots.txt.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,85 @@
import type { APIRoute } from 'astro';

/**
* @todo - try again to automate darkvisitors, on 4/26/24 api was status 500
* @see https://darkvisitors.com/docs/set-up-a-robots-txt
* @see https://coryd.dev/posts/2024/go-ahead-and-block-ai-web-crawlers/
*/
const robotsTxt = `
User-agent: *
Allow: /
# Disallow Rules
User-agent: Amazonbot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: AwarioRssBot
User-agent: AwarioSmartBot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: DataForSeoBot
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: NewsNow
Disallow: /
User-agent: news-please
Disallow: /
User-agent: omgili
Disallow: /
User-agent: omgilibot
Disallow: /
User-agent: peer39_crawler
User-agent: peer39_crawler/1.0
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: Scrapy
Disallow: /
User-agent: TurnitinBot
Disallow: /
Sitemap: ${new URL('sitemap-index.xml', import.meta.env.SITE).href}
`.trim();
Expand All @@ -13,4 +90,4 @@ export const GET: APIRoute = () => {
'Content-Type': 'text/plain; charset=utf-8',
},
});
};
};

0 comments on commit 67a1fb1

Please sign in to comment.