diff --git a/bun.lockb b/bun.lockb index f5a6b33..3e97801 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index 0a7f2a4..ac16773 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "nanoid": "^5.0.9", "rehype-autolink-headings": "^7.1.0", "rehype-slug": "^6.0.0", + "robots-parser": "^3.0.1", "steamgriddb": "^2.2.0", "typescript-svelte-plugin": "^0.3.43" }, diff --git a/src/lib/lastfm.ts b/src/lib/lastfm.ts index dabceb2..25425e5 100644 --- a/src/lib/lastfm.ts +++ b/src/lib/lastfm.ts @@ -15,7 +15,7 @@ export const lastFmGetNowPlaying: () => Promise = async () => try { var resp = await (await fetch(GET_RECENT_TRACKS_ENDPOINT)).json() var track = resp.recenttracks.track[0] ?? null - if (!(track['@attr'].nowplaying ?? null)) { + if (!((track['@attr'] ?? {}).nowplaying ?? null)) { throw "no nowplaying track found" } var data = { diff --git a/src/lib/robots.ts b/src/lib/robots.ts new file mode 100644 index 0000000..4578f01 --- /dev/null +++ b/src/lib/robots.ts @@ -0,0 +1,53 @@ +import { env } from '$env/dynamic/private' +import { get, writable } from 'svelte/store' +import { type Robot } from 'robots-parser' +import robotsParser from 'robots-parser' +import { PUBLIC_BASE_URL } from '$env/static/public' + +const cachedParsedRobots = writable(null) +const cachedRobots = writable("") +const lastFetched = writable(Date.now()) + +const fetchRobotsTxt = async () => { + const robotsTxtResp = await fetch( + "https://api.darkvisitors.com/robots-txts", + { + method: "POST", + headers: { + "Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + agent_types: [ + "AI Assistant", + "AI Data Scraper", + "AI Search Crawler", + "Undocumented AI Agent", + ], + disallow: "/" + }) + } + ) + const robotsTxt = await robotsTxtResp.text() + lastFetched.set(Date.now()) + return robotsTxt +} + +export const getRobotsTxt = async () => { + let robotsTxt = get(cachedRobots) + if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) { + robotsTxt = await fetchRobotsTxt() + cachedRobots.set(robotsTxt) + cachedParsedRobots.set(robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, robotsTxt)) + } + return robotsTxt +} + +export const testUa = async (url: string, ua: string) => { + let parsedRobots = get(cachedParsedRobots) + if (parsedRobots === null) { + parsedRobots = robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, await getRobotsTxt()) + cachedParsedRobots.set(parsedRobots) + } + return parsedRobots.isAllowed(url, ua) +} \ No newline at end of file diff --git a/src/routes/+layout.server.ts b/src/routes/+layout.server.ts index bef57e6..c868567 100644 --- a/src/routes/+layout.server.ts +++ b/src/routes/+layout.server.ts @@ -1,13 +1,20 @@ +import { testUa } from '$lib/robots.js'; import { incrementVisitCount, notifyDarkVisitors } from '$lib/visits.js'; +import { error } from '@sveltejs/kit'; export const csr = true; export const ssr = true; export const prerender = false; export const trailingSlash = 'always'; -export async function load({ request, cookies, url, setHeaders }) { +export async function load({ request, cookies, url }) { notifyDarkVisitors(url, request) // no await so it doesnt block load + // block any requests if the user agent is disallowed by our robots txt + if (await testUa(url.toString(), request.headers.get('user-agent') ?? "unknown user agent") === false) { + throw error(403, "get a better user agent silly") + } + return { route: url.pathname, visitCount: incrementVisitCount(request, cookies), diff --git a/src/routes/robots.txt/+server.ts b/src/routes/robots.txt/+server.ts index 2a18f2d..b98a0a4 100644 --- a/src/routes/robots.txt/+server.ts +++ b/src/routes/robots.txt/+server.ts @@ -1,39 +1,5 @@ -import { env } from '$env/dynamic/private'; -import { get, writable } from 'svelte/store'; - -const cachedRobots = writable("") -const lastFetched = writable(Date.now()) - -const fetchRobotsTxt = async () => { - const robotsTxtResp = await fetch( - "https://api.darkvisitors.com/robots-txts", - { - method: "POST", - headers: { - "Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - agent_types: [ - "AI Assistant", - "AI Data Scraper", - "AI Search Crawler", - "Undocumented AI Agent", - ], - disallow: "/" - }) - } - ) - const robotsTxt = await robotsTxtResp.text() - lastFetched.set(Date.now()) - return robotsTxt -} +import { getRobotsTxt } from "$lib/robots" export const GET = async ({ }) => { - let robotsTxt = get(cachedRobots) - if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) { - robotsTxt = await fetchRobotsTxt() - cachedRobots.set(robotsTxt) - } - return new Response(robotsTxt) + return new Response(await getRobotsTxt()) } \ No newline at end of file