feat: block any requests that are disallowed by robots txt
This commit is contained in:
parent
c9765abf6c
commit
a67eb0a376
@ -47,6 +47,7 @@
|
||||
"nanoid": "^5.0.9",
|
||||
"rehype-autolink-headings": "^7.1.0",
|
||||
"rehype-slug": "^6.0.0",
|
||||
"robots-parser": "^3.0.1",
|
||||
"steamgriddb": "^2.2.0",
|
||||
"typescript-svelte-plugin": "^0.3.43"
|
||||
},
|
||||
|
@ -15,7 +15,7 @@ export const lastFmGetNowPlaying: () => Promise<LastTrack | null> = async () =>
|
||||
try {
|
||||
var resp = await (await fetch(GET_RECENT_TRACKS_ENDPOINT)).json()
|
||||
var track = resp.recenttracks.track[0] ?? null
|
||||
if (!(track['@attr'].nowplaying ?? null)) {
|
||||
if (!((track['@attr'] ?? {}).nowplaying ?? null)) {
|
||||
throw "no nowplaying track found"
|
||||
}
|
||||
var data = {
|
||||
|
53
src/lib/robots.ts
Normal file
53
src/lib/robots.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import { env } from '$env/dynamic/private'
|
||||
import { get, writable } from 'svelte/store'
|
||||
import { type Robot } from 'robots-parser'
|
||||
import robotsParser from 'robots-parser'
|
||||
import { PUBLIC_BASE_URL } from '$env/static/public'
|
||||
|
||||
const cachedParsedRobots = writable<Robot | null>(null)
|
||||
const cachedRobots = writable<string>("")
|
||||
const lastFetched = writable<number>(Date.now())
|
||||
|
||||
const fetchRobotsTxt = async () => {
|
||||
const robotsTxtResp = await fetch(
|
||||
"https://api.darkvisitors.com/robots-txts",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`,
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({
|
||||
agent_types: [
|
||||
"AI Assistant",
|
||||
"AI Data Scraper",
|
||||
"AI Search Crawler",
|
||||
"Undocumented AI Agent",
|
||||
],
|
||||
disallow: "/"
|
||||
})
|
||||
}
|
||||
)
|
||||
const robotsTxt = await robotsTxtResp.text()
|
||||
lastFetched.set(Date.now())
|
||||
return robotsTxt
|
||||
}
|
||||
|
||||
export const getRobotsTxt = async () => {
|
||||
let robotsTxt = get(cachedRobots)
|
||||
if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) {
|
||||
robotsTxt = await fetchRobotsTxt()
|
||||
cachedRobots.set(robotsTxt)
|
||||
cachedParsedRobots.set(robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, robotsTxt))
|
||||
}
|
||||
return robotsTxt
|
||||
}
|
||||
|
||||
export const testUa = async (url: string, ua: string) => {
|
||||
let parsedRobots = get(cachedParsedRobots)
|
||||
if (parsedRobots === null) {
|
||||
parsedRobots = robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, await getRobotsTxt())
|
||||
cachedParsedRobots.set(parsedRobots)
|
||||
}
|
||||
return parsedRobots.isAllowed(url, ua)
|
||||
}
|
@ -1,13 +1,20 @@
|
||||
import { testUa } from '$lib/robots.js';
|
||||
import { incrementVisitCount, notifyDarkVisitors } from '$lib/visits.js';
|
||||
import { error } from '@sveltejs/kit';
|
||||
|
||||
export const csr = true;
|
||||
export const ssr = true;
|
||||
export const prerender = false;
|
||||
export const trailingSlash = 'always';
|
||||
|
||||
export async function load({ request, cookies, url, setHeaders }) {
|
||||
export async function load({ request, cookies, url }) {
|
||||
notifyDarkVisitors(url, request) // no await so it doesnt block load
|
||||
|
||||
// block any requests if the user agent is disallowed by our robots txt
|
||||
if (await testUa(url.toString(), request.headers.get('user-agent') ?? "unknown user agent") === false) {
|
||||
throw error(403, "get a better user agent silly")
|
||||
}
|
||||
|
||||
return {
|
||||
route: url.pathname,
|
||||
visitCount: incrementVisitCount(request, cookies),
|
||||
|
@ -1,39 +1,5 @@
|
||||
import { env } from '$env/dynamic/private';
|
||||
import { get, writable } from 'svelte/store';
|
||||
|
||||
const cachedRobots = writable<string>("")
|
||||
const lastFetched = writable<number>(Date.now())
|
||||
|
||||
const fetchRobotsTxt = async () => {
|
||||
const robotsTxtResp = await fetch(
|
||||
"https://api.darkvisitors.com/robots-txts",
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`,
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({
|
||||
agent_types: [
|
||||
"AI Assistant",
|
||||
"AI Data Scraper",
|
||||
"AI Search Crawler",
|
||||
"Undocumented AI Agent",
|
||||
],
|
||||
disallow: "/"
|
||||
})
|
||||
}
|
||||
)
|
||||
const robotsTxt = await robotsTxtResp.text()
|
||||
lastFetched.set(Date.now())
|
||||
return robotsTxt
|
||||
}
|
||||
import { getRobotsTxt } from "$lib/robots"
|
||||
|
||||
export const GET = async ({ }) => {
|
||||
let robotsTxt = get(cachedRobots)
|
||||
if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) {
|
||||
robotsTxt = await fetchRobotsTxt()
|
||||
cachedRobots.set(robotsTxt)
|
||||
}
|
||||
return new Response(robotsTxt)
|
||||
return new Response(await getRobotsTxt())
|
||||
}
|
Loading…
Reference in New Issue
Block a user