feat: block any requests that are disallowed by robots txt
This commit is contained in:
parent
c9765abf6c
commit
a67eb0a376
@ -47,6 +47,7 @@
|
|||||||
"nanoid": "^5.0.9",
|
"nanoid": "^5.0.9",
|
||||||
"rehype-autolink-headings": "^7.1.0",
|
"rehype-autolink-headings": "^7.1.0",
|
||||||
"rehype-slug": "^6.0.0",
|
"rehype-slug": "^6.0.0",
|
||||||
|
"robots-parser": "^3.0.1",
|
||||||
"steamgriddb": "^2.2.0",
|
"steamgriddb": "^2.2.0",
|
||||||
"typescript-svelte-plugin": "^0.3.43"
|
"typescript-svelte-plugin": "^0.3.43"
|
||||||
},
|
},
|
||||||
|
@ -15,7 +15,7 @@ export const lastFmGetNowPlaying: () => Promise<LastTrack | null> = async () =>
|
|||||||
try {
|
try {
|
||||||
var resp = await (await fetch(GET_RECENT_TRACKS_ENDPOINT)).json()
|
var resp = await (await fetch(GET_RECENT_TRACKS_ENDPOINT)).json()
|
||||||
var track = resp.recenttracks.track[0] ?? null
|
var track = resp.recenttracks.track[0] ?? null
|
||||||
if (!(track['@attr'].nowplaying ?? null)) {
|
if (!((track['@attr'] ?? {}).nowplaying ?? null)) {
|
||||||
throw "no nowplaying track found"
|
throw "no nowplaying track found"
|
||||||
}
|
}
|
||||||
var data = {
|
var data = {
|
||||||
|
53
src/lib/robots.ts
Normal file
53
src/lib/robots.ts
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import { env } from '$env/dynamic/private'
|
||||||
|
import { get, writable } from 'svelte/store'
|
||||||
|
import { type Robot } from 'robots-parser'
|
||||||
|
import robotsParser from 'robots-parser'
|
||||||
|
import { PUBLIC_BASE_URL } from '$env/static/public'
|
||||||
|
|
||||||
|
const cachedParsedRobots = writable<Robot | null>(null)
|
||||||
|
const cachedRobots = writable<string>("")
|
||||||
|
const lastFetched = writable<number>(Date.now())
|
||||||
|
|
||||||
|
const fetchRobotsTxt = async () => {
|
||||||
|
const robotsTxtResp = await fetch(
|
||||||
|
"https://api.darkvisitors.com/robots-txts",
|
||||||
|
{
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`,
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
agent_types: [
|
||||||
|
"AI Assistant",
|
||||||
|
"AI Data Scraper",
|
||||||
|
"AI Search Crawler",
|
||||||
|
"Undocumented AI Agent",
|
||||||
|
],
|
||||||
|
disallow: "/"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
)
|
||||||
|
const robotsTxt = await robotsTxtResp.text()
|
||||||
|
lastFetched.set(Date.now())
|
||||||
|
return robotsTxt
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getRobotsTxt = async () => {
|
||||||
|
let robotsTxt = get(cachedRobots)
|
||||||
|
if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) {
|
||||||
|
robotsTxt = await fetchRobotsTxt()
|
||||||
|
cachedRobots.set(robotsTxt)
|
||||||
|
cachedParsedRobots.set(robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, robotsTxt))
|
||||||
|
}
|
||||||
|
return robotsTxt
|
||||||
|
}
|
||||||
|
|
||||||
|
export const testUa = async (url: string, ua: string) => {
|
||||||
|
let parsedRobots = get(cachedParsedRobots)
|
||||||
|
if (parsedRobots === null) {
|
||||||
|
parsedRobots = robotsParser(`${PUBLIC_BASE_URL}/robots.txt`, await getRobotsTxt())
|
||||||
|
cachedParsedRobots.set(parsedRobots)
|
||||||
|
}
|
||||||
|
return parsedRobots.isAllowed(url, ua)
|
||||||
|
}
|
@ -1,13 +1,20 @@
|
|||||||
|
import { testUa } from '$lib/robots.js';
|
||||||
import { incrementVisitCount, notifyDarkVisitors } from '$lib/visits.js';
|
import { incrementVisitCount, notifyDarkVisitors } from '$lib/visits.js';
|
||||||
|
import { error } from '@sveltejs/kit';
|
||||||
|
|
||||||
export const csr = true;
|
export const csr = true;
|
||||||
export const ssr = true;
|
export const ssr = true;
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
export const trailingSlash = 'always';
|
export const trailingSlash = 'always';
|
||||||
|
|
||||||
export async function load({ request, cookies, url, setHeaders }) {
|
export async function load({ request, cookies, url }) {
|
||||||
notifyDarkVisitors(url, request) // no await so it doesnt block load
|
notifyDarkVisitors(url, request) // no await so it doesnt block load
|
||||||
|
|
||||||
|
// block any requests if the user agent is disallowed by our robots txt
|
||||||
|
if (await testUa(url.toString(), request.headers.get('user-agent') ?? "unknown user agent") === false) {
|
||||||
|
throw error(403, "get a better user agent silly")
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
route: url.pathname,
|
route: url.pathname,
|
||||||
visitCount: incrementVisitCount(request, cookies),
|
visitCount: incrementVisitCount(request, cookies),
|
||||||
|
@ -1,39 +1,5 @@
|
|||||||
import { env } from '$env/dynamic/private';
|
import { getRobotsTxt } from "$lib/robots"
|
||||||
import { get, writable } from 'svelte/store';
|
|
||||||
|
|
||||||
const cachedRobots = writable<string>("")
|
|
||||||
const lastFetched = writable<number>(Date.now())
|
|
||||||
|
|
||||||
const fetchRobotsTxt = async () => {
|
|
||||||
const robotsTxtResp = await fetch(
|
|
||||||
"https://api.darkvisitors.com/robots-txts",
|
|
||||||
{
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Authorization": `Bearer ${env.DARK_VISITORS_TOKEN}`,
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
agent_types: [
|
|
||||||
"AI Assistant",
|
|
||||||
"AI Data Scraper",
|
|
||||||
"AI Search Crawler",
|
|
||||||
"Undocumented AI Agent",
|
|
||||||
],
|
|
||||||
disallow: "/"
|
|
||||||
})
|
|
||||||
}
|
|
||||||
)
|
|
||||||
const robotsTxt = await robotsTxtResp.text()
|
|
||||||
lastFetched.set(Date.now())
|
|
||||||
return robotsTxt
|
|
||||||
}
|
|
||||||
|
|
||||||
export const GET = async ({ }) => {
|
export const GET = async ({ }) => {
|
||||||
let robotsTxt = get(cachedRobots)
|
return new Response(await getRobotsTxt())
|
||||||
if (robotsTxt.length === 0 || Date.now() - get(lastFetched) > 1000 * 60 * 60 * 24) {
|
|
||||||
robotsTxt = await fetchRobotsTxt()
|
|
||||||
cachedRobots.set(robotsTxt)
|
|
||||||
}
|
|
||||||
return new Response(robotsTxt)
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user