mirror of
https://github.com/nkoehring/250kb-club.git
synced 2025-04-19 21:59:04 +02:00
85 lines
2.6 KiB
TypeScript
85 lines
2.6 KiB
TypeScript
import './index.d.ts'
|
|
import { url2title, getPageRecord, writeRecord } from './analyser/toolkit.ts'
|
|
import { requestMetricsRun, checkStatus, retrieveMetrics } from './analyser/metrics.ts'
|
|
|
|
const INPUT_FILE = Deno.args[0] ?? './pages.txt'
|
|
const OUTPUT_PATH = Deno.args[1] ?? './content' // results are written here
|
|
const RECHECK_THRESHOLD = 60*60*24*7*1000 // recheck pages older than 1 week
|
|
const REJECT_THRESHOLD = 262144 // 256kb (duh)
|
|
|
|
async function getPageList (): Promise<string[]> {
|
|
const inputContent = await Deno.readTextFile(INPUT_FILE)
|
|
return inputContent.split('\n').filter(line => line.startsWith('http'))
|
|
}
|
|
|
|
const now = Date.now()
|
|
const pages = await getPageList()
|
|
const pagesToUpdate: string[] = []
|
|
|
|
pages.forEach(async (url) => {
|
|
const record = await getPageRecord(url, OUTPUT_PATH)
|
|
const lastUpdated = Date.parse(record?.updated || '')
|
|
const needsCheck = !record || (now - lastUpdated) > RECHECK_THRESHOLD
|
|
|
|
if (!needsCheck) {
|
|
console.log(url, 'is up-to-date')
|
|
return
|
|
}
|
|
|
|
const runId = await requestMetricsRun(url)
|
|
if (runId) {
|
|
console.log(url, 'new or outdated, runId is', runId)
|
|
pagesToUpdate.push(runId)
|
|
}
|
|
|
|
})
|
|
|
|
async function updateRecords () {
|
|
if (pagesToUpdate.length === 0) return // done, yeah!
|
|
|
|
const runId = pagesToUpdate.at(-1) || '' // make tsc happy
|
|
const { url, status } = await checkStatus(runId)
|
|
|
|
// TODO: handle failures more gracefully
|
|
if (status === 'failed') {
|
|
pagesToUpdate.pop()
|
|
console.log(url, 'analysis failed')
|
|
} else if (status === 'complete') {
|
|
pagesToUpdate.pop()
|
|
const oldRecord = await getPageRecord(url, OUTPUT_PATH)
|
|
const metrics = await retrieveMetrics(runId)
|
|
|
|
if (metrics) {
|
|
// poor mans toISODateString
|
|
const now = (new Date()).toISOString().split('T')[0]
|
|
|
|
const weight = metrics.metrics.contentLength
|
|
const ratio = Math.round((metrics.metrics.htmlSize / weight) * 100)
|
|
|
|
if (weight > REJECT_THRESHOLD) {
|
|
console.log(url, 'is not allowed in, weighs', Math.round(weight / 1024), 'kb')
|
|
}
|
|
|
|
const record: PageRecord = {
|
|
title: url2title(url),
|
|
date: oldRecord === null ? now : oldRecord.date,
|
|
updated: now,
|
|
weight,
|
|
extra: {
|
|
source: url,
|
|
ratio,
|
|
size: Math.round(weight / 1024)
|
|
}
|
|
}
|
|
|
|
// TODO: check success
|
|
await writeRecord(record, url, OUTPUT_PATH)
|
|
console.log(url, 'updated')
|
|
} else {
|
|
console.error('failed to retrieve results for', url, runId)
|
|
}
|
|
}
|
|
setTimeout(() => updateRecords(), 500) // run again until the list is empty
|
|
}
|
|
|
|
setTimeout(() => updateRecords(), 1000)
|