better output

Shows statistics and uses ANSI codes for coloured and well formatted output
This commit is contained in:
Norman Köhring 2025-01-03 22:14:33 +01:00
parent c4b4e3a387
commit 911d12ca77
4 changed files with 181 additions and 61 deletions

7
deno.json Normal file
View file

@ -0,0 +1,7 @@
{
"imports": {
"@cliffy/ansi": "jsr:@cliffy/ansi@^1.0.0-rc.7",
"@cliffy/table": "jsr:@cliffy/table@^1.0.0-rc.7",
"@std/log": "jsr:@std/log@^0.224.12"
}
}

59
deno.lock generated
View file

@ -1,9 +1,68 @@
{
"version": "3",
"packages": {
"specifiers": {
"jsr:@cliffy/ansi@^1.0.0-rc.7": "jsr:@cliffy/ansi@1.0.0-rc.7",
"jsr:@cliffy/internal@1.0.0-rc.7": "jsr:@cliffy/internal@1.0.0-rc.7",
"jsr:@cliffy/table@^1.0.0-rc.7": "jsr:@cliffy/table@1.0.0-rc.7",
"jsr:@std/encoding@~1.0.5": "jsr:@std/encoding@1.0.6",
"jsr:@std/fmt@^1.0.3": "jsr:@std/fmt@1.0.3",
"jsr:@std/fmt@~1.0.2": "jsr:@std/fmt@1.0.3",
"jsr:@std/fs@^1.0.7": "jsr:@std/fs@1.0.8",
"jsr:@std/io@^0.225.0": "jsr:@std/io@0.225.0",
"jsr:@std/log@^0.224.12": "jsr:@std/log@0.224.12"
},
"jsr": {
"@cliffy/ansi@1.0.0-rc.7": {
"integrity": "f71c921cce224c13d322e5cedba4f38e8f7354c7d855c9cb22729362a53f25aa",
"dependencies": [
"jsr:@cliffy/internal@1.0.0-rc.7",
"jsr:@std/encoding@~1.0.5",
"jsr:@std/fmt@~1.0.2"
]
},
"@cliffy/internal@1.0.0-rc.7": {
"integrity": "10412636ab3e67517d448be9eaab1b70c88eba9be22617b5d146257a11cc9b17"
},
"@cliffy/table@1.0.0-rc.7": {
"integrity": "9fdd9776eda28a0b397981c400eeb1aa36da2371b43eefe12e6ff555290e3180",
"dependencies": [
"jsr:@std/fmt@~1.0.2"
]
},
"@std/encoding@1.0.6": {
"integrity": "ca87122c196e8831737d9547acf001766618e78cd8c33920776c7f5885546069"
},
"@std/fmt@1.0.3": {
"integrity": "97765c16aa32245ff4e2204ecf7d8562496a3cb8592340a80e7e554e0bb9149f"
},
"@std/fs@1.0.8": {
"integrity": "161c721b6f9400b8100a851b6f4061431c538b204bb76c501d02c508995cffe0"
},
"@std/io@0.225.0": {
"integrity": "c1db7c5e5a231629b32d64b9a53139445b2ca640d828c26bf23e1c55f8c079b3"
},
"@std/log@0.224.12": {
"integrity": "d0f002f1340a11f28d482a7a9e1c904c26b8ff2c7dd4fe32175b3ece3e0b18c5",
"dependencies": [
"jsr:@std/fmt@^1.0.3",
"jsr:@std/fs@^1.0.7",
"jsr:@std/io@^0.225.0"
]
}
}
},
"remote": {
"https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74",
"https://deno.land/std@0.130.0/_util/deep_assign.ts": "52d4ed44314c5c22e9346264d1ef6c204debf3289be9f5c3c8cf3e8668595113",
"https://deno.land/std@0.130.0/encoding/_toml/parser.ts": "70e459891b514906db15dca739d567d59cdcf97e14d630ea79f619a088a187ae",
"https://deno.land/std@0.130.0/encoding/toml.ts": "368aef2dbc32dd17911f391e0f5a5eecfe51457ed56a5bc752632f8d6a1723ce"
},
"workspace": {
"dependencies": [
"jsr:@cliffy/ansi@^1.0.0-rc.7",
"jsr:@cliffy/table@^1.0.0-rc.7",
"jsr:@std/log@^0.224.12"
]
}
}

4
index.d.ts vendored
View file

@ -10,10 +10,6 @@ type PageRecord = {
};
}
type StringMap = {
[key: string]: string;
}
type Status = {
status: 'awaiting' | 'running' | 'complete' | 'failed';
url: string;

172
index.ts
View file

@ -1,54 +1,67 @@
import "./index.d.ts";
import { Table } from '@cliffy/table'
import { tty } from '@cliffy/ansi/tty'
import { colors } from '@cliffy/ansi/colors'
import './index.d.ts'
import {
url2title,
getPageRecord,
writeRecord,
removeRecord,
} from "./analyser/toolkit.ts";
} from './analyser/toolkit.ts'
import {
requestMetricsRun,
checkStatus,
retrieveMetrics,
} from "./analyser/metrics.ts";
} from './analyser/metrics.ts'
const INPUT_FILE = Deno.args[0] ?? "./pages.txt";
const OUTPUT_PATH = Deno.args[1] ?? "./content"; // results are written here
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000; // recheck pages older than 1 week
const REJECT_THRESHOLD = 262144; // 256KB (duh)
const PARALLEL_JOBS = 3; // max YLT jobs
const INPUT_FILE = Deno.args[0] ?? './pages.txt'
const OUTPUT_PATH = Deno.args[1] ?? './content' // results are written here
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000 // recheck pages older than 1 week
const REJECT_THRESHOLD = 262144 // 256KB (duh)
const PARALLEL_JOBS = 3 // max YLT jobs
const now = Date.now();
const pages = await getPageList(); // all pages
const pagesUpdating: string[] = []; // currently running ylt jobs
const now = Date.now()
const pages = await getPageList() // all pages
const statistics = {
total: pages.length,
checked: 0,
updated: [] as { url: string, weight: number }[],
rejected: [] as { url: string, weight: number }[],
errors: [] as string[],
}
async function getPageList(): Promise<string[]> {
const inputContent = await Deno.readTextFile(INPUT_FILE);
return inputContent.split("\n").filter((line) => line.startsWith("http"));
const inputContent = await Deno.readTextFile(INPUT_FILE)
return inputContent.split('\n').filter((line) => line.startsWith('http'))
}
async function updateRecord(runId: string, url: string): Promise<boolean> {
const oldRecord = await getPageRecord(url, OUTPUT_PATH);
const metrics = await retrieveMetrics(runId);
const oldRecord = await getPageRecord(url, OUTPUT_PATH)
const metrics = await retrieveMetrics(runId)
if (!metrics) {
console.error("failed to retrieve results for", url, runId);
return false;
statistics.errors.push(`Failed to retrieve results for ${url} (run id: ${runId})`)
console.debug("failed to retrieve results for", url, runId)
return false
}
// poor mans toISODateString
const now = new Date().toISOString().split("T")[0];
const weight = metrics.metrics.contentLength;
const now = new Date().toISOString().split("T")[0]
const weight = metrics.metrics.contentLength
if (weight > REJECT_THRESHOLD) {
console.log(url, "rejected! Weighs", Math.round(weight / 1024), "kb");
statistics.rejected.push({ url, weight: Math.round(weight / 1024) })
console.debug(url, "rejected! Weighs", Math.round(weight / 1024), "kb")
if (oldRecord) {
console.log("Removing record at", OUTPUT_PATH)
console.debug("Removing record at", OUTPUT_PATH)
removeRecord(url, OUTPUT_PATH).catch(() => {
console.error("Failed to remove old record of rejected url", url);
});
statistics.errors.push('Failed to remove', OUTPUT_PATH)
console.debug("Failed to remove old record of rejected url", url)
})
}
return false;
return false
}
const { htmlSize, imageSize, videoSize } = metrics.metrics
const contentSize = htmlSize + imageSize + videoSize
@ -63,76 +76,121 @@ async function updateRecord(runId: string, url: string): Promise<boolean> {
ratio: Math.round(contentSize / weight * 100),
size: Math.round(weight / 1024),
},
};
}
const success = await writeRecord(record, url, OUTPUT_PATH);
const success = await writeRecord(record, url, OUTPUT_PATH)
if (success) {
console.log(url, "successfully updated");
statistics.updated.push({ url, weight })
console.debug(url, "successfully updated")
} else {
console.error(url, "record could not be written!");
statistics.errors.push(`Failed to write record for ${url}`)
console.debug(url, "record could not be written!")
}
return true
}
async function checkPage(url: string) {
const record = await getPageRecord(url, OUTPUT_PATH);
const lastUpdated = Date.parse(record?.updated || "");
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD;
const record = await getPageRecord(url, OUTPUT_PATH)
const lastUpdated = Date.parse(record?.updated || "")
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD
if (!needsCheck) {
console.log(url, "is up-to-date");
return true;
statistics.checked++
console.debug(url, "is up-to-date")
return true
}
const runId = await requestMetricsRun(url);
const runId = await requestMetricsRun(url)
if (!runId) {
console.error(url, "updating failed!");
return false;
statistics.errors.push(`Failed to run metric for ${url}`)
console.debug(url, "updating failed!")
return false
}
console.log(url, "new or outdated, runId is", runId);
return runId;
console.debug(url, "new or outdated, runId is", runId)
return runId
}
function sleep(duration: number) {
return new Promise((resolve) => {
setTimeout(() => resolve(), duration);
});
return new Promise<void>((resolve) => {
setTimeout(() => resolve(), duration)
})
}
const white = (output: string | number) => colors.white(` ${output} `)
const whiteHd = (output: string | number) => colors.bgWhite.bold.black(` ${output} `)
const red = (output: string | number) => colors.red(` ${output} `)
const redHd = (output: string | number) => colors.bgRed.bold.black(` ${output} `)
const yellow = (output: string | number) => colors.yellow(` ${output} `)
const yellowHd = (output: string | number) => colors.bgYellow.bold.black(` ${output} `)
const blue = (output: string | number) => colors.blue(` ${output} `)
const blueHd = (output: string | number) => colors.bgBlue.bold.black(` ${output} `)
function updateStatusScreen() {
const { total, checked, updated, rejected, errors } = statistics
const tableOutput = new Table(
[whiteHd('total'), whiteHd('checked'), blueHd('added/updated'), yellowHd('rejected'), redHd('errors')],
[white(total), white(checked), blue(updated.length), yellow(rejected.length), red(errors.length)],
)
tty.cursorLeft.cursorUp.eraseLine()
tty.cursorLeft.cursorUp.eraseLine()
console.log(tableOutput.toString())
}
function showStatistics() {
console.log(new Table(
...statistics.rejected.map((page) => [yellowHd('Rejected'), page.url, `${red(page.weight)}kb`]),
).toString())
console.log(new Table(
...statistics.errors.map((err) => [redHd('Error'), err]),
).toString())
}
async function handleBatch() {
if (!pages.length) return; // done, yeah!
updateStatusScreen()
if (!pages.length) return showStatistics() // done, yeah!
const batch = pages.splice(0, PARALLEL_JOBS);
const jobs = batch.map((url) => checkPage(url));
const batch = pages.splice(0, PARALLEL_JOBS)
const jobs = batch.map((url) => checkPage(url))
while (jobs.length) {
// take the first job and check
// if the check fails, it will be added back to the end of the list
const runId = await jobs.shift();
const job = jobs.shift()
const runId = await job
// page is up-to-date or YLT has an error
if (runId === true || runId === false) continue;
if (!job || runId === undefined || runId === true || runId === false) continue
// TODO: handle failures more gracefully
const { url, status } = await checkStatus(runId);
const { url, status } = await checkStatus(runId)
if (status === "failed") {
console.error(url, "YLT analysis failed");
continue;
statistics.errors.push(`YLT analysis failed for ${url} (run id: ${runId})`)
console.debug(url, "YLT analysis failed")
continue
} else if (status === "complete") {
console.log(url, "updating record...");
await updateRecord(runId, url);
continue;
console.debug(url, "updating record...")
await updateRecord(runId, url)
continue
} else {
// not done yet, add it back
jobs.push(runId);
jobs.push(job)
// wait a bit before checking again
await sleep(1000);
await sleep(1000)
}
}
handleBatch();
handleBatch()
}
handleBatch();
const debug = Deno.env.get('DEBUG') !== undefined
if (!debug) console.debug = () => {} // supress debug messages
console.log('Starting...')
handleBatch()