From 911d12ca7739abcfc0d5d084c06c4e28229aca38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Norman=20K=C3=B6hring?= Date: Fri, 3 Jan 2025 22:14:33 +0100 Subject: [PATCH] better output Shows statistics and uses ANSI codes for coloured and well formatted output --- deno.json | 7 +++ deno.lock | 59 ++++++++++++++++++ index.d.ts | 4 -- index.ts | 172 +++++++++++++++++++++++++++++++++++------------------ 4 files changed, 181 insertions(+), 61 deletions(-) create mode 100644 deno.json diff --git a/deno.json b/deno.json new file mode 100644 index 00000000..8f0ee5db --- /dev/null +++ b/deno.json @@ -0,0 +1,7 @@ +{ + "imports": { + "@cliffy/ansi": "jsr:@cliffy/ansi@^1.0.0-rc.7", + "@cliffy/table": "jsr:@cliffy/table@^1.0.0-rc.7", + "@std/log": "jsr:@std/log@^0.224.12" + } +} diff --git a/deno.lock b/deno.lock index 1edabb44..b09b8ec2 100644 --- a/deno.lock +++ b/deno.lock @@ -1,9 +1,68 @@ { "version": "3", + "packages": { + "specifiers": { + "jsr:@cliffy/ansi@^1.0.0-rc.7": "jsr:@cliffy/ansi@1.0.0-rc.7", + "jsr:@cliffy/internal@1.0.0-rc.7": "jsr:@cliffy/internal@1.0.0-rc.7", + "jsr:@cliffy/table@^1.0.0-rc.7": "jsr:@cliffy/table@1.0.0-rc.7", + "jsr:@std/encoding@~1.0.5": "jsr:@std/encoding@1.0.6", + "jsr:@std/fmt@^1.0.3": "jsr:@std/fmt@1.0.3", + "jsr:@std/fmt@~1.0.2": "jsr:@std/fmt@1.0.3", + "jsr:@std/fs@^1.0.7": "jsr:@std/fs@1.0.8", + "jsr:@std/io@^0.225.0": "jsr:@std/io@0.225.0", + "jsr:@std/log@^0.224.12": "jsr:@std/log@0.224.12" + }, + "jsr": { + "@cliffy/ansi@1.0.0-rc.7": { + "integrity": "f71c921cce224c13d322e5cedba4f38e8f7354c7d855c9cb22729362a53f25aa", + "dependencies": [ + "jsr:@cliffy/internal@1.0.0-rc.7", + "jsr:@std/encoding@~1.0.5", + "jsr:@std/fmt@~1.0.2" + ] + }, + "@cliffy/internal@1.0.0-rc.7": { + "integrity": "10412636ab3e67517d448be9eaab1b70c88eba9be22617b5d146257a11cc9b17" + }, + "@cliffy/table@1.0.0-rc.7": { + "integrity": "9fdd9776eda28a0b397981c400eeb1aa36da2371b43eefe12e6ff555290e3180", + "dependencies": [ + "jsr:@std/fmt@~1.0.2" + ] + }, + "@std/encoding@1.0.6": { + "integrity": "ca87122c196e8831737d9547acf001766618e78cd8c33920776c7f5885546069" + }, + "@std/fmt@1.0.3": { + "integrity": "97765c16aa32245ff4e2204ecf7d8562496a3cb8592340a80e7e554e0bb9149f" + }, + "@std/fs@1.0.8": { + "integrity": "161c721b6f9400b8100a851b6f4061431c538b204bb76c501d02c508995cffe0" + }, + "@std/io@0.225.0": { + "integrity": "c1db7c5e5a231629b32d64b9a53139445b2ca640d828c26bf23e1c55f8c079b3" + }, + "@std/log@0.224.12": { + "integrity": "d0f002f1340a11f28d482a7a9e1c904c26b8ff2c7dd4fe32175b3ece3e0b18c5", + "dependencies": [ + "jsr:@std/fmt@^1.0.3", + "jsr:@std/fs@^1.0.7", + "jsr:@std/io@^0.225.0" + ] + } + } + }, "remote": { "https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74", "https://deno.land/std@0.130.0/_util/deep_assign.ts": "52d4ed44314c5c22e9346264d1ef6c204debf3289be9f5c3c8cf3e8668595113", "https://deno.land/std@0.130.0/encoding/_toml/parser.ts": "70e459891b514906db15dca739d567d59cdcf97e14d630ea79f619a088a187ae", "https://deno.land/std@0.130.0/encoding/toml.ts": "368aef2dbc32dd17911f391e0f5a5eecfe51457ed56a5bc752632f8d6a1723ce" + }, + "workspace": { + "dependencies": [ + "jsr:@cliffy/ansi@^1.0.0-rc.7", + "jsr:@cliffy/table@^1.0.0-rc.7", + "jsr:@std/log@^0.224.12" + ] } } diff --git a/index.d.ts b/index.d.ts index ca79559a..21995a8e 100644 --- a/index.d.ts +++ b/index.d.ts @@ -10,10 +10,6 @@ type PageRecord = { }; } -type StringMap = { - [key: string]: string; -} - type Status = { status: 'awaiting' | 'running' | 'complete' | 'failed'; url: string; diff --git a/index.ts b/index.ts index 6feecdf2..aa5fb1bd 100644 --- a/index.ts +++ b/index.ts @@ -1,54 +1,67 @@ -import "./index.d.ts"; +import { Table } from '@cliffy/table' +import { tty } from '@cliffy/ansi/tty' +import { colors } from '@cliffy/ansi/colors' + +import './index.d.ts' import { url2title, getPageRecord, writeRecord, removeRecord, -} from "./analyser/toolkit.ts"; +} from './analyser/toolkit.ts' import { requestMetricsRun, checkStatus, retrieveMetrics, -} from "./analyser/metrics.ts"; +} from './analyser/metrics.ts' -const INPUT_FILE = Deno.args[0] ?? "./pages.txt"; -const OUTPUT_PATH = Deno.args[1] ?? "./content"; // results are written here -const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000; // recheck pages older than 1 week -const REJECT_THRESHOLD = 262144; // 256KB (duh) -const PARALLEL_JOBS = 3; // max YLT jobs +const INPUT_FILE = Deno.args[0] ?? './pages.txt' +const OUTPUT_PATH = Deno.args[1] ?? './content' // results are written here +const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000 // recheck pages older than 1 week +const REJECT_THRESHOLD = 262144 // 256KB (duh) +const PARALLEL_JOBS = 3 // max YLT jobs -const now = Date.now(); -const pages = await getPageList(); // all pages -const pagesUpdating: string[] = []; // currently running ylt jobs +const now = Date.now() +const pages = await getPageList() // all pages + +const statistics = { + total: pages.length, + checked: 0, + updated: [] as { url: string, weight: number }[], + rejected: [] as { url: string, weight: number }[], + errors: [] as string[], +} async function getPageList(): Promise { - const inputContent = await Deno.readTextFile(INPUT_FILE); - return inputContent.split("\n").filter((line) => line.startsWith("http")); + const inputContent = await Deno.readTextFile(INPUT_FILE) + return inputContent.split('\n').filter((line) => line.startsWith('http')) } async function updateRecord(runId: string, url: string): Promise { - const oldRecord = await getPageRecord(url, OUTPUT_PATH); - const metrics = await retrieveMetrics(runId); + const oldRecord = await getPageRecord(url, OUTPUT_PATH) + const metrics = await retrieveMetrics(runId) if (!metrics) { - console.error("failed to retrieve results for", url, runId); - return false; + statistics.errors.push(`Failed to retrieve results for ${url} (run id: ${runId})`) + console.debug("failed to retrieve results for", url, runId) + return false } // poor mans toISODateString - const now = new Date().toISOString().split("T")[0]; - - const weight = metrics.metrics.contentLength; + const now = new Date().toISOString().split("T")[0] + const weight = metrics.metrics.contentLength if (weight > REJECT_THRESHOLD) { - console.log(url, "rejected! Weighs", Math.round(weight / 1024), "kb"); + statistics.rejected.push({ url, weight: Math.round(weight / 1024) }) + console.debug(url, "rejected! Weighs", Math.round(weight / 1024), "kb") if (oldRecord) { - console.log("Removing record at", OUTPUT_PATH) + console.debug("Removing record at", OUTPUT_PATH) removeRecord(url, OUTPUT_PATH).catch(() => { - console.error("Failed to remove old record of rejected url", url); - }); + statistics.errors.push('Failed to remove', OUTPUT_PATH) + console.debug("Failed to remove old record of rejected url", url) + }) } - return false; + return false } const { htmlSize, imageSize, videoSize } = metrics.metrics const contentSize = htmlSize + imageSize + videoSize @@ -63,76 +76,121 @@ async function updateRecord(runId: string, url: string): Promise { ratio: Math.round(contentSize / weight * 100), size: Math.round(weight / 1024), }, - }; + } - const success = await writeRecord(record, url, OUTPUT_PATH); + const success = await writeRecord(record, url, OUTPUT_PATH) if (success) { - console.log(url, "successfully updated"); + statistics.updated.push({ url, weight }) + console.debug(url, "successfully updated") } else { - console.error(url, "record could not be written!"); + statistics.errors.push(`Failed to write record for ${url}`) + console.debug(url, "record could not be written!") } + + return true } async function checkPage(url: string) { - const record = await getPageRecord(url, OUTPUT_PATH); - const lastUpdated = Date.parse(record?.updated || ""); - const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD; + const record = await getPageRecord(url, OUTPUT_PATH) + const lastUpdated = Date.parse(record?.updated || "") + const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD if (!needsCheck) { - console.log(url, "is up-to-date"); - return true; + statistics.checked++ + console.debug(url, "is up-to-date") + return true } - const runId = await requestMetricsRun(url); + const runId = await requestMetricsRun(url) if (!runId) { - console.error(url, "updating failed!"); - return false; + statistics.errors.push(`Failed to run metric for ${url}`) + console.debug(url, "updating failed!") + return false } - console.log(url, "new or outdated, runId is", runId); - return runId; + console.debug(url, "new or outdated, runId is", runId) + return runId } function sleep(duration: number) { - return new Promise((resolve) => { - setTimeout(() => resolve(), duration); - }); + return new Promise((resolve) => { + setTimeout(() => resolve(), duration) + }) +} + +const white = (output: string | number) => colors.white(` ${output} `) +const whiteHd = (output: string | number) => colors.bgWhite.bold.black(` ${output} `) +const red = (output: string | number) => colors.red(` ${output} `) +const redHd = (output: string | number) => colors.bgRed.bold.black(` ${output} `) +const yellow = (output: string | number) => colors.yellow(` ${output} `) +const yellowHd = (output: string | number) => colors.bgYellow.bold.black(` ${output} `) +const blue = (output: string | number) => colors.blue(` ${output} `) +const blueHd = (output: string | number) => colors.bgBlue.bold.black(` ${output} `) + +function updateStatusScreen() { + const { total, checked, updated, rejected, errors } = statistics + + const tableOutput = new Table( + [whiteHd('total'), whiteHd('checked'), blueHd('added/updated'), yellowHd('rejected'), redHd('errors')], + [white(total), white(checked), blue(updated.length), yellow(rejected.length), red(errors.length)], + ) + + tty.cursorLeft.cursorUp.eraseLine() + tty.cursorLeft.cursorUp.eraseLine() + console.log(tableOutput.toString()) +} + +function showStatistics() { + console.log(new Table( + ...statistics.rejected.map((page) => [yellowHd('Rejected'), page.url, `${red(page.weight)}kb`]), + ).toString()) + + console.log(new Table( + ...statistics.errors.map((err) => [redHd('Error'), err]), + ).toString()) } async function handleBatch() { - if (!pages.length) return; // done, yeah! + updateStatusScreen() + if (!pages.length) return showStatistics() // done, yeah! - const batch = pages.splice(0, PARALLEL_JOBS); - const jobs = batch.map((url) => checkPage(url)); + const batch = pages.splice(0, PARALLEL_JOBS) + const jobs = batch.map((url) => checkPage(url)) while (jobs.length) { // take the first job and check // if the check fails, it will be added back to the end of the list - const runId = await jobs.shift(); + const job = jobs.shift() + const runId = await job // page is up-to-date or YLT has an error - if (runId === true || runId === false) continue; + if (!job || runId === undefined || runId === true || runId === false) continue // TODO: handle failures more gracefully - const { url, status } = await checkStatus(runId); + const { url, status } = await checkStatus(runId) if (status === "failed") { - console.error(url, "YLT analysis failed"); - continue; + statistics.errors.push(`YLT analysis failed for ${url} (run id: ${runId})`) + console.debug(url, "YLT analysis failed") + continue } else if (status === "complete") { - console.log(url, "updating record..."); - await updateRecord(runId, url); - continue; + console.debug(url, "updating record...") + await updateRecord(runId, url) + continue } else { // not done yet, add it back - jobs.push(runId); + jobs.push(job) // wait a bit before checking again - await sleep(1000); + await sleep(1000) } } - handleBatch(); + handleBatch() } -handleBatch(); +const debug = Deno.env.get('DEBUG') !== undefined +if (!debug) console.debug = () => {} // supress debug messages + +console.log('Starting...') +handleBatch()