mirror of
https://github.com/nkoehring/250kb-club.git
synced 2025-04-30 02:29:05 +02:00
better output
Shows statistics and uses ANSI codes for coloured and well formatted output
This commit is contained in:
parent
c4b4e3a387
commit
911d12ca77
4 changed files with 181 additions and 61 deletions
7
deno.json
Normal file
7
deno.json
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"imports": {
|
||||||
|
"@cliffy/ansi": "jsr:@cliffy/ansi@^1.0.0-rc.7",
|
||||||
|
"@cliffy/table": "jsr:@cliffy/table@^1.0.0-rc.7",
|
||||||
|
"@std/log": "jsr:@std/log@^0.224.12"
|
||||||
|
}
|
||||||
|
}
|
59
deno.lock
generated
59
deno.lock
generated
|
@ -1,9 +1,68 @@
|
||||||
{
|
{
|
||||||
"version": "3",
|
"version": "3",
|
||||||
|
"packages": {
|
||||||
|
"specifiers": {
|
||||||
|
"jsr:@cliffy/ansi@^1.0.0-rc.7": "jsr:@cliffy/ansi@1.0.0-rc.7",
|
||||||
|
"jsr:@cliffy/internal@1.0.0-rc.7": "jsr:@cliffy/internal@1.0.0-rc.7",
|
||||||
|
"jsr:@cliffy/table@^1.0.0-rc.7": "jsr:@cliffy/table@1.0.0-rc.7",
|
||||||
|
"jsr:@std/encoding@~1.0.5": "jsr:@std/encoding@1.0.6",
|
||||||
|
"jsr:@std/fmt@^1.0.3": "jsr:@std/fmt@1.0.3",
|
||||||
|
"jsr:@std/fmt@~1.0.2": "jsr:@std/fmt@1.0.3",
|
||||||
|
"jsr:@std/fs@^1.0.7": "jsr:@std/fs@1.0.8",
|
||||||
|
"jsr:@std/io@^0.225.0": "jsr:@std/io@0.225.0",
|
||||||
|
"jsr:@std/log@^0.224.12": "jsr:@std/log@0.224.12"
|
||||||
|
},
|
||||||
|
"jsr": {
|
||||||
|
"@cliffy/ansi@1.0.0-rc.7": {
|
||||||
|
"integrity": "f71c921cce224c13d322e5cedba4f38e8f7354c7d855c9cb22729362a53f25aa",
|
||||||
|
"dependencies": [
|
||||||
|
"jsr:@cliffy/internal@1.0.0-rc.7",
|
||||||
|
"jsr:@std/encoding@~1.0.5",
|
||||||
|
"jsr:@std/fmt@~1.0.2"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"@cliffy/internal@1.0.0-rc.7": {
|
||||||
|
"integrity": "10412636ab3e67517d448be9eaab1b70c88eba9be22617b5d146257a11cc9b17"
|
||||||
|
},
|
||||||
|
"@cliffy/table@1.0.0-rc.7": {
|
||||||
|
"integrity": "9fdd9776eda28a0b397981c400eeb1aa36da2371b43eefe12e6ff555290e3180",
|
||||||
|
"dependencies": [
|
||||||
|
"jsr:@std/fmt@~1.0.2"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"@std/encoding@1.0.6": {
|
||||||
|
"integrity": "ca87122c196e8831737d9547acf001766618e78cd8c33920776c7f5885546069"
|
||||||
|
},
|
||||||
|
"@std/fmt@1.0.3": {
|
||||||
|
"integrity": "97765c16aa32245ff4e2204ecf7d8562496a3cb8592340a80e7e554e0bb9149f"
|
||||||
|
},
|
||||||
|
"@std/fs@1.0.8": {
|
||||||
|
"integrity": "161c721b6f9400b8100a851b6f4061431c538b204bb76c501d02c508995cffe0"
|
||||||
|
},
|
||||||
|
"@std/io@0.225.0": {
|
||||||
|
"integrity": "c1db7c5e5a231629b32d64b9a53139445b2ca640d828c26bf23e1c55f8c079b3"
|
||||||
|
},
|
||||||
|
"@std/log@0.224.12": {
|
||||||
|
"integrity": "d0f002f1340a11f28d482a7a9e1c904c26b8ff2c7dd4fe32175b3ece3e0b18c5",
|
||||||
|
"dependencies": [
|
||||||
|
"jsr:@std/fmt@^1.0.3",
|
||||||
|
"jsr:@std/fs@^1.0.7",
|
||||||
|
"jsr:@std/io@^0.225.0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"remote": {
|
"remote": {
|
||||||
"https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74",
|
"https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74",
|
||||||
"https://deno.land/std@0.130.0/_util/deep_assign.ts": "52d4ed44314c5c22e9346264d1ef6c204debf3289be9f5c3c8cf3e8668595113",
|
"https://deno.land/std@0.130.0/_util/deep_assign.ts": "52d4ed44314c5c22e9346264d1ef6c204debf3289be9f5c3c8cf3e8668595113",
|
||||||
"https://deno.land/std@0.130.0/encoding/_toml/parser.ts": "70e459891b514906db15dca739d567d59cdcf97e14d630ea79f619a088a187ae",
|
"https://deno.land/std@0.130.0/encoding/_toml/parser.ts": "70e459891b514906db15dca739d567d59cdcf97e14d630ea79f619a088a187ae",
|
||||||
"https://deno.land/std@0.130.0/encoding/toml.ts": "368aef2dbc32dd17911f391e0f5a5eecfe51457ed56a5bc752632f8d6a1723ce"
|
"https://deno.land/std@0.130.0/encoding/toml.ts": "368aef2dbc32dd17911f391e0f5a5eecfe51457ed56a5bc752632f8d6a1723ce"
|
||||||
|
},
|
||||||
|
"workspace": {
|
||||||
|
"dependencies": [
|
||||||
|
"jsr:@cliffy/ansi@^1.0.0-rc.7",
|
||||||
|
"jsr:@cliffy/table@^1.0.0-rc.7",
|
||||||
|
"jsr:@std/log@^0.224.12"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
4
index.d.ts
vendored
4
index.d.ts
vendored
|
@ -10,10 +10,6 @@ type PageRecord = {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
type StringMap = {
|
|
||||||
[key: string]: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
type Status = {
|
type Status = {
|
||||||
status: 'awaiting' | 'running' | 'complete' | 'failed';
|
status: 'awaiting' | 'running' | 'complete' | 'failed';
|
||||||
url: string;
|
url: string;
|
||||||
|
|
172
index.ts
172
index.ts
|
@ -1,54 +1,67 @@
|
||||||
import "./index.d.ts";
|
import { Table } from '@cliffy/table'
|
||||||
|
import { tty } from '@cliffy/ansi/tty'
|
||||||
|
import { colors } from '@cliffy/ansi/colors'
|
||||||
|
|
||||||
|
import './index.d.ts'
|
||||||
import {
|
import {
|
||||||
url2title,
|
url2title,
|
||||||
getPageRecord,
|
getPageRecord,
|
||||||
writeRecord,
|
writeRecord,
|
||||||
removeRecord,
|
removeRecord,
|
||||||
} from "./analyser/toolkit.ts";
|
} from './analyser/toolkit.ts'
|
||||||
import {
|
import {
|
||||||
requestMetricsRun,
|
requestMetricsRun,
|
||||||
checkStatus,
|
checkStatus,
|
||||||
retrieveMetrics,
|
retrieveMetrics,
|
||||||
} from "./analyser/metrics.ts";
|
} from './analyser/metrics.ts'
|
||||||
|
|
||||||
const INPUT_FILE = Deno.args[0] ?? "./pages.txt";
|
const INPUT_FILE = Deno.args[0] ?? './pages.txt'
|
||||||
const OUTPUT_PATH = Deno.args[1] ?? "./content"; // results are written here
|
const OUTPUT_PATH = Deno.args[1] ?? './content' // results are written here
|
||||||
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000; // recheck pages older than 1 week
|
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000 // recheck pages older than 1 week
|
||||||
const REJECT_THRESHOLD = 262144; // 256KB (duh)
|
const REJECT_THRESHOLD = 262144 // 256KB (duh)
|
||||||
const PARALLEL_JOBS = 3; // max YLT jobs
|
const PARALLEL_JOBS = 3 // max YLT jobs
|
||||||
|
|
||||||
const now = Date.now();
|
const now = Date.now()
|
||||||
const pages = await getPageList(); // all pages
|
const pages = await getPageList() // all pages
|
||||||
const pagesUpdating: string[] = []; // currently running ylt jobs
|
|
||||||
|
const statistics = {
|
||||||
|
total: pages.length,
|
||||||
|
checked: 0,
|
||||||
|
updated: [] as { url: string, weight: number }[],
|
||||||
|
rejected: [] as { url: string, weight: number }[],
|
||||||
|
errors: [] as string[],
|
||||||
|
}
|
||||||
|
|
||||||
async function getPageList(): Promise<string[]> {
|
async function getPageList(): Promise<string[]> {
|
||||||
const inputContent = await Deno.readTextFile(INPUT_FILE);
|
const inputContent = await Deno.readTextFile(INPUT_FILE)
|
||||||
return inputContent.split("\n").filter((line) => line.startsWith("http"));
|
return inputContent.split('\n').filter((line) => line.startsWith('http'))
|
||||||
}
|
}
|
||||||
|
|
||||||
async function updateRecord(runId: string, url: string): Promise<boolean> {
|
async function updateRecord(runId: string, url: string): Promise<boolean> {
|
||||||
const oldRecord = await getPageRecord(url, OUTPUT_PATH);
|
const oldRecord = await getPageRecord(url, OUTPUT_PATH)
|
||||||
const metrics = await retrieveMetrics(runId);
|
const metrics = await retrieveMetrics(runId)
|
||||||
|
|
||||||
if (!metrics) {
|
if (!metrics) {
|
||||||
console.error("failed to retrieve results for", url, runId);
|
statistics.errors.push(`Failed to retrieve results for ${url} (run id: ${runId})`)
|
||||||
return false;
|
console.debug("failed to retrieve results for", url, runId)
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// poor mans toISODateString
|
// poor mans toISODateString
|
||||||
const now = new Date().toISOString().split("T")[0];
|
const now = new Date().toISOString().split("T")[0]
|
||||||
|
const weight = metrics.metrics.contentLength
|
||||||
const weight = metrics.metrics.contentLength;
|
|
||||||
|
|
||||||
if (weight > REJECT_THRESHOLD) {
|
if (weight > REJECT_THRESHOLD) {
|
||||||
console.log(url, "rejected! Weighs", Math.round(weight / 1024), "kb");
|
statistics.rejected.push({ url, weight: Math.round(weight / 1024) })
|
||||||
|
console.debug(url, "rejected! Weighs", Math.round(weight / 1024), "kb")
|
||||||
if (oldRecord) {
|
if (oldRecord) {
|
||||||
console.log("Removing record at", OUTPUT_PATH)
|
console.debug("Removing record at", OUTPUT_PATH)
|
||||||
removeRecord(url, OUTPUT_PATH).catch(() => {
|
removeRecord(url, OUTPUT_PATH).catch(() => {
|
||||||
console.error("Failed to remove old record of rejected url", url);
|
statistics.errors.push('Failed to remove', OUTPUT_PATH)
|
||||||
});
|
console.debug("Failed to remove old record of rejected url", url)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return false;
|
return false
|
||||||
}
|
}
|
||||||
const { htmlSize, imageSize, videoSize } = metrics.metrics
|
const { htmlSize, imageSize, videoSize } = metrics.metrics
|
||||||
const contentSize = htmlSize + imageSize + videoSize
|
const contentSize = htmlSize + imageSize + videoSize
|
||||||
|
@ -63,76 +76,121 @@ async function updateRecord(runId: string, url: string): Promise<boolean> {
|
||||||
ratio: Math.round(contentSize / weight * 100),
|
ratio: Math.round(contentSize / weight * 100),
|
||||||
size: Math.round(weight / 1024),
|
size: Math.round(weight / 1024),
|
||||||
},
|
},
|
||||||
};
|
}
|
||||||
|
|
||||||
const success = await writeRecord(record, url, OUTPUT_PATH);
|
const success = await writeRecord(record, url, OUTPUT_PATH)
|
||||||
|
|
||||||
if (success) {
|
if (success) {
|
||||||
console.log(url, "successfully updated");
|
statistics.updated.push({ url, weight })
|
||||||
|
console.debug(url, "successfully updated")
|
||||||
} else {
|
} else {
|
||||||
console.error(url, "record could not be written!");
|
statistics.errors.push(`Failed to write record for ${url}`)
|
||||||
|
console.debug(url, "record could not be written!")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkPage(url: string) {
|
async function checkPage(url: string) {
|
||||||
const record = await getPageRecord(url, OUTPUT_PATH);
|
const record = await getPageRecord(url, OUTPUT_PATH)
|
||||||
const lastUpdated = Date.parse(record?.updated || "");
|
const lastUpdated = Date.parse(record?.updated || "")
|
||||||
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD;
|
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD
|
||||||
|
|
||||||
if (!needsCheck) {
|
if (!needsCheck) {
|
||||||
console.log(url, "is up-to-date");
|
statistics.checked++
|
||||||
return true;
|
console.debug(url, "is up-to-date")
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
const runId = await requestMetricsRun(url);
|
const runId = await requestMetricsRun(url)
|
||||||
if (!runId) {
|
if (!runId) {
|
||||||
console.error(url, "updating failed!");
|
statistics.errors.push(`Failed to run metric for ${url}`)
|
||||||
return false;
|
console.debug(url, "updating failed!")
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(url, "new or outdated, runId is", runId);
|
console.debug(url, "new or outdated, runId is", runId)
|
||||||
return runId;
|
return runId
|
||||||
}
|
}
|
||||||
|
|
||||||
function sleep(duration: number) {
|
function sleep(duration: number) {
|
||||||
return new Promise((resolve) => {
|
return new Promise<void>((resolve) => {
|
||||||
setTimeout(() => resolve(), duration);
|
setTimeout(() => resolve(), duration)
|
||||||
});
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const white = (output: string | number) => colors.white(` ${output} `)
|
||||||
|
const whiteHd = (output: string | number) => colors.bgWhite.bold.black(` ${output} `)
|
||||||
|
const red = (output: string | number) => colors.red(` ${output} `)
|
||||||
|
const redHd = (output: string | number) => colors.bgRed.bold.black(` ${output} `)
|
||||||
|
const yellow = (output: string | number) => colors.yellow(` ${output} `)
|
||||||
|
const yellowHd = (output: string | number) => colors.bgYellow.bold.black(` ${output} `)
|
||||||
|
const blue = (output: string | number) => colors.blue(` ${output} `)
|
||||||
|
const blueHd = (output: string | number) => colors.bgBlue.bold.black(` ${output} `)
|
||||||
|
|
||||||
|
function updateStatusScreen() {
|
||||||
|
const { total, checked, updated, rejected, errors } = statistics
|
||||||
|
|
||||||
|
const tableOutput = new Table(
|
||||||
|
[whiteHd('total'), whiteHd('checked'), blueHd('added/updated'), yellowHd('rejected'), redHd('errors')],
|
||||||
|
[white(total), white(checked), blue(updated.length), yellow(rejected.length), red(errors.length)],
|
||||||
|
)
|
||||||
|
|
||||||
|
tty.cursorLeft.cursorUp.eraseLine()
|
||||||
|
tty.cursorLeft.cursorUp.eraseLine()
|
||||||
|
console.log(tableOutput.toString())
|
||||||
|
}
|
||||||
|
|
||||||
|
function showStatistics() {
|
||||||
|
console.log(new Table(
|
||||||
|
...statistics.rejected.map((page) => [yellowHd('Rejected'), page.url, `${red(page.weight)}kb`]),
|
||||||
|
).toString())
|
||||||
|
|
||||||
|
console.log(new Table(
|
||||||
|
...statistics.errors.map((err) => [redHd('Error'), err]),
|
||||||
|
).toString())
|
||||||
}
|
}
|
||||||
|
|
||||||
async function handleBatch() {
|
async function handleBatch() {
|
||||||
if (!pages.length) return; // done, yeah!
|
updateStatusScreen()
|
||||||
|
if (!pages.length) return showStatistics() // done, yeah!
|
||||||
|
|
||||||
const batch = pages.splice(0, PARALLEL_JOBS);
|
const batch = pages.splice(0, PARALLEL_JOBS)
|
||||||
const jobs = batch.map((url) => checkPage(url));
|
const jobs = batch.map((url) => checkPage(url))
|
||||||
|
|
||||||
while (jobs.length) {
|
while (jobs.length) {
|
||||||
// take the first job and check
|
// take the first job and check
|
||||||
// if the check fails, it will be added back to the end of the list
|
// if the check fails, it will be added back to the end of the list
|
||||||
const runId = await jobs.shift();
|
const job = jobs.shift()
|
||||||
|
const runId = await job
|
||||||
|
|
||||||
// page is up-to-date or YLT has an error
|
// page is up-to-date or YLT has an error
|
||||||
if (runId === true || runId === false) continue;
|
if (!job || runId === undefined || runId === true || runId === false) continue
|
||||||
|
|
||||||
// TODO: handle failures more gracefully
|
// TODO: handle failures more gracefully
|
||||||
const { url, status } = await checkStatus(runId);
|
const { url, status } = await checkStatus(runId)
|
||||||
|
|
||||||
if (status === "failed") {
|
if (status === "failed") {
|
||||||
console.error(url, "YLT analysis failed");
|
statistics.errors.push(`YLT analysis failed for ${url} (run id: ${runId})`)
|
||||||
continue;
|
console.debug(url, "YLT analysis failed")
|
||||||
|
continue
|
||||||
} else if (status === "complete") {
|
} else if (status === "complete") {
|
||||||
console.log(url, "updating record...");
|
console.debug(url, "updating record...")
|
||||||
await updateRecord(runId, url);
|
await updateRecord(runId, url)
|
||||||
continue;
|
continue
|
||||||
} else {
|
} else {
|
||||||
// not done yet, add it back
|
// not done yet, add it back
|
||||||
jobs.push(runId);
|
jobs.push(job)
|
||||||
// wait a bit before checking again
|
// wait a bit before checking again
|
||||||
await sleep(1000);
|
await sleep(1000)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
handleBatch();
|
handleBatch()
|
||||||
}
|
}
|
||||||
|
|
||||||
handleBatch();
|
const debug = Deno.env.get('DEBUG') !== undefined
|
||||||
|
if (!debug) console.debug = () => {} // supress debug messages
|
||||||
|
|
||||||
|
console.log('Starting...')
|
||||||
|
handleBatch()
|
||||||
|
|
Loading…
Add table
Reference in a new issue