mirror of
https://github.com/nkoehring/250kb-club.git
synced 2025-04-29 18:19:05 +02:00
better output
Shows statistics and uses ANSI codes for coloured and well formatted output
This commit is contained in:
parent
c4b4e3a387
commit
911d12ca77
4 changed files with 181 additions and 61 deletions
7
deno.json
Normal file
7
deno.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"imports": {
|
||||
"@cliffy/ansi": "jsr:@cliffy/ansi@^1.0.0-rc.7",
|
||||
"@cliffy/table": "jsr:@cliffy/table@^1.0.0-rc.7",
|
||||
"@std/log": "jsr:@std/log@^0.224.12"
|
||||
}
|
||||
}
|
59
deno.lock
generated
59
deno.lock
generated
|
@ -1,9 +1,68 @@
|
|||
{
|
||||
"version": "3",
|
||||
"packages": {
|
||||
"specifiers": {
|
||||
"jsr:@cliffy/ansi@^1.0.0-rc.7": "jsr:@cliffy/ansi@1.0.0-rc.7",
|
||||
"jsr:@cliffy/internal@1.0.0-rc.7": "jsr:@cliffy/internal@1.0.0-rc.7",
|
||||
"jsr:@cliffy/table@^1.0.0-rc.7": "jsr:@cliffy/table@1.0.0-rc.7",
|
||||
"jsr:@std/encoding@~1.0.5": "jsr:@std/encoding@1.0.6",
|
||||
"jsr:@std/fmt@^1.0.3": "jsr:@std/fmt@1.0.3",
|
||||
"jsr:@std/fmt@~1.0.2": "jsr:@std/fmt@1.0.3",
|
||||
"jsr:@std/fs@^1.0.7": "jsr:@std/fs@1.0.8",
|
||||
"jsr:@std/io@^0.225.0": "jsr:@std/io@0.225.0",
|
||||
"jsr:@std/log@^0.224.12": "jsr:@std/log@0.224.12"
|
||||
},
|
||||
"jsr": {
|
||||
"@cliffy/ansi@1.0.0-rc.7": {
|
||||
"integrity": "f71c921cce224c13d322e5cedba4f38e8f7354c7d855c9cb22729362a53f25aa",
|
||||
"dependencies": [
|
||||
"jsr:@cliffy/internal@1.0.0-rc.7",
|
||||
"jsr:@std/encoding@~1.0.5",
|
||||
"jsr:@std/fmt@~1.0.2"
|
||||
]
|
||||
},
|
||||
"@cliffy/internal@1.0.0-rc.7": {
|
||||
"integrity": "10412636ab3e67517d448be9eaab1b70c88eba9be22617b5d146257a11cc9b17"
|
||||
},
|
||||
"@cliffy/table@1.0.0-rc.7": {
|
||||
"integrity": "9fdd9776eda28a0b397981c400eeb1aa36da2371b43eefe12e6ff555290e3180",
|
||||
"dependencies": [
|
||||
"jsr:@std/fmt@~1.0.2"
|
||||
]
|
||||
},
|
||||
"@std/encoding@1.0.6": {
|
||||
"integrity": "ca87122c196e8831737d9547acf001766618e78cd8c33920776c7f5885546069"
|
||||
},
|
||||
"@std/fmt@1.0.3": {
|
||||
"integrity": "97765c16aa32245ff4e2204ecf7d8562496a3cb8592340a80e7e554e0bb9149f"
|
||||
},
|
||||
"@std/fs@1.0.8": {
|
||||
"integrity": "161c721b6f9400b8100a851b6f4061431c538b204bb76c501d02c508995cffe0"
|
||||
},
|
||||
"@std/io@0.225.0": {
|
||||
"integrity": "c1db7c5e5a231629b32d64b9a53139445b2ca640d828c26bf23e1c55f8c079b3"
|
||||
},
|
||||
"@std/log@0.224.12": {
|
||||
"integrity": "d0f002f1340a11f28d482a7a9e1c904c26b8ff2c7dd4fe32175b3ece3e0b18c5",
|
||||
"dependencies": [
|
||||
"jsr:@std/fmt@^1.0.3",
|
||||
"jsr:@std/fs@^1.0.7",
|
||||
"jsr:@std/io@^0.225.0"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"remote": {
|
||||
"https://deno.land/std@0.130.0/_util/assert.ts": "e94f2eb37cebd7f199952e242c77654e43333c1ac4c5c700e929ea3aa5489f74",
|
||||
"https://deno.land/std@0.130.0/_util/deep_assign.ts": "52d4ed44314c5c22e9346264d1ef6c204debf3289be9f5c3c8cf3e8668595113",
|
||||
"https://deno.land/std@0.130.0/encoding/_toml/parser.ts": "70e459891b514906db15dca739d567d59cdcf97e14d630ea79f619a088a187ae",
|
||||
"https://deno.land/std@0.130.0/encoding/toml.ts": "368aef2dbc32dd17911f391e0f5a5eecfe51457ed56a5bc752632f8d6a1723ce"
|
||||
},
|
||||
"workspace": {
|
||||
"dependencies": [
|
||||
"jsr:@cliffy/ansi@^1.0.0-rc.7",
|
||||
"jsr:@cliffy/table@^1.0.0-rc.7",
|
||||
"jsr:@std/log@^0.224.12"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
4
index.d.ts
vendored
4
index.d.ts
vendored
|
@ -10,10 +10,6 @@ type PageRecord = {
|
|||
};
|
||||
}
|
||||
|
||||
type StringMap = {
|
||||
[key: string]: string;
|
||||
}
|
||||
|
||||
type Status = {
|
||||
status: 'awaiting' | 'running' | 'complete' | 'failed';
|
||||
url: string;
|
||||
|
|
172
index.ts
172
index.ts
|
@ -1,54 +1,67 @@
|
|||
import "./index.d.ts";
|
||||
import { Table } from '@cliffy/table'
|
||||
import { tty } from '@cliffy/ansi/tty'
|
||||
import { colors } from '@cliffy/ansi/colors'
|
||||
|
||||
import './index.d.ts'
|
||||
import {
|
||||
url2title,
|
||||
getPageRecord,
|
||||
writeRecord,
|
||||
removeRecord,
|
||||
} from "./analyser/toolkit.ts";
|
||||
} from './analyser/toolkit.ts'
|
||||
import {
|
||||
requestMetricsRun,
|
||||
checkStatus,
|
||||
retrieveMetrics,
|
||||
} from "./analyser/metrics.ts";
|
||||
} from './analyser/metrics.ts'
|
||||
|
||||
const INPUT_FILE = Deno.args[0] ?? "./pages.txt";
|
||||
const OUTPUT_PATH = Deno.args[1] ?? "./content"; // results are written here
|
||||
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000; // recheck pages older than 1 week
|
||||
const REJECT_THRESHOLD = 262144; // 256KB (duh)
|
||||
const PARALLEL_JOBS = 3; // max YLT jobs
|
||||
const INPUT_FILE = Deno.args[0] ?? './pages.txt'
|
||||
const OUTPUT_PATH = Deno.args[1] ?? './content' // results are written here
|
||||
const RECHECK_THRESHOLD = 60 * 60 * 24 * 7 * 1000 // recheck pages older than 1 week
|
||||
const REJECT_THRESHOLD = 262144 // 256KB (duh)
|
||||
const PARALLEL_JOBS = 3 // max YLT jobs
|
||||
|
||||
const now = Date.now();
|
||||
const pages = await getPageList(); // all pages
|
||||
const pagesUpdating: string[] = []; // currently running ylt jobs
|
||||
const now = Date.now()
|
||||
const pages = await getPageList() // all pages
|
||||
|
||||
const statistics = {
|
||||
total: pages.length,
|
||||
checked: 0,
|
||||
updated: [] as { url: string, weight: number }[],
|
||||
rejected: [] as { url: string, weight: number }[],
|
||||
errors: [] as string[],
|
||||
}
|
||||
|
||||
async function getPageList(): Promise<string[]> {
|
||||
const inputContent = await Deno.readTextFile(INPUT_FILE);
|
||||
return inputContent.split("\n").filter((line) => line.startsWith("http"));
|
||||
const inputContent = await Deno.readTextFile(INPUT_FILE)
|
||||
return inputContent.split('\n').filter((line) => line.startsWith('http'))
|
||||
}
|
||||
|
||||
async function updateRecord(runId: string, url: string): Promise<boolean> {
|
||||
const oldRecord = await getPageRecord(url, OUTPUT_PATH);
|
||||
const metrics = await retrieveMetrics(runId);
|
||||
const oldRecord = await getPageRecord(url, OUTPUT_PATH)
|
||||
const metrics = await retrieveMetrics(runId)
|
||||
|
||||
if (!metrics) {
|
||||
console.error("failed to retrieve results for", url, runId);
|
||||
return false;
|
||||
statistics.errors.push(`Failed to retrieve results for ${url} (run id: ${runId})`)
|
||||
console.debug("failed to retrieve results for", url, runId)
|
||||
return false
|
||||
}
|
||||
|
||||
// poor mans toISODateString
|
||||
const now = new Date().toISOString().split("T")[0];
|
||||
|
||||
const weight = metrics.metrics.contentLength;
|
||||
const now = new Date().toISOString().split("T")[0]
|
||||
const weight = metrics.metrics.contentLength
|
||||
|
||||
if (weight > REJECT_THRESHOLD) {
|
||||
console.log(url, "rejected! Weighs", Math.round(weight / 1024), "kb");
|
||||
statistics.rejected.push({ url, weight: Math.round(weight / 1024) })
|
||||
console.debug(url, "rejected! Weighs", Math.round(weight / 1024), "kb")
|
||||
if (oldRecord) {
|
||||
console.log("Removing record at", OUTPUT_PATH)
|
||||
console.debug("Removing record at", OUTPUT_PATH)
|
||||
removeRecord(url, OUTPUT_PATH).catch(() => {
|
||||
console.error("Failed to remove old record of rejected url", url);
|
||||
});
|
||||
statistics.errors.push('Failed to remove', OUTPUT_PATH)
|
||||
console.debug("Failed to remove old record of rejected url", url)
|
||||
})
|
||||
}
|
||||
return false;
|
||||
return false
|
||||
}
|
||||
const { htmlSize, imageSize, videoSize } = metrics.metrics
|
||||
const contentSize = htmlSize + imageSize + videoSize
|
||||
|
@ -63,76 +76,121 @@ async function updateRecord(runId: string, url: string): Promise<boolean> {
|
|||
ratio: Math.round(contentSize / weight * 100),
|
||||
size: Math.round(weight / 1024),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const success = await writeRecord(record, url, OUTPUT_PATH);
|
||||
const success = await writeRecord(record, url, OUTPUT_PATH)
|
||||
|
||||
if (success) {
|
||||
console.log(url, "successfully updated");
|
||||
statistics.updated.push({ url, weight })
|
||||
console.debug(url, "successfully updated")
|
||||
} else {
|
||||
console.error(url, "record could not be written!");
|
||||
statistics.errors.push(`Failed to write record for ${url}`)
|
||||
console.debug(url, "record could not be written!")
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
async function checkPage(url: string) {
|
||||
const record = await getPageRecord(url, OUTPUT_PATH);
|
||||
const lastUpdated = Date.parse(record?.updated || "");
|
||||
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD;
|
||||
const record = await getPageRecord(url, OUTPUT_PATH)
|
||||
const lastUpdated = Date.parse(record?.updated || "")
|
||||
const needsCheck = !record || now - lastUpdated > RECHECK_THRESHOLD
|
||||
|
||||
if (!needsCheck) {
|
||||
console.log(url, "is up-to-date");
|
||||
return true;
|
||||
statistics.checked++
|
||||
console.debug(url, "is up-to-date")
|
||||
return true
|
||||
}
|
||||
|
||||
const runId = await requestMetricsRun(url);
|
||||
const runId = await requestMetricsRun(url)
|
||||
if (!runId) {
|
||||
console.error(url, "updating failed!");
|
||||
return false;
|
||||
statistics.errors.push(`Failed to run metric for ${url}`)
|
||||
console.debug(url, "updating failed!")
|
||||
return false
|
||||
}
|
||||
|
||||
console.log(url, "new or outdated, runId is", runId);
|
||||
return runId;
|
||||
console.debug(url, "new or outdated, runId is", runId)
|
||||
return runId
|
||||
}
|
||||
|
||||
function sleep(duration: number) {
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => resolve(), duration);
|
||||
});
|
||||
return new Promise<void>((resolve) => {
|
||||
setTimeout(() => resolve(), duration)
|
||||
})
|
||||
}
|
||||
|
||||
const white = (output: string | number) => colors.white(` ${output} `)
|
||||
const whiteHd = (output: string | number) => colors.bgWhite.bold.black(` ${output} `)
|
||||
const red = (output: string | number) => colors.red(` ${output} `)
|
||||
const redHd = (output: string | number) => colors.bgRed.bold.black(` ${output} `)
|
||||
const yellow = (output: string | number) => colors.yellow(` ${output} `)
|
||||
const yellowHd = (output: string | number) => colors.bgYellow.bold.black(` ${output} `)
|
||||
const blue = (output: string | number) => colors.blue(` ${output} `)
|
||||
const blueHd = (output: string | number) => colors.bgBlue.bold.black(` ${output} `)
|
||||
|
||||
function updateStatusScreen() {
|
||||
const { total, checked, updated, rejected, errors } = statistics
|
||||
|
||||
const tableOutput = new Table(
|
||||
[whiteHd('total'), whiteHd('checked'), blueHd('added/updated'), yellowHd('rejected'), redHd('errors')],
|
||||
[white(total), white(checked), blue(updated.length), yellow(rejected.length), red(errors.length)],
|
||||
)
|
||||
|
||||
tty.cursorLeft.cursorUp.eraseLine()
|
||||
tty.cursorLeft.cursorUp.eraseLine()
|
||||
console.log(tableOutput.toString())
|
||||
}
|
||||
|
||||
function showStatistics() {
|
||||
console.log(new Table(
|
||||
...statistics.rejected.map((page) => [yellowHd('Rejected'), page.url, `${red(page.weight)}kb`]),
|
||||
).toString())
|
||||
|
||||
console.log(new Table(
|
||||
...statistics.errors.map((err) => [redHd('Error'), err]),
|
||||
).toString())
|
||||
}
|
||||
|
||||
async function handleBatch() {
|
||||
if (!pages.length) return; // done, yeah!
|
||||
updateStatusScreen()
|
||||
if (!pages.length) return showStatistics() // done, yeah!
|
||||
|
||||
const batch = pages.splice(0, PARALLEL_JOBS);
|
||||
const jobs = batch.map((url) => checkPage(url));
|
||||
const batch = pages.splice(0, PARALLEL_JOBS)
|
||||
const jobs = batch.map((url) => checkPage(url))
|
||||
|
||||
while (jobs.length) {
|
||||
// take the first job and check
|
||||
// if the check fails, it will be added back to the end of the list
|
||||
const runId = await jobs.shift();
|
||||
const job = jobs.shift()
|
||||
const runId = await job
|
||||
|
||||
// page is up-to-date or YLT has an error
|
||||
if (runId === true || runId === false) continue;
|
||||
if (!job || runId === undefined || runId === true || runId === false) continue
|
||||
|
||||
// TODO: handle failures more gracefully
|
||||
const { url, status } = await checkStatus(runId);
|
||||
const { url, status } = await checkStatus(runId)
|
||||
|
||||
if (status === "failed") {
|
||||
console.error(url, "YLT analysis failed");
|
||||
continue;
|
||||
statistics.errors.push(`YLT analysis failed for ${url} (run id: ${runId})`)
|
||||
console.debug(url, "YLT analysis failed")
|
||||
continue
|
||||
} else if (status === "complete") {
|
||||
console.log(url, "updating record...");
|
||||
await updateRecord(runId, url);
|
||||
continue;
|
||||
console.debug(url, "updating record...")
|
||||
await updateRecord(runId, url)
|
||||
continue
|
||||
} else {
|
||||
// not done yet, add it back
|
||||
jobs.push(runId);
|
||||
jobs.push(job)
|
||||
// wait a bit before checking again
|
||||
await sleep(1000);
|
||||
await sleep(1000)
|
||||
}
|
||||
}
|
||||
|
||||
handleBatch();
|
||||
handleBatch()
|
||||
}
|
||||
|
||||
handleBatch();
|
||||
const debug = Deno.env.get('DEBUG') !== undefined
|
||||
if (!debug) console.debug = () => {} // supress debug messages
|
||||
|
||||
console.log('Starting...')
|
||||
handleBatch()
|
||||
|
|
Loading…
Add table
Reference in a new issue