#!/usr/bin/env node "use strict"; import path from 'path'; import fs from 'fs'; import tqdm from 'tqdm'; import pThrottle from 'p-throttle'; import ai_extract from './lib/ai-extract.mjs'; import make_html from './lib/make-html.mjs'; // HACK: Make sure __dirname is defined when using es6 modules. I forget where I found this - a PR with a source URL would be great! const __dirname = import.meta.url.slice(7, import.meta.url.lastIndexOf("/")); // const filepaths = process.argv.slice(2).map(f => path.resolve(process.cwd(), f)); const filepaths = fs.readFileSync(process.argv.slice(2)[0], "utf-8") .split(`\n`) .map(el => el.trim()) .filter(el => el.length > 0); console.log(`Hello from Node.js`); console.log(`>>> FILEPATHS`); console.log(filepaths.join(`\n`)); const txts = await Promise.all(filepaths.map(filepath => fs.promises.readFile(filepath, "utf-8"))); const throttler = pThrottle({ limit: 40, interval: 60 * 1000 // every minute }); const throttled = throttler(ai_extract); const objs = []; let i = 0; for(let txt of tqdm(txts, { total: txts.length })) { console.log(`>>> AI > ${i}`); txt = txt.trim(); if(txt.length === 0) { objs.push({ date: "unknown", item_name: "unknown", paid: null, vat: null, category: "Unknown" }); continue; } const result = await throttled(txt, false); // bool is whether we're pretend or not - i.e. not making anthropic/claude api calls result.i = i; console.log(result); objs.push(result); i++; } const html = make_html(objs); const filepath_out = path.join(process.cwd(), `report.html`); fs.writeFileSync(filepath_out, html); console.log(`Written output to ${filepath_out}`);