2024-07-09 21:31:25 +00:00
|
|
|
#!/usr/bin/env node
|
|
|
|
"use strict";
|
|
|
|
|
|
|
|
import path from 'path';
|
|
|
|
import fs from 'fs';
|
|
|
|
|
|
|
|
import tqdm from 'tqdm';
|
|
|
|
import pThrottle from 'p-throttle';
|
|
|
|
|
|
|
|
import ai_extract from './lib/ai-extract.mjs';
|
|
|
|
import make_html from './lib/make-html.mjs';
|
|
|
|
|
|
|
|
// HACK: Make sure __dirname is defined when using es6 modules. I forget where I found this - a PR with a source URL would be great!
|
|
|
|
const __dirname = import.meta.url.slice(7, import.meta.url.lastIndexOf("/"));
|
|
|
|
|
|
|
|
// const filepaths = process.argv.slice(2).map(f => path.resolve(process.cwd(), f));
|
|
|
|
const filepaths = fs.readFileSync(process.argv.slice(2)[0], "utf-8")
|
|
|
|
.split(`\n`)
|
|
|
|
.map(el => el.trim())
|
|
|
|
.filter(el => el.length > 0);
|
|
|
|
|
|
|
|
console.log(`Hello from Node.js`);
|
|
|
|
|
|
|
|
console.log(`>>> FILEPATHS`);
|
|
|
|
console.log(filepaths.join(`\n`));
|
|
|
|
|
|
|
|
const txts = await Promise.all(filepaths.map(filepath => fs.promises.readFile(filepath, "utf-8")));
|
|
|
|
|
|
|
|
|
|
|
|
const throttler = pThrottle({
|
|
|
|
limit: 40,
|
|
|
|
interval: 60 * 1000 // every minute
|
|
|
|
});
|
|
|
|
const throttled = throttler(ai_extract);
|
|
|
|
|
|
|
|
const objs = [];
|
|
|
|
let i = 0;
|
2024-07-09 22:12:06 +00:00
|
|
|
for(let txt of tqdm(txts, { total: txts.length })) {
|
2024-07-09 21:31:25 +00:00
|
|
|
console.log(`>>> AI > ${i}`);
|
|
|
|
|
2024-07-09 22:12:06 +00:00
|
|
|
txt = txt.trim();
|
|
|
|
if(txt.length === 0) {
|
|
|
|
objs.push({
|
|
|
|
date: "unknown",
|
|
|
|
item_name: "unknown",
|
|
|
|
paid: null,
|
|
|
|
vat: null,
|
|
|
|
category: "Unknown"
|
|
|
|
});
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const result = await throttled(txt, false); // bool is whether we're pretend or not - i.e. not making anthropic/claude api calls
|
2024-07-09 21:31:25 +00:00
|
|
|
result.i = i;
|
|
|
|
|
|
|
|
console.log(result);
|
|
|
|
|
|
|
|
objs.push(result);
|
|
|
|
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
const html = make_html(objs);
|
|
|
|
const filepath_out = path.join(process.cwd(), `report.html`);
|
|
|
|
fs.writeFileSync(filepath_out, html);
|
|
|
|
|
|
|
|
console.log(`Written output to ${filepath_out}`);
|