receipt-parser/src/index.mjs

67 lines
1.6 KiB
JavaScript
Executable file

#!/usr/bin/env node
"use strict";
import path from 'path';
import fs from 'fs';
import tqdm from 'tqdm';
import pThrottle from 'p-throttle';
import ai_extract from './lib/ai-extract.mjs';
import make_html from './lib/make-html.mjs';
// HACK: Make sure __dirname is defined when using es6 modules. I forget where I found this - a PR with a source URL would be great!
const __dirname = import.meta.url.slice(7, import.meta.url.lastIndexOf("/"));
// const filepaths = process.argv.slice(2).map(f => path.resolve(process.cwd(), f));
const filepaths = fs.readFileSync(process.argv.slice(2)[0], "utf-8")
.split(`\n`)
.map(el => el.trim())
.filter(el => el.length > 0);
console.log(`Hello from Node.js`);
console.log(`>>> FILEPATHS`);
console.log(filepaths.join(`\n`));
const txts = await Promise.all(filepaths.map(filepath => fs.promises.readFile(filepath, "utf-8")));
const throttler = pThrottle({
limit: 40,
interval: 60 * 1000 // every minute
});
const throttled = throttler(ai_extract);
const objs = [];
let i = 0;
for(let txt of tqdm(txts, { total: txts.length })) {
console.log(`>>> AI > ${i}`);
txt = txt.trim();
if(txt.length === 0) {
objs.push({
date: "unknown",
item_name: "unknown",
paid: null,
vat: null,
category: "Unknown"
});
continue;
}
const result = await throttled(txt, false); // bool is whether we're pretend or not - i.e. not making anthropic/claude api calls
result.i = i;
console.log(result);
objs.push(result);
i++;
}
const html = make_html(objs);
const filepath_out = path.join(process.cwd(), `report.html`);
fs.writeFileSync(filepath_out, html);
console.log(`Written output to ${filepath_out}`);