diff --git a/package-lock.json b/package-lock.json index 55147d1..7cc2c22 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "GPL-3.0", "dependencies": { "@anthropic-ai/sdk": "^0.24.3", + "date-fns": "^3.6.0", "nightink": "^1.0.2", "p-throttle": "^6.1.0", "pdf-to-text": "^0.0.7", @@ -93,6 +94,16 @@ "node": ">= 0.8" } }, + "node_modules/date-fns": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-3.6.0.tgz", + "integrity": "sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" + } + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", diff --git a/package.json b/package.json index d2fe103..25ef380 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "license": "GPL-3.0", "dependencies": { "@anthropic-ai/sdk": "^0.24.3", + "date-fns": "^3.6.0", "nightink": "^1.0.2", "p-throttle": "^6.1.0", "pdf-to-text": "^0.0.7", diff --git a/src/index.mjs b/src/index.mjs index 5a7f5b0..4ccd2ad 100755 --- a/src/index.mjs +++ b/src/index.mjs @@ -42,7 +42,7 @@ for(let txt of tqdm(txts, { total: txts.length })) { if(txt.length === 0) { objs.push({ date: "unknown", - item_name: "unknown", + item_name: `unknown (zero length input for filepath ${filepaths[i]}')`, paid: null, vat: null, category: "Unknown" diff --git a/src/lib/ai-extract.mjs b/src/lib/ai-extract.mjs index 4b87186..db06cba 100644 --- a/src/lib/ai-extract.mjs +++ b/src/lib/ai-extract.mjs @@ -5,7 +5,7 @@ import trim_specific from "./trim-specific.mjs"; const prompt = `The input is a receipt, invoice or parking/bus ticket. The user is a theatre production company. Output only a single JSON object and NOT an array and nothing else with the following properties: -date (string): the date the item was purchased, formatted as an iso date. If not present, set this value to null. +date (string): the date the item was purchased, formatted as an iso date. If not present, set this value to null. If there are both an invoice date and an order date available, pick the order date. item_name (string): The name of the item purchased. Where there are multiple items, summarise them with a single name. Shorten excessively long product names. diff --git a/src/lib/parse-result.mjs b/src/lib/parse-result.mjs index 84d7b56..c03aea9 100644 --- a/src/lib/parse-result.mjs +++ b/src/lib/parse-result.mjs @@ -1,5 +1,34 @@ "use strict"; +import { format as date_format } from 'date-fns'; + +// Ref https://stackoverflow.com/a/77069029/1460422 +const round_to_dp = (n, p = 2) => (e => Math.round(n * e) / e)(Math.pow(10, p)); + +function round_silly(value, dp) { + let n_value = value; + if(typeof(n_value) !== "number") + n_value = parseFloat(n_value); + if(isNaN(n_value)) { + console.warn(`Warning: value '${value}' could not be converted to a number successfully, returning original string value`); + return value; + } + n_value = round_to_dp(n_value, 2); + return n_value; +} + +function convert_date_silly(date_str) { + const date_obj = new Date(date_str); + + if(date_obj.toString() === (new Date("invalid")).toString()) { + console.warn(`Error parsing date value '${date_str}', returning original value`); + return date_str; + } + + // Ref https://stackoverflow.com/a/78325134/1460422 + return date_format(date_obj, "dd/MM/yyyy"); +} + export default function parse_result(obj) { if(typeof obj.vat === "undefined") { if(typeof obj.vat_percent === "undefined") { @@ -18,5 +47,11 @@ export default function parse_result(obj) { if(typeof obj.net === "undefined") obj.net = null; + obj.vat = round_silly(obj.vat, 2); + obj.net = round_silly(obj.net, 2); + obj.paid = round_silly(obj.paid, 2); + + obj.date = convert_date_silly(obj.date); + return obj; // daisy chain! :D } \ No newline at end of file