Initial commit
This commit is contained in:
commit
bfb837d4f7
11 changed files with 814 additions and 0 deletions
161
.gitignore
vendored
Normal file
161
.gitignore
vendored
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
*.html
|
||||||
|
!src/*.html
|
||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/git,node
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=git,node
|
||||||
|
|
||||||
|
### Git ###
|
||||||
|
# Created by git for backups. To disable backups in Git:
|
||||||
|
# $ git config --global mergetool.keepBackup false
|
||||||
|
*.orig
|
||||||
|
|
||||||
|
# Created by git when using merge tools for conflicts
|
||||||
|
*.BACKUP.*
|
||||||
|
*.BASE.*
|
||||||
|
*.LOCAL.*
|
||||||
|
*.REMOTE.*
|
||||||
|
*_BACKUP_*.txt
|
||||||
|
*_BASE_*.txt
|
||||||
|
*_LOCAL_*.txt
|
||||||
|
*_REMOTE_*.txt
|
||||||
|
|
||||||
|
### Node ###
|
||||||
|
# Logs
|
||||||
|
logs
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||||
|
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||||
|
|
||||||
|
# Runtime data
|
||||||
|
pids
|
||||||
|
*.pid
|
||||||
|
*.seed
|
||||||
|
*.pid.lock
|
||||||
|
|
||||||
|
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||||
|
lib-cov
|
||||||
|
|
||||||
|
# Coverage directory used by tools like istanbul
|
||||||
|
coverage
|
||||||
|
*.lcov
|
||||||
|
|
||||||
|
# nyc test coverage
|
||||||
|
.nyc_output
|
||||||
|
|
||||||
|
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||||
|
.grunt
|
||||||
|
|
||||||
|
# Bower dependency directory (https://bower.io/)
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
# node-waf configuration
|
||||||
|
.lock-wscript
|
||||||
|
|
||||||
|
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||||
|
build/Release
|
||||||
|
|
||||||
|
# Dependency directories
|
||||||
|
node_modules/
|
||||||
|
jspm_packages/
|
||||||
|
|
||||||
|
# Snowpack dependency directory (https://snowpack.dev/)
|
||||||
|
web_modules/
|
||||||
|
|
||||||
|
# TypeScript cache
|
||||||
|
*.tsbuildinfo
|
||||||
|
|
||||||
|
# Optional npm cache directory
|
||||||
|
.npm
|
||||||
|
|
||||||
|
# Optional eslint cache
|
||||||
|
.eslintcache
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
.stylelintcache
|
||||||
|
|
||||||
|
# Microbundle cache
|
||||||
|
.rpt2_cache/
|
||||||
|
.rts2_cache_cjs/
|
||||||
|
.rts2_cache_es/
|
||||||
|
.rts2_cache_umd/
|
||||||
|
|
||||||
|
# Optional REPL history
|
||||||
|
.node_repl_history
|
||||||
|
|
||||||
|
# Output of 'npm pack'
|
||||||
|
*.tgz
|
||||||
|
|
||||||
|
# Yarn Integrity file
|
||||||
|
.yarn-integrity
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# parcel-bundler cache (https://parceljs.org/)
|
||||||
|
.cache
|
||||||
|
.parcel-cache
|
||||||
|
|
||||||
|
# Next.js build output
|
||||||
|
.next
|
||||||
|
out
|
||||||
|
|
||||||
|
# Nuxt.js build / generate output
|
||||||
|
.nuxt
|
||||||
|
dist
|
||||||
|
|
||||||
|
# Gatsby files
|
||||||
|
.cache/
|
||||||
|
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||||
|
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||||
|
# public
|
||||||
|
|
||||||
|
# vuepress build output
|
||||||
|
.vuepress/dist
|
||||||
|
|
||||||
|
# vuepress v2.x temp and cache directory
|
||||||
|
.temp
|
||||||
|
|
||||||
|
# Docusaurus cache and generated files
|
||||||
|
.docusaurus
|
||||||
|
|
||||||
|
# Serverless directories
|
||||||
|
.serverless/
|
||||||
|
|
||||||
|
# FuseBox cache
|
||||||
|
.fusebox/
|
||||||
|
|
||||||
|
# DynamoDB Local files
|
||||||
|
.dynamodb/
|
||||||
|
|
||||||
|
# TernJS port file
|
||||||
|
.tern-port
|
||||||
|
|
||||||
|
# Stores VSCode versions used for testing VSCode extensions
|
||||||
|
.vscode-test
|
||||||
|
|
||||||
|
# yarn v2
|
||||||
|
.yarn/cache
|
||||||
|
.yarn/unplugged
|
||||||
|
.yarn/build-state.yml
|
||||||
|
.yarn/install-state.gz
|
||||||
|
.pnp.*
|
||||||
|
|
||||||
|
### Node Patch ###
|
||||||
|
# Serverless Webpack directories
|
||||||
|
.webpack/
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
|
||||||
|
# SvelteKit build / generate output
|
||||||
|
.svelte-kit
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/git,node
|
7
README.md
Normal file
7
README.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# receipt-parser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## System Requirements
|
||||||
|
- `libtesseract-dev`
|
318
package-lock.json
generated
Normal file
318
package-lock.json
generated
Normal file
|
@ -0,0 +1,318 @@
|
||||||
|
{
|
||||||
|
"name": "receipt-parser",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"name": "receipt-parser",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"license": "GPL-3.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@anthropic-ai/sdk": "^0.24.3",
|
||||||
|
"nightink": "^1.0.2",
|
||||||
|
"p-throttle": "^6.1.0",
|
||||||
|
"pdf-to-text": "^0.0.7",
|
||||||
|
"tqdm": "^2.0.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@anthropic-ai/sdk": {
|
||||||
|
"version": "0.24.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.24.3.tgz",
|
||||||
|
"integrity": "sha512-916wJXO6T6k8R6BAAcLhLPv/pnLGy7YSEBZXZ1XTFbLcTZE8oTy3oDW9WJf9KKZwMvVcePIfoTSvzXHRcGxkQQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "^18.11.18",
|
||||||
|
"@types/node-fetch": "^2.6.4",
|
||||||
|
"abort-controller": "^3.0.0",
|
||||||
|
"agentkeepalive": "^4.2.1",
|
||||||
|
"form-data-encoder": "1.7.2",
|
||||||
|
"formdata-node": "^4.3.2",
|
||||||
|
"node-fetch": "^2.6.7",
|
||||||
|
"web-streams-polyfill": "^3.2.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@types/node": {
|
||||||
|
"version": "18.19.39",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.39.tgz",
|
||||||
|
"integrity": "sha512-nPwTRDKUctxw3di5b4TfT3I0sWDiWoPQCZjXhvdkINntwr8lcoVCKsTgnXeRubKIlfnV+eN/HYk6Jb40tbcEAQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~5.26.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz",
|
||||||
|
"integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/abort-controller": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"event-target-shim": "^5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/agentkeepalive": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"humanize-ms": "^1.2.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 8.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/asynckit": {
|
||||||
|
"version": "0.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||||
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/combined-stream": {
|
||||||
|
"version": "1.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||||
|
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"delayed-stream": "~1.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/delayed-stream": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=0.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/event-target-shim": {
|
||||||
|
"version": "5.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
|
||||||
|
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/form-data": {
|
||||||
|
"version": "4.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||||
|
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"asynckit": "^0.4.0",
|
||||||
|
"combined-stream": "^1.0.8",
|
||||||
|
"mime-types": "^2.1.12"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/form-data-encoder": {
|
||||||
|
"version": "1.7.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
|
||||||
|
"integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/formdata-node": {
|
||||||
|
"version": "4.4.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
|
||||||
|
"integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"node-domexception": "1.0.0",
|
||||||
|
"web-streams-polyfill": "4.0.0-beta.3"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 12.20"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/formdata-node/node_modules/web-streams-polyfill": {
|
||||||
|
"version": "4.0.0-beta.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
|
||||||
|
"integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 14"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/html-entities": {
|
||||||
|
"version": "2.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.5.2.tgz",
|
||||||
|
"integrity": "sha512-K//PSRMQk4FZ78Kyau+mZurHn3FH0Vwr+H36eE0rPbeYkRRi9YxceYPhuN60UwWorxyKHhqoAJl2OFKa4BVtaA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/mdevils"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://patreon.com/mdevils"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/humanize-ms": {
|
||||||
|
"version": "1.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
|
||||||
|
"integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"ms": "^2.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/mime-db": {
|
||||||
|
"version": "1.52.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||||
|
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/mime-types": {
|
||||||
|
"version": "2.1.35",
|
||||||
|
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||||
|
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"mime-db": "1.52.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/ms": {
|
||||||
|
"version": "2.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||||
|
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/nightink": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/nightink/-/nightink-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-4m/WBimAdO0TkUOF+UFBR9U09UTF+Y/G+PAMWarQmq7wUxclSPkA/hyi9wq4RUxJleV7PvN782YTQ4BgS7Vd/w==",
|
||||||
|
"license": "MPL-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"html-entities": "^2.3.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/node-domexception": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/jimmywarting"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://paypal.me/jimmywarting"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/node-fetch": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"whatwg-url": "^5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "4.x || >=6.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"encoding": "^0.1.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"encoding": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/p-throttle": {
|
||||||
|
"version": "6.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/p-throttle/-/p-throttle-6.1.0.tgz",
|
||||||
|
"integrity": "sha512-eQMdGTxk2+047La67wefUtt0tEHh7D+C8Jl7QXoFCuIiNYeQ9zWs2AZiJdIAs72rSXZ06t11me2bgalRNdy3SQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/pdf-to-text": {
|
||||||
|
"version": "0.0.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/pdf-to-text/-/pdf-to-text-0.0.7.tgz",
|
||||||
|
"integrity": "sha512-NHWB7u/9q+SZ28UtEgJYljamp61j06oldHdvGik1729pzRFLCO4igbZwm0MOUWoIQUz4nla3n+cf3Jh7uiOZwQ==",
|
||||||
|
"license": "ISC"
|
||||||
|
},
|
||||||
|
"node_modules/tqdm": {
|
||||||
|
"version": "2.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/tqdm/-/tqdm-2.0.3.tgz",
|
||||||
|
"integrity": "sha512-Ju50G550gspkjd1AiJ/jFBHe2dii9s+KPntEsq0o73BqywqzNWPUM8/FD3zM1rOH7OGLoH7pGSGI90Ct+Yd/5Q==",
|
||||||
|
"license": "ISC"
|
||||||
|
},
|
||||||
|
"node_modules/tr46": {
|
||||||
|
"version": "0.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
||||||
|
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/undici-types": {
|
||||||
|
"version": "5.26.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||||
|
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/web-streams-polyfill": {
|
||||||
|
"version": "3.3.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
|
||||||
|
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/webidl-conversions": {
|
||||||
|
"version": "3.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||||
|
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||||
|
"license": "BSD-2-Clause"
|
||||||
|
},
|
||||||
|
"node_modules/whatwg-url": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"tr46": "~0.0.3",
|
||||||
|
"webidl-conversions": "^3.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
18
package.json
Normal file
18
package.json
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"name": "receipt-parser",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Extracts and processes receipts.",
|
||||||
|
"main": "src/index.mjs",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"no tests implemented\""
|
||||||
|
},
|
||||||
|
"author": "Starbeamrainbowlabs",
|
||||||
|
"license": "GPL-3.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@anthropic-ai/sdk": "^0.24.3",
|
||||||
|
"nightink": "^1.0.2",
|
||||||
|
"p-throttle": "^6.1.0",
|
||||||
|
"pdf-to-text": "^0.0.7",
|
||||||
|
"tqdm": "^2.0.3"
|
||||||
|
}
|
||||||
|
}
|
39
src/index.css
Normal file
39
src/index.css
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
html, body { font-size: 100%; }
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
th, td {
|
||||||
|
padding: 0.75rem;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
background-color: #f2f2f2;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr:nth-child(even) {
|
||||||
|
background-color: #f8f8f8;
|
||||||
|
}
|
||||||
|
|
||||||
|
tr:hover {
|
||||||
|
background-color: #e8e8e8;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
.align-right {
|
||||||
|
text-align: right;
|
||||||
|
}
|
55
src/index.mjs
Executable file
55
src/index.mjs
Executable file
|
@ -0,0 +1,55 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
import path from 'path';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
import tqdm from 'tqdm';
|
||||||
|
import pThrottle from 'p-throttle';
|
||||||
|
|
||||||
|
import ai_extract from './lib/ai-extract.mjs';
|
||||||
|
import make_html from './lib/make-html.mjs';
|
||||||
|
|
||||||
|
// HACK: Make sure __dirname is defined when using es6 modules. I forget where I found this - a PR with a source URL would be great!
|
||||||
|
const __dirname = import.meta.url.slice(7, import.meta.url.lastIndexOf("/"));
|
||||||
|
|
||||||
|
// const filepaths = process.argv.slice(2).map(f => path.resolve(process.cwd(), f));
|
||||||
|
const filepaths = fs.readFileSync(process.argv.slice(2)[0], "utf-8")
|
||||||
|
.split(`\n`)
|
||||||
|
.map(el => el.trim())
|
||||||
|
.filter(el => el.length > 0);
|
||||||
|
|
||||||
|
console.log(`Hello from Node.js`);
|
||||||
|
|
||||||
|
console.log(`>>> FILEPATHS`);
|
||||||
|
console.log(filepaths.join(`\n`));
|
||||||
|
|
||||||
|
const txts = await Promise.all(filepaths.map(filepath => fs.promises.readFile(filepath, "utf-8")));
|
||||||
|
|
||||||
|
|
||||||
|
const throttler = pThrottle({
|
||||||
|
limit: 40,
|
||||||
|
interval: 60 * 1000 // every minute
|
||||||
|
});
|
||||||
|
const throttled = throttler(ai_extract);
|
||||||
|
|
||||||
|
const objs = [];
|
||||||
|
let i = 0;
|
||||||
|
for(const txt of tqdm(txts, { total: txts.length })) {
|
||||||
|
console.log(`>>> AI > ${i}`);
|
||||||
|
|
||||||
|
const result = await throttled(txt, true);
|
||||||
|
result.i = i;
|
||||||
|
|
||||||
|
console.log(result);
|
||||||
|
|
||||||
|
objs.push(result);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = make_html(objs);
|
||||||
|
const filepath_out = path.join(process.cwd(), `report.html`);
|
||||||
|
fs.writeFileSync(filepath_out, html);
|
||||||
|
|
||||||
|
console.log(`Written output to ${filepath_out}`);
|
64
src/index.sh
Executable file
64
src/index.sh
Executable file
|
@ -0,0 +1,64 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
__dirname="$(dirname "$0")";
|
||||||
|
|
||||||
|
temp_dir="$(mktemp --tmpdir -d "receipt-parse-XXXXXXX")";
|
||||||
|
if [[ -z "${temp_dir}" ]]; then
|
||||||
|
echo "Error: Failed to get temporary directory" >&2;
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
on_exit() {
|
||||||
|
rm -rf "${temp_dir}";
|
||||||
|
}
|
||||||
|
trap on_exit EXIT;
|
||||||
|
|
||||||
|
mkdir "${temp_dir}/tmp";
|
||||||
|
mkdir "${temp_dir}/txt";
|
||||||
|
|
||||||
|
echo ">>> Text extraction";
|
||||||
|
|
||||||
|
filepath_filelist="${temp_dir}/files.txt";
|
||||||
|
|
||||||
|
i=0;
|
||||||
|
for filename in "$@"; do
|
||||||
|
echo "Processing ${filename}";
|
||||||
|
filepath="$(realpath -s "${filename}")";
|
||||||
|
ext="$(echo "${filename#*.}" | tr '[:upper:]' '[:lower:]')";
|
||||||
|
|
||||||
|
###
|
||||||
|
## Convert to text
|
||||||
|
###
|
||||||
|
if [[ "${ext}" == "pdf" ]]; then
|
||||||
|
filepath_out="${temp_dir}/tmp/out.txt";
|
||||||
|
pdftotext "${filepath}" "${filepath_out}";
|
||||||
|
elif [[ "${ext}" == "txt" ]]; then
|
||||||
|
echo "skipping ${filename} because it's a text file";
|
||||||
|
continue
|
||||||
|
else
|
||||||
|
tesseract "${filepath}" "${temp_dir}/tmp/out";
|
||||||
|
filepath_out="$(find "${temp_dir}/tmp" -type f | head -n1)";
|
||||||
|
fi
|
||||||
|
|
||||||
|
###
|
||||||
|
## Move out of tmp dir
|
||||||
|
###
|
||||||
|
filepath_final="${temp_dir}/txt/${i}.txt";
|
||||||
|
mv "${filepath_out}" "${filepath_final}";
|
||||||
|
i="$((i+1))";
|
||||||
|
|
||||||
|
###
|
||||||
|
## Clean up tmp dir ready for the next file
|
||||||
|
###
|
||||||
|
set +e;
|
||||||
|
rm -rf "${temp_dir}/tmp/*";
|
||||||
|
set -e;
|
||||||
|
|
||||||
|
###
|
||||||
|
## Log the final filepath
|
||||||
|
###
|
||||||
|
echo "${filepath_final}" >>"${filepath_filelist}";
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ">>> Running Node.js";
|
||||||
|
"${__dirname}/index.mjs" "${filepath_filelist}";
|
56
src/lib/ai-extract.mjs
Normal file
56
src/lib/ai-extract.mjs
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import Anthropic from "@anthropic-ai/sdk";
|
||||||
|
|
||||||
|
import parse_result from "./parse-result.mjs";
|
||||||
|
|
||||||
|
const prompt = `The input is a receipt or invoice. The user is a theatre production company. Output only a JSON object and nothing else with the following properties:
|
||||||
|
|
||||||
|
date (string): the date the item was purchased, formatted as an iso date. If not present, set this value to null.
|
||||||
|
|
||||||
|
item_name (string): The name of the item purchased. Where there are multiple items, summarise them with a single name. Shorten excessively long product names.
|
||||||
|
|
||||||
|
paid (number): The total amount that was paid
|
||||||
|
|
||||||
|
vat (number): The total amount of VAT paid. If this value is NOT present, set the JSON property vat_percent to the VAT percentage. If this is not present either, set the value to null.
|
||||||
|
|
||||||
|
category (string): Estimate a single category for the item purchased from the following options: Props, Consumables, Technical, Travel, Logistics, Costume, Wellbeing, Health/Medical, Training`;
|
||||||
|
|
||||||
|
const anthropic = new Anthropic({
|
||||||
|
apiKey: process.env['ANTHROPIC_API_KEY'], // the default, apparently
|
||||||
|
});
|
||||||
|
|
||||||
|
export default async function ai_extract(text, pretend=false) {
|
||||||
|
if(pretend) {
|
||||||
|
console.log(`WARNING: IN PRETEND MODE.`);
|
||||||
|
return parse_result({
|
||||||
|
date: "2024-06-25",
|
||||||
|
item_name: "3M Picture/Strip, Gale Decoration Kit, T&G Matt Paint",
|
||||||
|
paid: 26.95,
|
||||||
|
vat_percent: 20,
|
||||||
|
category: "Props"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const msg = await anthropic.messages.create({
|
||||||
|
model: "claude-3-haiku-20240307",
|
||||||
|
max_tokens: 1000,
|
||||||
|
temperature: 0,
|
||||||
|
system: prompt,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
text
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = msg.content[0].text;
|
||||||
|
|
||||||
|
const obj = JSON.parse(response);
|
||||||
|
console.log(`AI OUTPUT:`, response, `OBJ`, obj);
|
||||||
|
|
||||||
|
return parse_result(obj);
|
||||||
|
}
|
26
src/lib/make-html.mjs
Normal file
26
src/lib/make-html.mjs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
import path from 'path';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
import { NightInk } from 'nightink';
|
||||||
|
|
||||||
|
// HACK: Make sure __dirname is defined when using es6 modules. I forget where I found this - a PR with a source URL would be great!
|
||||||
|
const __dirname = import.meta.url.slice(7, import.meta.url.lastIndexOf("/"));
|
||||||
|
|
||||||
|
const template = fs.readFileSync(path.join(__dirname, `../template.html`), `utf-8`);
|
||||||
|
const css = fs.readFileSync(path.join(__dirname, `../index.css`), `utf-8`);
|
||||||
|
|
||||||
|
export default function(objs) {
|
||||||
|
const total = objs.reduce((acc, obj) => acc + obj.paid, 0);
|
||||||
|
|
||||||
|
const values = {
|
||||||
|
objs,
|
||||||
|
total,
|
||||||
|
css
|
||||||
|
};
|
||||||
|
|
||||||
|
console.debug(`VALUES`, values);
|
||||||
|
|
||||||
|
return NightInk(template, values);
|
||||||
|
}
|
22
src/lib/parse-result.mjs
Normal file
22
src/lib/parse-result.mjs
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
export default function parse_result(obj) {
|
||||||
|
if(typeof obj.vat === "undefined") {
|
||||||
|
if(typeof obj.vat_percent === "undefined") {
|
||||||
|
obj.vat = null;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(obj.vat_percent > 1)
|
||||||
|
obj.vat_percent /= 100;
|
||||||
|
obj.vat = Math.round(obj.paid * obj.vat_percent * 100) / 100;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(obj.vat !== null)
|
||||||
|
obj.net = obj.paid - obj.vat;
|
||||||
|
|
||||||
|
if(typeof obj.net === "undefined")
|
||||||
|
obj.net = null;
|
||||||
|
|
||||||
|
return obj; // daisy chain! :D
|
||||||
|
}
|
48
src/template.html
Normal file
48
src/template.html
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Expense Report</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1><span contenteditable>Project name</span> Expense Report</h1>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>#</th>
|
||||||
|
<th>Date</th>
|
||||||
|
<th>Category</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Net</th>
|
||||||
|
<th>VAT</th>
|
||||||
|
<th>Total</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
{#each objs}
|
||||||
|
<tr>
|
||||||
|
<td contenteditable>{{i}}</td>
|
||||||
|
<td contenteditable>{{date}}</td>
|
||||||
|
<td contenteditable>{{category}}</td>
|
||||||
|
<td contenteditable>{{item_name}}</td>
|
||||||
|
<td contenteditable>{{net}}</td>
|
||||||
|
<td contenteditable>{{vat}}</td>
|
||||||
|
<td contenteditable>{{paid}}</td>
|
||||||
|
</tr>
|
||||||
|
{#endeach}
|
||||||
|
</tbody>
|
||||||
|
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div class="align-right">
|
||||||
|
<strong>All total:</strong> <output contenteditable>{{total}}</output>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!---------------->
|
||||||
|
<style>
|
||||||
|
{css}
|
||||||
|
</style>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in a new issue