From 1297f41105442aa5530f4096191f035d771a68a1 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Fri, 1 Jul 2022 18:28:39 +0100 Subject: [PATCH] .tfrecord files are too much hassle let's go with a standard of .jsonl.gz instead --- .gitignore | 29 ++ .vscode/settings.json | 2 + rainfallwrangler/package-lock.json | 279 +----------------- rainfallwrangler/package.json | 4 +- ...{TFRecordWriter.mjs => RecordWrangler.mjs} | 32 +- rainfallwrangler/src/lib/io/StreamHelpers.mjs | 52 ++++ .../src/lib/record/RecordBuilder.mjs | 19 ++ .../src/lib/record/RecordsWriter.mjs | 28 ++ .../{tfrecordify => recordify}/meta.mjs | 2 +- .../recordify.mjs} | 4 +- 10 files changed, 161 insertions(+), 290 deletions(-) create mode 100644 .vscode/settings.json rename rainfallwrangler/src/lib/io/{TFRecordWriter.mjs => RecordWrangler.mjs} (56%) create mode 100644 rainfallwrangler/src/lib/io/StreamHelpers.mjs create mode 100644 rainfallwrangler/src/lib/record/RecordBuilder.mjs create mode 100644 rainfallwrangler/src/lib/record/RecordsWriter.mjs rename rainfallwrangler/src/subcommands/{tfrecordify => recordify}/meta.mjs (91%) rename rainfallwrangler/src/subcommands/{tfrecordify/tfrecordify.mjs => recordify/recordify.mjs} (88%) diff --git a/.gitignore b/.gitignore index f07fd77..e0bbf7e 100644 --- a/.gitignore +++ b/.gitignore @@ -339,3 +339,32 @@ cython_debug/ *.code-workspace # End of https://www.toptal.com/developers/gitignore/api/python,node,git,visualstudiocode +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# Support for Project snippet scope +.vscode/*.code-snippets + +# Ignore code-workspaces +*.code-workspace + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/rainfallwrangler/package-lock.json b/rainfallwrangler/package-lock.json index ba933b1..c267806 100644 --- a/rainfallwrangler/package-lock.json +++ b/rainfallwrangler/package-lock.json @@ -13,8 +13,8 @@ "applause-cli": "^1.8.1", "gunzip-maybe": "^1.4.2", "pretty-ms": "^8.0.0", - "terrain50": "^1.10.1", - "tfrecord-stream": "^0.2.0" + "spawn-stream": "^1.0.2", + "terrain50": "^1.10.1" } }, "node_modules/@mapbox/node-pre-gyp": { @@ -163,60 +163,6 @@ "node": ">= 10" } }, - "node_modules/@protobufjs/aspromise": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", - "integrity": "sha1-m4sMxmPWaafY9vXQiToU00jzD78=" - }, - "node_modules/@protobufjs/base64": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", - "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==" - }, - "node_modules/@protobufjs/codegen": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", - "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==" - }, - "node_modules/@protobufjs/eventemitter": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", - "integrity": "sha1-NVy8mLr61ZePntCV85diHx0Ga3A=" - }, - "node_modules/@protobufjs/fetch": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", - "integrity": "sha1-upn7WYYUr2VwDBYZ/wbUVLDYTEU=", - "dependencies": { - "@protobufjs/aspromise": "^1.1.1", - "@protobufjs/inquire": "^1.1.0" - } - }, - "node_modules/@protobufjs/float": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", - "integrity": "sha1-Xp4avctz/Ap8uLKR33jIy9l7h9E=" - }, - "node_modules/@protobufjs/inquire": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", - "integrity": "sha1-/yAOPnzyQp4tyvwRQIKOjMY48Ik=" - }, - "node_modules/@protobufjs/path": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", - "integrity": "sha1-bMKyDFya1q0NzP0hynZz2Nf79o0=" - }, - "node_modules/@protobufjs/pool": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", - "integrity": "sha1-Cf0V8tbTq/qbZbw2ZQbWrXhG/1Q=" - }, - "node_modules/@protobufjs/utf8": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", - "integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA=" - }, "node_modules/@tensorflow/tfjs": { "version": "3.18.0", "resolved": "https://registry.npmjs.org/@tensorflow/tfjs/-/tfjs-3.18.0.tgz", @@ -475,25 +421,11 @@ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "node_modules/awaitify-stream": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/awaitify-stream/-/awaitify-stream-1.0.2.tgz", - "integrity": "sha512-JE6mrRIPxhBQWt9Mu4u2XJF9V9xp4p5+Uxif/Ol/s2TFC/9+offUE50j/KQdaVWwMyGlKrlsE7Ncq/yhc6AJ+w==" - }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, - "node_modules/bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "optional": true, - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -723,20 +655,6 @@ "node": ">=6" } }, - "node_modules/fast-crc32c": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/fast-crc32c/-/fast-crc32c-2.0.0.tgz", - "integrity": "sha512-LIREwygxtxzHF11oLJ4xIVKu/ZWNgrj/QaGvaSD8ZggIsgCyCtSYevlrpWVqNau57ZwezV8K1HFBSjQ7FcRbTQ==", - "optionalDependencies": { - "sse4_crc32": "^6.0.1" - } - }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "optional": true - }, "node_modules/form-data": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.1.tgz", @@ -1067,12 +985,6 @@ "proj4": "^2.3.10" } }, - "node_modules/node-addon-api": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-1.7.2.tgz", - "integrity": "sha512-ibPK3iA+vaY1eEjESkQkM0BbCqFOaZMiXRTtdB0u7b4djtY6JnsjvPdUHVMg6xQt3B8fpTTWHI9A+ADjM9frzg==", - "optional": true - }, "node_modules/node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -1211,31 +1123,6 @@ "wkt-parser": "^1.3.1" } }, - "node_modules/protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "hasInstallScript": true, - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - }, - "bin": { - "pbjs": "bin/pbjs", - "pbts": "bin/pbts" - } - }, "node_modules/pump": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", @@ -1333,25 +1220,16 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, + "node_modules/spawn-stream": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/spawn-stream/-/spawn-stream-1.0.2.tgz", + "integrity": "sha512-gRlPPUISTDoeWqQSJNnqAp9vjHc7b7m3aWRgp1dzL3c183f5Xx2lnVtvPRCQy+pyqfDSx5DZa95Yh7XtPuJE0Q==" + }, "node_modules/sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" }, - "node_modules/sse4_crc32": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/sse4_crc32/-/sse4_crc32-6.0.1.tgz", - "integrity": "sha512-FUTYXpLroqytNKWIfHzlDWoy9E4tmBB/RklNMy6w3VJs+/XEYAHgbiylg4SS43iOk/9bM0BlJ2EDpFAGT66IoQ==", - "hasInstallScript": true, - "optional": true, - "dependencies": { - "bindings": "^1.3.0", - "node-addon-api": "^1.3.0" - }, - "engines": { - "node": ">=4" - } - }, "node_modules/stream-shift": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.1.tgz", @@ -1450,16 +1328,6 @@ "nnng": "^1.0.0" } }, - "node_modules/tfrecord-stream": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/tfrecord-stream/-/tfrecord-stream-0.2.0.tgz", - "integrity": "sha512-B7TtEQUqR6u/0drbPZBUUaBFVp4QbEPBgSiUolSarKuyIYqM6AAgB/Tre5gECoSacEl39hgHBnXm+xjj9zzIcA==", - "dependencies": { - "awaitify-stream": "^1.0.2", - "fast-crc32c": "^2.0.0", - "protobufjs": "^6.8.4" - } - }, "node_modules/through2": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", @@ -1767,60 +1635,6 @@ } } }, - "@protobufjs/aspromise": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", - "integrity": "sha1-m4sMxmPWaafY9vXQiToU00jzD78=" - }, - "@protobufjs/base64": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", - "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==" - }, - "@protobufjs/codegen": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", - "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==" - }, - "@protobufjs/eventemitter": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", - "integrity": "sha1-NVy8mLr61ZePntCV85diHx0Ga3A=" - }, - "@protobufjs/fetch": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", - "integrity": "sha1-upn7WYYUr2VwDBYZ/wbUVLDYTEU=", - "requires": { - "@protobufjs/aspromise": "^1.1.1", - "@protobufjs/inquire": "^1.1.0" - } - }, - "@protobufjs/float": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", - "integrity": "sha1-Xp4avctz/Ap8uLKR33jIy9l7h9E=" - }, - "@protobufjs/inquire": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", - "integrity": "sha1-/yAOPnzyQp4tyvwRQIKOjMY48Ik=" - }, - "@protobufjs/path": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", - "integrity": "sha1-bMKyDFya1q0NzP0hynZz2Nf79o0=" - }, - "@protobufjs/pool": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", - "integrity": "sha1-Cf0V8tbTq/qbZbw2ZQbWrXhG/1Q=" - }, - "@protobufjs/utf8": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", - "integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA=" - }, "@tensorflow/tfjs": { "version": "3.18.0", "resolved": "https://registry.npmjs.org/@tensorflow/tfjs/-/tfjs-3.18.0.tgz", @@ -2040,25 +1854,11 @@ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "awaitify-stream": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/awaitify-stream/-/awaitify-stream-1.0.2.tgz", - "integrity": "sha512-JE6mrRIPxhBQWt9Mu4u2XJF9V9xp4p5+Uxif/Ol/s2TFC/9+offUE50j/KQdaVWwMyGlKrlsE7Ncq/yhc6AJ+w==" - }, "balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, - "bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "optional": true, - "requires": { - "file-uri-to-path": "1.0.0" - } - }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -2246,20 +2046,6 @@ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" }, - "fast-crc32c": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/fast-crc32c/-/fast-crc32c-2.0.0.tgz", - "integrity": "sha512-LIREwygxtxzHF11oLJ4xIVKu/ZWNgrj/QaGvaSD8ZggIsgCyCtSYevlrpWVqNau57ZwezV8K1HFBSjQ7FcRbTQ==", - "requires": { - "sse4_crc32": "^6.0.1" - } - }, - "file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "optional": true - }, "form-data": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.1.tgz", @@ -2534,12 +2320,6 @@ "proj4": "^2.3.10" } }, - "node-addon-api": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-1.7.2.tgz", - "integrity": "sha512-ibPK3iA+vaY1eEjESkQkM0BbCqFOaZMiXRTtdB0u7b4djtY6JnsjvPdUHVMg6xQt3B8fpTTWHI9A+ADjM9frzg==", - "optional": true - }, "node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -2637,26 +2417,6 @@ "wkt-parser": "^1.3.1" } }, - "protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "requires": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - } - }, "pump": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", @@ -2742,21 +2502,16 @@ "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, + "spawn-stream": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/spawn-stream/-/spawn-stream-1.0.2.tgz", + "integrity": "sha512-gRlPPUISTDoeWqQSJNnqAp9vjHc7b7m3aWRgp1dzL3c183f5Xx2lnVtvPRCQy+pyqfDSx5DZa95Yh7XtPuJE0Q==" + }, "sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" }, - "sse4_crc32": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/sse4_crc32/-/sse4_crc32-6.0.1.tgz", - "integrity": "sha512-FUTYXpLroqytNKWIfHzlDWoy9E4tmBB/RklNMy6w3VJs+/XEYAHgbiylg4SS43iOk/9bM0BlJ2EDpFAGT66IoQ==", - "optional": true, - "requires": { - "bindings": "^1.3.0", - "node-addon-api": "^1.3.0" - } - }, "stream-shift": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.1.tgz", @@ -2831,16 +2586,6 @@ "nnng": "^1.0.0" } }, - "tfrecord-stream": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/tfrecord-stream/-/tfrecord-stream-0.2.0.tgz", - "integrity": "sha512-B7TtEQUqR6u/0drbPZBUUaBFVp4QbEPBgSiUolSarKuyIYqM6AAgB/Tre5gECoSacEl39hgHBnXm+xjj9zzIcA==", - "requires": { - "awaitify-stream": "^1.0.2", - "fast-crc32c": "^2.0.0", - "protobufjs": "^6.8.4" - } - }, "through2": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", diff --git a/rainfallwrangler/package.json b/rainfallwrangler/package.json index 85cb773..eff478e 100644 --- a/rainfallwrangler/package.json +++ b/rainfallwrangler/package.json @@ -17,7 +17,7 @@ "applause-cli": "^1.8.1", "gunzip-maybe": "^1.4.2", "pretty-ms": "^8.0.0", - "terrain50": "^1.10.1", - "tfrecord-stream": "^0.2.0" + "spawn-stream": "^1.0.2", + "terrain50": "^1.10.1" } } diff --git a/rainfallwrangler/src/lib/io/TFRecordWriter.mjs b/rainfallwrangler/src/lib/io/RecordWrangler.mjs similarity index 56% rename from rainfallwrangler/src/lib/io/TFRecordWriter.mjs rename to rainfallwrangler/src/lib/io/RecordWrangler.mjs index 404f0b5..0f9a8db 100644 --- a/rainfallwrangler/src/lib/io/TFRecordWriter.mjs +++ b/rainfallwrangler/src/lib/io/RecordWrangler.mjs @@ -3,11 +3,12 @@ import fs from 'fs'; import path from 'path'; -import tfrecord from 'tfrecord-stream'; +import RecordBuilder from '../record/RecordBuilder.mjs'; +import RecordsWriter from '../record/RecordsWriter.mjs'; import pretty_ms from 'pretty-ms'; -class TFRecordWriter { - #builder = tfrecord.createBuilder(); +class RecordWrangler { + #builder = new RecordBuilder(); constructor(dirpath, count_per_file) { this.dirpath = dirpath; @@ -25,13 +26,14 @@ class TFRecordWriter { while(true) { i++; + console.log(`RecordWriter step ${i}`); + // Start writing to a new file when necessary if(writer == null || count_this_file > this.count_per_file) { if(writer !== null) await writer.close(); - const filepath_next = path.join(this.dirpath, `${i_file}.tfrecord`); - writer = await tfrecord.Writer.createFromStream( - fs.createWriteStream(filepath_next) - ); + const filepath_next = path.join(this.dirpath, `${i_file}.jsonl.gz`); + writer = new RecordsWriter(filepath_next); + console.log(`RecordWriter NEW FILE ${filepath_next}`); i_file++; } @@ -45,7 +47,7 @@ class TFRecordWriter { sample_water.value ); - await writer.writeExample(example_next); + await writer.write(example_next); process.stderr.write(`Elapsed: ${pretty_ms(new Date() - time_start)}, Written ${count_this_file}/${i_file}/${i} examples/files/total\r`); } @@ -53,16 +55,10 @@ class TFRecordWriter { } make_example(sample_radar, sample_water) { - console.log(`SAMPLE WATER ${sample_water.flat().length} RAINFALL ${sample_radar.flat().length}`); - const sample_radar_flat1 = sample_radar.flat(); - this.#builder.setFloats("rainfallradar", sample_radar_flat1.flat()); - this.#builder.setInteger("rainfallradar_width", sample_radar[0].length); - this.#builder.setInteger("rainfallradar_channelsize", sample_radar_flat1[0].length); - this.#builder.setFloats("waterdepth", sample_water.flat()); - this.#builder.setInteger("waterdepth_width", sample_water[0].length); - - return this.#builder.releaseExample(); + this.#builder.add("rainfallradar", sample_radar); + this.#builder.add("waterdepth", sample_water.flat); + return this.#builder.release(); } } -export default TFRecordWriter; +export default RecordWrangler; diff --git a/rainfallwrangler/src/lib/io/StreamHelpers.mjs b/rainfallwrangler/src/lib/io/StreamHelpers.mjs new file mode 100644 index 0000000..a15bc9f --- /dev/null +++ b/rainfallwrangler/src/lib/io/StreamHelpers.mjs @@ -0,0 +1,52 @@ +"use strict"; + +/** + * Writes data to a stream, automatically waiting for the drain event if asked. + * See also write_safe. + * @param {stream.Writable} stream_out The writable stream to write to. + * @param {string|Buffer|Uint8Array} data The data to write. + * @return {Promise} A promise that resolves when writing is complete. + * @private + */ +function write_safe(stream_out, data) { + return new Promise(function (resolve, reject) { + // Handle errors + let handler_error = (error) => { + stream_out.off("error", handler_error); + reject(error); + }; + stream_out.on("error", handler_error); + + if(typeof data == "string" ? stream_out.write(data, "utf-8") : stream_out.write(data)) { + // We're good to go + stream_out.off("error", handler_error); + resolve(); + } + else { + // We need to wait for the drain event before continuing + stream_out.once("drain", () => { + stream_out.off("error", handler_error); + resolve(); + }); + } + }); +} + +/** + * Waits for the given stream to end and finish writing data. + * NOTE: This function is not tested and guaranteed yet. (ref #10 the HydroIndexWriter bug) + * @param {stream.Writable} stream The stream to end. + * @param {Buffer|string} [chunk=undefined] Optional. A chunk to write when calling .end(). + * @return {Promise} A Promise that resolves when writing is complete. + * @private + */ +function end_safe(stream, chunk = undefined) { + return new Promise((resolve, _reject) => { + stream.once("finish", resolve); + if(typeof chunk == "undefined") stream.end(); + else stream.end(chunk); + }); +} + + +export { write_safe, end_safe }; diff --git a/rainfallwrangler/src/lib/record/RecordBuilder.mjs b/rainfallwrangler/src/lib/record/RecordBuilder.mjs new file mode 100644 index 0000000..0b7dd7e --- /dev/null +++ b/rainfallwrangler/src/lib/record/RecordBuilder.mjs @@ -0,0 +1,19 @@ +"use strict"; + +class RecordBuilder { + constructor() { + this.acc = new Map(); + } + + add(key, value) { + this.acc.set(key, value); + } + + release() { + const result = this.acc; + this.acc = new Map(); + return result; + } +} + +export default RecordBuilder; \ No newline at end of file diff --git a/rainfallwrangler/src/lib/record/RecordsWriter.mjs b/rainfallwrangler/src/lib/record/RecordsWriter.mjs new file mode 100644 index 0000000..16e0801 --- /dev/null +++ b/rainfallwrangler/src/lib/record/RecordsWriter.mjs @@ -0,0 +1,28 @@ +"use strict"; + +import fs from 'fs'; + +import SpawnStream from 'spawn-stream'; + +import { write_safe, end_safe } from '../io/StreamHelpers.mjs'; + +class RecordsWriter { + #stream_out = fs.createWriteStream(filepath); + #gzip = SpawnStream("gzip"); + + constructor(filepath) { + this.#gzip.pipe(this.#stream_out); + } + + async write(sample) { + console.log(sample); + await write_safe(this.#gzip, JSON.stringify(sample)); + } + + async close() { + await this.#gzip.close(); + await this.#stream_out.close(); + } +} + +export default RecordsWriter; \ No newline at end of file diff --git a/rainfallwrangler/src/subcommands/tfrecordify/meta.mjs b/rainfallwrangler/src/subcommands/recordify/meta.mjs similarity index 91% rename from rainfallwrangler/src/subcommands/tfrecordify/meta.mjs rename to rainfallwrangler/src/subcommands/recordify/meta.mjs index 6ffbae4..eea97db 100644 --- a/rainfallwrangler/src/subcommands/tfrecordify/meta.mjs +++ b/rainfallwrangler/src/subcommands/recordify/meta.mjs @@ -1,7 +1,7 @@ "use strict"; export default function(cli) { - cli.subcommand("tfrecordify", "Converts rianfall radar and water depth data to a directory of tfrecord files.") + cli.subcommand("recordify", "Converts rainfall radar and water depth data to a directory of .jsonl.gz files.") .argument("water", "Path to the water depths file, formatted as a stream of terrain50 objects. May or may not be gzipped.", null, "string") .argument("rainfall", "Path to the rainfall radar data, formatted as jsonl. May or may not be gzipped.", null, "string") .argument("count-file", "The number of records to store in each TFRecord file. See the documentation for the optimal value of this number (default: 4096).", 64*64) diff --git a/rainfallwrangler/src/subcommands/tfrecordify/tfrecordify.mjs b/rainfallwrangler/src/subcommands/recordify/recordify.mjs similarity index 88% rename from rainfallwrangler/src/subcommands/tfrecordify/tfrecordify.mjs rename to rainfallwrangler/src/subcommands/recordify/recordify.mjs index f3fc828..66c728d 100644 --- a/rainfallwrangler/src/subcommands/tfrecordify/tfrecordify.mjs +++ b/rainfallwrangler/src/subcommands/recordify/recordify.mjs @@ -3,7 +3,7 @@ import fs from 'fs'; import settings from '../../settings.mjs'; -import TFRecordWriter from '../../lib/io/TFRecordWriter.mjs'; +import RecordWrangler from '../../lib/io/RecordWrangler.mjs'; import RadarWrangler from '../../lib/RadarWrangler.mjs'; import Terrain50StreamReader from '../../lib/io/Terrain50StreamReader.mjs'; @@ -22,7 +22,7 @@ export default async function() { await fs.promises.mkdir(settings.output, { recursive: true }); console.log("DEBUG", settings); - const writer = new TFRecordWriter(settings.output, settings.count_file); + const writer = new RecordWrangler(settings.output, settings.count_file); const reader_radar = new RadarWrangler(settings.rainfall_pattern); const reader_water = new Terrain50StreamReader();