From 2ccc1be4149d989ffed9db2eb2496634b91b7e92 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Thu, 28 Jul 2022 19:48:25 +0100 Subject: [PATCH] =?UTF-8?q?json2tfrecord:=20write=20(untested=20python=20t?= =?UTF-8?q?o=20convert=20.jsonl=20=E2=86=92=20.tfrecord?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/lib/python/json2tfrecord.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 rainfallwrangler/src/lib/python/json2tfrecord.py diff --git a/rainfallwrangler/src/lib/python/json2tfrecord.py b/rainfallwrangler/src/lib/python/json2tfrecord.py new file mode 100644 index 0000000..eedf795 --- /dev/null +++ b/rainfallwrangler/src/lib/python/json2tfrecord.py @@ -0,0 +1,42 @@ +import sys +import os +import gzip +import json +import argparse + +import tensorflow as tf + +def parse_args(): + parser = argparse.ArgumentParser(description="Convert a generated .jsonl.gz file to a .tfrecord.gz file") + parser.add_argument("--input", "-i", help="Path to the input file to convert.", required=True) + parser.add_argument("--output", "-o", help="Path to the output file to write to.", required=True) + + return parser.parse_args(args=sys.argv[2:]) + +def convert(filepath_in, filepath_out): + with gzip.open(filepath_in, "r") as handle, tf.io.TFRecordWriter(filepath_out) as writer: + for line in handle: + if len(line) == 0: + continue + + obj = json.loads(line) + + rainfall = tf.constant(obj.rainfallradar, dtype=tf.float32) + water = tf.constant(obj.waterdepth, dtype=tf.float32) + + record = tf.train.Example(features=tf.train.Features(feature={ + "rainfallradar": tf.train.BytesList(bytes_list=tf.io.serialize_tensor(rainfall)), + "waterdepth": tf.train.BytesList(bytes_list=tf.io.serialize_tensor(water)) + })) + writer.write(record.SerializeToString()) + + +def main(): + args = parse_args() + + if not os.path.exists(args.input): + print(f"Error: No such input file {args.input}") + sys.exit(2) + + + convert(args.input, args.output) \ No newline at end of file