diff --git a/aimodel/src/parse_args.py b/aimodel/src/parse_args.py index c7c6053..c8aa9f7 100644 --- a/aimodel/src/parse_args.py +++ b/aimodel/src/parse_args.py @@ -24,6 +24,7 @@ Available subcommands: train-predict Make predictions using a model trained through the train subcommand. train-mono Train a mono rainfall → water depth model. train-mono-predict Make predictions using a model trained through the train-mono subcommand. + rainfall-stats Calculate statistics about the rainfall radar data. For more information, do src/index.py --help. """) diff --git a/aimodel/src/subcommands/rainfall_stats.py b/aimodel/src/subcommands/rainfall_stats.py new file mode 100755 index 0000000..8bd8d1d --- /dev/null +++ b/aimodel/src/subcommands/rainfall_stats.py @@ -0,0 +1,85 @@ +import io +import json +import os +import sys +import argparse +import re + +from loguru import logger +import tensorflow as tf +from lib.dataset.batched_iterator import batched_iterator + +from lib.io.handle_open import handle_open +from lib.ai.RainfallWaterMono import RainfallWaterMono +from lib.dataset.dataset_mono import dataset_mono_predict +from lib.io.find_paramsjson import find_paramsjson +from lib.io.readfile import readfile +from lib.vis.segmentation_plot import segmentation_plot + + +MODE_JSONL = 1 +MODE_PNG = 2 + +def parse_args(): + parser = argparse.ArgumentParser(description="Output water depth image segmentation maps using a given pretrained mono model.") + # parser.add_argument("--config", "-c", help="Filepath to the TOML config file to load.", required=True) + parser.add_argument("--input", "-i", help="Path to input directory containing the .tfrecord(.gz) files to predict for. If a single file is passed instead, then only that file will be converted.", required=True) + parser.add_argument("--reads-multiplier", help="Optional. The multiplier for the number of files we should read from at once. Defaults to 0. When using this start with 1.5, which means read ceil(NUMBER_OF_CORES * 1.5). Set to a higher number of systems with high read latency to avoid starving the GPU of data. SETTING THIS WILL SCRAMBLE THE ORDER OF THE DATASET.") + parser.add_argument("--batch_size", help="Optional. The batch size to calculate statistics with. Can be larger than normal since we don't have a model loaded. Default: 1024") + return parser + +def run(args): + + if (not hasattr(args, "read_multiplier")) or args.read_multiplier == None: + args.read_multiplier = 4 + if (not hasattr(args, "batch_size")) or args.batch_size == None: + args.batch_size = 1024 + + + sys.stderr.write(f"\n\n>>> This is TensorFlow {tf.__version__}\n\n\n") + + # Note that if using a directory of input files, the output order is NOT GUARANTEED TO BE THE SAME. In fact, it probably won't be (see dataset_mono for more details). + dataset = dataset_mono_predict( + dirpath_input=args.input, + parallel_reads_multiplier=args.read_multiplier + ) + + # for items in dataset_train.repeat(10): + # print("ITEMS", len(items)) + # print("LEFT", [ item.shape for item in items[0] ]) + # print("ITEMS DONE") + # exit(0) + + + logger.info("RAINFALL STATS") + + calc_mean = [] + calc_stddev = [] + calc_max = [] + + i = 0 + for batch in batched_iterator(dataset, tensors_in_item=2, batch_size=args.batch_size): + rainfall_actual_batch, water_actual_batch = batch + + rainfall_flat = tf.reshape(rainfall_actual_batch, [-1]) + + batch_mean = tf.math.reduce_mean(rainfall_flat) + batch_stddev = tf.math.reduce_std(rainfall_flat) + batch_max = tf.math.reduce_max(rainfall_flat) + + print("BATCH", "mean", batch_mean, "stddev", batch_stddev, "max", batch_max) + + calc_mean.append(batch_mean) + calc_stddev.append(batch_stddev) + calc_max.append(batch_max) + + i += 1 + + + calc_mean = tf.math.reduce_mean(tf.stack(calc_mean)) + calc_max = tf.math.reduce_max(tf.stack(calc_max)) + + print("STDDEV VALUES", tf.stack(calc_stddev).numpy().tolist()) + print("OVERALL", "mean", calc_mean.numpy().tolist(), "max", calc_max.numpy().tolist()) + + logger.write(">>> Complete\n") \ No newline at end of file diff --git a/aimodel/src/subcommands/train_mono_predict.py b/aimodel/src/subcommands/train_mono_predict.py index c0e169f..4e3c5c1 100644 --- a/aimodel/src/subcommands/train_mono_predict.py +++ b/aimodel/src/subcommands/train_mono_predict.py @@ -149,15 +149,12 @@ def do_jsonl(args, ai, dataset, model_params, do_argmax=False): for batch in batched_iterator(dataset, tensors_in_item=2, batch_size=model_params["batch_size"]): rainfall_actual_batch, water_actual_batch = batch - print("DEBUG:do_jsonl rainfall_actual_batch", rainfall_actual_batch.shape) - print("DEBUG:do_jsonl water_actual_batch", water_actual_batch.shape) water_predict_batch = ai.embed(rainfall_actual_batch) water_actual_batch = tf.unstack(water_actual_batch, axis=0) rainfall_actual_batch = tf.unstack(rainfall_actual_batch, axis=0) i_batch = 0 for water_predict in water_predict_batch: - print("DEBUG:do_jsonl water_predict", water_predict.shape) # [ width, height, softmax_probabilities ] → [ batch, width, height ] if do_argmax: water_predict = tf.math.argmax(water_predict, axis=-1)