mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-16 14:43:01 +00:00
rainfall_stats: initial implementation
this might reveal why we are having problems. If most/all the rainfall radar data is v small numbers, normalising might help.
This commit is contained in:
parent
3131b4f7b3
commit
fe57d6aab2
3 changed files with 86 additions and 3 deletions
|
@ -24,6 +24,7 @@ Available subcommands:
|
||||||
train-predict Make predictions using a model trained through the train subcommand.
|
train-predict Make predictions using a model trained through the train subcommand.
|
||||||
train-mono Train a mono rainfall → water depth model.
|
train-mono Train a mono rainfall → water depth model.
|
||||||
train-mono-predict Make predictions using a model trained through the train-mono subcommand.
|
train-mono-predict Make predictions using a model trained through the train-mono subcommand.
|
||||||
|
rainfall-stats Calculate statistics about the rainfall radar data.
|
||||||
|
|
||||||
For more information, do src/index.py <subcommand> --help.
|
For more information, do src/index.py <subcommand> --help.
|
||||||
""")
|
""")
|
||||||
|
|
85
aimodel/src/subcommands/rainfall_stats.py
Executable file
85
aimodel/src/subcommands/rainfall_stats.py
Executable file
|
@ -0,0 +1,85 @@
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
import tensorflow as tf
|
||||||
|
from lib.dataset.batched_iterator import batched_iterator
|
||||||
|
|
||||||
|
from lib.io.handle_open import handle_open
|
||||||
|
from lib.ai.RainfallWaterMono import RainfallWaterMono
|
||||||
|
from lib.dataset.dataset_mono import dataset_mono_predict
|
||||||
|
from lib.io.find_paramsjson import find_paramsjson
|
||||||
|
from lib.io.readfile import readfile
|
||||||
|
from lib.vis.segmentation_plot import segmentation_plot
|
||||||
|
|
||||||
|
|
||||||
|
MODE_JSONL = 1
|
||||||
|
MODE_PNG = 2
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="Output water depth image segmentation maps using a given pretrained mono model.")
|
||||||
|
# parser.add_argument("--config", "-c", help="Filepath to the TOML config file to load.", required=True)
|
||||||
|
parser.add_argument("--input", "-i", help="Path to input directory containing the .tfrecord(.gz) files to predict for. If a single file is passed instead, then only that file will be converted.", required=True)
|
||||||
|
parser.add_argument("--reads-multiplier", help="Optional. The multiplier for the number of files we should read from at once. Defaults to 0. When using this start with 1.5, which means read ceil(NUMBER_OF_CORES * 1.5). Set to a higher number of systems with high read latency to avoid starving the GPU of data. SETTING THIS WILL SCRAMBLE THE ORDER OF THE DATASET.")
|
||||||
|
parser.add_argument("--batch_size", help="Optional. The batch size to calculate statistics with. Can be larger than normal since we don't have a model loaded. Default: 1024")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def run(args):
|
||||||
|
|
||||||
|
if (not hasattr(args, "read_multiplier")) or args.read_multiplier == None:
|
||||||
|
args.read_multiplier = 4
|
||||||
|
if (not hasattr(args, "batch_size")) or args.batch_size == None:
|
||||||
|
args.batch_size = 1024
|
||||||
|
|
||||||
|
|
||||||
|
sys.stderr.write(f"\n\n>>> This is TensorFlow {tf.__version__}\n\n\n")
|
||||||
|
|
||||||
|
# Note that if using a directory of input files, the output order is NOT GUARANTEED TO BE THE SAME. In fact, it probably won't be (see dataset_mono for more details).
|
||||||
|
dataset = dataset_mono_predict(
|
||||||
|
dirpath_input=args.input,
|
||||||
|
parallel_reads_multiplier=args.read_multiplier
|
||||||
|
)
|
||||||
|
|
||||||
|
# for items in dataset_train.repeat(10):
|
||||||
|
# print("ITEMS", len(items))
|
||||||
|
# print("LEFT", [ item.shape for item in items[0] ])
|
||||||
|
# print("ITEMS DONE")
|
||||||
|
# exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("RAINFALL STATS")
|
||||||
|
|
||||||
|
calc_mean = []
|
||||||
|
calc_stddev = []
|
||||||
|
calc_max = []
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for batch in batched_iterator(dataset, tensors_in_item=2, batch_size=args.batch_size):
|
||||||
|
rainfall_actual_batch, water_actual_batch = batch
|
||||||
|
|
||||||
|
rainfall_flat = tf.reshape(rainfall_actual_batch, [-1])
|
||||||
|
|
||||||
|
batch_mean = tf.math.reduce_mean(rainfall_flat)
|
||||||
|
batch_stddev = tf.math.reduce_std(rainfall_flat)
|
||||||
|
batch_max = tf.math.reduce_max(rainfall_flat)
|
||||||
|
|
||||||
|
print("BATCH", "mean", batch_mean, "stddev", batch_stddev, "max", batch_max)
|
||||||
|
|
||||||
|
calc_mean.append(batch_mean)
|
||||||
|
calc_stddev.append(batch_stddev)
|
||||||
|
calc_max.append(batch_max)
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
|
||||||
|
calc_mean = tf.math.reduce_mean(tf.stack(calc_mean))
|
||||||
|
calc_max = tf.math.reduce_max(tf.stack(calc_max))
|
||||||
|
|
||||||
|
print("STDDEV VALUES", tf.stack(calc_stddev).numpy().tolist())
|
||||||
|
print("OVERALL", "mean", calc_mean.numpy().tolist(), "max", calc_max.numpy().tolist())
|
||||||
|
|
||||||
|
logger.write(">>> Complete\n")
|
|
@ -149,15 +149,12 @@ def do_jsonl(args, ai, dataset, model_params, do_argmax=False):
|
||||||
for batch in batched_iterator(dataset, tensors_in_item=2, batch_size=model_params["batch_size"]):
|
for batch in batched_iterator(dataset, tensors_in_item=2, batch_size=model_params["batch_size"]):
|
||||||
rainfall_actual_batch, water_actual_batch = batch
|
rainfall_actual_batch, water_actual_batch = batch
|
||||||
|
|
||||||
print("DEBUG:do_jsonl rainfall_actual_batch", rainfall_actual_batch.shape)
|
|
||||||
print("DEBUG:do_jsonl water_actual_batch", water_actual_batch.shape)
|
|
||||||
water_predict_batch = ai.embed(rainfall_actual_batch)
|
water_predict_batch = ai.embed(rainfall_actual_batch)
|
||||||
water_actual_batch = tf.unstack(water_actual_batch, axis=0)
|
water_actual_batch = tf.unstack(water_actual_batch, axis=0)
|
||||||
rainfall_actual_batch = tf.unstack(rainfall_actual_batch, axis=0)
|
rainfall_actual_batch = tf.unstack(rainfall_actual_batch, axis=0)
|
||||||
|
|
||||||
i_batch = 0
|
i_batch = 0
|
||||||
for water_predict in water_predict_batch:
|
for water_predict in water_predict_batch:
|
||||||
print("DEBUG:do_jsonl water_predict", water_predict.shape)
|
|
||||||
# [ width, height, softmax_probabilities ] → [ batch, width, height ]
|
# [ width, height, softmax_probabilities ] → [ batch, width, height ]
|
||||||
if do_argmax:
|
if do_argmax:
|
||||||
water_predict = tf.math.argmax(water_predict, axis=-1)
|
water_predict = tf.math.argmax(water_predict, axis=-1)
|
||||||
|
|
Loading…
Reference in a new issue