From 4dd66a9bdfbaddc85527dddeb7c97dcec3f79e54 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 12 Dec 2024 23:36:54 +0000
Subject: [PATCH] dataset_mono: add water_threshold=None support

This is for the stupid pointless regression thing
Like just let me get on with sample weighting and accounting for extreme event bias already!
---
 aimodel/src/lib/dataset/dataset_mono.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/aimodel/src/lib/dataset/dataset_mono.py b/aimodel/src/lib/dataset/dataset_mono.py
index 1a428be..cd4043c 100644
--- a/aimodel/src/lib/dataset/dataset_mono.py
+++ b/aimodel/src/lib/dataset/dataset_mono.py
@@ -23,7 +23,7 @@ def parse_item(metadata, output_size=100, input_size="same", water_threshold=0.1
 		metadata (dict): Metadata about the shapes of the dataset - rainfall radar, water depth data etc. This should be read automaticallyfrom the metadata.json file that's generated by previous pipeline steps that I forget at this time.
 		output_size (int): The desired output size of the water depth data.
 		input_size (str or int): The desired input size of the rainfall radar data. If "same", it will be set to the same as the output_size.
-		water_threshold (float): The threshold to use for binarizing the water depth data.
+		water_threshold (float|None): The threshold to use for binarizing the water depth data. If None, then no thresholding will be done. IMPORTANT: setting `water_threshold=None` will NOT remove the channels! You gotta do that yourself!
 		water_bins (int): The number of bins to use for the water depth data (e.g. for one-hot encoding).
 		heightmap (tf.Tensor): An optional heightmap to include as an additional channel in the rainfall radar data.
 		rainfall_scale_up (int): A factor to scale up the rainfall radar data.
@@ -113,7 +113,9 @@ def parse_item(metadata, output_size=100, input_size="same", water_threshold=0.1
 		# water = tf.cast(tf.math.greater_equal(water, water_threshold), dtype=tf.int32)
 		# water = tf.one_hot(water, water_bins, axis=-1, dtype=tf.int32)
 		# SPARSE [LOSS dice / sparse cross entropy]
-		water = tf.cast(tf.math.greater_equal(water, water_threshold), dtype=tf.float32)
+		if water_threshold is not None: # if water_threshold=None, then regression mode
+			water = tf.cast(tf.math.greater_equal(water, water_threshold), dtype=tf.float32)
+			# BUG it may be a problem we're [height, width, channel] here rather than [height, width], depending on how dlr works
 		if do_remove_isolated_pixels:
 			water = remove_isolated_pixels(water)