diff --git a/aimodel/slurm-TEST-deeplabv3p-rainfall.job b/aimodel/slurm-TEST-deeplabv3p-rainfall.job index 89d1eea..7959229 100755 --- a/aimodel/slurm-TEST-deeplabv3p-rainfall.job +++ b/aimodel/slurm-TEST-deeplabv3p-rainfall.job @@ -32,6 +32,7 @@ show_help() { echo -e " PATH_COLOURMAP The path to the colourmap for predictive purposes." >&2; echo -e " PATH_CHECKPOINT The path to a checkcpoint to load. If specified, a model will be loaded instead of being trained." >&2; echo -e " STEPS_PER_EPOCH The number of steps to consider an epoch. Defaults to None, which means use the entire dataset." >&2; + echo -e " NO_REMOVE_ISOLATED_PIXELS Set to any value to avoid the engine from removing isolated pixels - that is, water pixels with no other surrounding pixels, either side to side to diagonally." >&2; echo -e " EPOCHS The number of epochs to train for." >&2; echo -e " PREDICT_COUNT The number of items from the (SCRAMBLED) dataset to make a prediction for." >&2; echo -e " POSTFIX Postfix to append to the output dir (auto calculated)." >&2; @@ -59,7 +60,7 @@ DIR_OUTPUT="output/$(date -u --rfc-3339=date)_${CODE}"; echo -e ">>> Additional args: ${ARGS}"; export PATH=$HOME/software/bin:$PATH; -export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT; +export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS; echo ">>> Installing requirements"; conda run -n py38 pip install -q -r requirements.txt; diff --git a/aimodel/src/deeplabv3_plus_test_rainfall.py b/aimodel/src/deeplabv3_plus_test_rainfall.py index e40315f..b4e0804 100755 --- a/aimodel/src/deeplabv3_plus_test_rainfall.py +++ b/aimodel/src/deeplabv3_plus_test_rainfall.py @@ -25,6 +25,7 @@ DIR_RAINFALLWATER = os.environ["DIR_RAINFALLWATER"] PATH_HEIGHTMAP = os.environ["PATH_HEIGHTMAP"] PATH_COLOURMAP = os.environ["PATH_COLOURMAP"] STEPS_PER_EPOCH = int(os.environ["STEPS_PER_EPOCH"]) if "STEPS_PER_EPOCH" in os.environ else None +REMOVE_ISOLATED_PIXELS = FALSE if "NO_REMOVE_ISOLATED_PIXELS" in os.environ else True EPOCHS = int(os.environ["EPOCHS"]) if "EPOCHS" in os.environ else 25 PREDICT_COUNT = int(os.environ["PREDICT_COUNT"]) if "PREDICT_COUNT" in os.environ else 4 @@ -42,6 +43,7 @@ logger.info(f"> DIR_RAINFALLWATER {DIR_RAINFALLWATER}") logger.info(f"> PATH_HEIGHTMAP {PATH_HEIGHTMAP}") logger.info(f"> PATH_COLOURMAP {PATH_COLOURMAP}") logger.info(f"> STEPS_PER_EPOCH {STEPS_PER_EPOCH}") +logger.info(f"> REMOVE_ISOLATED_PIXELS {REMOVE_ISOLATED_PIXELS} [NO_REMOVE_ISOLATED_PIXELS]") logger.info(f"> EPOCHS {EPOCHS}") logger.info(f"> DIR_OUTPUT {DIR_OUTPUT}") logger.info(f"> PATH_CHECKPOINT {PATH_CHECKPOINT}") @@ -56,6 +58,7 @@ dataset_train, dataset_validate = dataset_mono( output_size=IMAGE_SIZE, input_size="same", filepath_heightmap=PATH_HEIGHTMAP, + remove_isolated_pixels=REMOVE_ISOLATED_PIXELS ) logger.info("Train Dataset:", dataset_train) diff --git a/aimodel/src/lib/dataset/dataset_mono.py b/aimodel/src/lib/dataset/dataset_mono.py index 2abd24c..2486964 100644 --- a/aimodel/src/lib/dataset/dataset_mono.py +++ b/aimodel/src/lib/dataset/dataset_mono.py @@ -9,13 +9,13 @@ import tensorflow as tf from lib.dataset.read_metadata import read_metadata from ..io.readfile import readfile -from .shuffle import shuffle from .parse_heightmap import parse_heightmap - +from .primitives.shuffle import shuffle +from .primitives.remove_isolated_pixels import remove_isolated_pixels # TO PARSE: -def parse_item(metadata, output_size=100, input_size="same", water_threshold=0.1, water_bins=2, heightmap=None, rainfall_scale_up=1): +def parse_item(metadata, output_size=100, input_size="same", water_threshold=0.1, water_bins=2, heightmap=None, rainfall_scale_up=1, remove_isolated_pixels=True): if input_size == "same": input_size = output_size # This is almost always the case with e.g. the DeepLabV3+ model @@ -91,11 +91,13 @@ def parse_item(metadata, output_size=100, input_size="same", water_threshold=0.1 print("DEBUG:dataset BEFORE_SQUEEZE water", water.shape) water = tf.squeeze(water) print("DEBUG:dataset AFTER_SQUEEZE water", water.shape) - # LOSS cross entropy + # ONE-HOT [LOSS cross entropy] # water = tf.cast(tf.math.greater_equal(water, water_threshold), dtype=tf.int32) # water = tf.one_hot(water, water_bins, axis=-1, dtype=tf.int32) - # LOSS dice + # SPARSE [LOSS dice] water = tf.cast(tf.math.greater_equal(water, water_threshold), dtype=tf.float32) + if remove_isolated_pixels: + water = remove_isolated_pixels(water) print("DEBUG DATASET_OUT:rainfall shape", rainfall.shape) print("DEBUG DATASET_OUT:water shape", water.shape) diff --git a/aimodel/src/lib/dataset/primitives/remove_isolated_pixels.py b/aimodel/src/lib/dataset/primitives/remove_isolated_pixels.py new file mode 100644 index 0000000..ec61dd7 --- /dev/null +++ b/aimodel/src/lib/dataset/primitives/remove_isolated_pixels.py @@ -0,0 +1,16 @@ +import tensorflow as tf + + +def remove_isolated_pixels(binarised_water_labels): + # we expect the data in the form [ height, width ], where each value is either 1 or 0 (i.e. BEFORE any one-hot) + + data = tf.expand_dims(tf.expand_dims(binarised_water_labels, axis=0), axis=-1) + + conv = tf.squeeze(tf.nn.conv2d(data, tf.ones([3,3,1,1]), 1, "SAME")) + + data_map_remove = tf.cast(tf.math.equal(tf.math.multiply( + binarised_water_labels, + conv + ), 1), tf.float32) + + return tf.math.subtract(binarised_water_labels, data_map_remove)