Merge branch 'main' of github.com:sbrl/research-rainfallradar

This commit is contained in:
Starbeamrainbowlabs 2024-12-12 19:36:09 +00:00
commit 7e00ede747
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
4 changed files with 30 additions and 13 deletions

View file

@ -1,7 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
#SBATCH -J DeepRain #SBATCH -J DeepRain
#SBATCH -N 1 #SBATCH -N 1
#SBATCH -n 14 #SBATCH -n 9
#SBATCH --gres=gpu:1 #SBATCH --gres=gpu:1
#SBATCH -o %j.%N.%a.deeplab-rainfall.out.log #SBATCH -o %j.%N.%a.deeplab-rainfall.out.log
#SBATCH -e %j.%N.%a.deeplab-rainfall.err.log #SBATCH -e %j.%N.%a.deeplab-rainfall.err.log
@ -21,7 +21,21 @@ command_exists() {
####################################################### #######################################################
if command_exists module; then if [[ "${SLURM_CLUSTER_NAME}" == "cs-cluster" ]]; then
echo "[slurm_runner] csgpu cluster detected, applying CUDA workarounds" >&2;
# Fix "Could not load library libcublasLt.so.12. Error: libcublasLt.so.12: cannot open shared object file: No such file or directory" error
if [[ -d "${HOME}/cuda" ]]; then
echo "[slurm_runner] sourcing extra CUDA setup script" >&2;
#shellcheck source=/dev/null
source "${HOME}/cuda/activate.sh";
fi
export XLA_FLAGS="--xla_gpu_cuda_data_dir=/usr/lib/cuda"; # weird... this wasn't needed before? Fixes
echo "[slurm_runner] set XLA_FLAGS=\"${XLA_FLAGS}\"" >&2;
fi
# No modules on the CS cluster
if command_exists module && [[ "${SLURM_CLUSTER_NAME}" != "cs-cluster" ]]; then
module load utilities/multi module load utilities/multi
module load readline/7.0 module load readline/7.0
module load gcc/10.2.0 module load gcc/10.2.0
@ -30,7 +44,7 @@ if command_exists module; then
module load python/anaconda/4.6/miniconda/3.7 module load python/anaconda/4.6/miniconda/3.7
else else
echo "[slurm_runner]: module command not present, not loading modules" >&2; echo "[slurm_runner]: module command not present or csgpu cluster detected, not loading modules" >&2;
fi fi
@ -74,7 +88,7 @@ show_help() {
exit; exit;
} }
DIR_RAINFALLWATER="${DIR_RAINFALLWATER:-$HOME/rainfallwater_records_tfrecord}"; DIR_RAINFALLWATER="${DIR_RAINFALLWATER:-$HOME/data/rainfallwater_records_tfrecord}";
PATH_HEIGHTMAP="${PATH_HEIGHTMAP:-$HOME/data/terrain50-nimrodsized.json.gz}"; PATH_HEIGHTMAP="${PATH_HEIGHTMAP:-$HOME/data/terrain50-nimrodsized.json.gz}";
PATH_COLOURMAP="${PATH_COLOURMAP:-$HOME/data/instance-level-human-parsing/instance-level_human_parsing/human_colormap.mat}"; PATH_COLOURMAP="${PATH_COLOURMAP:-$HOME/data/instance-level-human-parsing/instance-level_human_parsing/human_colormap.mat}";
@ -92,6 +106,9 @@ mkdir -p "${DIR_OUTPUT}";
echo -e ">>> NOW: $(date)"; echo -e ">>> NOW: $(date)";
echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}"; echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
echo -e ">>> Additional args: ${ARGS}"; echo -e ">>> Additional args: ${ARGS}";
echo -e ">>> GIT COMMIT: $(git rev-parse HEAD)";
{ echo "*****"; git rev-parse HEAD; git status; git log -1 | cat; } >>"${DIR_OUTPUT}/commit.txt";
export PATH=$HOME/software/bin:$PATH; export PATH=$HOME/software/bin:$PATH;
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST; export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST;

View file

@ -20,7 +20,7 @@ import matplotlib.pyplot as plt
import tensorflow as tf import tensorflow as tf
import lib.primitives.env import lib.primitives.env as env
from lib.dataset.dataset_mono import dataset_mono, dataset_mono_predict from lib.dataset.dataset_mono import dataset_mono, dataset_mono_predict
from lib.ai.components.LossCrossEntropyDice import LossCrossEntropyDice from lib.ai.components.LossCrossEntropyDice import LossCrossEntropyDice
from lib.ai.components.MetricDice import metric_dice_coefficient as dice_coefficient from lib.ai.components.MetricDice import metric_dice_coefficient as dice_coefficient
@ -40,7 +40,7 @@ logger.info(f"Starting at {str(datetime.now().isoformat())}")
# ███████ ██ ████ ████ ██ ██ ██ ██████ ██ ████ ██ ██ ███████ ██ ████ ██ # ███████ ██ ████ ████ ██ ██ ██ ██████ ██ ████ ██ ██ ███████ ██ ████ ██
IMAGE_SIZE = env.read("IMAGE_SIZE", int, 128) # was 512; 128 is the highest power of 2 that fits the data IMAGE_SIZE = env.read("IMAGE_SIZE", int, 128) # was 512; 128 is the highest power of 2 that fits the data
BATCH_SIZE = env.read("BATCH_SIZE", int, 64) BATCH_SIZE = env.read("BATCH_SIZE", int, 32)
NUM_CLASSES = 2 NUM_CLASSES = 2
DIR_RAINFALLWATER = env.read("DIR_RAINFALLWATER", str) DIR_RAINFALLWATER = env.read("DIR_RAINFALLWATER", str)
PATH_HEIGHTMAP = env.read("PATH_HEIGHTMAP", str) PATH_HEIGHTMAP = env.read("PATH_HEIGHTMAP", str)
@ -48,10 +48,10 @@ PATH_COLOURMAP = env.read("PATH_COLOURMAP", str)
PARALLEL_READS = env.read("PARALLEL_READS", float, 1.5) PARALLEL_READS = env.read("PARALLEL_READS", float, 1.5)
STEPS_PER_EPOCH = env.read("STEPS_PER_EPOCH", int, None) STEPS_PER_EPOCH = env.read("STEPS_PER_EPOCH", int, None)
REMOVE_ISOLATED_PIXELS = env.read("NO_REMOVE_ISOLATED_PIXELS", bool, True) REMOVE_ISOLATED_PIXELS = env.read("NO_REMOVE_ISOLATED_PIXELS", bool, True)
EPOCHS = env.read("EPOCHS", int, 50) EPOCHS = env.read("EPOCHS", int, 25)
LOSS = env.read("LOSS", str, "cross-entropy-dice") # other possible values: cross-entropy LOSS = env.read("LOSS", str, "cross-entropy-dice") # other possible values: cross-entropy
DICE_LOG_COSH = env.read("DICE_LOG_COSH", bool, False) DICE_LOG_COSH = env.read("DICE_LOG_COSH", bool, False)
LEARNING_RATE = env.read("LEARNING_RATE", float, 0.001) LEARNING_RATE = env.read("LEARNING_RATE", float, 0.00001)
WATER_THRESHOLD = env.read("WATER_THRESHOLD", float, 0.1) WATER_THRESHOLD = env.read("WATER_THRESHOLD", float, 0.1)
UPSAMPLE = env.read("UPSAMPLE", int, 2) UPSAMPLE = env.read("UPSAMPLE", int, 2)
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2) SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
@ -59,7 +59,7 @@ SPLIT_TEST = env.read("SPLIT_TEST", float, 0)
# NOTE: RANDSEED is declared and handled in src/lib/dataset/primitives/shuffle.py # NOTE: RANDSEED is declared and handled in src/lib/dataset/primitives/shuffle.py
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1) STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
JIT_COMPILE = env.read("JIT_COMPILE", bool, False) JIT_COMPILE = env.read("JIT_COMPILE", bool, True)
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST") DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None) PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25) PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
@ -95,7 +95,7 @@ if not PREDICT_AS_ONE:
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS, do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
parallel_reads_multiplier=PARALLEL_READS, parallel_reads_multiplier=PARALLEL_READS,
percentage_validate=SPLIT_VALIDATE, percentage_validate=SPLIT_VALIDATE,
percentage_test=SPLIT_TESTs percentage_test=SPLIT_TEST
) )
logger.info("Train Dataset:", dataset_train) logger.info("Train Dataset:", dataset_train)

View file

@ -165,7 +165,7 @@ def dataset_mono(dirpath_input, percentage_validate=0.2, percentage_test=0, **kw
filepaths_count = len(filepaths) filepaths_count = len(filepaths)
split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test))) split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test)))
split_validatetest=math.floor(filepaths * (1 - percentage_test)) split_validatetest=math.floor(filepaths_count * (1 - percentage_test))
filepaths_train = filepaths[:split_trainvalidate] filepaths_train = filepaths[:split_trainvalidate]

View file

@ -8,6 +8,7 @@ import os
### ###
## Changelog: ## Changelog:
# 2024-11-14: Fix crash on line #107 unterminated string literal
# 2024-09-29: Create this changelog, prepare for reuse # 2024-09-29: Create this changelog, prepare for reuse
############################################################################## ##############################################################################
@ -104,8 +105,7 @@ def print_all(table=True):
# Create the table format string # Create the table format string
format_string = f"| {{:<{width_name}}} | {{:<{ format_string = f"| {{:<{width_name}}} | {{:<{width_type}}} | {{:<{width_value}}} | {{:<{width_flags}}} |"
width_type}}} | {{:<{width_value}}} | {{:<{width_flags}}} |"
# Calculate total width # Calculate total width
total_width = width_name + width_type + width_value + \ total_width = width_name + width_type + width_value + \