mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-12-22 14:15:01 +00:00
Merge branch 'main' of github.com:sbrl/research-rainfallradar
This commit is contained in:
commit
7e00ede747
4 changed files with 30 additions and 13 deletions
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
#SBATCH -J DeepRain
|
#SBATCH -J DeepRain
|
||||||
#SBATCH -N 1
|
#SBATCH -N 1
|
||||||
#SBATCH -n 14
|
#SBATCH -n 9
|
||||||
#SBATCH --gres=gpu:1
|
#SBATCH --gres=gpu:1
|
||||||
#SBATCH -o %j.%N.%a.deeplab-rainfall.out.log
|
#SBATCH -o %j.%N.%a.deeplab-rainfall.out.log
|
||||||
#SBATCH -e %j.%N.%a.deeplab-rainfall.err.log
|
#SBATCH -e %j.%N.%a.deeplab-rainfall.err.log
|
||||||
|
@ -21,7 +21,21 @@ command_exists() {
|
||||||
|
|
||||||
#######################################################
|
#######################################################
|
||||||
|
|
||||||
if command_exists module; then
|
if [[ "${SLURM_CLUSTER_NAME}" == "cs-cluster" ]]; then
|
||||||
|
echo "[slurm_runner] csgpu cluster detected, applying CUDA workarounds" >&2;
|
||||||
|
# Fix "Could not load library libcublasLt.so.12. Error: libcublasLt.so.12: cannot open shared object file: No such file or directory" error
|
||||||
|
if [[ -d "${HOME}/cuda" ]]; then
|
||||||
|
echo "[slurm_runner] sourcing extra CUDA setup script" >&2;
|
||||||
|
#shellcheck source=/dev/null
|
||||||
|
source "${HOME}/cuda/activate.sh";
|
||||||
|
fi
|
||||||
|
|
||||||
|
export XLA_FLAGS="--xla_gpu_cuda_data_dir=/usr/lib/cuda"; # weird... this wasn't needed before? Fixes
|
||||||
|
echo "[slurm_runner] set XLA_FLAGS=\"${XLA_FLAGS}\"" >&2;
|
||||||
|
fi
|
||||||
|
|
||||||
|
# No modules on the CS cluster
|
||||||
|
if command_exists module && [[ "${SLURM_CLUSTER_NAME}" != "cs-cluster" ]]; then
|
||||||
module load utilities/multi
|
module load utilities/multi
|
||||||
module load readline/7.0
|
module load readline/7.0
|
||||||
module load gcc/10.2.0
|
module load gcc/10.2.0
|
||||||
|
@ -30,7 +44,7 @@ if command_exists module; then
|
||||||
|
|
||||||
module load python/anaconda/4.6/miniconda/3.7
|
module load python/anaconda/4.6/miniconda/3.7
|
||||||
else
|
else
|
||||||
echo "[slurm_runner]: module command not present, not loading modules" >&2;
|
echo "[slurm_runner]: module command not present or csgpu cluster detected, not loading modules" >&2;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +88,7 @@ show_help() {
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
DIR_RAINFALLWATER="${DIR_RAINFALLWATER:-$HOME/rainfallwater_records_tfrecord}";
|
DIR_RAINFALLWATER="${DIR_RAINFALLWATER:-$HOME/data/rainfallwater_records_tfrecord}";
|
||||||
PATH_HEIGHTMAP="${PATH_HEIGHTMAP:-$HOME/data/terrain50-nimrodsized.json.gz}";
|
PATH_HEIGHTMAP="${PATH_HEIGHTMAP:-$HOME/data/terrain50-nimrodsized.json.gz}";
|
||||||
PATH_COLOURMAP="${PATH_COLOURMAP:-$HOME/data/instance-level-human-parsing/instance-level_human_parsing/human_colormap.mat}";
|
PATH_COLOURMAP="${PATH_COLOURMAP:-$HOME/data/instance-level-human-parsing/instance-level_human_parsing/human_colormap.mat}";
|
||||||
|
|
||||||
|
@ -92,6 +106,9 @@ mkdir -p "${DIR_OUTPUT}";
|
||||||
echo -e ">>> NOW: $(date)";
|
echo -e ">>> NOW: $(date)";
|
||||||
echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
|
echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
|
||||||
echo -e ">>> Additional args: ${ARGS}";
|
echo -e ">>> Additional args: ${ARGS}";
|
||||||
|
echo -e ">>> GIT COMMIT: $(git rev-parse HEAD)";
|
||||||
|
|
||||||
|
{ echo "*****"; git rev-parse HEAD; git status; git log -1 | cat; } >>"${DIR_OUTPUT}/commit.txt";
|
||||||
|
|
||||||
export PATH=$HOME/software/bin:$PATH;
|
export PATH=$HOME/software/bin:$PATH;
|
||||||
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST;
|
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST;
|
||||||
|
|
|
@ -20,7 +20,7 @@ import matplotlib.pyplot as plt
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
import lib.primitives.env
|
import lib.primitives.env as env
|
||||||
from lib.dataset.dataset_mono import dataset_mono, dataset_mono_predict
|
from lib.dataset.dataset_mono import dataset_mono, dataset_mono_predict
|
||||||
from lib.ai.components.LossCrossEntropyDice import LossCrossEntropyDice
|
from lib.ai.components.LossCrossEntropyDice import LossCrossEntropyDice
|
||||||
from lib.ai.components.MetricDice import metric_dice_coefficient as dice_coefficient
|
from lib.ai.components.MetricDice import metric_dice_coefficient as dice_coefficient
|
||||||
|
@ -40,7 +40,7 @@ logger.info(f"Starting at {str(datetime.now().isoformat())}")
|
||||||
# ███████ ██ ████ ████ ██ ██ ██ ██████ ██ ████ ██ ██ ███████ ██ ████ ██
|
# ███████ ██ ████ ████ ██ ██ ██ ██████ ██ ████ ██ ██ ███████ ██ ████ ██
|
||||||
|
|
||||||
IMAGE_SIZE = env.read("IMAGE_SIZE", int, 128) # was 512; 128 is the highest power of 2 that fits the data
|
IMAGE_SIZE = env.read("IMAGE_SIZE", int, 128) # was 512; 128 is the highest power of 2 that fits the data
|
||||||
BATCH_SIZE = env.read("BATCH_SIZE", int, 64)
|
BATCH_SIZE = env.read("BATCH_SIZE", int, 32)
|
||||||
NUM_CLASSES = 2
|
NUM_CLASSES = 2
|
||||||
DIR_RAINFALLWATER = env.read("DIR_RAINFALLWATER", str)
|
DIR_RAINFALLWATER = env.read("DIR_RAINFALLWATER", str)
|
||||||
PATH_HEIGHTMAP = env.read("PATH_HEIGHTMAP", str)
|
PATH_HEIGHTMAP = env.read("PATH_HEIGHTMAP", str)
|
||||||
|
@ -48,10 +48,10 @@ PATH_COLOURMAP = env.read("PATH_COLOURMAP", str)
|
||||||
PARALLEL_READS = env.read("PARALLEL_READS", float, 1.5)
|
PARALLEL_READS = env.read("PARALLEL_READS", float, 1.5)
|
||||||
STEPS_PER_EPOCH = env.read("STEPS_PER_EPOCH", int, None)
|
STEPS_PER_EPOCH = env.read("STEPS_PER_EPOCH", int, None)
|
||||||
REMOVE_ISOLATED_PIXELS = env.read("NO_REMOVE_ISOLATED_PIXELS", bool, True)
|
REMOVE_ISOLATED_PIXELS = env.read("NO_REMOVE_ISOLATED_PIXELS", bool, True)
|
||||||
EPOCHS = env.read("EPOCHS", int, 50)
|
EPOCHS = env.read("EPOCHS", int, 25)
|
||||||
LOSS = env.read("LOSS", str, "cross-entropy-dice") # other possible values: cross-entropy
|
LOSS = env.read("LOSS", str, "cross-entropy-dice") # other possible values: cross-entropy
|
||||||
DICE_LOG_COSH = env.read("DICE_LOG_COSH", bool, False)
|
DICE_LOG_COSH = env.read("DICE_LOG_COSH", bool, False)
|
||||||
LEARNING_RATE = env.read("LEARNING_RATE", float, 0.001)
|
LEARNING_RATE = env.read("LEARNING_RATE", float, 0.00001)
|
||||||
WATER_THRESHOLD = env.read("WATER_THRESHOLD", float, 0.1)
|
WATER_THRESHOLD = env.read("WATER_THRESHOLD", float, 0.1)
|
||||||
UPSAMPLE = env.read("UPSAMPLE", int, 2)
|
UPSAMPLE = env.read("UPSAMPLE", int, 2)
|
||||||
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
|
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
|
||||||
|
@ -59,7 +59,7 @@ SPLIT_TEST = env.read("SPLIT_TEST", float, 0)
|
||||||
# NOTE: RANDSEED is declared and handled in src/lib/dataset/primitives/shuffle.py
|
# NOTE: RANDSEED is declared and handled in src/lib/dataset/primitives/shuffle.py
|
||||||
|
|
||||||
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
|
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
|
||||||
JIT_COMPILE = env.read("JIT_COMPILE", bool, False)
|
JIT_COMPILE = env.read("JIT_COMPILE", bool, True)
|
||||||
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
|
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
|
||||||
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
|
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
|
||||||
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
|
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
|
||||||
|
@ -95,7 +95,7 @@ if not PREDICT_AS_ONE:
|
||||||
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
|
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
|
||||||
parallel_reads_multiplier=PARALLEL_READS,
|
parallel_reads_multiplier=PARALLEL_READS,
|
||||||
percentage_validate=SPLIT_VALIDATE,
|
percentage_validate=SPLIT_VALIDATE,
|
||||||
percentage_test=SPLIT_TESTs
|
percentage_test=SPLIT_TEST
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Train Dataset:", dataset_train)
|
logger.info("Train Dataset:", dataset_train)
|
||||||
|
|
|
@ -165,7 +165,7 @@ def dataset_mono(dirpath_input, percentage_validate=0.2, percentage_test=0, **kw
|
||||||
filepaths_count = len(filepaths)
|
filepaths_count = len(filepaths)
|
||||||
|
|
||||||
split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test)))
|
split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test)))
|
||||||
split_validatetest=math.floor(filepaths * (1 - percentage_test))
|
split_validatetest=math.floor(filepaths_count * (1 - percentage_test))
|
||||||
|
|
||||||
|
|
||||||
filepaths_train = filepaths[:split_trainvalidate]
|
filepaths_train = filepaths[:split_trainvalidate]
|
||||||
|
|
|
@ -8,6 +8,7 @@ import os
|
||||||
###
|
###
|
||||||
|
|
||||||
## Changelog:
|
## Changelog:
|
||||||
|
# 2024-11-14: Fix crash on line #107 unterminated string literal
|
||||||
# 2024-09-29: Create this changelog, prepare for reuse
|
# 2024-09-29: Create this changelog, prepare for reuse
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
@ -104,8 +105,7 @@ def print_all(table=True):
|
||||||
|
|
||||||
|
|
||||||
# Create the table format string
|
# Create the table format string
|
||||||
format_string = f"| {{:<{width_name}}} | {{:<{
|
format_string = f"| {{:<{width_name}}} | {{:<{width_type}}} | {{:<{width_value}}} | {{:<{width_flags}}} |"
|
||||||
width_type}}} | {{:<{width_value}}} | {{:<{width_flags}}} |"
|
|
||||||
|
|
||||||
# Calculate total width
|
# Calculate total width
|
||||||
total_width = width_name + width_type + width_value + \
|
total_width = width_name + width_type + width_value + \
|
||||||
|
|
Loading…
Reference in a new issue