From e5f6e6394f3c3b937e524925dc2105fbe70c4003 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Thu, 29 Aug 2024 19:33:40 +0100 Subject: [PATCH] Implement initial UNTESTED support for split_validation and split_test --- aimodel/slurm-TEST-deeplabv3p-rainfall.job | 4 +++- aimodel/src/deeplabv3_plus_test_rainfall.py | 17 ++++++++++++++--- aimodel/src/lib/dataset/dataset_mono.py | 11 +++++------ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/aimodel/slurm-TEST-deeplabv3p-rainfall.job b/aimodel/slurm-TEST-deeplabv3p-rainfall.job index 2c86e3d..206a3cb 100755 --- a/aimodel/slurm-TEST-deeplabv3p-rainfall.job +++ b/aimodel/slurm-TEST-deeplabv3p-rainfall.job @@ -43,6 +43,8 @@ show_help() { echo -e " PATH_CHECKPOINT The path to a checkpoint to load. If specified, a model will be loaded instead of being trained." >&2; echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2; echo -e " UPSAMPLE How much to upsample by at the beginning of the model. A value of disables upscaling. Default: 2." >&2; + echo -e " SPLIT_VALIDATE Percentage of the available files in the dataset to be allocated to the validation split. Default: 0.2" >&2; + echo -e " SPLIT_TEST Percentage of the available files in the dataset to be allocated to the test split. Default: 0.2" >&2; echo -e " STEPS_PER_EXECUTION How many steps to perform before surfacing from the GPU to e.g. do callbacks. Default: 16." >&2; echo -e " RANDSEED The random seed to use when shuffling filepaths. Default: unset, which means use a random value." >&2; echo -e " JIT_COMPILE Set to any value to compile the model with XLA." >&2; @@ -77,7 +79,7 @@ echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}"; echo -e ">>> Additional args: ${ARGS}"; export PATH=$HOME/software/bin:$PATH; -export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE; +export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST; echo ">>> Installing requirements"; conda run -n py38 pip install -q -r requirements.txt; diff --git a/aimodel/src/deeplabv3_plus_test_rainfall.py b/aimodel/src/deeplabv3_plus_test_rainfall.py index cff279d..daa2d64 100755 --- a/aimodel/src/deeplabv3_plus_test_rainfall.py +++ b/aimodel/src/deeplabv3_plus_test_rainfall.py @@ -56,13 +56,13 @@ UPSAMPLE = env.read("UPSAMPLE", int, 2) SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2) SPLIT_TEST = env.read("SPLIT_TEST", float, 0) - STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1) JIT_COMPILE = env.read("JIT_COMPILE", bool, False) DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST") PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None) PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25) PREDICT_AS_ONE = env.read("PREDICT_AS_ONE", bool, False) + # ~~~ env.val_dir_exists(os.path.join(DIR_OUTPUT, "checkpoints"), create=True) @@ -82,7 +82,7 @@ env.print_all(False) # ██████ ██ ██ ██ ██ ██ ███████ ███████ ██ if not PREDICT_AS_ONE: - dataset_train, dataset_validate = dataset_mono( + dataset_train, dataset_validate, dataset_test = dataset_mono( dirpath_input=DIR_RAINFALLWATER, batch_size=BATCH_SIZE, water_threshold=WATER_THRESHOLD, @@ -91,11 +91,14 @@ if not PREDICT_AS_ONE: input_size="same", filepath_heightmap=PATH_HEIGHTMAP, do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS, - parallel_reads_multiplier=PARALLEL_READS + parallel_reads_multiplier=PARALLEL_READS, + percentage_validate=SPLIT_VALIDATE, + percentage_test=SPLIT_TESTs ) logger.info("Train Dataset:", dataset_train) logger.info("Validation Dataset:", dataset_validate) + logger.info("Test Dataset:", dataset_test) else: dataset_train = dataset_mono_predict( dirpath_input=DIR_RAINFALLWATER, @@ -253,6 +256,7 @@ if PATH_CHECKPOINT is None: logger.info(">>> Beginning training") history = model.fit(dataset_train, validation_data=dataset_validate, + # test_data=dataset_test, # Nope, it doesn't have a param like this so it's time to do this the *hard* way epochs=EPOCHS, callbacks=[ tf.keras.callbacks.CSVLogger( @@ -395,5 +399,12 @@ if not PREDICT_AS_ONE: colormap, model=model ) + if dataset_test is not None: + plot_predictions( + os.path.join(DIR_OUTPUT, "predict_test_$$.png"), + get_from_batched(dataset_test, PREDICT_COUNT), + colormap, + model=model + ) logger.info(f"Complete at {str(datetime.now().isoformat())}, elapsed {str((datetime.now() - time_start).total_seconds())} seconds") diff --git a/aimodel/src/lib/dataset/dataset_mono.py b/aimodel/src/lib/dataset/dataset_mono.py index 2e363fa..7e5e8b3 100644 --- a/aimodel/src/lib/dataset/dataset_mono.py +++ b/aimodel/src/lib/dataset/dataset_mono.py @@ -160,19 +160,18 @@ def get_filepaths(dirpath_input, do_shuffle=True): return result -# TODO refactor this to validate_percentage=0.2 and test_percentage=0, but DON'T FORGET TO CHECK ***ALL*** usages of this FIRST and update them afterwards! -def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kwargs): +def dataset_mono(dirpath_input, percentage_validate=0.2, percentage_test=0, **kwargs): filepaths = get_filepaths(dirpath_input) filepaths_count = len(filepaths) - split_trainvalidate=math.floor(filepaths_count * (1-(validate_percentage+test_percentage))) - split_validatetest=math.floor(filepaths * (1 - test_percentage)) + split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test))) + split_validatetest=math.floor(filepaths * (1 - percentage_test)) filepaths_train = filepaths[:split_trainvalidate] filepaths_validate = filepaths[split_trainvalidate:split_validatetest] filepaths_test = [] - if test_percentage > 0: + if percentage_test > 0: filepaths_test = filepaths[split_validatetest:] print("DEBUG:dataset_mono filepaths_train", filepaths_train, "filepaths_validate", filepaths_validate, "filepaths_test", filepaths_test) @@ -182,7 +181,7 @@ def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kw dataset_train = make_dataset(filepaths_train, metadata=metadata, **kwargs) dataset_validate = make_dataset(filepaths_validate, metadata=metadata, **kwargs) dataset_test = None - if test_percentage > 0: + if percentage_test > 0: dataset_test = make_dataset(filepaths_test, metadata=metadata, **kwargs) return dataset_train, dataset_validate, dataset_test #, filepaths