mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-21 17:03:00 +00:00
Implement initial UNTESTED support for split_validation and split_test
This commit is contained in:
parent
b2b96ab636
commit
e5f6e6394f
3 changed files with 22 additions and 10 deletions
|
@ -43,6 +43,8 @@ show_help() {
|
||||||
echo -e " PATH_CHECKPOINT The path to a checkpoint to load. If specified, a model will be loaded instead of being trained." >&2;
|
echo -e " PATH_CHECKPOINT The path to a checkpoint to load. If specified, a model will be loaded instead of being trained." >&2;
|
||||||
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
|
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
|
||||||
echo -e " UPSAMPLE How much to upsample by at the beginning of the model. A value of disables upscaling. Default: 2." >&2;
|
echo -e " UPSAMPLE How much to upsample by at the beginning of the model. A value of disables upscaling. Default: 2." >&2;
|
||||||
|
echo -e " SPLIT_VALIDATE Percentage of the available files in the dataset to be allocated to the validation split. Default: 0.2" >&2;
|
||||||
|
echo -e " SPLIT_TEST Percentage of the available files in the dataset to be allocated to the test split. Default: 0.2" >&2;
|
||||||
echo -e " STEPS_PER_EXECUTION How many steps to perform before surfacing from the GPU to e.g. do callbacks. Default: 16." >&2;
|
echo -e " STEPS_PER_EXECUTION How many steps to perform before surfacing from the GPU to e.g. do callbacks. Default: 16." >&2;
|
||||||
echo -e " RANDSEED The random seed to use when shuffling filepaths. Default: unset, which means use a random value." >&2;
|
echo -e " RANDSEED The random seed to use when shuffling filepaths. Default: unset, which means use a random value." >&2;
|
||||||
echo -e " JIT_COMPILE Set to any value to compile the model with XLA." >&2;
|
echo -e " JIT_COMPILE Set to any value to compile the model with XLA." >&2;
|
||||||
|
@ -77,7 +79,7 @@ echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
|
||||||
echo -e ">>> Additional args: ${ARGS}";
|
echo -e ">>> Additional args: ${ARGS}";
|
||||||
|
|
||||||
export PATH=$HOME/software/bin:$PATH;
|
export PATH=$HOME/software/bin:$PATH;
|
||||||
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE;
|
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST;
|
||||||
|
|
||||||
echo ">>> Installing requirements";
|
echo ">>> Installing requirements";
|
||||||
conda run -n py38 pip install -q -r requirements.txt;
|
conda run -n py38 pip install -q -r requirements.txt;
|
||||||
|
|
|
@ -56,13 +56,13 @@ UPSAMPLE = env.read("UPSAMPLE", int, 2)
|
||||||
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
|
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
|
||||||
SPLIT_TEST = env.read("SPLIT_TEST", float, 0)
|
SPLIT_TEST = env.read("SPLIT_TEST", float, 0)
|
||||||
|
|
||||||
|
|
||||||
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
|
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
|
||||||
JIT_COMPILE = env.read("JIT_COMPILE", bool, False)
|
JIT_COMPILE = env.read("JIT_COMPILE", bool, False)
|
||||||
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
|
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
|
||||||
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
|
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
|
||||||
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
|
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
|
||||||
PREDICT_AS_ONE = env.read("PREDICT_AS_ONE", bool, False)
|
PREDICT_AS_ONE = env.read("PREDICT_AS_ONE", bool, False)
|
||||||
|
|
||||||
# ~~~
|
# ~~~
|
||||||
|
|
||||||
env.val_dir_exists(os.path.join(DIR_OUTPUT, "checkpoints"), create=True)
|
env.val_dir_exists(os.path.join(DIR_OUTPUT, "checkpoints"), create=True)
|
||||||
|
@ -82,7 +82,7 @@ env.print_all(False)
|
||||||
# ██████ ██ ██ ██ ██ ██ ███████ ███████ ██
|
# ██████ ██ ██ ██ ██ ██ ███████ ███████ ██
|
||||||
|
|
||||||
if not PREDICT_AS_ONE:
|
if not PREDICT_AS_ONE:
|
||||||
dataset_train, dataset_validate = dataset_mono(
|
dataset_train, dataset_validate, dataset_test = dataset_mono(
|
||||||
dirpath_input=DIR_RAINFALLWATER,
|
dirpath_input=DIR_RAINFALLWATER,
|
||||||
batch_size=BATCH_SIZE,
|
batch_size=BATCH_SIZE,
|
||||||
water_threshold=WATER_THRESHOLD,
|
water_threshold=WATER_THRESHOLD,
|
||||||
|
@ -91,11 +91,14 @@ if not PREDICT_AS_ONE:
|
||||||
input_size="same",
|
input_size="same",
|
||||||
filepath_heightmap=PATH_HEIGHTMAP,
|
filepath_heightmap=PATH_HEIGHTMAP,
|
||||||
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
|
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
|
||||||
parallel_reads_multiplier=PARALLEL_READS
|
parallel_reads_multiplier=PARALLEL_READS,
|
||||||
|
percentage_validate=SPLIT_VALIDATE,
|
||||||
|
percentage_test=SPLIT_TESTs
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Train Dataset:", dataset_train)
|
logger.info("Train Dataset:", dataset_train)
|
||||||
logger.info("Validation Dataset:", dataset_validate)
|
logger.info("Validation Dataset:", dataset_validate)
|
||||||
|
logger.info("Test Dataset:", dataset_test)
|
||||||
else:
|
else:
|
||||||
dataset_train = dataset_mono_predict(
|
dataset_train = dataset_mono_predict(
|
||||||
dirpath_input=DIR_RAINFALLWATER,
|
dirpath_input=DIR_RAINFALLWATER,
|
||||||
|
@ -253,6 +256,7 @@ if PATH_CHECKPOINT is None:
|
||||||
logger.info(">>> Beginning training")
|
logger.info(">>> Beginning training")
|
||||||
history = model.fit(dataset_train,
|
history = model.fit(dataset_train,
|
||||||
validation_data=dataset_validate,
|
validation_data=dataset_validate,
|
||||||
|
# test_data=dataset_test, # Nope, it doesn't have a param like this so it's time to do this the *hard* way
|
||||||
epochs=EPOCHS,
|
epochs=EPOCHS,
|
||||||
callbacks=[
|
callbacks=[
|
||||||
tf.keras.callbacks.CSVLogger(
|
tf.keras.callbacks.CSVLogger(
|
||||||
|
@ -395,5 +399,12 @@ if not PREDICT_AS_ONE:
|
||||||
colormap,
|
colormap,
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
|
if dataset_test is not None:
|
||||||
|
plot_predictions(
|
||||||
|
os.path.join(DIR_OUTPUT, "predict_test_$$.png"),
|
||||||
|
get_from_batched(dataset_test, PREDICT_COUNT),
|
||||||
|
colormap,
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(f"Complete at {str(datetime.now().isoformat())}, elapsed {str((datetime.now() - time_start).total_seconds())} seconds")
|
logger.info(f"Complete at {str(datetime.now().isoformat())}, elapsed {str((datetime.now() - time_start).total_seconds())} seconds")
|
||||||
|
|
|
@ -160,19 +160,18 @@ def get_filepaths(dirpath_input, do_shuffle=True):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# TODO refactor this to validate_percentage=0.2 and test_percentage=0, but DON'T FORGET TO CHECK ***ALL*** usages of this FIRST and update them afterwards!
|
def dataset_mono(dirpath_input, percentage_validate=0.2, percentage_test=0, **kwargs):
|
||||||
def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kwargs):
|
|
||||||
filepaths = get_filepaths(dirpath_input)
|
filepaths = get_filepaths(dirpath_input)
|
||||||
filepaths_count = len(filepaths)
|
filepaths_count = len(filepaths)
|
||||||
|
|
||||||
split_trainvalidate=math.floor(filepaths_count * (1-(validate_percentage+test_percentage)))
|
split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test)))
|
||||||
split_validatetest=math.floor(filepaths * (1 - test_percentage))
|
split_validatetest=math.floor(filepaths * (1 - percentage_test))
|
||||||
|
|
||||||
|
|
||||||
filepaths_train = filepaths[:split_trainvalidate]
|
filepaths_train = filepaths[:split_trainvalidate]
|
||||||
filepaths_validate = filepaths[split_trainvalidate:split_validatetest]
|
filepaths_validate = filepaths[split_trainvalidate:split_validatetest]
|
||||||
filepaths_test = []
|
filepaths_test = []
|
||||||
if test_percentage > 0:
|
if percentage_test > 0:
|
||||||
filepaths_test = filepaths[split_validatetest:]
|
filepaths_test = filepaths[split_validatetest:]
|
||||||
|
|
||||||
print("DEBUG:dataset_mono filepaths_train", filepaths_train, "filepaths_validate", filepaths_validate, "filepaths_test", filepaths_test)
|
print("DEBUG:dataset_mono filepaths_train", filepaths_train, "filepaths_validate", filepaths_validate, "filepaths_test", filepaths_test)
|
||||||
|
@ -182,7 +181,7 @@ def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kw
|
||||||
dataset_train = make_dataset(filepaths_train, metadata=metadata, **kwargs)
|
dataset_train = make_dataset(filepaths_train, metadata=metadata, **kwargs)
|
||||||
dataset_validate = make_dataset(filepaths_validate, metadata=metadata, **kwargs)
|
dataset_validate = make_dataset(filepaths_validate, metadata=metadata, **kwargs)
|
||||||
dataset_test = None
|
dataset_test = None
|
||||||
if test_percentage > 0:
|
if percentage_test > 0:
|
||||||
dataset_test = make_dataset(filepaths_test, metadata=metadata, **kwargs)
|
dataset_test = make_dataset(filepaths_test, metadata=metadata, **kwargs)
|
||||||
|
|
||||||
return dataset_train, dataset_validate, dataset_test #, filepaths
|
return dataset_train, dataset_validate, dataset_test #, filepaths
|
||||||
|
|
Loading…
Reference in a new issue