mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-21 17:03:00 +00:00
Implement initial UNTESTED support for split_validation and split_test
This commit is contained in:
parent
b2b96ab636
commit
e5f6e6394f
3 changed files with 22 additions and 10 deletions
|
@ -43,6 +43,8 @@ show_help() {
|
|||
echo -e " PATH_CHECKPOINT The path to a checkpoint to load. If specified, a model will be loaded instead of being trained." >&2;
|
||||
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
|
||||
echo -e " UPSAMPLE How much to upsample by at the beginning of the model. A value of disables upscaling. Default: 2." >&2;
|
||||
echo -e " SPLIT_VALIDATE Percentage of the available files in the dataset to be allocated to the validation split. Default: 0.2" >&2;
|
||||
echo -e " SPLIT_TEST Percentage of the available files in the dataset to be allocated to the test split. Default: 0.2" >&2;
|
||||
echo -e " STEPS_PER_EXECUTION How many steps to perform before surfacing from the GPU to e.g. do callbacks. Default: 16." >&2;
|
||||
echo -e " RANDSEED The random seed to use when shuffling filepaths. Default: unset, which means use a random value." >&2;
|
||||
echo -e " JIT_COMPILE Set to any value to compile the model with XLA." >&2;
|
||||
|
@ -77,7 +79,7 @@ echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
|
|||
echo -e ">>> Additional args: ${ARGS}";
|
||||
|
||||
export PATH=$HOME/software/bin:$PATH;
|
||||
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE;
|
||||
export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STEPS_PER_EPOCH DIR_OUTPUT PATH_CHECKPOINT EPOCHS PREDICT_COUNT NO_REMOVE_ISOLATED_PIXELS LOSS LEARNING_RATE DICE_LOG_COSH WATER_THRESHOLD UPSAMPLE STEPS_PER_EXECUTION JIT_COMPILE RANDSEED PREDICT_AS_ONE SPLIT_VALIDATE SPLIT_TEST;
|
||||
|
||||
echo ">>> Installing requirements";
|
||||
conda run -n py38 pip install -q -r requirements.txt;
|
||||
|
|
|
@ -56,13 +56,13 @@ UPSAMPLE = env.read("UPSAMPLE", int, 2)
|
|||
SPLIT_VALIDATE = env.read("SPLIT_VALIDATE", float, 0.2)
|
||||
SPLIT_TEST = env.read("SPLIT_TEST", float, 0)
|
||||
|
||||
|
||||
STEPS_PER_EXECUTION = env.read("STEPS_PER_EXECUTION", int, 1)
|
||||
JIT_COMPILE = env.read("JIT_COMPILE", bool, False)
|
||||
DIR_OUTPUT = env.read("DIR_OUTPUT", str, f"output/{datetime.utcnow().date().isoformat()}_deeplabv3plus_rainfall_TEST")
|
||||
PATH_CHECKPOINT = env.read("PATH_CHECKPOINT", str, None)
|
||||
PREDICT_COUNT = env.read("PREDICT_COUNT", int, 25)
|
||||
PREDICT_AS_ONE = env.read("PREDICT_AS_ONE", bool, False)
|
||||
|
||||
# ~~~
|
||||
|
||||
env.val_dir_exists(os.path.join(DIR_OUTPUT, "checkpoints"), create=True)
|
||||
|
@ -82,7 +82,7 @@ env.print_all(False)
|
|||
# ██████ ██ ██ ██ ██ ██ ███████ ███████ ██
|
||||
|
||||
if not PREDICT_AS_ONE:
|
||||
dataset_train, dataset_validate = dataset_mono(
|
||||
dataset_train, dataset_validate, dataset_test = dataset_mono(
|
||||
dirpath_input=DIR_RAINFALLWATER,
|
||||
batch_size=BATCH_SIZE,
|
||||
water_threshold=WATER_THRESHOLD,
|
||||
|
@ -91,11 +91,14 @@ if not PREDICT_AS_ONE:
|
|||
input_size="same",
|
||||
filepath_heightmap=PATH_HEIGHTMAP,
|
||||
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
|
||||
parallel_reads_multiplier=PARALLEL_READS
|
||||
parallel_reads_multiplier=PARALLEL_READS,
|
||||
percentage_validate=SPLIT_VALIDATE,
|
||||
percentage_test=SPLIT_TESTs
|
||||
)
|
||||
|
||||
logger.info("Train Dataset:", dataset_train)
|
||||
logger.info("Validation Dataset:", dataset_validate)
|
||||
logger.info("Test Dataset:", dataset_test)
|
||||
else:
|
||||
dataset_train = dataset_mono_predict(
|
||||
dirpath_input=DIR_RAINFALLWATER,
|
||||
|
@ -253,6 +256,7 @@ if PATH_CHECKPOINT is None:
|
|||
logger.info(">>> Beginning training")
|
||||
history = model.fit(dataset_train,
|
||||
validation_data=dataset_validate,
|
||||
# test_data=dataset_test, # Nope, it doesn't have a param like this so it's time to do this the *hard* way
|
||||
epochs=EPOCHS,
|
||||
callbacks=[
|
||||
tf.keras.callbacks.CSVLogger(
|
||||
|
@ -395,5 +399,12 @@ if not PREDICT_AS_ONE:
|
|||
colormap,
|
||||
model=model
|
||||
)
|
||||
if dataset_test is not None:
|
||||
plot_predictions(
|
||||
os.path.join(DIR_OUTPUT, "predict_test_$$.png"),
|
||||
get_from_batched(dataset_test, PREDICT_COUNT),
|
||||
colormap,
|
||||
model=model
|
||||
)
|
||||
|
||||
logger.info(f"Complete at {str(datetime.now().isoformat())}, elapsed {str((datetime.now() - time_start).total_seconds())} seconds")
|
||||
|
|
|
@ -160,19 +160,18 @@ def get_filepaths(dirpath_input, do_shuffle=True):
|
|||
|
||||
return result
|
||||
|
||||
# TODO refactor this to validate_percentage=0.2 and test_percentage=0, but DON'T FORGET TO CHECK ***ALL*** usages of this FIRST and update them afterwards!
|
||||
def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kwargs):
|
||||
def dataset_mono(dirpath_input, percentage_validate=0.2, percentage_test=0, **kwargs):
|
||||
filepaths = get_filepaths(dirpath_input)
|
||||
filepaths_count = len(filepaths)
|
||||
|
||||
split_trainvalidate=math.floor(filepaths_count * (1-(validate_percentage+test_percentage)))
|
||||
split_validatetest=math.floor(filepaths * (1 - test_percentage))
|
||||
split_trainvalidate=math.floor(filepaths_count * (1-(percentage_validate+percentage_test)))
|
||||
split_validatetest=math.floor(filepaths * (1 - percentage_test))
|
||||
|
||||
|
||||
filepaths_train = filepaths[:split_trainvalidate]
|
||||
filepaths_validate = filepaths[split_trainvalidate:split_validatetest]
|
||||
filepaths_test = []
|
||||
if test_percentage > 0:
|
||||
if percentage_test > 0:
|
||||
filepaths_test = filepaths[split_validatetest:]
|
||||
|
||||
print("DEBUG:dataset_mono filepaths_train", filepaths_train, "filepaths_validate", filepaths_validate, "filepaths_test", filepaths_test)
|
||||
|
@ -182,7 +181,7 @@ def dataset_mono(dirpath_input, validate_percentage=0.2, test_percentage=0, **kw
|
|||
dataset_train = make_dataset(filepaths_train, metadata=metadata, **kwargs)
|
||||
dataset_validate = make_dataset(filepaths_validate, metadata=metadata, **kwargs)
|
||||
dataset_test = None
|
||||
if test_percentage > 0:
|
||||
if percentage_test > 0:
|
||||
dataset_test = make_dataset(filepaths_test, metadata=metadata, **kwargs)
|
||||
|
||||
return dataset_train, dataset_validate, dataset_test #, filepaths
|
||||
|
|
Loading…
Reference in a new issue