dlr: add PARALLEL_READS env var, update docs

This commit is contained in:
Starbeamrainbowlabs 2023-11-30 16:33:22 +00:00
parent 60674fb6b3
commit 0f9f185983
Signed by: sbrl
GPG key ID: 1BE5172E637709C2
3 changed files with 7 additions and 4 deletions

View file

@ -33,13 +33,14 @@ show_help() {
echo -e " DIR_RAINFALLWATER The path to the directory the .tfrecord files containing the rainfall radar / water depth data." >&2;
echo -e " PATH_HEIGHTMAP The path to the heightmap jsonl file to read in." >&2;
echo -e " PATH_COLOURMAP The path to the colourmap for predictive purposes." >&2;
echo -e " PARALLEL_READS Multiplier for the number of files to read in parallel. 1 = number of CPU cores available. Very useful on high-read-latency systems (e.g. HPC like Viper) to avoid starving the GPU of data. WILL MANGLE THE ORDERING OF DATA. Set to 0 to disable and read data sequentially. WILL ONLY NOT MANGLE DATA IF PREDICT_AS_ONE IS SET. Defaults to 1.5." >&2;
echo -e " STEPS_PER_EPOCH The number of steps to consider an epoch. Defaults to None, which means use the entire dataset." >&2;
echo -e " NO_REMOVE_ISOLATED_PIXELS Set to any value to avoid the engine from removing isolated pixels - that is, water pixels with no other surrounding pixels, either side to side to diagonally." >&2;
echo -e " EPOCHS The number of epochs to train for." >&2;
echo -e " LOSS The loss function to use. Default: cross-entropy (possible values: cross-entropy, cross-entropy-dice)." >&2;
echo -e " DICE_LOG_COSH When in cross-entropy-dice mode, in addition do loss = cel + log(cosh(dice_loss)) instead of just loss = cel + dice_loss." >&2;
echo -e " WATER_THRESHOLD The threshold to cut water off at when training, in metres. Default: 0.1" >&2;
echo -e " PATH_CHECKPOINT The path to a checkcpoint to load. If specified, a model will be loaded instead of being trained." >&2;
echo -e " PATH_CHECKPOINT The path to a checkpoint to load. If specified, a model will be loaded instead of being trained." >&2;
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
echo -e " UPSAMPLE How much to upsample by at the beginning of the model. A value of disables upscaling. Default: 2." >&2;
echo -e " STEPS_PER_EXECUTION How many steps to perform before surfacing from the GPU to e.g. do callbacks. Default: 16." >&2;

View file

@ -43,6 +43,7 @@ NUM_CLASSES = 2
DIR_RAINFALLWATER = os.environ["DIR_RAINFALLWATER"]
PATH_HEIGHTMAP = os.environ["PATH_HEIGHTMAP"]
PATH_COLOURMAP = os.environ["PATH_COLOURMAP"]
PARALLEL_READS = float(os.environ["PARALLEL_READS"]) if "PARALLEL_READS" in os.environ else 1.5
STEPS_PER_EPOCH = int(os.environ["STEPS_PER_EPOCH"]) if "STEPS_PER_EPOCH" in os.environ else None
REMOVE_ISOLATED_PIXELS = False if "NO_REMOVE_ISOLATED_PIXELS" in os.environ else True
EPOCHS = int(os.environ["EPOCHS"]) if "EPOCHS" in os.environ else 50
@ -69,7 +70,7 @@ if not os.path.exists(DIR_OUTPUT):
# ~~~
logger.info("DeepLabV3+ rainfall radar TEST")
for env_name in [ "BATCH_SIZE","NUM_CLASSES", "DIR_RAINFALLWATER", "PATH_HEIGHTMAP", "PATH_COLOURMAP", "STEPS_PER_EPOCH", "REMOVE_ISOLATED_PIXELS", "EPOCHS", "LOSS", "LEARNING_RATE", "DIR_OUTPUT", "PATH_CHECKPOINT", "PREDICT_COUNT", "DICE_LOG_COSH", "WATER_THRESHOLD", "UPSAMPLE", "STEPS_PER_EXECUTION", "JIT_COMPILE", "PREDICT_AS_ONE" ]:
for env_name in [ "BATCH_SIZE","NUM_CLASSES", "DIR_RAINFALLWATER", "PATH_HEIGHTMAP", "PATH_COLOURMAP", "STEPS_PER_EPOCH", "PARALLEL_READS", "REMOVE_ISOLATED_PIXELS", "EPOCHS", "LOSS", "LEARNING_RATE", "DIR_OUTPUT", "PATH_CHECKPOINT", "PREDICT_COUNT", "DICE_LOG_COSH", "WATER_THRESHOLD", "UPSAMPLE", "STEPS_PER_EXECUTION", "JIT_COMPILE", "PREDICT_AS_ONE" ]:
logger.info(f"> {env_name} {str(globals()[env_name])}")
@ -88,7 +89,8 @@ if not PREDICT_AS_ONE:
output_size=IMAGE_SIZE,
input_size="same",
filepath_heightmap=PATH_HEIGHTMAP,
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS
do_remove_isolated_pixels=REMOVE_ISOLATED_PIXELS,
parallel_reads_multiplier=PARALLEL_READS
)
logger.info("Train Dataset:", dataset_train)

View file

@ -178,7 +178,7 @@ def dataset_mono_predict(dirpath_input, batch_size=64, **kwargs):
filepaths=filepaths,
metadata=read_metadata(dirpath_input),
batch_size=batch_size, # WAS None
shuffle=False, #even with shuffle=False we're not gonna get them all in the same order since we're reading in parallel
shuffle=False, #even with shuffle=False we're not gonna get them all in the same order since we're reading in parallel by default
**kwargs
)