mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-24 18:23:01 +00:00
slurm dlr: shell out in conda; redirect stderr & stdout to disk inside the experiments folder
Also, if the job restarts, we still save the previous run's results because we append rather than overwrite
This commit is contained in:
parent
0b31c9e700
commit
6ff2864d23
1 changed files with 7 additions and 3 deletions
|
@ -34,8 +34,8 @@ show_help() {
|
||||||
echo -e " NO_REMOVE_ISOLATED_PIXELS Set to any value to avoid the engine from removing isolated pixels - that is, water pixels with no other surrounding pixels, either side to side to diagonally." >&2;
|
echo -e " NO_REMOVE_ISOLATED_PIXELS Set to any value to avoid the engine from removing isolated pixels - that is, water pixels with no other surrounding pixels, either side to side to diagonally." >&2;
|
||||||
echo -e " EPOCHS The number of epochs to train for." >&2;
|
echo -e " EPOCHS The number of epochs to train for." >&2;
|
||||||
echo -e " LOSS The loss function to use. Default: cross-entropy (possible values: cross-entropy, cross-entropy-dice)." >&2;
|
echo -e " LOSS The loss function to use. Default: cross-entropy (possible values: cross-entropy, cross-entropy-dice)." >&2;
|
||||||
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
|
|
||||||
echo -e " PATH_CHECKPOINT The path to a checkcpoint to load. If specified, a model will be loaded instead of being trained." >&2;
|
echo -e " PATH_CHECKPOINT The path to a checkcpoint to load. If specified, a model will be loaded instead of being trained." >&2;
|
||||||
|
echo -e " LEARNING_RATE The learning rate to use. Default: 0.001." >&2;
|
||||||
echo -e " PREDICT_COUNT The number of items from the (SCRAMBLED) dataset to make a prediction for." >&2;
|
echo -e " PREDICT_COUNT The number of items from the (SCRAMBLED) dataset to make a prediction for." >&2;
|
||||||
echo -e " POSTFIX Postfix to append to the output dir (auto calculated)." >&2;
|
echo -e " POSTFIX Postfix to append to the output dir (auto calculated)." >&2;
|
||||||
echo -e " ARGS Optional. Any additional arguments to pass to the python program." >&2;
|
echo -e " ARGS Optional. Any additional arguments to pass to the python program." >&2;
|
||||||
|
@ -59,6 +59,10 @@ fi
|
||||||
|
|
||||||
DIR_OUTPUT="output/$(date -u --rfc-3339=date)_${CODE}";
|
DIR_OUTPUT="output/$(date -u --rfc-3339=date)_${CODE}";
|
||||||
|
|
||||||
|
mkdir -p "${DIR_OUTPUT}";
|
||||||
|
|
||||||
|
echo -e ">>> NOW: $(date)";
|
||||||
|
echo -e ">>> DIR_OUTPUT: ${DIR_OUTPUT}";
|
||||||
echo -e ">>> Additional args: ${ARGS}";
|
echo -e ">>> Additional args: ${ARGS}";
|
||||||
|
|
||||||
export PATH=$HOME/software/bin:$PATH;
|
export PATH=$HOME/software/bin:$PATH;
|
||||||
|
@ -67,6 +71,6 @@ export IMAGE_SIZE BATCH_SIZE DIR_RAINFALLWATER PATH_HEIGHTMAP PATH_COLOURMAP STE
|
||||||
echo ">>> Installing requirements";
|
echo ">>> Installing requirements";
|
||||||
conda run -n py38 pip install -q -r requirements.txt;
|
conda run -n py38 pip install -q -r requirements.txt;
|
||||||
echo ">>> Training model";
|
echo ">>> Training model";
|
||||||
#shellcheck disable=SC2086
|
#shellcheck disable=SC2016
|
||||||
/usr/bin/env time -v conda run -n py38 src/deeplabv3_plus_test_rainfall.py
|
/usr/bin/env time -v conda run -n py38 bash -c 'src/deeplabv3_plus_test_rainfall.py >>"${DIR_OUTPUT}/experiment.${SLURM_JOB_ID}.out.log" >>"${DIR_OUTPUT}/experiment.${SLURM_JOB_ID}.err.log"';
|
||||||
echo ">>> exited with code $?";
|
echo ">>> exited with code $?";
|
||||||
|
|
Loading…
Reference in a new issue