diff --git a/aimodel/slurm-pretrain-plot.job b/aimodel/slurm-pretrain-plot.job new file mode 100755 index 0000000..ea469ea --- /dev/null +++ b/aimodel/slurm-pretrain-plot.job @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +#SBATCH -J RainUMAP +#SBATCH -N 1 +#SBATCH -n 14 +#SBATCH --gres=gpu:1 +#SBATCH -o %j.%N.%a.out.log +#SBATCH -e %j.%N.%a.err.log +#SBATCH -p gpu05 +#SBATCH --time=5-00:00:00 + +module load utilities/multi +module load readline/7.0 +module load gcc/10.2.0 +module load cuda/11.5.0 + +module load python/anaconda/4.6/miniconda/3.7 + + +show_help() { + echo -e "Usage:" >&2; + echo -e " [INPUT=\"\$HOME/rainfallwater_records_embed.jsonl.gz\"] [POSTFIX=\"some_string\"] sbatch slurm-pretrain-plot.job" >&2; + echo -e "" >&2; + echo -e "....where:" >&2; + echo -e " INPUT The path to the input file (.jsonl.gz) containing the embedded data to plot (see the pretrain-predict subcommand for embedding data)" >&2; + echo -e " POSTFIX Arbitrary string to add to filename." >&2; + echo -e "" >&2; + echo -e "The code used to identify the run is taken automatically from the filename of the config file." >&2; + exit; +} + +INPUT="${INPUT:-$HOME/rainfallwater_records_embed.jsonl.gz}"; + +if [[ -z "${INPUT}" ]]; then + echo -e "Error: No INPUT environment variable specified.\n" >&2; + show_help; + exit 0; +fi + +if [[ ! -d "${INPUT}" ]]; then + echo -e "Error: The input filepath at '${INPUT}' containing the input .tfrecord dataset either doesn't exist or isn't a directory."; + show_help; + exit 1; +fi + + +CODE="_contrast_embed_umap"; + +if [[ -n "${POSTFIX}" ]]; then + echo -e ">>> Applying postfix of ${POSTFIX}" >&2; + CODE="${CODE}_${POSTFIX}"; +fi + +echo -e ">>> Input dirpath: ${INPUT}" >&2; +echo -e ">>> Code: ${CODE}" >&2; +echo -e ">>> Additional args: ${ARGS}"; + +filepath_output="output/gen$(date -u --rfc-3339=date)_${CODE}.png"; + +export PATH=$HOME/software/bin:$PATH; + +echo ">>> Installing requirements"; +conda run -n py38 pip install -r requirements.txt; +echo ">>> Training model"; +#shellcheck disable=SC2086 +/usr/bin/env time -v conda run -n py38 src/index.py pretrain-plot -i "${INPUT}" -o "${filepath_output}" ${ARGS}; +# src/index.py pretrain --input "${INPUT}" --output "${dir_output}" ${ARGS}; +echo ">>> exited with code $?"; diff --git a/aimodel/slurm-pretrain-predict.job b/aimodel/slurm-pretrain-predict.job new file mode 100755 index 0000000..88ae2d0 --- /dev/null +++ b/aimodel/slurm-pretrain-predict.job @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +#SBATCH -J RainUMAP +#SBATCH -N 1 +#SBATCH -n 14 +#SBATCH --gres=gpu:1 +#SBATCH -o %j.%N.%a.out.log +#SBATCH -e %j.%N.%a.err.log +#SBATCH -p gpu05 +#SBATCH --time=5-00:00:00 + +module load utilities/multi +module load readline/7.0 +module load gcc/10.2.0 +module load cuda/11.5.0 + +module load python/anaconda/4.6/miniconda/3.7 + + +show_help() { + echo -e "Usage:" >&2; + echo -e " [INPUT=\"\$HOME/rainfallwater_records_embed.jsonl.gz\"] [POSTFIX=\"some_string\"] sbatch slurm-pretrain-plot.job" >&2; + echo -e "" >&2; + echo -e "....where:" >&2; + echo -e " INPUT The path to the input file (.jsonl.gz) containing the embedded data to plot (see the pretrain-predict subcommand for embedding data)" >&2; + echo -e " CHECKPOINT The filepath to the checkpoint (.hdf5) file to load" >&2; + echo -e " POSTFIX Arbitrary string to add to filename." >&2; + echo -e "" >&2; + echo -e "The code used to identify the run is taken automatically from the filename of the config file." >&2; + exit; +} + +INPUT="${INPUT:-$HOME/rainfallwater_records_tfrecord}"; +CHECKPOINT="${CHECKPOINT:-output/2022-09-07_pretrain_contrast_dim1024/checkpoints/checkpoint_weights_e32_loss0.693.hdf5}" + +if [[ -z "${INPUT}" ]]; then + echo -e "Error: No INPUT environment variable specified.\n" >&2; + show_help; + exit 0; +fi +if [[ -z "${CHECKPOINT}" ]]; then + echo -e "Error: No CHECKPOINT environment variable specified\n" >&2; + show_help; + exit 0; +fi + +if [[ ! -d "${INPUT}" ]]; then + echo -e "Error: The input filepath at '${INPUT}' containing the input .tfrecord dataset either doesn't exist or isn't a directory."; + show_help; + exit 1; +fi + + +CODE="_contrast_embed_umap"; + +if [[ -n "${POSTFIX}" ]]; then + echo -e ">>> Applying postfix of ${POSTFIX}" >&2; + CODE="${CODE}_${POSTFIX}"; +fi + +echo -e ">>> Input dirpath: ${INPUT}" >&2; +echo -e ">>> Code: ${CODE}" >&2; +echo -e ">>> Additional args: ${ARGS}"; + +filepath_output="output/rainfallwater_records_embed_$(date -u --rfc-3339=date)${CODE}.jsonl.gz"; + +export PATH=$HOME/software/bin:$PATH; + +echo ">>> Installing requirements"; +conda run -n py38 pip install -r requirements.txt; +echo ">>> Training model"; +#shellcheck disable=SC2086 +/usr/bin/env time -v conda run -n py38 src/index.py pretrain-predict --input "${INPUT}" --output "${filepath_output}" -c "${CHECKPOINT}" +echo ">>> exited with code $?";