#!/usr/bin/env bash #SBATCH -J RainUMAP #SBATCH -N 1 #SBATCH -n 14 #SBATCH -o %j.%N.%a.pretrain-plot.out.log #SBATCH -e %j.%N.%a.pretrain-plot.err.log #SBATCH -p highmem #SBATCH --time=3-00:00:00 #SBATCH --exclusive module load utilities/multi module load readline/7.0 module load gcc/10.2.0 module load cuda/11.5.0 module load python/anaconda/4.6/miniconda/3.7 show_help() { echo -e "Usage:" >&2; echo -e " [INPUT=\"\$HOME/rainfallwater_records_embed.jsonl.gz\"] [POSTFIX=\"some_string\"] sbatch slurm-pretrain-plot.job" >&2; echo -e "" >&2; echo -e "....where:" >&2; echo -e " INPUT The path to the input file (.jsonl.gz) containing the embedded data to plot (see the pretrain-predict subcommand for embedding data)" >&2; echo -e " POSTFIX Arbitrary string to add to filename." >&2; echo -e "" >&2; echo -e "The code used to identify the run is taken automatically from the filename of the config file." >&2; exit; } INPUT="${INPUT:-$HOME/rainfallwater_records_embed.jsonl.gz}"; if [[ -z "${INPUT}" ]]; then echo -e "Error: No INPUT environment variable specified.\n" >&2; show_help; exit 0; fi if [[ ! -r "${INPUT}" ]]; then echo -e "Error: The input filepath at '${INPUT}' containing the input .tfrecord dataset either doesn't exist or isn't a directory."; show_help; exit 1; fi CODE="_contrast_embed_umap"; if [[ -n "${POSTFIX}" ]]; then echo -e ">>> Applying postfix of ${POSTFIX}" >&2; CODE="${CODE}_${POSTFIX}"; fi echo -e ">>> Input dirpath: ${INPUT}" >&2; echo -e ">>> Code: ${CODE}" >&2; echo -e ">>> Additional args: ${ARGS}"; filepath_output="output/gen$(date -u --rfc-3339=date)_${CODE}.png"; export PATH=$HOME/software/bin:$PATH; echo ">>> Installing requirements"; conda run -n py38 pip install -r requirements.txt; echo ">>> Training model"; #shellcheck disable=SC2086 conda run -n py38 /usr/bin/env time -v src/index.py pretrain-plot -i "${INPUT}" -o "${filepath_output}" ${ARGS}; # src/index.py pretrain --input "${INPUT}" --output "${dir_output}" ${ARGS}; echo ">>> exited with code $?";