2022-08-08 14:53:32 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
#SBATCH -J Json2TfR
|
|
|
|
#SBATCH -N 1
|
|
|
|
#SBATCH -n 28
|
2022-11-10 21:09:34 +00:00
|
|
|
#SBATCH -o %j.%N.%a.jsonl2tfrecord.out.log
|
|
|
|
#SBATCH -e %j.%N.%a.jsonl2tfrecord.err.log
|
2022-08-08 16:22:31 +00:00
|
|
|
#SBATCH -p compute
|
2022-08-08 14:53:32 +00:00
|
|
|
#SBATCH --time=3-00:00:00
|
|
|
|
|
|
|
|
module load utilities/multi
|
|
|
|
module load readline/7.0
|
|
|
|
module load gcc/10.2.0
|
2022-08-08 16:22:18 +00:00
|
|
|
|
|
|
|
# module load cuda/11.5.0
|
2022-08-08 14:53:32 +00:00
|
|
|
|
|
|
|
module load python/anaconda/4.6/miniconda/3.7
|
|
|
|
|
|
|
|
INPUT="${INPUT:-$HOME/rainfallwater_records}";
|
|
|
|
OUTPUT="${OUTPUT:-$HOME/rainfallwater_records_tfrecord}";
|
|
|
|
|
|
|
|
if [[ -z "${INPUT}" ]]; then
|
|
|
|
echo "Error: No input directory specified in the INPUT environment variable.";
|
|
|
|
exit 1;
|
|
|
|
fi
|
|
|
|
if [[ -z "${OUTPUT}" ]]; then
|
|
|
|
echo "Error: No output directory specified in the OUTPUT environment variable.";
|
|
|
|
exit 1;
|
|
|
|
fi
|
|
|
|
if [[ ! -d "${INPUT}" ]]; then
|
|
|
|
echo "Error: That input directory either doesn't exist, isn't a directory, or we don't have permission to access it.";
|
|
|
|
exit 3;
|
|
|
|
fi
|
|
|
|
|
|
|
|
export PATH=$HOME/software/bin:$PATH;
|
|
|
|
|
|
|
|
echo ">>> Settings";
|
|
|
|
|
|
|
|
echo "INPUT $INPUT";
|
|
|
|
|
2022-08-08 15:31:49 +00:00
|
|
|
echo ">>> Installing requirements";
|
|
|
|
cd ../aimodel || { echo "Error: Failed to cd to ai model directory"; exit 1; };
|
|
|
|
conda run -n py38 pip install -r requirements.txt;
|
|
|
|
cd ../rainfallwrangler || { echo "Error: Failed to cd back to rainfallwrangler directory"; exit 1; };
|
2022-08-08 14:53:32 +00:00
|
|
|
echo ">>> Converting dataset .jsonl.gz → .tfrecord.gz";
|
2022-08-08 15:33:05 +00:00
|
|
|
conda run -n py38 /usr/bin/env time -v src/index.mjs jsonl2tfrecord --verbose --source "${INPUT}" --target "${OUTPUT}";
|
2022-08-08 14:53:32 +00:00
|
|
|
echo ">>> exited with code $?";
|