research-rainfallradar/aimodel/src/rainfallwater_data_explorer.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "1f6fdebf-69c5-46ab-a5a8-f9c91f000ff3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "import tensorflow as tf\n",
    "from datetime import datetime\n",
    "\n",
    "from lib.dataset.dataset_mono import dataset_mono_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "4a6a94dd-5c4e-4481-bfae-eb5ccf6214db",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dirpath=\"/home/bryan-smithl/Documents/repos/PhD-Rainfall-Radar/aimodel/output/rainfallwater_records_embed_2022-10-06_contrast_embed_umap_d512e19_tfrecord\"\n",
    "dirpath=\"/mnt/research-data/main/rainfallwater_records_tfrecord\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d1aa931a-ecf2-4134-8e70-87db4ae60736",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG DATASET:rainfall shape [7, 174, 105] / w 105 h 174\n",
      "DEBUG DATASET:water shape [348, 210]\n",
      "DEBUG DATASET:water_threshold 0.1\n",
      "DEBUG DATASET:water_bins 2\n",
      "DEBUG DATASET:output_size 100\n",
      "DEBUG DATASET:input_size 100\n",
      "DEBUG DATASET:water_offset x 55 y 124\n",
      "DEBUG DATASET:rainfall_offset x 3 y 37\n",
      "DEBUG:dataset BEFORE_SQUEEZE water (100, 100, 1)\n",
      "DEBUG:dataset AFTER_SQUEEZE water (100, 100)\n",
      "DEBUG DATASET_OUT:rainfall shape (100, 100, 7)\n",
      "DEBUG DATASET_OUT:water shape (100, 100)\n"
     ]
    }
   ],
   "source": [
    "dataset = dataset_mono_predict(\n",
    "\tdirpath_input=dirpath,\n",
    "\twater_threshold=0.1,\n",
    "\t# shape_water_desired=[94, 94],\n",
    "\tparallel_reads_multiplier=1.5 # Mangles the ordering. For counting things this doesn't matter\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "456f7c8f-3f7d-4a2c-b361-900588c49612",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed 23100 batches\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "{msg}\n"
     ]
    }
   ],
   "source": [
    "i = 0\n",
    "counts = tf.constant([0, 0], dtype=tf.int64)\n",
    "for (items, label) in dataset:\n",
    "\tlabel = tf.cast(label, tf.int32)\n",
    "\tstep_counts = tf.math.bincount(tf.reshape(label, [-1]))\n",
    "\tcounts += tf.cast(step_counts, dtype=tf.int64)\n",
    "\t# print(\"STEP counts\", counts, \"step_counts\", step_counts)\n",
    "\ti += 1\n",
    "\tif i % 100 == 0:\n",
    "\t\tsys.stderr.write(f\"Processed {i} batches\\r\")\n",
    "\n",
    "msg = f\"Complete at {datetime.now()}. Counts:\\n\"+\"\\n\".join([ str(i)+\": \"+str(count) for i,count in enumerate(counts.numpy().tolist()) ])\n",
    "print(f\"\\n{msg}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "74a94efe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Total 14817920000 cells, Percentages:\n",
      "0: 77.92587213320088%\n",
      "1: 22.074127866799117%\n"
     ]
    }
   ],
   "source": [
    "total = tf.math.reduce_sum(counts)\n",
    "\n",
    "percentages = (tf.cast(counts, tf.float64) / tf.cast(total, tf.float64)) * 100.0\n",
    "\n",
    "msg = f\"Total {total.numpy()} cells, Percentages:\\n\"+\"\\n\".join(\n",
    "    [str(i)+\": \"+str(count)+\"%\" for i, count in enumerate(percentages.numpy().tolist())]\n",
    ")\n",
    "print(f\"\\n{msg}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"execution_count": 12,`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"id": "1f6fdebf-69c5-46ab-a5a8-f9c91f000ff3",`
			`"metadata": {},`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"outputs": [],`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"source": [`
			`"import sys\n",`
			`"\n",`
			`"import tensorflow as tf\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"from datetime import datetime\n",`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"from lib.dataset.dataset_mono import dataset_mono_predict"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"execution_count": 13,`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"id": "4a6a94dd-5c4e-4481-bfae-eb5ccf6214db",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"# dirpath=\"/home/bryan-smithl/Documents/repos/PhD-Rainfall-Radar/aimodel/output/rainfallwater_records_embed_2022-10-06_contrast_embed_umap_d512e19_tfrecord\"\n",`
			`"dirpath=\"/mnt/research-data/main/rainfallwater_records_tfrecord\""`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"execution_count": 14,`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"id": "d1aa931a-ecf2-4134-8e70-87db4ae60736",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"DEBUG DATASET:rainfall shape [7, 174, 105] / w 105 h 174\n",`
			`"DEBUG DATASET:water shape [348, 210]\n",`
			`"DEBUG DATASET:water_threshold 0.1\n",`
			`"DEBUG DATASET:water_bins 2\n",`
			`"DEBUG DATASET:output_size 100\n",`
			`"DEBUG DATASET:input_size 100\n",`
			`"DEBUG DATASET:water_offset x 55 y 124\n",`
			`"DEBUG DATASET:rainfall_offset x 3 y 37\n",`
			`"DEBUG:dataset BEFORE_SQUEEZE water (100, 100, 1)\n",`
			`"DEBUG:dataset AFTER_SQUEEZE water (100, 100)\n",`
			`"DEBUG DATASET_OUT:rainfall shape (100, 100, 7)\n",`
			`"DEBUG DATASET_OUT:water shape (100, 100)\n"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`}`
			`],`
			`"source": [`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"dataset = dataset_mono_predict(\n",`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"\tdirpath_input=dirpath,\n",`
			`"\twater_threshold=0.1,\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"\t# shape_water_desired=[94, 94],\n",`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"\tparallel_reads_multiplier=1.5 # Mangles the ordering. For counting things this doesn't matter\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`")"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"execution_count": 15,`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"id": "456f7c8f-3f7d-4a2c-b361-900588c49612",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"Processed 23100 batches\r"`
			`]`
			`},`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"{msg}\n"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`}`
			`],`
			`"source": [`
			`"i = 0\n",`
			`"counts = tf.constant([0, 0], dtype=tf.int64)\n",`
			`"for (items, label) in dataset:\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"\tlabel = tf.cast(label, tf.int32)\n",`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`"\tstep_counts = tf.math.bincount(tf.reshape(label, [-1]))\n",`
			`"\tcounts += tf.cast(step_counts, dtype=tf.int64)\n",`
			`"\t# print(\"STEP counts\", counts, \"step_counts\", step_counts)\n",`
			`"\ti += 1\n",`
			`"\tif i % 100 == 0:\n",`
			`"\t\tsys.stderr.write(f\"Processed {i} batches\\r\")\n",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"\n",`
			`"msg = f\"Complete at {datetime.now()}. Counts:\\n\"+\"\\n\".join([ str(i)+\": \"+str(count) for i,count in enumerate(counts.numpy().tolist()) ])\n",`
			`"print(f\"\\n{msg}\")\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 18,`
			`"id": "74a94efe",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"\n",`
			`"Total 14817920000 cells, Percentages:\n",`
			`"0: 77.92587213320088%\n",`
			`"1: 22.074127866799117%\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"total = tf.math.reduce_sum(counts)\n",`
			`"\n",`
			`"percentages = (tf.cast(counts, tf.float64) / tf.cast(total, tf.float64)) * 100.0\n",`
			`"\n",`
			`"msg = f\"Total {total.numpy()} cells, Percentages:\\n\"+\"\\n\".join(\n",`
			`" [str(i)+\": \"+str(count)+\"%\" for i, count in enumerate(percentages.numpy().tolist())]\n",`
			`")\n",`
			`"print(f\"\\n{msg}\")"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3.10.6 64-bit",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
rr de: update to dataset_mono_predict 2023-11-03 15:48:53 +00:00			`"version": "3.10.12"`
count water/nowater pixels in Jupyter Notebook 2022-10-24 17:05:34 +00:00			`},`
			`"vscode": {`
			`"interpreter": {`
			`"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"`
			`}`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`