research-rainfallradar/aimodel/src/UMAP-Random.ipynb

133 lines
738 KiB
Plaintext
Raw Normal View History

2022-09-27 14:52:45 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "626ac255-dcad-448a-a12b-1079fd0f1b46",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/bryan-smithl/.local/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import os\n",
"\n",
"import torch\n",
"import umap\n",
"import umap.plot\n",
"import numpy as np\n",
"import matplotlib.pylab as plt\n",
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2be2a6a2-c712-4218-866d-f29db09eb8b4",
"metadata": {},
"outputs": [],
"source": [
"features_count = 100000\n",
"image_features = torch.rand([features_count, 512])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9d130099-0c79-4e58-9e73-4c64ea8b9760",
"metadata": {},
"outputs": [],
"source": [
"dimreducer = umap.UMAP().fit(image_features.numpy())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c6c4b3f9-e778-43aa-8c30-de1ab4427cb9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAACroAAAPkCAYAAAD8zXxvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xT9f7H8XeSDjqBQgEZMpWtgiCggiAogjIEREAQFdy4rz/H9V7BPa4KghtFEGUroDJkD0GGbESW7E1ZbaFtmpzfHyGhaU7SdNEAr6ePSvJd53tOTtY5n3yOxTAMQwAAAAAAAAAAAAAAAAAAAECIsRb1BAAAAAAAAAAAAAAAAAAAAAAzBLoCAAAAAAAAAAAAAAAAAAAgJBHoCgAAAAAAAAAAAAAAAAAAgJBEoCsAAAAAAAAAAAAAAAAAAABCEoGuAAAAAAAAAAAAAAAAAAAACEkEugIAAAAAAAAAAAAAAAAAACAkEegKAAAAAAAAAAAAAAAAAACAkESgKwAAAAAAAAAAAAAAAAAAAEISga4AAAAAAAAAAAAAAAAAAAAISQS6AgAAAABQiE6ePKlly5bp22+/1QsvvKCOHTsW9ZRC3vz582WxWHz+5s+fX9RTwwXm22+/Nd2Xdu7cWdRTwyVo2bJlatmypeLj43XllVdq8ODBcjqdRT0tAAAAAAAAAAh5YUU9AQAAAAAALladO3fWlClTinoaAIAitmrVKrVo0UIZGRmSpOTkZD3zzDM6cOCA3n333SKeHQAAAAAAAACENgJdAQAAAAAoJCdOnCjqKQC4wB0/flyLFi3Snj17lJqaqvLly6tWrVpq1KhRvsZNTU3VggULtHv3bp08eVLlypVT9erVdf3118tqzftFoDIyMrRo0SLt2LFDR48eVWJioqpUqaLmzZsrIiIiz+M6nU4tWbJE27Zt08GDB5WQkKBKlSqpRYsWiomJyfO4krRy5Upt2rRJ+/fvV3x8vCpUqKDmzZurZMmS+Ro3q08++cQT5JrV0KFD9frrr+dr2wAAAAAAAADAxY5AVwAAAACAqQkTJmjjxo3auXOndu7cqX379unkyZNKTk6W3W6X1WpVbGys4uLiVKlSJdWqVUsNGjTQ7bffripVqhT19AEvO3fuVNWqVU3rDMMotP41a9bUli1bfMpnzpypW2+9NcflZnffffdp5MiRPuVLly5V06ZNcz2eJL355pt65ZVXfMqvv/56/f7777keb9euXXrttde0evVq7d+/XydOnFB6erpXm8jISM/rR9WqVVWrVi01btxYt99+u8qUKZOn9QgVu3btUrt27bRp0yZPWd++ffXtt9/mapwdO3bohRde0NSpU322nyRVr15dzz77rB599FFZLJagxz1y5IhefvlljR07VikpKT715cuX16OPPqoXXnhB4eHhQY+bmpqqgQMHasSIEUpKSvKpT0hIUN++ffXaa68pNjY26HHtdrvef/99ffrpp9q3b59PfUxMjLp37653331XiYmJQY9rGIa++OILffjhh9q6datPfUREhDp06KD33ntP1apVC3pcf3bv3m1afubMGR09elTly5fP9zIAAAAAAAAA4GJlMYI5mwMAAAAAuORUqVJFu3btylPfxo0b6z//+Y86dOhQwLO6sLRs2VILFizwKeereGDz589Xq1atfMrnzZunli1b5mnMogh0dTqdio6ONg1SbNSokVasWJHjcrP6+++/Va9ePTkcDp+677//Xr169crVeJKUnp6uKlWq6ODBg6b1eQmg9ff4BcNqtap169Z69dVXdcMNN+RpjKy+/fZb3X///T7lO3bsKJSA/OXLl6tTp04+2zO3ga5jx47VQw89pOTk5Bzbtm7dWpMmTVLx4sVzbDtv3jz16NFDhw8fzrFtgwYNNHXqVFWsWDHHtuvWrVOXLl20ffv2HNtWq1ZNkydPVv369XNsu2fPHnXu3FmrVq3KsW1iYqLGjRsX1L538uRJdevWTbNnz86xbWxsrIYPH6677747x7aBPPDAAxoxYoRPeVRUlE6cOOGV0TU9PV1r1qzRpk2b9Ndff3n+7rvvPv33v//N1zwAAAAAAAAA4EKU9+uQAQAAAADgx4oVK9SxY0fdcccdphkDgUvB/v37TYNcJdel0qdPn56r8V5//XXTIFdJ+ueff3I9P0n67rvv/Aa5StL//ve/PI2bV06nU7NmzdKNN96oRx99VJmZmed1+XmVmZmpQYMG6YYbbgi4PYPxyy+/qHfv3kEFuUrSnDlzdOeddyojIyNgu5UrV6pDhw5BBblK0urVq3XbbbfpxIkTAdvt2LFDbdu2DSrIVXLtq7fddpvfDKdux44dU9u2bYMKcpVcmWo7deqk1atXB2yXnp6uTp06BRXkKkkpKSnq3bt3rp+v2T322GOmGXIff/xxryBX6VyA+f3336/3339fv/76q3bs2CGn05mvOQAAAAAAAADAhYpAVwAAAABAofn111/VunVr2e32op4KcN7llBH5jTfeCHqsbdu2aezYsX7rcwoaNGMYhj744IOAbX766ac8B9Hm1+eff64+ffoUybJzY/78+WrcuLEGDhyY78Dcffv2qUePHn4Dmv2ZN29ewEyfqamp6ty5s1JTU3M17saNG/XII4/4rXc6nerWrVuug3v379+vu+++O2A25X79+mnTpk25Gjc5OVl33nmn0tLS/LZ58cUXTTNtB5KZmRl0Jlx/GjVqpAULFqhFixaKjY1VjRo19L///U/vvPNOnscEAAAAAAAAgEsFga4AAAAAgDyxWCxBtVu+fLneeuutQp4NEHr27dsXsH7JkiVBB9y9++67AbM55rQsMz///LP+/vvvgG2cTqc++uijXI8diM1mC/r1Y+zYsZowYUKBLr+grF69Wh07dlSrVq20Zs2aAhnzxRdfNA1GjY6OVr9+/fT888+rfv36pn0HDx6snTt3mta98847pvtIWFiYevXqpRdeeEHXX3+9ad9x48Zp6dKlpnUjRozwm3G1Y8eOeumll9S2bVvT+j/++EPjxo0zrZs7d64mT55sWnfzzTfrxRdfVNeuXWW1+h7a3LVrlwYPHmzad/Pmzfrkk09M6xo3bqwXXnhBffr08cmwKkmnTp0KGEwcjGbNmmnBggVKTk7W1q1b9dxzz8lms+VrTAAAAAAAAAC4FBDoCgAAAADIlZ9//lkOh0NOp1NpaWnasWOHJk2apC5duvjtM3To0AvmEuRAQdm7d2+ObYLJ5njgwAGNGjUq38vK7n//+59PmVng4IgRI3T8+PFcj5/d5ZdfroyMDGVmZiozM1OHDx/Wxo0b9e2336pRo0Z++xV0oG1+HTx4ULfccosaNmyon3/+ucDG3blzp77//nuf8ri4OC1btkzDhw/Xe++9p9WrV6tbt24+7dLT0/Xhhx/6lKempppuw7CwMM2YMUPff/+93nnnHf3+++969tlnTef29ttvm5a/+eabpuUjR47UlClT9NZbb2nGjBkaMmSIaTt/P4J4/fXXTcsHDRqkOXPm6O2339bEiRM1adIk06Dp999/3zST+DvvvGNa/sADD2jZsmV65513NGrUKC1cuFDFihXzaTd8+PB8ZXUFAAAAAAAAAORNWFFPAAAAAABwYYmNjfUEw0VGRqpKlSqqUqWKunTpos8//1yPPvqoT5+kpCQtWrRIrVq1Mh0zOTlZmzZt0j///KPDhw8rJSVFNptNJUqUUPny5dWkSROVKVMm13NNTk7Wtm3btHv3bh04cECpqak6c+aMoqKilJCQoGrVqqlp06aKjIzM9dhu//zzj5YtW6ZDhw7JbrerQoUKqlu3rq6++uo8j5lVamqqFi5cqD179igpKUnFixfXZZddpubNm6t06dJ5GjMlJUVbt27Vzp07PdslPT1dxYsXV7ly5dSiRQuVLVvWp9/Jkye1ePFibd++XampqSpVqpQqVKigFi1aKC4uLr+retE5cOBAjm1mzJih9evX+83SKUlDhgxRRkZGwHH279+fq7mtXr1aixYt8ikfOHCgT9bK1NRUffXVV/q///u/XC0jO4vFovDwcEmugNrExEQ
"text/plain": [
"<Figure size 3824x1080 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"px = 1/plt.rcParams['figure.dpi'] # matplotlib sizes are in inches :-( :-( :-(\n",
"width = 3824\n",
"height = 1080\n",
"\n",
"plt.rc(\"font\", size=20)\n",
"plt.rc(\"font\", family=\"Ubuntu\")\n",
"figure = plt.figure(figsize=(width*px, height*px))\n",
"figure.add_subplot(1, 2, 1)\n",
"\n",
"# 1: UMAP\n",
"umap.plot.points(dimreducer,\n",
" ax=figure.get_axes()[0]\n",
")\n",
"plt.title(f\"UMAP Dimensionality Reduction\", fontsize=20)\n",
"\n",
"# 2: Parallel coordinates\n",
"figure.add_subplot(1, 2, 2)\n",
"dataframe = pandas.DataFrame(image_features[:,0:25].numpy())\n",
"dataframe[\"Label\"] = [1] * features_count\n",
"pandas.plotting.parallel_coordinates(\n",
" dataframe,\n",
" \"Label\",\n",
" ax=figure.get_axes()[1],\n",
" use_columns=False,\n",
" axvlines=False,\n",
" sort_labels=True\n",
")\n",
"\n",
"plt.title(f\"Parallel coordinates plot\", fontsize=20)\n",
"\n",
"plt.suptitle(f\"Random | UMAP | {features_count} items\", fontsize=28, weight=\"bold\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1d5bda1-2317-4d4f-882d-b79007191bb4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}