From c0a9cb12d818bf54994dcb1ae06b745a0f995efc Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 10 Aug 2022 19:03:25 +0100
Subject: [PATCH] ai: start creating initial model implementation.

it's not hooked up to the CLI yet though.
Focus is still on ensuring the dataset is in the right format though
---
 aimodel/src/lib/ai/RainfallWaterContraster.py | 85 +++++++++++++++++++
 .../ai/components/LayerCheeseMultipleOut.py   | 26 ++++++
 .../ai/components/LayerContrastiveEncoder.py  | 66 ++++++++++++++
 .../src/lib/ai/components/LossContrastive.py  | 37 ++++++++
 aimodel/src/lib/ai/components/__init__.py     |  0
 aimodel/src/lib/ai/helpers/__init__.py        |  2 +
 aimodel/src/lib/ai/helpers/make_callbacks.py  | 25 ++++++
 aimodel/src/lib/ai/helpers/summarywriter.py   | 31 +++++++
 .../lib/ai/model_rainfallwater_contrastive.py | 43 ++++++++++
 aimodel/src/lib/io/readfile.py                |  7 ++
 aimodel/src/lib/io/writefile.py               |  6 ++
 11 files changed, 328 insertions(+)
 create mode 100644 aimodel/src/lib/ai/RainfallWaterContraster.py
 create mode 100644 aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
 create mode 100644 aimodel/src/lib/ai/components/LayerContrastiveEncoder.py
 create mode 100644 aimodel/src/lib/ai/components/LossContrastive.py
 create mode 100644 aimodel/src/lib/ai/components/__init__.py
 create mode 100644 aimodel/src/lib/ai/helpers/__init__.py
 create mode 100644 aimodel/src/lib/ai/helpers/make_callbacks.py
 create mode 100644 aimodel/src/lib/ai/helpers/summarywriter.py
 create mode 100644 aimodel/src/lib/ai/model_rainfallwater_contrastive.py
 create mode 100644 aimodel/src/lib/io/readfile.py
 create mode 100644 aimodel/src/lib/io/writefile.py

diff --git a/aimodel/src/lib/ai/RainfallWaterContraster.py b/aimodel/src/lib/ai/RainfallWaterContraster.py
new file mode 100644
index 0000000..a233c5b
--- /dev/null
+++ b/aimodel/src/lib/ai/RainfallWaterContraster.py
@@ -0,0 +1,85 @@
+import os
+import io
+import re
+import sys
+import json
+
+import tensorflow as tf
+
+from ..io.readfile import readfile
+from ..io.writefile import writefile
+
+from .model_rainfallwater_contrastive import model_rainfallwater_contrastive
+from .helpers import make_callbacks
+from .helpers import summarywriter
+from .components.LayerContrastiveEncoder import LayerContrastiveEncoder
+from .components.LayerCheeseMultipleOut import LayerCheeseMultipleOut
+from .helpers.summarywriter import summarywriter
+
+class RainfallWaterContraster(object):
+	def __init__(self, dir_output=None, filepath_checkpoint=None, epochs=50, batch_size=64, **kwargs):
+		super(RainfallWaterContraster, self).__init__()
+		
+		self.dir_output = dir_output
+		self.epochs = epochs
+		self.kwargs = kwargs
+		self.batch_size = batch_size
+		
+		
+		if filepath_checkpoint == None:
+			self.model = self.make_model()
+			if self.dir_output == None:
+				raise Exception("Error: dir_output was not specified, and since no checkpoint was loaded training mode is activated.")
+			if not os.path.exists(self.dir_output):
+				os.mkdir(self.dir_output)
+			
+			self.filepath_summary = os.path.join(self.dir_output, "summary.txt")
+			
+			summarywriter(self.model, self.filepath_summary)
+			writefile(os.path.join(self.dir_output, "params.json"), json.dumps(self.model.get_config()))
+		else:	
+			self.model = self.load_model(filepath_checkpoint)
+	
+	
+	@staticmethod
+	def from_checkpoint(filepath_checkpoint, filepath_hyperparams):
+		hyperparams = json.loads(readfile(filepath_hyperparams))
+		return RainfallWaterContraster(filepath_checkpoint=filepath_checkpoint, **hyperparams)
+	
+	def make_model(self):
+		model = model_rainfallwater_contrastive(batch_size=self.batch_size, **self.kwargs)
+		return model
+	
+	def load_model(self, filepath_checkpoint):
+		"""
+		Loads a saved model from the given filename.
+		filepath_checkpoint (string): The filepath to load the saved model from.
+		"""
+		
+		return tf.keras.models.load_model(filepath_checkpoint, custom_objects={
+			"LayerContrastiveEncoder": LayerContrastiveEncoder,
+			"LayerCheeseMultipleOut": LayerCheeseMultipleOut
+		})
+	
+	
+	
+	def train(self, dataset_train, dataset_validate):
+		return self.model.fit(
+			dataset_train,
+			validation_data=dataset_validate,
+			epochs=self.epochs,
+			callbacks=make_callbacks(self.dir_output)
+		)
+	
+	def embed(self, dataset):
+		result = []
+		i_batch = -1
+		for batch in dataset:
+			i_batch += 1
+			result_batch = self.model(batch[0])
+			# Currently, the left and right should be the same
+			left, _ = tf.unstack(result_batch, axis=-2)
+			result_batch = tf.unstack(left, axis=0)
+			result.extend(result_batch)
+		
+		return result
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
new file mode 100644
index 0000000..e2b0bbb
--- /dev/null
+++ b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
@@ -0,0 +1,26 @@
+import tensorflow as tf
+
+
+class LayerCheeseMultipleOut(tf.keras.layers.Layer):
+	
+	def __init__(self, **kwargs):
+		"""Creates a new cheese multiple out layer.
+		This layer is useful if you have multiple outputs and a custom loss function that requires multiple inputs.
+		Basically, it just concatenates all inputs.
+		Inputs are expected to be in the form [ batch_size, feature_dim ], and this layer outputs in the form [ batch_size, concat, feature_dim ].
+		This layer also creates a temperature weight for contrastive learning models.
+		"""
+		super(LayerCheeseMultipleOut, self).__init__(**kwargs)
+		
+		self.weights_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([0.07]))
+	
+	def get_config(self):
+		config = super(LayerCheeseMultipleOut, self).get_config()
+		return config
+	
+	def call(self, inputs):
+		# inputs form: [ rainfall, water ]
+		
+		# By this point, the above has already dropped through the encoder, so should be in the form [ batch_size, dim ]
+		
+		return tf.stack(inputs, axis=-2)
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py b/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py
new file mode 100644
index 0000000..654c9d9
--- /dev/null
+++ b/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py
@@ -0,0 +1,66 @@
+import tensorflow as tf
+
+from tensorflow.keras.applications.resnet_v2 import ResNet50V2
+# from transformers import TFConvNextModel, ConvNextConfig
+from ..helpers.summarywriter import summarylogger
+
+class LayerContrastiveEncoder(tf.keras.layers.Layer):
+	
+	def __init__(self, input_width, input_height, channels, feature_dim=200, **kwargs):
+		"""Creates a new contrastive learning encoder layer.
+		While this is intended for contrastive learning, this can (in theory) be used anywhere as it's just a generic wrapper layer.
+		The key feature here is that it does not care about the input size or the number of channels.
+		Currently it uses a ResNetV2 internally, but an upgrade to ConvNeXt is planned once Tensorflow Keras' implementation comes out of nightly and into stable.
+		We would use ResNetRS (as it's technically superior), but the implementation is bad and in places outright *wrong* O.o
+		
+		Args:
+			feature_dim (int, optional): The size of the features dimension in the output shape. Note that there are *two* feature dimensions outputted - one for the left, and one for the right. They will both be in the form [ batch_size, feature_dim ]. Set to a low value (e.g. 25) to be able to plot a sensible a parallel coordinates graph. Defaults to 200.
+			image_width (int): The size of width of the input in pixels.
+			image_height (int): The size of height of the input in pixels.
+			channels (int): The number of channels in the input in pixels.
+		"""
+		super(LayerContrastiveEncoder, self).__init__(**kwargs)
+		
+		self.param_input_width	= input_width
+		self.param_input_height	= input_height
+		self.param_channels		= channels
+		self.param_feature_dim	= feature_dim
+		
+		"""The main ResNet model that forms the encoder.
+		Note that both the left AND the right go through the SAME encoder!s
+		"""
+		self.encoder = ResNet50V2(
+			include_top=False,
+			input_shape=(self.param_input_width, self.param_input_height, self.param_channels),
+			weights=None,
+			pooling=None
+		)
+		"""Small sequential stack of layers that control the size of the outputted feature dimension.
+		"""
+		self.embedding = tf.keras.layers.Dense(self.param_feature_dim)
+		self.embedding_input_shape = [None, 2048] # The output shape of the above ResNet AFTER reshaping.
+		
+		summarylogger(self.encoder)
+	
+	def get_config(self):
+		config = super(LayerContrastiveEncoder, self).get_config()
+		config["input_width"] = self.param_input_width
+		config["input_height"] = self.param_input_height
+		config["input_channels"] = self.param_input_channels
+		config["feature_dim"] = self.param_feature_dim
+		return config
+	
+	def build(self, input_shape):
+		# print("LAYER:build input_shape", input_shape)
+		super().build(input_shape=input_shape[0])
+		self.embedding.build(input_shape=tf.TensorShape([ *self.embedding_input_shape ]))
+	
+	def call(self, input_thing):
+		result = self.encoder(input_thing)
+		
+		shape_ksize = result.shape[1]
+		result = tf.nn.avg_pool(result, ksize=shape_ksize, strides=1, padding="VALID")
+		
+		target_shape = [ -1, result.shape[-1] ]
+		result = self.embedding(tf.reshape(result, target_shape))
+		return result
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
new file mode 100644
index 0000000..fe0aa09
--- /dev/null
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -0,0 +1,37 @@
+import tensorflow as tf
+
+class LossContrastive(tf.keras.losses.Loss):
+	def __init__(self, weight_temperature, batch_size):
+		super(LossContrastive, self).__init__()
+		self.batch_size = batch_size
+		self.weight_temperature = weight_temperature
+	
+	def call(self, y_true, y_pred):
+		rainfall, water = tf.unstack(y_pred, axis=-2)
+		print("LOSS:call y_true", y_true.shape)
+		print("LOSS:call y_pred", y_pred.shape)
+		print("BEFORE_RESHAPE rainfall", rainfall)
+		print("BEFORE_RESHAPE water", water)
+		
+		# # Ensure the shapes are defined
+		# rainfall = tf.reshape(rainfall, [self.batch_size, rainfall.shape[1]])
+		# water = tf.reshape(water, [self.batch_size, water.shape[1]])
+		
+		
+		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
+		
+		print("LOGITS", logits)
+		
+		labels			= tf.eye(self.batch_size, dtype=tf.int32)
+		loss_rainfall	= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=0)
+		loss_water		= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=1)
+		
+		loss = (loss_rainfall + loss_water) / 2
+		
+		# cosine_similarity results in tensor of range -1 - 1, but tf.sparse.eye has range 0 - 1
+		print("LABELS", labels)
+		print("LOSS_rainfall", loss_rainfall)
+		print("LOSS_water", loss_water)
+		print("LOSS", loss)
+		return loss
+	
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/components/__init__.py b/aimodel/src/lib/ai/components/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/aimodel/src/lib/ai/helpers/__init__.py b/aimodel/src/lib/ai/helpers/__init__.py
new file mode 100644
index 0000000..069b21c
--- /dev/null
+++ b/aimodel/src/lib/ai/helpers/__init__.py
@@ -0,0 +1,2 @@
+from .make_callbacks import make_callbacks
+from .summarywriter import summarywriter
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/helpers/make_callbacks.py b/aimodel/src/lib/ai/helpers/make_callbacks.py
new file mode 100644
index 0000000..9cb5c0b
--- /dev/null
+++ b/aimodel/src/lib/ai/helpers/make_callbacks.py
@@ -0,0 +1,25 @@
+import os
+
+import tensorflow as tf
+
+def make_callbacks(dirpath):
+	dirpath_checkpoints = os.path.join(dirpath, "checkpoints")
+	filepath_metrics = os.path.join(dirpath, "metrics.tsv")
+	
+	if not os.path.exists(dirpath_checkpoints):
+		os.mkdir(dirpath_checkpoints)
+	
+	return [
+		tf.keras.callbacks.ModelCheckpoint(
+			filepath=os.path.join(
+				dirpath_checkpoints,
+				"checkpoint_weights_e{epoch:d}_loss{loss:.3f}.hdf5"
+			),
+			monitor="loss"
+		),
+		tf.keras.callbacks.CSVLogger(
+			filename=filepath_metrics,
+			separator="\t"
+		),
+		tf.keras.callbacks.ProgbarLogger()
+	]
\ No newline at end of file
diff --git a/aimodel/src/lib/ai/helpers/summarywriter.py b/aimodel/src/lib/ai/helpers/summarywriter.py
new file mode 100644
index 0000000..9670acb
--- /dev/null
+++ b/aimodel/src/lib/ai/helpers/summarywriter.py
@@ -0,0 +1,31 @@
+import io
+
+from loguru import logger
+
+
+def summarylogger(model):
+    """
+    Writes the summary for a model with the default logging context.
+    model (tf.keras.Model): The model to generate the summary from.
+    """
+    
+    def handle_line(line: str):
+        logger.info(line)
+    
+    model.summary(print_fn=handle_line)
+    
+
+def summarywriter(model, filepath_output, append=False):
+    """
+    Writes the summary for a model to a file in the specified location.
+    model (tf.keras.Model): The model to generate the summary from.
+    filepath_output (str):  The path to the file to write the summary to.
+    """
+    handle = io.open(filepath_output, "a" if append else "w")
+    
+    def handle_line(line: str):
+        handle.write(f"{line}\n")
+    
+    model.summary(print_fn=handle_line)
+    
+    handle.close()
diff --git a/aimodel/src/lib/ai/model_rainfallwater_contrastive.py b/aimodel/src/lib/ai/model_rainfallwater_contrastive.py
new file mode 100644
index 0000000..97ea631
--- /dev/null
+++ b/aimodel/src/lib/ai/model_rainfallwater_contrastive.py
@@ -0,0 +1,43 @@
+from pickletools import optimize
+import tensorflow as tf
+
+from .components.LayerContrastiveEncoder import LayerContrastiveEncoder
+from .components.LayerCheeseMultipleOut import LayerCheeseMultipleOut
+from .components.LossContrastive import LossContrastive
+
+def model_rainfallwater_contrastive(shape_rainfall, shape_water):
+	rainfall_width, rainfall_height, rainfall_channels = shape_rainfall
+	water_width, water_height, water_channels = shape_water
+	
+	input_rainfall = tf.keras.layers.Input(
+		shape=shape_rainfall
+	)
+	input_water = tf.keras.layers.Input(
+		shape=shape_water
+	)
+	
+	
+	rainfall = LayerContrastiveEncoder(
+		input_width=rainfall_width,
+		input_height=rainfall_height,
+		channels=rainfall_channels
+	)(input_rainfall)
+	water = LayerContrastiveEncoder(
+		input_width=water_width,
+		input_height=water_height,
+		channels=water_channels
+	)(input_water)
+	
+	
+	final = LayerCheeseMultipleOut()([ rainfall, water ])
+	weight_temperature = final.weight_temperature
+	
+	model = tf.keras.Model(
+		inputs = [ input_rainfall, input_water ],
+		outputs = final
+	)
+	
+	model.compile(
+		optimizer="Adam",
+		loss=LossContrastive(weights_temperature=weight_temperature)
+	)
\ No newline at end of file
diff --git a/aimodel/src/lib/io/readfile.py b/aimodel/src/lib/io/readfile.py
new file mode 100644
index 0000000..500947b
--- /dev/null
+++ b/aimodel/src/lib/io/readfile.py
@@ -0,0 +1,7 @@
+import io
+
+def readfile(filepath):
+	handle = io.open(filepath, "r")
+	content = handle.read()
+	handle.close()
+	return content
\ No newline at end of file
diff --git a/aimodel/src/lib/io/writefile.py b/aimodel/src/lib/io/writefile.py
new file mode 100644
index 0000000..596c7c1
--- /dev/null
+++ b/aimodel/src/lib/io/writefile.py
@@ -0,0 +1,6 @@
+import io
+
+def writefile(filepath, content):
+	handle = io.open(filepath, "w")
+	handle.write(content)
+	handle.close()
\ No newline at end of file