From c0a9cb12d818bf54994dcb1ae06b745a0f995efc Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Wed, 10 Aug 2022 19:03:25 +0100 Subject: [PATCH] ai: start creating initial model implementation. it's not hooked up to the CLI yet though. Focus is still on ensuring the dataset is in the right format though --- aimodel/src/lib/ai/RainfallWaterContraster.py | 85 +++++++++++++++++++ .../ai/components/LayerCheeseMultipleOut.py | 26 ++++++ .../ai/components/LayerContrastiveEncoder.py | 66 ++++++++++++++ .../src/lib/ai/components/LossContrastive.py | 37 ++++++++ aimodel/src/lib/ai/components/__init__.py | 0 aimodel/src/lib/ai/helpers/__init__.py | 2 + aimodel/src/lib/ai/helpers/make_callbacks.py | 25 ++++++ aimodel/src/lib/ai/helpers/summarywriter.py | 31 +++++++ .../lib/ai/model_rainfallwater_contrastive.py | 43 ++++++++++ aimodel/src/lib/io/readfile.py | 7 ++ aimodel/src/lib/io/writefile.py | 6 ++ 11 files changed, 328 insertions(+) create mode 100644 aimodel/src/lib/ai/RainfallWaterContraster.py create mode 100644 aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py create mode 100644 aimodel/src/lib/ai/components/LayerContrastiveEncoder.py create mode 100644 aimodel/src/lib/ai/components/LossContrastive.py create mode 100644 aimodel/src/lib/ai/components/__init__.py create mode 100644 aimodel/src/lib/ai/helpers/__init__.py create mode 100644 aimodel/src/lib/ai/helpers/make_callbacks.py create mode 100644 aimodel/src/lib/ai/helpers/summarywriter.py create mode 100644 aimodel/src/lib/ai/model_rainfallwater_contrastive.py create mode 100644 aimodel/src/lib/io/readfile.py create mode 100644 aimodel/src/lib/io/writefile.py diff --git a/aimodel/src/lib/ai/RainfallWaterContraster.py b/aimodel/src/lib/ai/RainfallWaterContraster.py new file mode 100644 index 0000000..a233c5b --- /dev/null +++ b/aimodel/src/lib/ai/RainfallWaterContraster.py @@ -0,0 +1,85 @@ +import os +import io +import re +import sys +import json + +import tensorflow as tf + +from ..io.readfile import readfile +from ..io.writefile import writefile + +from .model_rainfallwater_contrastive import model_rainfallwater_contrastive +from .helpers import make_callbacks +from .helpers import summarywriter +from .components.LayerContrastiveEncoder import LayerContrastiveEncoder +from .components.LayerCheeseMultipleOut import LayerCheeseMultipleOut +from .helpers.summarywriter import summarywriter + +class RainfallWaterContraster(object): + def __init__(self, dir_output=None, filepath_checkpoint=None, epochs=50, batch_size=64, **kwargs): + super(RainfallWaterContraster, self).__init__() + + self.dir_output = dir_output + self.epochs = epochs + self.kwargs = kwargs + self.batch_size = batch_size + + + if filepath_checkpoint == None: + self.model = self.make_model() + if self.dir_output == None: + raise Exception("Error: dir_output was not specified, and since no checkpoint was loaded training mode is activated.") + if not os.path.exists(self.dir_output): + os.mkdir(self.dir_output) + + self.filepath_summary = os.path.join(self.dir_output, "summary.txt") + + summarywriter(self.model, self.filepath_summary) + writefile(os.path.join(self.dir_output, "params.json"), json.dumps(self.model.get_config())) + else: + self.model = self.load_model(filepath_checkpoint) + + + @staticmethod + def from_checkpoint(filepath_checkpoint, filepath_hyperparams): + hyperparams = json.loads(readfile(filepath_hyperparams)) + return RainfallWaterContraster(filepath_checkpoint=filepath_checkpoint, **hyperparams) + + def make_model(self): + model = model_rainfallwater_contrastive(batch_size=self.batch_size, **self.kwargs) + return model + + def load_model(self, filepath_checkpoint): + """ + Loads a saved model from the given filename. + filepath_checkpoint (string): The filepath to load the saved model from. + """ + + return tf.keras.models.load_model(filepath_checkpoint, custom_objects={ + "LayerContrastiveEncoder": LayerContrastiveEncoder, + "LayerCheeseMultipleOut": LayerCheeseMultipleOut + }) + + + + def train(self, dataset_train, dataset_validate): + return self.model.fit( + dataset_train, + validation_data=dataset_validate, + epochs=self.epochs, + callbacks=make_callbacks(self.dir_output) + ) + + def embed(self, dataset): + result = [] + i_batch = -1 + for batch in dataset: + i_batch += 1 + result_batch = self.model(batch[0]) + # Currently, the left and right should be the same + left, _ = tf.unstack(result_batch, axis=-2) + result_batch = tf.unstack(left, axis=0) + result.extend(result_batch) + + return result \ No newline at end of file diff --git a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py new file mode 100644 index 0000000..e2b0bbb --- /dev/null +++ b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py @@ -0,0 +1,26 @@ +import tensorflow as tf + + +class LayerCheeseMultipleOut(tf.keras.layers.Layer): + + def __init__(self, **kwargs): + """Creates a new cheese multiple out layer. + This layer is useful if you have multiple outputs and a custom loss function that requires multiple inputs. + Basically, it just concatenates all inputs. + Inputs are expected to be in the form [ batch_size, feature_dim ], and this layer outputs in the form [ batch_size, concat, feature_dim ]. + This layer also creates a temperature weight for contrastive learning models. + """ + super(LayerCheeseMultipleOut, self).__init__(**kwargs) + + self.weights_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([0.07])) + + def get_config(self): + config = super(LayerCheeseMultipleOut, self).get_config() + return config + + def call(self, inputs): + # inputs form: [ rainfall, water ] + + # By this point, the above has already dropped through the encoder, so should be in the form [ batch_size, dim ] + + return tf.stack(inputs, axis=-2) \ No newline at end of file diff --git a/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py b/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py new file mode 100644 index 0000000..654c9d9 --- /dev/null +++ b/aimodel/src/lib/ai/components/LayerContrastiveEncoder.py @@ -0,0 +1,66 @@ +import tensorflow as tf + +from tensorflow.keras.applications.resnet_v2 import ResNet50V2 +# from transformers import TFConvNextModel, ConvNextConfig +from ..helpers.summarywriter import summarylogger + +class LayerContrastiveEncoder(tf.keras.layers.Layer): + + def __init__(self, input_width, input_height, channels, feature_dim=200, **kwargs): + """Creates a new contrastive learning encoder layer. + While this is intended for contrastive learning, this can (in theory) be used anywhere as it's just a generic wrapper layer. + The key feature here is that it does not care about the input size or the number of channels. + Currently it uses a ResNetV2 internally, but an upgrade to ConvNeXt is planned once Tensorflow Keras' implementation comes out of nightly and into stable. + We would use ResNetRS (as it's technically superior), but the implementation is bad and in places outright *wrong* O.o + + Args: + feature_dim (int, optional): The size of the features dimension in the output shape. Note that there are *two* feature dimensions outputted - one for the left, and one for the right. They will both be in the form [ batch_size, feature_dim ]. Set to a low value (e.g. 25) to be able to plot a sensible a parallel coordinates graph. Defaults to 200. + image_width (int): The size of width of the input in pixels. + image_height (int): The size of height of the input in pixels. + channels (int): The number of channels in the input in pixels. + """ + super(LayerContrastiveEncoder, self).__init__(**kwargs) + + self.param_input_width = input_width + self.param_input_height = input_height + self.param_channels = channels + self.param_feature_dim = feature_dim + + """The main ResNet model that forms the encoder. + Note that both the left AND the right go through the SAME encoder!s + """ + self.encoder = ResNet50V2( + include_top=False, + input_shape=(self.param_input_width, self.param_input_height, self.param_channels), + weights=None, + pooling=None + ) + """Small sequential stack of layers that control the size of the outputted feature dimension. + """ + self.embedding = tf.keras.layers.Dense(self.param_feature_dim) + self.embedding_input_shape = [None, 2048] # The output shape of the above ResNet AFTER reshaping. + + summarylogger(self.encoder) + + def get_config(self): + config = super(LayerContrastiveEncoder, self).get_config() + config["input_width"] = self.param_input_width + config["input_height"] = self.param_input_height + config["input_channels"] = self.param_input_channels + config["feature_dim"] = self.param_feature_dim + return config + + def build(self, input_shape): + # print("LAYER:build input_shape", input_shape) + super().build(input_shape=input_shape[0]) + self.embedding.build(input_shape=tf.TensorShape([ *self.embedding_input_shape ])) + + def call(self, input_thing): + result = self.encoder(input_thing) + + shape_ksize = result.shape[1] + result = tf.nn.avg_pool(result, ksize=shape_ksize, strides=1, padding="VALID") + + target_shape = [ -1, result.shape[-1] ] + result = self.embedding(tf.reshape(result, target_shape)) + return result \ No newline at end of file diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py new file mode 100644 index 0000000..fe0aa09 --- /dev/null +++ b/aimodel/src/lib/ai/components/LossContrastive.py @@ -0,0 +1,37 @@ +import tensorflow as tf + +class LossContrastive(tf.keras.losses.Loss): + def __init__(self, weight_temperature, batch_size): + super(LossContrastive, self).__init__() + self.batch_size = batch_size + self.weight_temperature = weight_temperature + + def call(self, y_true, y_pred): + rainfall, water = tf.unstack(y_pred, axis=-2) + print("LOSS:call y_true", y_true.shape) + print("LOSS:call y_pred", y_pred.shape) + print("BEFORE_RESHAPE rainfall", rainfall) + print("BEFORE_RESHAPE water", water) + + # # Ensure the shapes are defined + # rainfall = tf.reshape(rainfall, [self.batch_size, rainfall.shape[1]]) + # water = tf.reshape(water, [self.batch_size, water.shape[1]]) + + + logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100) + + print("LOGITS", logits) + + labels = tf.eye(self.batch_size, dtype=tf.int32) + loss_rainfall = tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=0) + loss_water = tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=1) + + loss = (loss_rainfall + loss_water) / 2 + + # cosine_similarity results in tensor of range -1 - 1, but tf.sparse.eye has range 0 - 1 + print("LABELS", labels) + print("LOSS_rainfall", loss_rainfall) + print("LOSS_water", loss_water) + print("LOSS", loss) + return loss + \ No newline at end of file diff --git a/aimodel/src/lib/ai/components/__init__.py b/aimodel/src/lib/ai/components/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aimodel/src/lib/ai/helpers/__init__.py b/aimodel/src/lib/ai/helpers/__init__.py new file mode 100644 index 0000000..069b21c --- /dev/null +++ b/aimodel/src/lib/ai/helpers/__init__.py @@ -0,0 +1,2 @@ +from .make_callbacks import make_callbacks +from .summarywriter import summarywriter \ No newline at end of file diff --git a/aimodel/src/lib/ai/helpers/make_callbacks.py b/aimodel/src/lib/ai/helpers/make_callbacks.py new file mode 100644 index 0000000..9cb5c0b --- /dev/null +++ b/aimodel/src/lib/ai/helpers/make_callbacks.py @@ -0,0 +1,25 @@ +import os + +import tensorflow as tf + +def make_callbacks(dirpath): + dirpath_checkpoints = os.path.join(dirpath, "checkpoints") + filepath_metrics = os.path.join(dirpath, "metrics.tsv") + + if not os.path.exists(dirpath_checkpoints): + os.mkdir(dirpath_checkpoints) + + return [ + tf.keras.callbacks.ModelCheckpoint( + filepath=os.path.join( + dirpath_checkpoints, + "checkpoint_weights_e{epoch:d}_loss{loss:.3f}.hdf5" + ), + monitor="loss" + ), + tf.keras.callbacks.CSVLogger( + filename=filepath_metrics, + separator="\t" + ), + tf.keras.callbacks.ProgbarLogger() + ] \ No newline at end of file diff --git a/aimodel/src/lib/ai/helpers/summarywriter.py b/aimodel/src/lib/ai/helpers/summarywriter.py new file mode 100644 index 0000000..9670acb --- /dev/null +++ b/aimodel/src/lib/ai/helpers/summarywriter.py @@ -0,0 +1,31 @@ +import io + +from loguru import logger + + +def summarylogger(model): + """ + Writes the summary for a model with the default logging context. + model (tf.keras.Model): The model to generate the summary from. + """ + + def handle_line(line: str): + logger.info(line) + + model.summary(print_fn=handle_line) + + +def summarywriter(model, filepath_output, append=False): + """ + Writes the summary for a model to a file in the specified location. + model (tf.keras.Model): The model to generate the summary from. + filepath_output (str): The path to the file to write the summary to. + """ + handle = io.open(filepath_output, "a" if append else "w") + + def handle_line(line: str): + handle.write(f"{line}\n") + + model.summary(print_fn=handle_line) + + handle.close() diff --git a/aimodel/src/lib/ai/model_rainfallwater_contrastive.py b/aimodel/src/lib/ai/model_rainfallwater_contrastive.py new file mode 100644 index 0000000..97ea631 --- /dev/null +++ b/aimodel/src/lib/ai/model_rainfallwater_contrastive.py @@ -0,0 +1,43 @@ +from pickletools import optimize +import tensorflow as tf + +from .components.LayerContrastiveEncoder import LayerContrastiveEncoder +from .components.LayerCheeseMultipleOut import LayerCheeseMultipleOut +from .components.LossContrastive import LossContrastive + +def model_rainfallwater_contrastive(shape_rainfall, shape_water): + rainfall_width, rainfall_height, rainfall_channels = shape_rainfall + water_width, water_height, water_channels = shape_water + + input_rainfall = tf.keras.layers.Input( + shape=shape_rainfall + ) + input_water = tf.keras.layers.Input( + shape=shape_water + ) + + + rainfall = LayerContrastiveEncoder( + input_width=rainfall_width, + input_height=rainfall_height, + channels=rainfall_channels + )(input_rainfall) + water = LayerContrastiveEncoder( + input_width=water_width, + input_height=water_height, + channels=water_channels + )(input_water) + + + final = LayerCheeseMultipleOut()([ rainfall, water ]) + weight_temperature = final.weight_temperature + + model = tf.keras.Model( + inputs = [ input_rainfall, input_water ], + outputs = final + ) + + model.compile( + optimizer="Adam", + loss=LossContrastive(weights_temperature=weight_temperature) + ) \ No newline at end of file diff --git a/aimodel/src/lib/io/readfile.py b/aimodel/src/lib/io/readfile.py new file mode 100644 index 0000000..500947b --- /dev/null +++ b/aimodel/src/lib/io/readfile.py @@ -0,0 +1,7 @@ +import io + +def readfile(filepath): + handle = io.open(filepath, "r") + content = handle.read() + handle.close() + return content \ No newline at end of file diff --git a/aimodel/src/lib/io/writefile.py b/aimodel/src/lib/io/writefile.py new file mode 100644 index 0000000..596c7c1 --- /dev/null +++ b/aimodel/src/lib/io/writefile.py @@ -0,0 +1,6 @@ +import io + +def writefile(filepath, content): + handle = io.open(filepath, "w") + handle.write(content) + handle.close() \ No newline at end of file