From 1d872cb9628710bd09a924c554162be7364d109b Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 16:45:01 +0100
Subject: [PATCH 01/41] contrastive: fix initial temperature value It should be
 1/0.07, but we had it set to 0.07......

---
 aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
index 87c2e43..21f976d 100644
--- a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
+++ b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
@@ -16,7 +16,9 @@ class LayerCheeseMultipleOut(tf.keras.layers.Layer):
 		self.param_batch_size = batch_size
 		self.param_feature_dim = feature_dim
 		
-		self.weight_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([0.07]))
+		self.weight_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([
+			math.log(1 / 0.07)
+		]))
 		self.weight_nce = tf.Variable(
 			name="loss_nce",
 			shape=(batch_size, feature_dim),

From fad1399c2dce85edb61f0dc39fadcca43587176e Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 16:45:20 +0100
Subject: [PATCH 02/41] convnext: whitespace

---
 aimodel/src/lib/ai/components/convnext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/components/convnext.py b/aimodel/src/lib/ai/components/convnext.py
index b49fb38..dc3630a 100644
--- a/aimodel/src/lib/ai/components/convnext.py
+++ b/aimodel/src/lib/ai/components/convnext.py
@@ -147,7 +147,7 @@ def add_convnext_block(y, dim, drop_prob=0, prefix=""):
 		name=f'{prefix}.pwconv1'
 	)(y)
 	
-   
+	
 	y = tf.keras.layers.Activation(
 		'gelu',
 		name=f'{prefix}.act'

From 843cc8dc7bdc54c4435ce56a8154c627e00146ae Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 16:45:45 +0100
Subject: [PATCH 03/41] contrastive: rewrite the loss function. The CLIP paper
 *does* kinda make sense I think

---
 aimodel/src/lib/ai/components/LossContrastive.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index 43e6e74..697e517 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -17,14 +17,17 @@ class LossContrastive(tf.keras.losses.Loss):
 		# rainfall = tf.reshape(rainfall, [self.batch_size, rainfall.shape[1]])
 		# water = tf.reshape(water, [self.batch_size, water.shape[1]])
 		
-		
-		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
+		# logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
+		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.math.exp(self.weight_temperature)
 		
 		# print("LOGITS", logits)
 		
-		labels			= tf.eye(self.batch_size, dtype=tf.int32)
-		loss_rainfall	= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=0)
-		loss_water		= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=1)
+		# labels			= tf.eye(self.batch_size, dtype=tf.int32) # we *would* do this if we were using mean squared error...
+		labels			= tf.range(self.batch_size, dtype=tf.int32) # each row is a different category we think
+		loss_rainfall	= tf.keras.metrics.sparse_categorical_crossentropy(labels, logits, from_logits=True, axis=0)
+		loss_water		= tf.keras.metrics.sparse_categorical_crossentropy(labels, logits, from_logits=True, axis=1)
+		# loss_rainfall	= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=0)
+		# loss_water		= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=1)
 		
 		loss = (loss_rainfall + loss_water) / 2
 		

From 48ae8a5c20468272efa27d8b1821d5748a8821b5 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 16:52:56 +0100
Subject: [PATCH 04/41] LossContrastive: normalise features as per the paper

---
 aimodel/src/lib/ai/components/LossContrastive.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index 697e517..59a2f8f 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -17,6 +17,10 @@ class LossContrastive(tf.keras.losses.Loss):
 		# rainfall = tf.reshape(rainfall, [self.batch_size, rainfall.shape[1]])
 		# water = tf.reshape(water, [self.batch_size, water.shape[1]])
 		
+		# normalise features
+		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
+		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
+		
 		# logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
 		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.math.exp(self.weight_temperature)
 		

From 1b489518d053c282bff9265c585dfd3411bf979c Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 17:05:50 +0100
Subject: [PATCH 05/41] segmenter: add LayerStack2Image to custom_objects

---
 aimodel/src/lib/ai/RainfallWaterSegmenter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aimodel/src/lib/ai/RainfallWaterSegmenter.py b/aimodel/src/lib/ai/RainfallWaterSegmenter.py
index 65a552b..e592d37 100644
--- a/aimodel/src/lib/ai/RainfallWaterSegmenter.py
+++ b/aimodel/src/lib/ai/RainfallWaterSegmenter.py
@@ -14,6 +14,7 @@ from .model_rainfallwater_segmentation import model_rainfallwater_segmentation
 from .helpers import make_callbacks
 from .helpers import summarywriter
 from .components.LayerConvNeXtGamma import LayerConvNeXtGamma
+from .components.LayerStack2Image import LayerStack2Image
 from .helpers.summarywriter import summarywriter
 
 class RainfallWaterSegmenter(object):
@@ -70,6 +71,7 @@ class RainfallWaterSegmenter(object):
 		
 		self.model = tf.keras.models.load_model(filepath_checkpoint, custom_objects={
 			"LayerConvNeXtGamma": LayerConvNeXtGamma,
+			"LayerStack2Image": LayerStack2Image
 		})
 	
 	

From 4f9d543695c12b20a7c26cd1ff895d9d265d219f Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 17:11:36 +0100
Subject: [PATCH 06/41] train_predict: don't pass model_code it's redundant

---
 aimodel/src/subcommands/train_predict.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aimodel/src/subcommands/train_predict.py b/aimodel/src/subcommands/train_predict.py
index f1ef0da..ac84492 100644
--- a/aimodel/src/subcommands/train_predict.py
+++ b/aimodel/src/subcommands/train_predict.py
@@ -87,13 +87,13 @@ def run(args):
 	logger.info(f"Records per file: {args.records_per_file}")
 	
 	if output_mode == MODE_JSONL:
-		do_jsonl(args, ai, dataset, args.model_code, model_params)
+		do_jsonl(args, ai, dataset, model_params)
 	else:
-		do_png(args, ai, dataset, args.model_code, model_params)
+		do_png(args, ai, dataset, model_params)
 	
 	sys.stderr.write(">>> Complete\n")
 
-def do_png(args, ai, dataset, model_code, model_params):
+def do_png(args, ai, dataset, model_params):
 	if not os.path.exists(os.path.dirname(args.output)):
 		os.mkdir(os.path.dirname(args.output))
 	
@@ -114,7 +114,7 @@ def do_png(args, ai, dataset, model_code, model_params):
 			
 			segmentation_plot(
 				water_actual, water_predict,
-				model_code,
+				args.model_code,
 				args.output.replace("+d", str(i))
 			)
 			

From 74f2cdb9005c507626646878e07c898780c552e3 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 17:12:36 +0100
Subject: [PATCH 07/41] =?UTF-8?q?train=5Fpredict:=20.list()=20=E2=86=92=20?=
 =?UTF-8?q?.tolist()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 aimodel/src/subcommands/train_predict.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aimodel/src/subcommands/train_predict.py b/aimodel/src/subcommands/train_predict.py
index ac84492..c96ce15 100644
--- a/aimodel/src/subcommands/train_predict.py
+++ b/aimodel/src/subcommands/train_predict.py
@@ -165,11 +165,11 @@ def do_jsonl(args, ai, dataset, model_params):
 			
 			item_obj = {}
 			if "rainfall_actual" in args.log:
-				item_obj["rainfall_actual"] = rainfall_actual_batch[i_batch].numpy().list()
+				item_obj["rainfall_actual"] = rainfall_actual_batch[i_batch].numpy().tolist()
 			if "water_actual" in args.log:
-				item_obj["water_actual"] = water_actual.numpy().list()
+				item_obj["water_actual"] = water_actual.numpy().tolist()
 			if "water_predict" in args.log:
-				item_obj["water_predict"] = water_predict.numpy().list()
+				item_obj["water_predict"] = water_predict.numpy().tolist()
 			
 			handle.write(json.dumps(item_obj, separators=(',', ':'))+"\n") # Ref https://stackoverflow.com/a/64710892/1460422
 			

From 33391eaf16145fb74af17c4800a17a6f71f92c85 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 26 Oct 2022 17:21:19 +0100
Subject: [PATCH 08/41] train_predict/jsonl: don't argmax I'm interested inthe
 raw values

---
 aimodel/src/subcommands/train_predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/subcommands/train_predict.py b/aimodel/src/subcommands/train_predict.py
index c96ce15..3624cf8 100644
--- a/aimodel/src/subcommands/train_predict.py
+++ b/aimodel/src/subcommands/train_predict.py
@@ -152,7 +152,7 @@ def do_jsonl(args, ai, dataset, model_params):
 		i_batch = 0
 		for water_predict in water_predict_batch:
 			# [ width, height, softmax_probabilities ] → [ batch, width, height ]
-			water_predict = tf.math.argmax(water_predict, axis=-1) 
+			# water_predict = tf.math.argmax(water_predict, axis=-1) 
 			# [ width, height ]
 			water_actual = tf.squeeze(water_actual_batch[i_batch])
 			

From 55dc05e8ce9617c9be0003cbb7c5e58f545a2646 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 16:26:48 +0000
Subject: [PATCH 09/41] contrastive: comment weights that aren't needed

---
 .../ai/components/LayerCheeseMultipleOut.py   | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
index 21f976d..3f9481a 100644
--- a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
+++ b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
@@ -19,19 +19,19 @@ class LayerCheeseMultipleOut(tf.keras.layers.Layer):
 		self.weight_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([
 			math.log(1 / 0.07)
 		]))
-		self.weight_nce = tf.Variable(
-			name="loss_nce",
-			shape=(batch_size, feature_dim),
-			initial_value=tf.random.truncated_normal(
-				(feature_dim),
-				stddev=1.0 / math.sqrt(128)
-			)
-		)
-		self.weight_nce_bias = tf.Variable(
-			name="loss_nce_bias",
-			shape=(feature_dim),
-			initial_value=tf.zeros((feature_dim))
-		)
+		# self.weight_nce = tf.Variable(
+		# 	name="loss_nce",
+		# 	shape=(batch_size, feature_dim),
+		# 	initial_value=tf.random.truncated_normal(
+		# 		[feature_dim],
+		# 		stddev=1.0 / math.sqrt(128)
+		# 	)
+		# )
+		# self.weight_nce_bias = tf.Variable(
+		# 	name="loss_nce_bias",
+		# 	shape=(feature_dim),
+		# 	initial_value=tf.zeros((feature_dim))
+		# )
 	
 	def get_config(self):
 		config = super(LayerCheeseMultipleOut, self).get_config()

From 458faa96d2adb803444e4ed5b173d3357902b001 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 17:18:21 +0000
Subject: [PATCH 10/41] loss: fixup

---
 aimodel/src/lib/ai/components/LossContrastive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index 59a2f8f..356147f 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -19,7 +19,7 @@ class LossContrastive(tf.keras.losses.Loss):
 		
 		# normalise features
 		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
-		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
+		water = water / tf.math.l2_normalize(water, axis=1)
 		
 		# logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
 		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.math.exp(self.weight_temperature)

From b986b069e23073f9b84bf9777d8efa0c477baab9 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 17:50:29 +0000
Subject: [PATCH 11/41] debug party time

---
 .../ai/components/LayerCheeseMultipleOut.py   |  2 +-
 .../src/lib/ai/components/LossContrastive.py  | 30 +++++++++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
index 3f9481a..5e6d13f 100644
--- a/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
+++ b/aimodel/src/lib/ai/components/LayerCheeseMultipleOut.py
@@ -44,4 +44,4 @@ class LayerCheeseMultipleOut(tf.keras.layers.Layer):
 		
 		# By this point, the above has already dropped through the encoder, so should be in the form [ batch_size, dim ]
 		
-		return tf.stack(inputs, axis=-2)
\ No newline at end of file
+		return tf.stack(inputs, axis=-2)
diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index 356147f..d54cf62 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -1,3 +1,5 @@
+import math
+
 import tensorflow as tf
 
 class LossContrastive(tf.keras.losses.Loss):
@@ -10,8 +12,8 @@ class LossContrastive(tf.keras.losses.Loss):
 		rainfall, water = tf.unstack(y_pred, axis=-2)
 		# print("LOSS:call y_true", y_true.shape)
 		# print("LOSS:call y_pred", y_pred.shape)
-		# print("BEFORE_RESHAPE rainfall", rainfall)
-		# print("BEFORE_RESHAPE water", water)
+		print("START rainfall", rainfall)
+		print("START water", water)
 		
 		# # Ensure the shapes are defined
 		# rainfall = tf.reshape(rainfall, [self.batch_size, rainfall.shape[1]])
@@ -21,24 +23,42 @@ class LossContrastive(tf.keras.losses.Loss):
 		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
 		water = water / tf.math.l2_normalize(water, axis=1)
 		
+		print("AFTER_L2 rainfall", rainfall)
+		print("AFTER_L2 water", water)
+		
 		# logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.clip_by_value(tf.math.exp(self.weight_temperature), 0, 100)
 		logits = tf.linalg.matmul(rainfall, tf.transpose(water)) * tf.math.exp(self.weight_temperature)
 		
-		# print("LOGITS", logits)
+		print("LOGITS", logits)
 		
 		# labels			= tf.eye(self.batch_size, dtype=tf.int32) # we *would* do this if we were using mean squared error...
 		labels			= tf.range(self.batch_size, dtype=tf.int32) # each row is a different category we think
 		loss_rainfall	= tf.keras.metrics.sparse_categorical_crossentropy(labels, logits, from_logits=True, axis=0)
 		loss_water		= tf.keras.metrics.sparse_categorical_crossentropy(labels, logits, from_logits=True, axis=1)
+		
+		
 		# loss_rainfall	= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=0)
 		# loss_water		= tf.keras.metrics.binary_crossentropy(labels, logits, from_logits=True, axis=1)
-		
+		print("LABELS", labels)
+		print("LOSS_RAINFALL", loss_rainfall)
+		print("LOSS_WATER", loss_water)
+				
 		loss = (loss_rainfall + loss_water) / 2
 		
+		print("LOSS", loss)
 		# cosine_similarity results in tensor of range -1 - 1, but tf.sparse.eye has range 0 - 1
 		# print("LABELS", labels)
 		# print("LOSS_rainfall", loss_rainfall)
 		# print("LOSS_water", loss_water)
 		# print("LOSS", loss)
 		return loss
-	
\ No newline at end of file
+
+
+if __name__ == "__main__":
+	weight_temperature = tf.Variable(name="loss_temperature", shape=1, initial_value=tf.constant([
+		math.log(1 / 0.07)
+	]))
+	loss = LossContrastive(weight_temperature=weight_temperature, batch_size=64)
+	
+	tensor_input = tf.random.uniform([64, 2, 512])
+	print(loss(tensor_input))
\ No newline at end of file

From 5e60319024325c284adbcd9fc5398507c77841d0 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 17:56:49 +0000
Subject: [PATCH 12/41] fixup

---
 aimodel/src/lib/ai/components/LossContrastive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index d54cf62..c656c96 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -61,4 +61,4 @@ if __name__ == "__main__":
 	loss = LossContrastive(weight_temperature=weight_temperature, batch_size=64)
 	
 	tensor_input = tf.random.uniform([64, 2, 512])
-	print(loss(tensor_input))
\ No newline at end of file
+	print(loss(tf.constant(1), tensor_input))
\ No newline at end of file

From dbe35ee943953ff64ca504b0d6dad63b42ec2de3 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 18:09:03 +0000
Subject: [PATCH 13/41] loss: comment l2 norm

---
 aimodel/src/lib/ai/components/LossContrastive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index c656c96..57378ce 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -20,8 +20,8 @@ class LossContrastive(tf.keras.losses.Loss):
 		# water = tf.reshape(water, [self.batch_size, water.shape[1]])
 		
 		# normalise features
-		rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
-		water = water / tf.math.l2_normalize(water, axis=1)
+		# rainfall = rainfall / tf.math.l2_normalize(rainfall, axis=1)
+		# water = water / tf.math.l2_normalize(water, axis=1)
 		
 		print("AFTER_L2 rainfall", rainfall)
 		print("AFTER_L2 water", water)

From 172cf9d8cea2e95c8c219eff26863e23a75e6a93 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 18:19:43 +0000
Subject: [PATCH 14/41] tweak

---
 aimodel/src/lib/ai/components/LossContrastive.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aimodel/src/lib/ai/components/LossContrastive.py b/aimodel/src/lib/ai/components/LossContrastive.py
index 57378ce..6f3b3bc 100644
--- a/aimodel/src/lib/ai/components/LossContrastive.py
+++ b/aimodel/src/lib/ai/components/LossContrastive.py
@@ -46,6 +46,10 @@ class LossContrastive(tf.keras.losses.Loss):
 		loss = (loss_rainfall + loss_water) / 2
 		
 		print("LOSS", loss)
+		
+		loss = tf.math.reduce_mean(loss)
+		
+		print("LOSS FINAL", loss)
 		# cosine_similarity results in tensor of range -1 - 1, but tf.sparse.eye has range 0 - 1
 		# print("LABELS", labels)
 		# print("LOSS_rainfall", loss_rainfall)

From dfef7db4212163b1dd09d2b82dcf54f6bdee738b Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 18:26:34 +0000
Subject: [PATCH 15/41] moar debugging

---
 aimodel/src/subcommands/pretrain.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/aimodel/src/subcommands/pretrain.py b/aimodel/src/subcommands/pretrain.py
index 2506b8f..5a2f68a 100644
--- a/aimodel/src/subcommands/pretrain.py
+++ b/aimodel/src/subcommands/pretrain.py
@@ -21,6 +21,13 @@ def parse_args():
 	
 	return parser
 
+
+def count_batches(dataset):
+	count = 0
+	for _ in dataset:
+		count += 1
+	return count
+
 def run(args):
 	if (not hasattr(args, "water_size")) or args.water_size == None:
 		args.water_size = 100
@@ -40,7 +47,11 @@ def run(args):
 		dirpath_input=args.input,
 		batch_size=args.batch_size,
 	)
-	dataset_metadata = read_metadata(args.input)
+	
+	print("BATCHES_TRAIN", count_batches(dataset_train))
+	print("BATCHES_VALIDATE", count_batches(dataset_validate))
+	
+	
 	
 	# for (items, label) in dataset_train:
 	# 	print("ITEMS", len(items), [ item.shape for item in items ])
@@ -59,4 +70,3 @@ def run(args):
 	)
 	
 	ai.train(dataset_train, dataset_validate)
-	
\ No newline at end of file

From da32d75778659599e8017760c4c367177fcf3ed7 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 18:36:28 +0000
Subject: [PATCH 16/41] make_callbacks: display steps, not samples

---
 aimodel/src/lib/ai/helpers/make_callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/helpers/make_callbacks.py b/aimodel/src/lib/ai/helpers/make_callbacks.py
index bfb112a..f5fadd2 100644
--- a/aimodel/src/lib/ai/helpers/make_callbacks.py
+++ b/aimodel/src/lib/ai/helpers/make_callbacks.py
@@ -24,5 +24,5 @@ def make_callbacks(dirpath, model_predict):
 			filename=filepath_metrics,
 			separator="\t"
 		),
-		tf.keras.callbacks.ProgbarLogger()
+		tf.keras.callbacks.ProgbarLogger(count_mode="steps") # batches
 	]
\ No newline at end of file

From cf872ef739be4fcd7b7074bd40351b4d8f9bdfc8 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 18:40:58 +0000
Subject: [PATCH 17/41] how could I be so *stupid*......

---
 aimodel/src/lib/ai/RainfallWaterContraster.py | 2 +-
 aimodel/src/subcommands/pretrain.py           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/aimodel/src/lib/ai/RainfallWaterContraster.py b/aimodel/src/lib/ai/RainfallWaterContraster.py
index cce6469..154c5bf 100644
--- a/aimodel/src/lib/ai/RainfallWaterContraster.py
+++ b/aimodel/src/lib/ai/RainfallWaterContraster.py
@@ -85,7 +85,7 @@ class RainfallWaterContraster(object):
 			validation_data=dataset_validate,
 			epochs=self.epochs,
 			callbacks=make_callbacks(self.dir_output, self.model_predict),
-			steps_per_epoch=10 # For testing
+			# steps_per_epoch=10 # For testing
 		)
 	
 	def embed(self, dataset):
diff --git a/aimodel/src/subcommands/pretrain.py b/aimodel/src/subcommands/pretrain.py
index 5a2f68a..753f547 100644
--- a/aimodel/src/subcommands/pretrain.py
+++ b/aimodel/src/subcommands/pretrain.py
@@ -48,8 +48,8 @@ def run(args):
 		batch_size=args.batch_size,
 	)
 	
-	print("BATCHES_TRAIN", count_batches(dataset_train))
-	print("BATCHES_VALIDATE", count_batches(dataset_validate))
+	# print("BATCHES_TRAIN", count_batches(dataset_train)) # 18500 for the full 2006-2020 dataset
+	# print("BATCHES_VALIDATE", count_batches(dataset_validate)) # 4653 for the full 2006-2020 dataset
 	
 	
 	

From 5f8d6dc6ea7b29446a6430855a24bc9d4a21dae1 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Mon, 31 Oct 2022 19:26:10 +0000
Subject: [PATCH 18/41] Add metrics every 64 batches this is important, because
 with large batches it can be difficult to tell what's happening inside each
 epoch.

---
 .../lib/ai/components/CallbackNBatchCsv.py    | 30 +++++++++++++++++++
 aimodel/src/lib/ai/helpers/make_callbacks.py  |  6 ++++
 2 files changed, 36 insertions(+)
 create mode 100644 aimodel/src/lib/ai/components/CallbackNBatchCsv.py

diff --git a/aimodel/src/lib/ai/components/CallbackNBatchCsv.py b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
new file mode 100644
index 0000000..de7491b
--- /dev/null
+++ b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
@@ -0,0 +1,30 @@
+import tensorflow as tf
+
+from lib.io.handle_open import handle_open
+
+class CallbackNBatchCsv(tf.keras.callbacks.Callback):
+	def __init__(self, filepath, n_batches=1, separator="\t", **kwargs) -> None:
+		super().__init__(**kwargs)
+		
+		self.n_batches = n_batches
+		self.separator = separator
+		
+		self.handle = handle_open(filepath)
+		
+		
+		self.batches_seen = 0
+		self.keys = None
+	
+	def write_header(self, logs): # logs = metrics
+		self.keys = logs.keys()
+		self.keys.sort()
+		self.handle.write("\t".join(self.keys)+"\n")
+			
+	def on_batch_end(self, batch, logs=None): # logs = metrics
+		if self.batches_seen == 0:
+			self.write_header(logs)
+		
+		if self.batches_seen % self.n_batches == 0:
+			self.handle.write(self.separator.join([str(logs[key]) for key in self.keys]) + "\n")
+		
+		self.batches_seen += 1
diff --git a/aimodel/src/lib/ai/helpers/make_callbacks.py b/aimodel/src/lib/ai/helpers/make_callbacks.py
index f5fadd2..8d3f5d8 100644
--- a/aimodel/src/lib/ai/helpers/make_callbacks.py
+++ b/aimodel/src/lib/ai/helpers/make_callbacks.py
@@ -3,10 +3,12 @@ import os
 import tensorflow as tf
 
 from ..components.CallbackCustomModelCheckpoint import CallbackCustomModelCheckpoint
+from ..components.CallbackNBatchCsv import CallbackNBatchCsv
 
 def make_callbacks(dirpath, model_predict):
 	dirpath_checkpoints = os.path.join(dirpath, "checkpoints")
 	filepath_metrics = os.path.join(dirpath, "metrics.tsv")
+	filepath_metrics_batch = os.path.join(dirpath, "metrics_batch64.tsv")
 	
 	if not os.path.exists(dirpath_checkpoints):
 		os.mkdir(dirpath_checkpoints)
@@ -24,5 +26,9 @@ def make_callbacks(dirpath, model_predict):
 			filename=filepath_metrics,
 			separator="\t"
 		),
+		CallbackNBatchCsv(
+			filepath=filepath_metrics_batch,
+			n_batches=64
+		),
 		tf.keras.callbacks.ProgbarLogger(count_mode="steps") # batches
 	]
\ No newline at end of file

From 91152ebb1cde1c05929e44b28dc96bff71254977 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 18:29:47 +0000
Subject: [PATCH 19/41] wrangler:recordify update cli help we only output
 .jsonl.gz to a DIRECTORY, so update cli help to reflect this

---
 rainfallwrangler/src/subcommands/recordify/meta.mjs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rainfallwrangler/src/subcommands/recordify/meta.mjs b/rainfallwrangler/src/subcommands/recordify/meta.mjs
index ea39200..bba4455 100644
--- a/rainfallwrangler/src/subcommands/recordify/meta.mjs
+++ b/rainfallwrangler/src/subcommands/recordify/meta.mjs
@@ -11,5 +11,5 @@ export default function(cli) {
 				.reverse();
 		})
 		.argument("water-offset", "Make the water depth data be this many time steps ahead of the rainfall radar data. (default: 1)", 1, "integer")
-		.argument("output", "The path to the directory to write the generated TFRecord files to.", null, "string");
+		.argument("output", "The path to the directory to write the generated .jsonl.gz files to.", null, "string");
 }

From fecc63b6a2e66bde1f0734760bdd89c40aa87ab6 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 18:56:27 +0000
Subject: [PATCH 20/41] wrangler: write high-level job file

---
 rainfallwrangler/slurm-recordify.job | 71 ++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100755 rainfallwrangler/slurm-recordify.job

diff --git a/rainfallwrangler/slurm-recordify.job b/rainfallwrangler/slurm-recordify.job
new file mode 100755
index 0000000..c7c97e8
--- /dev/null
+++ b/rainfallwrangler/slurm-recordify.job
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#SBATCH -J RW2jsonl
+#SBATCH -N 1
+#SBATCH -n 1
+#SBATCH -o %j.%N.%a.out
+#SBATCH -e %j.%N.%a.err
+#SBATCH -p compute
+#SBATCH --time=3-00:00:00
+#SBATCH --mem=8096
+# * 8GB RAM
+
+module load utilities/multi
+module load readline/7.0
+module load gcc/10.2.0
+
+# module load cuda/11.5.0
+
+module load python/anaconda/4.6/miniconda/3.7
+
+RAINFALL="${RAINFALL:-$HOME/data/nimrod_ceda.jsonl.gz}";
+WATER="${WATER:-$HOME/data/WaterDepths-new.stream.asc.gz}";
+OUTPUT="${OUTPUT}";
+COUNT_FILE="${COUNT_FILE:4096}";
+
+if [[ -z "${WATER}" ]]; then
+	echo "Error: No input water depth file specified in the WATER environment variable.";
+	exit 1;
+fi
+if [[ -z "${RAINFALL}" ]]; then
+	echo "Error: No input rainfall file specified in the RAINFALL environment variables.";
+	exit 1;
+fi
+
+if [[ -z "${OUTPUT}" ]]; then
+	echo "Error: No output directory specified in the OUTPUT environment variable.";
+	exit 1;
+fi
+
+if [[ ! -d "${OUTPUT}" ]]; then
+	echo "Error: That input directory either doesn't exist, isn't a directory, or we don't have permission to access it.";
+	exit 3;
+fi
+
+export PATH=$HOME/software/bin:$PATH;
+
+
+OUTPUT_UNIQ="${OUTPUT%/}_uniq"; # Stript trailing slash, if present
+OUTPUT_TFRECORD="${OUTPUT%/}_tfrecord"; # Stript trailing slash, if present
+
+echo ">>> Settings";
+
+echo "RAINFALL $RAINFALL";
+echo "WATER $WATER";
+echo "OUTPUT $OUTPUT";
+echo "ARGS $ARGS";
+
+echo ">>> Installing requirements";
+cd ../aimodel || { echo "Error: Failed to cd to ai model directory"; exit 1; };
+conda run -n py38 pip install -r requirements.txt;
+cd ../rainfallwrangler || { echo "Error: Failed to cd back to rainfallwrangler directory"; exit 1; };
+npm install;
+echo ">>> Converting dataset to .jsonl.gz";
+/usr/bin/env time -v src/index.mjs recordify --verbose --rainfall "${RAINFALL}" --water "${WATER}" --output "${OUTPUT}" --count-file "${COUNT_FILE}" ${ARGS};
+echo ">>> Deduplicating dataset";
+# This also automatically recompresses for us - hence the source/target rather than in-place
+srun --comment 'RainUniq' --exclusive -p compute /usr/bin/env time -v src/index.mjs uniq --source "${OUTPUT}" --target "${OUTPUT_UNIQ}" --count-file "${COUNT_FILE}";
+echo ">>> Removing intermediate output";
+rm -r "${OUTPUT}";
+echo ">>> Queuing .jsonl.gz → tfrecord";
+INPUT="${OUTPUT_UNIQ}" OUTPUT="${OUTPUT_TFRECORD}" sbatch ./slurm-jsonl2tfrecord.job;
+echo ">>> exited with code $?";

From f8341e7d895b18b11960366b9f91b6213d7a75c4 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 18:59:15 +0000
Subject: [PATCH 21/41] slurm: add .log

---
 rainfallwrangler/slurm-jsonl2tfrecord.job | 4 ++--
 rainfallwrangler/slurm-recordify.job      | 4 ++--
 rainfallwrangler/slurm-wrangle-uniq.job   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/rainfallwrangler/slurm-jsonl2tfrecord.job b/rainfallwrangler/slurm-jsonl2tfrecord.job
index 253ef55..5be5273 100755
--- a/rainfallwrangler/slurm-jsonl2tfrecord.job
+++ b/rainfallwrangler/slurm-jsonl2tfrecord.job
@@ -2,8 +2,8 @@
 #SBATCH -J Json2TfR
 #SBATCH -N 1
 #SBATCH -n 28
-#SBATCH -o %j.%N.%a.out
-#SBATCH -e %j.%N.%a.err
+#SBATCH -o %j.%N.%a.out.log
+#SBATCH -e %j.%N.%a.err.log
 #SBATCH -p compute
 #SBATCH --time=3-00:00:00
 
diff --git a/rainfallwrangler/slurm-recordify.job b/rainfallwrangler/slurm-recordify.job
index c7c97e8..1825c65 100755
--- a/rainfallwrangler/slurm-recordify.job
+++ b/rainfallwrangler/slurm-recordify.job
@@ -2,8 +2,8 @@
 #SBATCH -J RW2jsonl
 #SBATCH -N 1
 #SBATCH -n 1
-#SBATCH -o %j.%N.%a.out
-#SBATCH -e %j.%N.%a.err
+#SBATCH -o %j.%N.%a.out.log
+#SBATCH -e %j.%N.%a.err.log
 #SBATCH -p compute
 #SBATCH --time=3-00:00:00
 #SBATCH --mem=8096
diff --git a/rainfallwrangler/slurm-wrangle-uniq.job b/rainfallwrangler/slurm-wrangle-uniq.job
index c8dc40e..d97d1e3 100755
--- a/rainfallwrangler/slurm-wrangle-uniq.job
+++ b/rainfallwrangler/slurm-wrangle-uniq.job
@@ -2,8 +2,8 @@
 #SBATCH -J RainUniq
 #SBATCH -N 1
 #SBATCH -n 28
-#SBATCH -o %j.%N.%a.out
-#SBATCH -e %j.%N.%a.err
+#SBATCH -o %j.%N.%a.out.log
+#SBATCH -e %j.%N.%a.err.log
 #SBATCH -p compute
 #SBATCH --time=3-00:00:00
 

From a69fa9f0f38302cf3db1ded120b2273052c5b9f8 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 18:59:55 +0000
Subject: [PATCH 22/41] slurm: rename

---
 rainfallwrangler/{slurm-recordify.job => slurm-process.job} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename rainfallwrangler/{slurm-recordify.job => slurm-process.job} (100%)

diff --git a/rainfallwrangler/slurm-recordify.job b/rainfallwrangler/slurm-process.job
similarity index 100%
rename from rainfallwrangler/slurm-recordify.job
rename to rainfallwrangler/slurm-process.job

From 79b231198f5ee960b3ab3b359165afadee348049 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 19:03:37 +0000
Subject: [PATCH 23/41] slurm-process: check input files are readable

---
 rainfallwrangler/slurm-process.job | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 1825c65..251ef2d 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -36,8 +36,17 @@ if [[ -z "${OUTPUT}" ]]; then
 	exit 1;
 fi
 
+if [[ ! -r "${RAINFALL}" ]]; then
+	echo "Error: That input rainfall file either doesn't exist, isn't a directory, or we don't have permission to access it.";
+	exit 3;
+fi
+if [[ ! -r "${WATER}" ]]; then
+	echo "Error: That input water depth file either doesn't exist, isn't a directory, or we don't have permission to access it.";
+	exit 3;
+fi
+
 if [[ ! -d "${OUTPUT}" ]]; then
-	echo "Error: That input directory either doesn't exist, isn't a directory, or we don't have permission to access it.";
+	echo "Error: That output directory either doesn't exist, isn't a directory, or we don't have permission to access it.";
 	exit 3;
 fi
 

From c17a4ca05abe184c632fdbc0e599b9997553fdec Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 19:38:04 +0000
Subject: [PATCH 24/41] slurm: fix sanity logic

---
 rainfallwrangler/slurm-process.job | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 251ef2d..5189c91 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -46,8 +46,7 @@ if [[ ! -r "${WATER}" ]]; then
 fi
 
 if [[ ! -d "${OUTPUT}" ]]; then
-	echo "Error: That output directory either doesn't exist, isn't a directory, or we don't have permission to access it.";
-	exit 3;
+	mkdir "${OUTPUT}";
 fi
 
 export PATH=$HOME/software/bin:$PATH;
@@ -56,6 +55,8 @@ export PATH=$HOME/software/bin:$PATH;
 OUTPUT_UNIQ="${OUTPUT%/}_uniq"; # Stript trailing slash, if present
 OUTPUT_TFRECORD="${OUTPUT%/}_tfrecord"; # Stript trailing slash, if present
 
+mkd -p "${OUTPUT_UNIQ}" "${OUTPUT_TFRECORD}";
+
 echo ">>> Settings";
 
 echo "RAINFALL $RAINFALL";

From 784b8ed35c357bf0b2f205dd50755f8542374c3e Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 19:53:21 +0000
Subject: [PATCH 25/41] recordify: catch NaN --count-file

---
 rainfallwrangler/src/subcommands/recordify/recordify.mjs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/rainfallwrangler/src/subcommands/recordify/recordify.mjs b/rainfallwrangler/src/subcommands/recordify/recordify.mjs
index ca8f37b..5a66229 100644
--- a/rainfallwrangler/src/subcommands/recordify/recordify.mjs
+++ b/rainfallwrangler/src/subcommands/recordify/recordify.mjs
@@ -18,6 +18,10 @@ export default async function() {
 	
 	if(typeof settings.output !== "string")
 		throw new Error(`Error: No output directory specified.`);
+	if(typeof settings.count_file !== "number")
+		throw new Error(`Error: --count-file was not specified.`);
+	if(isNaN(settings.count_file))
+		throw new Error(`Error: --count-file was not a number. process.argv: ${process.argv.join(" ")}`);
 	
 	if(!fs.existsSync(settings.output))
 		await fs.promises.mkdir(settings.output, { recursive: true });

From bc0e5f05a812f668751340418ec1f240351b7dd4 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 19:55:04 +0000
Subject: [PATCH 26/41] slurm: fixup

---
 rainfallwrangler/slurm-process.job | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 5189c91..51d8a8c 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -55,13 +55,14 @@ export PATH=$HOME/software/bin:$PATH;
 OUTPUT_UNIQ="${OUTPUT%/}_uniq"; # Stript trailing slash, if present
 OUTPUT_TFRECORD="${OUTPUT%/}_tfrecord"; # Stript trailing slash, if present
 
-mkd -p "${OUTPUT_UNIQ}" "${OUTPUT_TFRECORD}";
+mkdir -p "${OUTPUT_UNIQ}" "${OUTPUT_TFRECORD}";
 
 echo ">>> Settings";
 
 echo "RAINFALL $RAINFALL";
 echo "WATER $WATER";
 echo "OUTPUT $OUTPUT";
+echo "COUNT_FILE $COUNT_FILE";
 echo "ARGS $ARGS";
 
 echo ">>> Installing requirements";

From 441ad92b128c4cfdd2747d4a9283c220116cce80 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 1 Nov 2022 19:57:15 +0000
Subject: [PATCH 27/41] slurm: fixup

---
 rainfallwrangler/slurm-process.job | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 51d8a8c..7f499af 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -9,6 +9,8 @@
 #SBATCH --mem=8096
 # * 8GB RAM
 
+set -e;
+
 module load utilities/multi
 module load readline/7.0
 module load gcc/10.2.0
@@ -20,7 +22,7 @@ module load python/anaconda/4.6/miniconda/3.7
 RAINFALL="${RAINFALL:-$HOME/data/nimrod_ceda.jsonl.gz}";
 WATER="${WATER:-$HOME/data/WaterDepths-new.stream.asc.gz}";
 OUTPUT="${OUTPUT}";
-COUNT_FILE="${COUNT_FILE:4096}";
+COUNT_FILE="${COUNT_FILE:-4096}";
 
 if [[ -z "${WATER}" ]]; then
 	echo "Error: No input water depth file specified in the WATER environment variable.";

From f2ae74ce7b3fa023644f670c9e32cfa26a918ffc Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 2 Nov 2022 17:38:26 +0000
Subject: [PATCH 28/41] how could I be so stupid..... round 2

---
 aimodel/src/lib/ai/RainfallWaterSegmenter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/RainfallWaterSegmenter.py b/aimodel/src/lib/ai/RainfallWaterSegmenter.py
index e592d37..dee79cd 100644
--- a/aimodel/src/lib/ai/RainfallWaterSegmenter.py
+++ b/aimodel/src/lib/ai/RainfallWaterSegmenter.py
@@ -82,7 +82,7 @@ class RainfallWaterSegmenter(object):
 			validation_data=dataset_validate,
 			epochs=self.epochs,
 			callbacks=make_callbacks(self.dir_output, self.model),
-			steps_per_epoch=10 # For testing
+			# steps_per_epoch=10 # For testing
 		)
 	
 	def embed(self, rainfall_embed):

From 3206d6b7e74f1bc89d31f8444e9241e44bb20a21 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 3 Nov 2022 17:12:27 +0000
Subject: [PATCH 29/41] slurm: rename segmenter job name

---
 aimodel/slurm-train.job | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/slurm-train.job b/aimodel/slurm-train.job
index 86368a7..ac2f092 100755
--- a/aimodel/slurm-train.job
+++ b/aimodel/slurm-train.job
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-#SBATCH -J RainAIv3
+#SBATCH -J RainAISG
 #SBATCH -N 1
 #SBATCH -n 14
 #SBATCH --gres=gpu:1

From 1375201c5f2930a3ec7c3f60c23b1ddc0044ed5a Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 3 Nov 2022 18:29:00 +0000
Subject: [PATCH 30/41] CallbackNBatchCsv: open_handle mode

---
 aimodel/src/lib/ai/components/CallbackNBatchCsv.py | 2 +-
 aimodel/src/lib/io/handle_open.py                  | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/aimodel/src/lib/ai/components/CallbackNBatchCsv.py b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
index de7491b..e04058c 100644
--- a/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
+++ b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
@@ -9,7 +9,7 @@ class CallbackNBatchCsv(tf.keras.callbacks.Callback):
 		self.n_batches = n_batches
 		self.separator = separator
 		
-		self.handle = handle_open(filepath)
+		self.handle = handle_open(filepath, "w")
 		
 		
 		self.batches_seen = 0
diff --git a/aimodel/src/lib/io/handle_open.py b/aimodel/src/lib/io/handle_open.py
index a167ea0..bd3f046 100644
--- a/aimodel/src/lib/io/handle_open.py
+++ b/aimodel/src/lib/io/handle_open.py
@@ -2,7 +2,10 @@ import io
 import gzip
 
 
-def handle_open(filepath, mode):
+def handle_open(filepath, mode, force_textwrite_gzip=True):
+	if mode == "w" and mode.endswith(".gz") and force_textwrite_gzip:
+		mode = "wt"
+	
 	if filepath.endswith(".gz"):
 		return gzip.open(filepath, mode)
 	else:

From 4dddcfcb420ad4163212ba69a5299f2d43737706 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 16:01:28 +0000
Subject: [PATCH 31/41] pretrain_predict: missing \n

---
 aimodel/src/subcommands/pretrain_predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/subcommands/pretrain_predict.py b/aimodel/src/subcommands/pretrain_predict.py
index c714754..dd0aa9d 100644
--- a/aimodel/src/subcommands/pretrain_predict.py
+++ b/aimodel/src/subcommands/pretrain_predict.py
@@ -140,4 +140,4 @@ def run(args):
 	
 	handle.close()
 	
-	sys.stderr.write(">>> Complete\n")
\ No newline at end of file
+	sys.stderr.write("\n>>> Complete\n")
\ No newline at end of file

From 44ad51f48320cc485c53918ebc339649e61c24b1 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 16:40:21 +0000
Subject: [PATCH 32/41] =?UTF-8?q?CallbackNBatchCsv:=20bugfix=20.sort()=20?=
 =?UTF-8?q?=E2=86=92=20sorted()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 aimodel/src/lib/ai/components/CallbackNBatchCsv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/ai/components/CallbackNBatchCsv.py b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
index e04058c..c308a03 100644
--- a/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
+++ b/aimodel/src/lib/ai/components/CallbackNBatchCsv.py
@@ -17,7 +17,7 @@ class CallbackNBatchCsv(tf.keras.callbacks.Callback):
 	
 	def write_header(self, logs): # logs = metrics
 		self.keys = logs.keys()
-		self.keys.sort()
+		self.keys = sorted(self.keys)
 		self.handle.write("\t".join(self.keys)+"\n")
 			
 	def on_batch_end(self, batch, logs=None): # logs = metrics

From 0353072d15029f721d320daa01bc4b628131a77f Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 17:02:07 +0000
Subject: [PATCH 33/41] allow pretrain to run on gpu we've slashed the size of
 the 2nd encoder, so ti should fit naow?

---
 aimodel/slurm-pretrain.job | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/slurm-pretrain.job b/aimodel/slurm-pretrain.job
index 817c205..7c92810 100755
--- a/aimodel/slurm-pretrain.job
+++ b/aimodel/slurm-pretrain.job
@@ -5,7 +5,7 @@
 #SBATCH --gres=gpu:1
 #SBATCH -o %j.%N.%a.out.log
 #SBATCH -e %j.%N.%a.err.log
-#SBATCH -p gpu05
+#SBATCH -p gpu05,gpu
 #SBATCH --time=5-00:00:00
 #SBATCH --mem=61440
 # 61440 = 60GiB memory required

From 0166b4d09e68675ee2ee61d48df5c0e36e1b4aef Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 17:11:10 +0000
Subject: [PATCH 34/41] slurm-process: change log file names

---
 rainfallwrangler/slurm-process.job | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 7f499af..194fb56 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -1,9 +1,9 @@
 #!/usr/bin/env bash
-#SBATCH -J RW2jsonl
+#SBATCH -J RWrangle
 #SBATCH -N 1
 #SBATCH -n 1
-#SBATCH -o %j.%N.%a.out.log
-#SBATCH -e %j.%N.%a.err.log
+#SBATCH -o %j.%N.%a.rainwrangle.out.log
+#SBATCH -e %j.%N.%a.rainwrangle.err.log
 #SBATCH -p compute
 #SBATCH --time=3-00:00:00
 #SBATCH --mem=8096

From a7626640631c376c428eafae5da88a2aec1b924d Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 17:23:20 +0000
Subject: [PATCH 35/41] slurm-process: -n28 fo  uniq call

---
 rainfallwrangler/slurm-process.job | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index 194fb56..bab0970 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -76,7 +76,7 @@ echo ">>> Converting dataset to .jsonl.gz";
 /usr/bin/env time -v src/index.mjs recordify --verbose --rainfall "${RAINFALL}" --water "${WATER}" --output "${OUTPUT}" --count-file "${COUNT_FILE}" ${ARGS};
 echo ">>> Deduplicating dataset";
 # This also automatically recompresses for us - hence the source/target rather than in-place
-srun --comment 'RainUniq' --exclusive -p compute /usr/bin/env time -v src/index.mjs uniq --source "${OUTPUT}" --target "${OUTPUT_UNIQ}" --count-file "${COUNT_FILE}";
+srun --comment 'RainUniq' --exclusive -p compute -n28 /usr/bin/env time -v src/index.mjs uniq --source "${OUTPUT}" --target "${OUTPUT_UNIQ}" --count-file "${COUNT_FILE}";
 echo ">>> Removing intermediate output";
 rm -r "${OUTPUT}";
 echo ">>> Queuing .jsonl.gz → tfrecord";

From ddbf2cb73462699bd0e3b1ce6a21cf5087d7515b Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Fri, 4 Nov 2022 17:42:50 +0000
Subject: [PATCH 36/41] =?UTF-8?q?slurm-process:=20-n28=20=E2=86=92=20--exc?=
 =?UTF-8?q?lusive?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 rainfallwrangler/slurm-process.job | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rainfallwrangler/slurm-process.job b/rainfallwrangler/slurm-process.job
index bab0970..5ba862b 100755
--- a/rainfallwrangler/slurm-process.job
+++ b/rainfallwrangler/slurm-process.job
@@ -76,7 +76,7 @@ echo ">>> Converting dataset to .jsonl.gz";
 /usr/bin/env time -v src/index.mjs recordify --verbose --rainfall "${RAINFALL}" --water "${WATER}" --output "${OUTPUT}" --count-file "${COUNT_FILE}" ${ARGS};
 echo ">>> Deduplicating dataset";
 # This also automatically recompresses for us - hence the source/target rather than in-place
-srun --comment 'RainUniq' --exclusive -p compute -n28 /usr/bin/env time -v src/index.mjs uniq --source "${OUTPUT}" --target "${OUTPUT_UNIQ}" --count-file "${COUNT_FILE}";
+srun --comment 'RainUniq' --exclusive -p compute --exclusive /usr/bin/env time -v src/index.mjs uniq --source "${OUTPUT}" --target "${OUTPUT_UNIQ}" --count-file "${COUNT_FILE}";
 echo ">>> Removing intermediate output";
 rm -r "${OUTPUT}";
 echo ">>> Queuing .jsonl.gz → tfrecord";

From e519b0adb3108549931219d41d82cb0c35c91322 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Wed, 9 Nov 2022 16:43:05 +0000
Subject: [PATCH 37/41] GzipChildProcess: spawn-stream is buggy IIRC

---
 rainfallwrangler/src/lib/io/GzipChildProcess.mjs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rainfallwrangler/src/lib/io/GzipChildProcess.mjs b/rainfallwrangler/src/lib/io/GzipChildProcess.mjs
index 6263dfc..7aa2998 100644
--- a/rainfallwrangler/src/lib/io/GzipChildProcess.mjs
+++ b/rainfallwrangler/src/lib/io/GzipChildProcess.mjs
@@ -9,7 +9,6 @@ import { end_safe } from './StreamHelpers.mjs';
 
 /**
  * Spawns and manages a gzip child process.
- * @deprecated Use spawn-stream instead
  * @extends EventEmitter
  */
 class GzipChildProcess extends EventEmitter {

From 0894bd09e83fd8d87f21ef5d475c29373e8a10f2 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 10 Nov 2022 19:45:41 +0000
Subject: [PATCH 38/41] train_predict: add error message for parrams.json not
 found

---
 aimodel/src/subcommands/train_predict.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aimodel/src/subcommands/train_predict.py b/aimodel/src/subcommands/train_predict.py
index 3624cf8..4fed4b3 100644
--- a/aimodel/src/subcommands/train_predict.py
+++ b/aimodel/src/subcommands/train_predict.py
@@ -40,6 +40,8 @@ def run(args):
 	
 	if (not hasattr(args, "params")) or args.params == None:
 		args.params = find_paramsjson(args.checkpoint)
+	if args.params == None:
+		logger.error("Error: Failed to find params.json. Please ensure it's either in the same directory as the checkpoint or 1 level above")
 	if (not hasattr(args, "read_multiplier")) or args.read_multiplier == None:
 		args.read_multiplier = 0
 	if (not hasattr(args, "records_per_file")) or args.records_per_file == None:

From aa7d9b8cf6e9e9b51ecf63c238be7a04e4c82ba4 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 10 Nov 2022 19:46:09 +0000
Subject: [PATCH 39/41] fixup

---
 aimodel/src/subcommands/train_predict.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aimodel/src/subcommands/train_predict.py b/aimodel/src/subcommands/train_predict.py
index 4fed4b3..4eb9727 100644
--- a/aimodel/src/subcommands/train_predict.py
+++ b/aimodel/src/subcommands/train_predict.py
@@ -42,6 +42,7 @@ def run(args):
 		args.params = find_paramsjson(args.checkpoint)
 	if args.params == None:
 		logger.error("Error: Failed to find params.json. Please ensure it's either in the same directory as the checkpoint or 1 level above")
+		return
 	if (not hasattr(args, "read_multiplier")) or args.read_multiplier == None:
 		args.read_multiplier = 0
 	if (not hasattr(args, "records_per_file")) or args.records_per_file == None:

From 0aa2ce19f5d7e58cffc0239e7091e52ef790585b Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 10 Nov 2022 19:53:30 +0000
Subject: [PATCH 40/41] read_metadata: support file inputs as well as dirs

---
 aimodel/src/lib/dataset/read_metadata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/aimodel/src/lib/dataset/read_metadata.py b/aimodel/src/lib/dataset/read_metadata.py
index 2a1dc5f..03f6941 100644
--- a/aimodel/src/lib/dataset/read_metadata.py
+++ b/aimodel/src/lib/dataset/read_metadata.py
@@ -5,6 +5,9 @@ import json
 from ..io.readfile import readfile
 
 def read_metadata(dirpath_dataset):
-	filepath_metadata = os.path.join(dirpath_dataset, "metadata.json")
+	if os.path.isfile(dirpath_dataset):
+		filepath_metadata = os.path.join(os.path.dirname(dirpath_dataset), "metadata.jsonl")
+	else:
+		filepath_metadata = os.path.join(dirpath_dataset, "metadata.json")
 	
 	return json.loads(readfile(filepath_metadata))
\ No newline at end of file

From daf691bf435b843521a2cd0873a2f740434d8dc9 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Thu, 10 Nov 2022 19:55:00 +0000
Subject: [PATCH 41/41] typo

---
 aimodel/src/lib/dataset/read_metadata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aimodel/src/lib/dataset/read_metadata.py b/aimodel/src/lib/dataset/read_metadata.py
index 03f6941..6ae44b5 100644
--- a/aimodel/src/lib/dataset/read_metadata.py
+++ b/aimodel/src/lib/dataset/read_metadata.py
@@ -6,7 +6,7 @@ from ..io.readfile import readfile
 
 def read_metadata(dirpath_dataset):
 	if os.path.isfile(dirpath_dataset):
-		filepath_metadata = os.path.join(os.path.dirname(dirpath_dataset), "metadata.jsonl")
+		filepath_metadata = os.path.join(os.path.dirname(dirpath_dataset), "metadata.json")
 	else:
 		filepath_metadata = os.path.join(dirpath_dataset, "metadata.json")