mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-22 09:13:01 +00:00
resize rainfall to be 1/2 size of current
This commit is contained in:
parent
8a86728b54
commit
3e4128c0a8
2 changed files with 34 additions and 23 deletions
|
@ -6,40 +6,47 @@ from loguru import logger
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from ..io.readfile import readfile
|
||||||
from .shuffle import shuffle
|
from .shuffle import shuffle
|
||||||
|
|
||||||
|
|
||||||
# TO PARSE:
|
|
||||||
@tf.function
|
|
||||||
def parse_item(item):
|
|
||||||
parsed = tf.io.parse_single_example(item, features={
|
|
||||||
"rainfallradar": tf.io.FixedLenFeature([], tf.string),
|
|
||||||
"waterdepth": tf.io.FixedLenFeature([], tf.string)
|
|
||||||
})
|
|
||||||
rainfall = tf.io.parse_tensor(parsed["rainfallradar"], out_type=tf.float32)
|
|
||||||
water = tf.io.parse_tensor(parsed["waterdepth"], out_type=tf.float32)
|
|
||||||
|
|
||||||
# [channels, width, height] → [width, height, channels] - ref ConvNeXt does not support data_format=channels_first
|
|
||||||
rainfall = tf.transpose(rainfall, [1, 2, 0])
|
|
||||||
# [width, height] → [width, height, channels]
|
|
||||||
water = tf.expand_dims(water, axis=-1)
|
|
||||||
|
|
||||||
# TODO: The shape of the resulting tensor can't be statically determined, so we need to reshape here
|
|
||||||
print("DEBUG:dataset ITEM rainfall:shape", rainfall.shape, "water:shape", water.shape)
|
|
||||||
# TODO: Any other additional parsing here, since multiple .map() calls are not optimal
|
|
||||||
return ((rainfall, water), tf.ones(1))
|
|
||||||
|
|
||||||
def make_dataset(filenames, compression_type="GZIP", parallel_reads_multiplier=1.5, shuffle_buffer_size=128, batch_size=64):
|
# TO PARSE:
|
||||||
|
def parse_item(metadata):
|
||||||
|
def parse_item_inner(item):
|
||||||
|
parsed = tf.io.parse_single_example(item, features={
|
||||||
|
"rainfallradar": tf.io.FixedLenFeature([], tf.string),
|
||||||
|
"waterdepth": tf.io.FixedLenFeature([], tf.string)
|
||||||
|
})
|
||||||
|
rainfall = tf.io.parse_tensor(parsed["rainfallradar"], out_type=tf.float32)
|
||||||
|
water = tf.io.parse_tensor(parsed["waterdepth"], out_type=tf.float32)
|
||||||
|
|
||||||
|
# [channels, width, height] → [width, height, channels] - ref ConvNeXt does not support data_format=channels_first
|
||||||
|
rainfall = tf.transpose(rainfall, [1, 2, 0])
|
||||||
|
# [width, height] → [width, height, channels]
|
||||||
|
water = tf.expand_dims(water, axis=-1)
|
||||||
|
|
||||||
|
rainfall = tf.image.resize(rainfall, tf.constant(metadata.waterdepth))
|
||||||
|
|
||||||
|
# TODO: The shape of the resulting tensor can't be statically determined, so we need to reshape here
|
||||||
|
print("DEBUG:dataset ITEM rainfall:shape", rainfall.shape, "water:shape", water.shape)
|
||||||
|
# TODO: Any other additional parsing here, since multiple .map() calls are not optimal
|
||||||
|
return ((rainfall, water), tf.ones(1))
|
||||||
|
|
||||||
|
return tf.function(parse_item_inner)
|
||||||
|
|
||||||
|
def make_dataset(filenames, metadata, compression_type="GZIP", parallel_reads_multiplier=1.5, shuffle_buffer_size=128, batch_size=64):
|
||||||
return tf.data.TFRecordDataset(filenames,
|
return tf.data.TFRecordDataset(filenames,
|
||||||
compression_type=compression_type,
|
compression_type=compression_type,
|
||||||
num_parallel_reads=math.ceil(os.cpu_count() * parallel_reads_multiplier)
|
num_parallel_reads=math.ceil(os.cpu_count() * parallel_reads_multiplier)
|
||||||
).shuffle(shuffle_buffer_size) \
|
).shuffle(shuffle_buffer_size) \
|
||||||
.map(parse_item, num_parallel_calls=tf.data.AUTOTUNE) \
|
.map(parse_item(metadata), num_parallel_calls=tf.data.AUTOTUNE) \
|
||||||
.batch(batch_size) \
|
.batch(batch_size) \
|
||||||
.prefetch(tf.data.AUTOTUNE)
|
.prefetch(tf.data.AUTOTUNE)
|
||||||
|
|
||||||
|
|
||||||
def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5):
|
def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5):
|
||||||
|
filepath_meta = os.path.join(dirpath_input, "metadata.json")
|
||||||
filepaths = shuffle(list(filter(
|
filepaths = shuffle(list(filter(
|
||||||
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
||||||
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
||||||
|
@ -50,8 +57,10 @@ def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_m
|
||||||
filepaths_train = filepaths[:dataset_splitpoint]
|
filepaths_train = filepaths[:dataset_splitpoint]
|
||||||
filepaths_validate = filepaths[dataset_splitpoint:]
|
filepaths_validate = filepaths[dataset_splitpoint:]
|
||||||
|
|
||||||
dataset_train = make_dataset(filepaths_train, batch_size=batch_size, parallel_reads_multiplier=parallel_reads_multiplier)
|
metadata = json.loads(readfile(filepath_meta))
|
||||||
dataset_validate = make_dataset(filepaths_validate, batch_size=batch_size, parallel_reads_multiplier=parallel_reads_multiplier)
|
|
||||||
|
dataset_train = make_dataset(filepaths_train, metadata, batch_size=batch_size, parallel_reads_multiplier=parallel_reads_multiplier)
|
||||||
|
dataset_validate = make_dataset(filepaths_validate, metadata, batch_size=batch_size, parallel_reads_multiplier=parallel_reads_multiplier)
|
||||||
|
|
||||||
return dataset_train, dataset_validate #, filepaths
|
return dataset_train, dataset_validate #, filepaths
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,8 @@ def convert(filepath_in, filepath_out):
|
||||||
rainfall = tf.constant(obj["rainfallradar"], dtype=tf.float32)
|
rainfall = tf.constant(obj["rainfallradar"], dtype=tf.float32)
|
||||||
water = tf.constant(obj["waterdepth"], dtype=tf.float32)
|
water = tf.constant(obj["waterdepth"], dtype=tf.float32)
|
||||||
|
|
||||||
|
# TODO: cast float32 → divide by max_value → clip 0-1 (or -1 to +1? I don't know)
|
||||||
|
|
||||||
###
|
###
|
||||||
## 3: Print shape definitions (required when parsing)
|
## 3: Print shape definitions (required when parsing)
|
||||||
###
|
###
|
||||||
|
|
Loading…
Reference in a new issue