mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-22 09:13:01 +00:00
datasets: add shuffle=True/False to get_filepaths.
This is important because otherwise it SCAMBLES the filenames, which is a disaster for making predictions in the right order....!
This commit is contained in:
parent
fe43ddfbf9
commit
63e909d9fc
2 changed files with 19 additions and 8 deletions
|
@ -66,11 +66,17 @@ def make_dataset(filepaths, metadata, shape_watch_desired=[100,100], compression
|
||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
def get_filepaths(dirpath_input):
|
def get_filepaths(dirpath_input, shuffle=True):
|
||||||
return shuffle(list(filter(
|
result = list(filter(
|
||||||
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
||||||
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
||||||
)))
|
))
|
||||||
|
if shuffle:
|
||||||
|
result = shuffle(result)
|
||||||
|
else:
|
||||||
|
result = sorted(result, key=lambda filepath: int(filepath.split(".", 1)[0]))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5):
|
def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5):
|
||||||
filepaths = get_filepaths(dirpath_input)
|
filepaths = get_filepaths(dirpath_input)
|
||||||
|
@ -99,7 +105,7 @@ def dataset_predict(dirpath_input, parallel_reads_multiplier=1.5, prefetch=True)
|
||||||
Returns:
|
Returns:
|
||||||
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
||||||
"""
|
"""
|
||||||
filepaths = get_filepaths(dirpath_input) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
filepaths = get_filepaths(dirpath_input, shuffle=False) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||||
|
|
||||||
return make_dataset(
|
return make_dataset(
|
||||||
filepaths=filepaths,
|
filepaths=filepaths,
|
||||||
|
|
|
@ -63,11 +63,16 @@ def make_dataset(filepaths, metadata, shape_water_desired=[100,100], water_thres
|
||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
def get_filepaths(dirpath_input):
|
def get_filepaths(dirpath_input, shuffle=True):
|
||||||
return shuffle(list(filter(
|
result = list(filter(
|
||||||
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
||||||
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
||||||
)))
|
))
|
||||||
|
if shuffle:
|
||||||
|
result = shuffle(result)
|
||||||
|
else:
|
||||||
|
result = sorted(result, key=lambda filepath: int(filepath.split(".", 1)[0]))
|
||||||
|
return result
|
||||||
|
|
||||||
def dataset_segmenter(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5, water_threshold=0.1, shape_water_desired=[100,100]):
|
def dataset_segmenter(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5, water_threshold=0.1, shape_water_desired=[100,100]):
|
||||||
filepaths = get_filepaths(dirpath_input)
|
filepaths = get_filepaths(dirpath_input)
|
||||||
|
@ -97,7 +102,7 @@ def dataset_predict(dirpath_input, parallel_reads_multiplier=1.5, prefetch=True,
|
||||||
Returns:
|
Returns:
|
||||||
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
||||||
"""
|
"""
|
||||||
filepaths = get_filepaths(dirpath_input) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
filepaths = get_filepaths(dirpath_input, shuffle=False) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||||
|
|
||||||
return make_dataset(
|
return make_dataset(
|
||||||
filepaths=filepaths,
|
filepaths=filepaths,
|
||||||
|
|
Loading…
Reference in a new issue