mirror of
https://github.com/sbrl/research-rainfallradar
synced 2024-11-25 10:32:59 +00:00
datasets: add shuffle=True/False to get_filepaths.
This is important because otherwise it SCAMBLES the filenames, which is a disaster for making predictions in the right order....!
This commit is contained in:
parent
fe43ddfbf9
commit
63e909d9fc
2 changed files with 19 additions and 8 deletions
|
@ -66,11 +66,17 @@ def make_dataset(filepaths, metadata, shape_watch_desired=[100,100], compression
|
|||
return dataset
|
||||
|
||||
|
||||
def get_filepaths(dirpath_input):
|
||||
return shuffle(list(filter(
|
||||
def get_filepaths(dirpath_input, shuffle=True):
|
||||
result = list(filter(
|
||||
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
||||
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
||||
)))
|
||||
))
|
||||
if shuffle:
|
||||
result = shuffle(result)
|
||||
else:
|
||||
result = sorted(result, key=lambda filepath: int(filepath.split(".", 1)[0]))
|
||||
|
||||
return result
|
||||
|
||||
def dataset(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5):
|
||||
filepaths = get_filepaths(dirpath_input)
|
||||
|
@ -99,7 +105,7 @@ def dataset_predict(dirpath_input, parallel_reads_multiplier=1.5, prefetch=True)
|
|||
Returns:
|
||||
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
||||
"""
|
||||
filepaths = get_filepaths(dirpath_input) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||
filepaths = get_filepaths(dirpath_input, shuffle=False) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||
|
||||
return make_dataset(
|
||||
filepaths=filepaths,
|
||||
|
|
|
@ -63,11 +63,16 @@ def make_dataset(filepaths, metadata, shape_water_desired=[100,100], water_thres
|
|||
return dataset
|
||||
|
||||
|
||||
def get_filepaths(dirpath_input):
|
||||
return shuffle(list(filter(
|
||||
def get_filepaths(dirpath_input, shuffle=True):
|
||||
result = list(filter(
|
||||
lambda filepath: str(filepath).endswith(".tfrecord.gz"),
|
||||
[ file.path for file in os.scandir(dirpath_input) ] # .path on a DirEntry object yields the absolute filepath
|
||||
)))
|
||||
))
|
||||
if shuffle:
|
||||
result = shuffle(result)
|
||||
else:
|
||||
result = sorted(result, key=lambda filepath: int(filepath.split(".", 1)[0]))
|
||||
return result
|
||||
|
||||
def dataset_segmenter(dirpath_input, batch_size=64, train_percentage=0.8, parallel_reads_multiplier=1.5, water_threshold=0.1, shape_water_desired=[100,100]):
|
||||
filepaths = get_filepaths(dirpath_input)
|
||||
|
@ -97,7 +102,7 @@ def dataset_predict(dirpath_input, parallel_reads_multiplier=1.5, prefetch=True,
|
|||
Returns:
|
||||
tf.data.Dataset: A tensorflow Dataset for the given input files.
|
||||
"""
|
||||
filepaths = get_filepaths(dirpath_input) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||
filepaths = get_filepaths(dirpath_input, shuffle=False) if os.path.isdir(dirpath_input) else [ dirpath_input ]
|
||||
|
||||
return make_dataset(
|
||||
filepaths=filepaths,
|
||||
|
|
Loading…
Reference in a new issue