Skip to content

Commit

Permalink
Update raster patches loader to only load patches without nodata #58
Browse files Browse the repository at this point in the history
  • Loading branch information
bdubayah committed Oct 14, 2021
1 parent 90759bd commit 0b659a2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 15 deletions.
29 changes: 19 additions & 10 deletions dora_exp_pipeline/dora_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,13 @@ class RasterPatchLoader(DataLoader):
def __init__(self):
super(RasterPatchLoader, self).__init__('raster_patches')

def _load(self, dir_path: str, patch_size: int) -> dict:
def _load(self, dir_path: str, patch_size: int, stride: int = 1,
nodata: object = None) -> dict:
if not os.path.exists(dir_path):
raise RuntimeError(f'Directory not found: '
f'{os.path.abspath(dir_path)}')
if stride < 1:
raise RuntimeError('Stride must be >= 1')

# List of supported file types
file_types = ['.tif']
Expand All @@ -215,15 +218,21 @@ def _load(self, dir_path: str, patch_size: int) -> dict:
# we want to put it in channels-last order
img = np.moveaxis(img, 0, -1)
# extract patches from raster image
# i, j are patch center coordinates
w = int(patch_size/2)
for i in range(w, img.shape[0]-w):
for j in range(w, img.shape[1]-w):
patch = img[i-w:i+(w + 1), j-w:j+(w + 1)]
# append the patch coordinate as the id
data_dict['id'].append('%d-%d' % (i, j))
# append the patch data
data_dict['data'].append(patch.flatten())
# i, j are patch top left coordinates
i = 0
while (i + patch_size) <= img.shape[0]:
j = 0
while (j + patch_size) <= img.shape[1]:
patch = img[i:i+patch_size, j:j+patch_size]
if np.any(patch == nodata):
j += 1
else:
# append the patch coordinate as the id
data_dict['id'].append('%d-%d' % (i, j))
# append the patch data
data_dict['data'].append(patch.flatten())
j += stride
i += stride
else:
raise RuntimeError(f'File extension not supported. '
f'Valid file extensions: '
Expand Down
16 changes: 11 additions & 5 deletions dora_exp_pipeline/dora_results_organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def run(self, data_ids, dts_scores, dts_sels, data_to_score,

@abstractmethod
def _run(self, data_ids, dts_scores, dts_sels, data_to_score,
outlier_alg_name, logger, seed, top_n, **params):
outlier_alg_name, out_dir, logger, seed, top_n, **params):
raise RuntimeError('This function must be implemented in a child class')


Expand Down Expand Up @@ -235,16 +235,22 @@ def _run(self, data_ids, dts_scores, dts_sels, data_to_score,
scores = np.reshape(np.array(scores), [height, width])
elif data_format == 'patches':
# Check that top_n wasn't specified to be a subset of the pixels
if top_n != ((height-(patch_size-1))*(width-(patch_size-1))):
if top_n != len(data_ids):
raise RuntimeError('Cannot use top_n with ReshapeRaster')
scores = np.zeros([height, width])
# Keep track of overlapping patches at each pixel
counts = np.zeros([height, width])
for ex, idx in enumerate(data_ids):
# get the patch center coordinates
# get the patch top left coordinates
i, j = idx.split('-')
i = int(i)
j = int(j)
# fill in the score for that index
scores[i, j] = dts_scores[ex]
# fill in the score for that patch
score = dts_scores[ex]
scores[i:i+patch_size, j:j+patch_size] += score
counts[i:i+patch_size, j:j+patch_size] += 1
counts[counts == 0] = np.nan
scores /= counts
else:
raise RuntimeError("data_format must be 'pixels' or 'patches'")

Expand Down

0 comments on commit 0b659a2

Please sign in to comment.