Skip to content

Commit

Permalink
Added adaptive tile placement
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristopherKotthoff committed Dec 11, 2024
1 parent 1823cd7 commit c2eddaf
Showing 1 changed file with 82 additions and 18 deletions.
100 changes: 82 additions & 18 deletions detectree2/preprocessing/tiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import concurrent.futures
import json
import logging
import math
import os
import pickle
import random
import shutil
import warnings # noqa: F401
from math import ceil
from pathlib import Path
from typing import List, Tuple

import cv2
import geopandas as gpd
Expand Down Expand Up @@ -376,6 +377,62 @@ def process_tile_train_helper(args):
return process_tile_train(*args)


def _calculate_tile_placements(
img_path: str,
buffer: int,
tile_width: int,
tile_height: int,
crowns: gpd.GeoDataFrame = None,
tile_placement: str = "grid",
) -> List[Tuple[int, int]]:
"""Internal method for calculating the placement of tiles"""

if tile_placement == "grid":
with rasterio.open(img_path) as data:
coordinates = [(minx, miny) for minx in np.arange(math.ceil(data.bounds[0]) + buffer, data.bounds[2] - tile_width - buffer, tile_width, int) for miny in np.arange(math.ceil(data.bounds[1]) + buffer, data.bounds[3] - tile_height - buffer, tile_height, int)]
elif tile_placement == "adaptive":

if crowns is None:
logger.warning('Crowns must be supplied if tile_placement="adaptive" (crowns is None). Assuming tiling for test dataset, and tile placement will be done with tile_placement == "grid" instead.')
return _calculate_tile_placements(img_path, buffer, tile_width, tile_height)

logger.info("Starting Union of Crowns")
unioned_crowns = crowns.union_all()
logger.info(f"Finished Union of Crowns")

area_width = crowns.total_bounds[2]-crowns.total_bounds[0]
area_height = crowns.total_bounds[3]-crowns.total_bounds[1]
required_tiles_x = math.ceil(area_width/tile_width)
required_tiles_y = math.ceil(area_height/tile_height)
combined_tiles_width = required_tiles_x*tile_width
combined_tiles_height = required_tiles_y*tile_height
x_offset = (combined_tiles_width-area_width)/2
y_offset = (combined_tiles_height-area_height)/2


logger.info("Starting Tile Placement Generation")
coordinates=[]
for row in range(required_tiles_y):
bar = gpd.GeoSeries([box(crowns.total_bounds[0]-x_offset, crowns.total_bounds[1]-y_offset+ row*tile_height,crowns.total_bounds[2]+x_offset, crowns.total_bounds[1]-y_offset+ (row+1)*tile_height)], crs=crowns.crs)

intersection = unioned_crowns.intersection(bar)
if intersection.is_empty.all():
continue

intersection_width = intersection.total_bounds[2]-intersection.total_bounds[0]
required_intersection_tiles_x = math.ceil(intersection_width/tile_width)
combined_intersection_tiles_width = required_intersection_tiles_x*tile_width
x_intersection_offset = (combined_intersection_tiles_width-intersection_width)/2

for col in range(required_intersection_tiles_x):
coordinates.append((int(intersection.total_bounds[0]-x_intersection_offset)+col*tile_width, int(crowns.total_bounds[1]-y_offset)+ row*tile_height))
logger.info(f"Finished Tile Placement Generation")
else:
raise ValueError('Unsupported tile_placement method. Must be "grid" or "adaptive"')

return coordinates


def tile_data(
img_path: str,
out_dir: str,
Expand All @@ -388,24 +445,30 @@ def tile_data(
dtype_bool: bool = False,
mode: str = "rgb",
class_column: str = None, # Allow class column to be passed here
tile_placement: str = "grid",
) -> None:
"""Tiles up orthomosaic and corresponding crowns (if supplied) into training/prediction tiles.
Tiles up large rasters into managable tiles for training and prediction. If crowns are not supplied the function
will tile up the entire landscape for prediction. If crowns are supplied the function will tile these with the image
and skip tiles without a minimum coverage of crowns. The 'threshold' can be varied to ensure a good coverage of
crowns across a traing tile. Tiles that do not have sufficient coverage are skipped.
Tiles up large rasters into manageable tiles for training and prediction. If crowns are not supplied, the function
will tile up the entire landscape for prediction. If crowns are supplied, the function will tile these with the image
and skip tiles without a minimum coverage of crowns. The 'threshold' can be varied to ensure good coverage of
crowns across a training tile. Tiles that do not have sufficient coverage are skipped.
Args:
img_path: Path to the orthomosaic
out_dir: Output directory
buffer: Overlapping buffer of tiles in meters (UTM)
tile_width: Tile width in meters
tile_height: Tile height in meters
crowns: Crown polygons as a geopandas dataframe
threshold: Min proportion of the tile covered by crowns to be accepted {0,1}
nan_theshold: Max proportion of tile covered by nans
crowns: Crown polygons as a GeoPandas DataFrame
threshold: Minimum proportion of the tile covered by crowns to be accepted [0,1]
nan_threshold: Maximum proportion of tile covered by NaNs [0,1]
dtype_bool: Flag to edit dtype to prevent black tiles
mode: Type of the raster data ("rgb" or "ms")
class_column: Name of the column in `crowns` DataFrame for class-based tiling
tile_placement: Strategy for placing tiles.
"grid" for fixed grid placement based on the bounds of the input image, optimized for speed.
"adaptive" for dynamic placement of tiles based on crowns, adjusts based on data features for better coverage.
Returns:
None
Expand All @@ -416,16 +479,15 @@ def tile_data(
with rasterio.open(img_path) as data:
crs = data.crs.to_epsg() # Update CRS handling to avoid deprecated syntax

tile_args = [
(img_path, out_dir, buffer, tile_width, tile_height, dtype_bool, minx, miny, crs, tilename, crowns,
threshold, nan_threshold, mode, class_column)
for minx in np.arange(ceil(data.bounds[0]) + buffer, data.bounds[2] - tile_width - buffer, tile_width, int)
for miny in np.arange(ceil(data.bounds[1]) + buffer, data.bounds[3] - tile_height - buffer, tile_height,
int)
]
tile_coordinates = _calculate_tile_placements(img_path, buffer, tile_width, tile_height, crowns, tile_placement)
tile_args = [
(img_path, out_dir, buffer, tile_width, tile_height, dtype_bool, minx, miny, crs, tilename, crowns, threshold,
nan_threshold, mode, class_column)
for minx, miny in tile_coordinates
]

with concurrent.futures.ProcessPoolExecutor() as executor: # Use ProcessPoolExecutor here
list(executor.map(process_tile_train_helper, tile_args))
with concurrent.futures.ProcessPoolExecutor() as executor: # Use ProcessPoolExecutor here
list(executor.map(process_tile_train_helper, tile_args))

logger.info("Tiling complete")

Expand Down Expand Up @@ -609,6 +671,7 @@ def to_traintest_folders( # noqa: C901
dtype_bool = False # Change dtype to uint8 to avoid black tiles
mode = "rgb" # Use 'rgb' for regular 3-channel imagery, 'ms' for multispectral
class_column = "species" # Column in the crowns file to use as the class label
tile_placement = "adaptive" # Determines the way that tiles are are placed, can be either "grid" or "adaptive"

# Read in the crowns
crowns = gpd.read_file(crown_path)
Expand All @@ -633,7 +696,8 @@ def to_traintest_folders( # noqa: C901
nan_threshold=nan_threshold,
dtype_bool=dtype_bool,
mode=mode,
class_column=class_column # Use the selected class column (e.g., 'species', 'status')
class_column=class_column, # Use the selected class column (e.g., 'species', 'status')
tile_placement=tile_placement
)

# Split the data into training and validation sets (optional)
Expand Down

0 comments on commit c2eddaf

Please sign in to comment.