diff --git a/detectree2/preprocessing/tiling.py b/detectree2/preprocessing/tiling.py index 914575e..b727d3a 100644 --- a/detectree2/preprocessing/tiling.py +++ b/detectree2/preprocessing/tiling.py @@ -7,13 +7,14 @@ import concurrent.futures import json import logging +import math import os import pickle import random import shutil import warnings # noqa: F401 -from math import ceil from pathlib import Path +from typing import List, Tuple import cv2 import geopandas as gpd @@ -376,6 +377,62 @@ def process_tile_train_helper(args): return process_tile_train(*args) +def _calculate_tile_placements( + img_path: str, + buffer: int, + tile_width: int, + tile_height: int, + crowns: gpd.GeoDataFrame = None, + tile_placement: str = "grid", +) -> List[Tuple[int, int]]: + """Internal method for calculating the placement of tiles""" + + if tile_placement == "grid": + with rasterio.open(img_path) as data: + coordinates = [(minx, miny) for minx in np.arange(math.ceil(data.bounds[0]) + buffer, data.bounds[2] - tile_width - buffer, tile_width, int) for miny in np.arange(math.ceil(data.bounds[1]) + buffer, data.bounds[3] - tile_height - buffer, tile_height, int)] + elif tile_placement == "adaptive": + + if crowns is None: + logger.warning('Crowns must be supplied if tile_placement="adaptive" (crowns is None). Assuming tiling for test dataset, and tile placement will be done with tile_placement == "grid" instead.') + return _calculate_tile_placements(img_path, buffer, tile_width, tile_height) + + logger.info("Starting Union of Crowns") + unioned_crowns = crowns.union_all() + logger.info(f"Finished Union of Crowns") + + area_width = crowns.total_bounds[2]-crowns.total_bounds[0] + area_height = crowns.total_bounds[3]-crowns.total_bounds[1] + required_tiles_x = math.ceil(area_width/tile_width) + required_tiles_y = math.ceil(area_height/tile_height) + combined_tiles_width = required_tiles_x*tile_width + combined_tiles_height = required_tiles_y*tile_height + x_offset = (combined_tiles_width-area_width)/2 + y_offset = (combined_tiles_height-area_height)/2 + + + logger.info("Starting Tile Placement Generation") + coordinates=[] + for row in range(required_tiles_y): + bar = gpd.GeoSeries([box(crowns.total_bounds[0]-x_offset, crowns.total_bounds[1]-y_offset+ row*tile_height,crowns.total_bounds[2]+x_offset, crowns.total_bounds[1]-y_offset+ (row+1)*tile_height)], crs=crowns.crs) + + intersection = unioned_crowns.intersection(bar) + if intersection.is_empty.all(): + continue + + intersection_width = intersection.total_bounds[2]-intersection.total_bounds[0] + required_intersection_tiles_x = math.ceil(intersection_width/tile_width) + combined_intersection_tiles_width = required_intersection_tiles_x*tile_width + x_intersection_offset = (combined_intersection_tiles_width-intersection_width)/2 + + for col in range(required_intersection_tiles_x): + coordinates.append((int(intersection.total_bounds[0]-x_intersection_offset)+col*tile_width, int(crowns.total_bounds[1]-y_offset)+ row*tile_height)) + logger.info(f"Finished Tile Placement Generation") + else: + raise ValueError('Unsupported tile_placement method. Must be "grid" or "adaptive"') + + return coordinates + + def tile_data( img_path: str, out_dir: str, @@ -388,13 +445,14 @@ def tile_data( dtype_bool: bool = False, mode: str = "rgb", class_column: str = None, # Allow class column to be passed here + tile_placement: str = "grid", ) -> None: """Tiles up orthomosaic and corresponding crowns (if supplied) into training/prediction tiles. - Tiles up large rasters into managable tiles for training and prediction. If crowns are not supplied the function - will tile up the entire landscape for prediction. If crowns are supplied the function will tile these with the image - and skip tiles without a minimum coverage of crowns. The 'threshold' can be varied to ensure a good coverage of - crowns across a traing tile. Tiles that do not have sufficient coverage are skipped. + Tiles up large rasters into manageable tiles for training and prediction. If crowns are not supplied, the function + will tile up the entire landscape for prediction. If crowns are supplied, the function will tile these with the image + and skip tiles without a minimum coverage of crowns. The 'threshold' can be varied to ensure good coverage of + crowns across a training tile. Tiles that do not have sufficient coverage are skipped. Args: img_path: Path to the orthomosaic @@ -402,10 +460,15 @@ def tile_data( buffer: Overlapping buffer of tiles in meters (UTM) tile_width: Tile width in meters tile_height: Tile height in meters - crowns: Crown polygons as a geopandas dataframe - threshold: Min proportion of the tile covered by crowns to be accepted {0,1} - nan_theshold: Max proportion of tile covered by nans + crowns: Crown polygons as a GeoPandas DataFrame + threshold: Minimum proportion of the tile covered by crowns to be accepted [0,1] + nan_threshold: Maximum proportion of tile covered by NaNs [0,1] dtype_bool: Flag to edit dtype to prevent black tiles + mode: Type of the raster data ("rgb" or "ms") + class_column: Name of the column in `crowns` DataFrame for class-based tiling + tile_placement: Strategy for placing tiles. + "grid" for fixed grid placement based on the bounds of the input image, optimized for speed. + "adaptive" for dynamic placement of tiles based on crowns, adjusts based on data features for better coverage. Returns: None @@ -416,16 +479,15 @@ def tile_data( with rasterio.open(img_path) as data: crs = data.crs.to_epsg() # Update CRS handling to avoid deprecated syntax - tile_args = [ - (img_path, out_dir, buffer, tile_width, tile_height, dtype_bool, minx, miny, crs, tilename, crowns, - threshold, nan_threshold, mode, class_column) - for minx in np.arange(ceil(data.bounds[0]) + buffer, data.bounds[2] - tile_width - buffer, tile_width, int) - for miny in np.arange(ceil(data.bounds[1]) + buffer, data.bounds[3] - tile_height - buffer, tile_height, - int) - ] + tile_coordinates = _calculate_tile_placements(img_path, buffer, tile_width, tile_height, crowns, tile_placement) + tile_args = [ + (img_path, out_dir, buffer, tile_width, tile_height, dtype_bool, minx, miny, crs, tilename, crowns, threshold, + nan_threshold, mode, class_column) + for minx, miny in tile_coordinates + ] - with concurrent.futures.ProcessPoolExecutor() as executor: # Use ProcessPoolExecutor here - list(executor.map(process_tile_train_helper, tile_args)) + with concurrent.futures.ProcessPoolExecutor() as executor: # Use ProcessPoolExecutor here + list(executor.map(process_tile_train_helper, tile_args)) logger.info("Tiling complete") @@ -609,6 +671,7 @@ def to_traintest_folders( # noqa: C901 dtype_bool = False # Change dtype to uint8 to avoid black tiles mode = "rgb" # Use 'rgb' for regular 3-channel imagery, 'ms' for multispectral class_column = "species" # Column in the crowns file to use as the class label + tile_placement = "adaptive" # Determines the way that tiles are are placed, can be either "grid" or "adaptive" # Read in the crowns crowns = gpd.read_file(crown_path) @@ -633,7 +696,8 @@ def to_traintest_folders( # noqa: C901 nan_threshold=nan_threshold, dtype_bool=dtype_bool, mode=mode, - class_column=class_column # Use the selected class column (e.g., 'species', 'status') + class_column=class_column, # Use the selected class column (e.g., 'species', 'status') + tile_placement=tile_placement ) # Split the data into training and validation sets (optional)