Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: apply a cutline if supplied #240

Merged
merged 12 commits into from
Dec 6, 2022
2 changes: 2 additions & 0 deletions scripts/files/files_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def get_file_name_from_path(path: str) -> str:
def is_tiff(path: str) -> bool:
return path.lower().endswith((".tiff", ".tif"))

def is_vrt(path: str) -> bool:
return path.lower().endswith(".vrt")

def is_json(path: str) -> bool:
return path.lower().endswith(".json")
6 changes: 3 additions & 3 deletions scripts/gdal/gdal_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def get_gdal_version() -> str:

def run_gdal(
command: List[str],
input_file: Optional[str] = None,
input_file: str,
output_file: Optional[str] = None,
) -> "subprocess.CompletedProcess[bytes]":
"""Run the GDAL command. The permissions to access to the input file are applied to the gdal environment.

Args:
command (List[str]): each arguments of the GDAL command.
input_file (str, optional): the input file path.
input_file str: the input file path.
output_file (str, optional): the output file path.

Raises:
Expand Down Expand Up @@ -102,7 +102,7 @@ def run_gdal(
if proc.stderr:
get_log().warning("run_gdal_stderr", command=command_to_string(temp_command), stderr=proc.stderr.decode())

get_log().debug("run_gdal_succeeded", command=command_to_string(temp_command), stdout=proc.stdout.decode())
get_log().trace("run_gdal_succeeded", command=command_to_string(temp_command), stdout=proc.stdout.decode())
blacha marked this conversation as resolved.
Show resolved Hide resolved

return proc

Expand Down
187 changes: 103 additions & 84 deletions scripts/gdal/gdal_preset.py
Original file line number Diff line number Diff line change
@@ -1,122 +1,141 @@
from typing import List
from typing import List, Optional

from linz_logger import get_log
from scripts.gdal.gdalinfo import gdal_info, GdalInfoBands

GDAL_PRESET_LZW = [
"gdal_translate",
"-q",
"-scale",

# Force the source projection as NZTM EPSG:2193
NZTM_SOURCE = [
"-a_srs",
"EPSG:2193"
]

# Scale imagery from 0-255 to 0-254 then set 255 as NO_DATA
# Useful for imagery that does not have a alpha band
SCALE_254_ADD_NO_DATA = [
"-scale",
"0",
"255",
"0",
"254",
"-a_srs",
"EPSG:2193",
"-a_nodata",
"255",
"-b",
"1",
"-b",
"2",
"-b",
"3",
"-of",
"COG",
"-co",
"compress=lzw",
"-co",
"num_threads=all_cpus",
"-co",
"predictor=2",
"-co",
"overview_compress=webp",
"-co",
"bigtiff=yes",
"-co",
"overview_resampling=lanczos",
"-co",
"blocksize=512",
"-co",
"overview_quality=90",
"-co",
"sparse_ok=true",
"255"
]

GDAL_PRESET_WEBP = [
"gdal_translate",
BASE_COG = [
# ??
"-q",
"-a_srs",
"EPSG:2193",
"-b",
"1",
"-b",
"2",
"-b",
"3",
# Output to a COG
"-of",
"COG",
# Tile the image int 512x512px images
"-co",
"compress=webp",
"blocksize=512",
# Ensure all CPUs are used for gdal translate
"-co",
"num_threads=all_cpus",
# If not all tiles are needed in the tiff, instead of writing empty images write a null byte
# this significantly reduces the size of tiffs which are very sparse
"-co",
"quality=100",
"-co",
"overview_compress=webp",
"sparse_ok=true",
# Force everything into big tiff
# this converts all offsets from 32bit to 64bit to support TIFFs > 4GB in size
"-co",
"bigtiff=yes",
]

COMPRESS_LZW = [
# Compress as LZW
"-co",
"overview_resampling=lanczos",
"-co",
"blocksize=512",
"-co",
"overview_quality=90",
"compress=lzw",
# Predictor two reduces file size
blacha marked this conversation as resolved.
Show resolved Hide resolved
"-co",
"sparse_ok=true",
"predictor=2",
]

GDAL_PRESET_GRAY_WEBP = [
"gdal_translate",
"-q",
"-a_srs",
"EPSG:2193",
"-b",
"1",
"-b",
"1",
"-b",
"1",
"-a_nodata",
"255",
"-of",
"COG",
COMPRESS_WEBP_LOSSLESS =[
# Comppress into webp
"-co",
"compress=webp",
"-co",
"num_threads=all_cpus",
# Compress losslessly
"-co",
"quality=100",
]

WEBP_OVERVIEWS = [
# When creating overviews also compress them into Webp
"-co",
"overview_compress=webp",
"-co",
"bigtiff=yes",
# When resampling overviews use lanczos
# see https://github.com/linz/basemaps/blob/master/docs/imagery/cog.quality.md
"-co",
"overview_resampling=lanczos",
"-co",
"blocksize=512",
# Reduce quality of overviews to 90%
"-co",
"overview_quality=90",
"-co",
"sparse_ok=true",
]


def get_gdal_command(preset: str) -> List[str]:
get_log().info("gdal_preset", preset=preset)
gdal_command:List[str] = ["gdal_translate"]

gdal_command.extend(BASE_COG)
gdal_command.extend(NZTM_SOURCE)

if preset == "lzw":
return GDAL_PRESET_LZW
if preset == "webp":
return GDAL_PRESET_WEBP
if preset == "gray_webp":
return GDAL_PRESET_GRAY_WEBP
raise Exception(f"Unknown GDAL preset: {preset}")
gdal_command.extend(SCALE_254_ADD_NO_DATA)
gdal_command.extend(COMPRESS_LZW)

elif preset == "webp":
gdal_command.extend(COMPRESS_WEBP_LOSSLESS)

gdal_command.extend(WEBP_OVERVIEWS)

return gdal_command

# Find a band from the color interpretation
def find_band(bands: List[GdalInfoBands], color: str) -> Optional[GdalInfoBands]:
for band in bands:
if band['colorInterpretation'] == color:
return band
return None

# Determine what band numbers to use for the "-b" overrides for gdal_translate
def get_gdal_band_offset(file: str) -> List[str]:
info = gdal_info(file, False)

bands = info['bands'];

alpha_band = find_band(bands, 'Alpha')
alpha_band_info: List[str] = []
if alpha_band:
alpha_band_info.extend(['-b', str(alpha_band['band'])])

# Grey scale imagery, set R,G and B to just the grey_band
grey_band = find_band(bands, 'Gray')
if grey_band:
grand_band_index = str(grey_band['band'])
return ["-b", grand_band_index, "-b", grand_band_index, "-b", grand_band_index] + alpha_band_info
blacha marked this conversation as resolved.
Show resolved Hide resolved

band_red = find_band(bands, 'Red')
band_green = find_band(bands, 'Green')
band_blue = find_band(bands, 'Blue')

if band_red is None or band_green is None or band_blue is None:
get_log().warn("gdal_info_bands_failed", band_red=band_red is None, band_green=band_green is None, band_blue=band_blue is None)
return ["-b", "1", "-b", "2", "-b", "3"] + alpha_band_info

return ["-b", str(band_red['band']), "-b", str(band_green['band']), "-b", str(band_blue['band'])] + alpha_band_info


# Get a command to create a virtual file which has a cutline and alpha applied
def get_cutline_command(cutline: str)-> List[str] :
return [
'gdalwarp',
# Outputting a VRT makes things faster as its not recomputing everything
'-of', 'VRT',
# Apply the cutline
'-cutline', cutline,
# Ensure the target has a alpha channel
'-dstalpha'
]
41 changes: 30 additions & 11 deletions scripts/gdal/gdalinfo.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,43 @@
import json
import re
from typing import Any, Dict
from typing import Dict, List, TypedDict

from linz_logger import get_log

from scripts.gdal.gdal_helper import GDALExecutionException, run_gdal


def gdal_info(path: str) -> Dict[Any, Any]:
class GdalInfoBands(TypedDict):
band: int
block: List[int]
type: str
colorInterpretation: str

class GdalInfo(TypedDict):
description: str
driverShortName: str
driverLongName: str
files: List[str]
size: List[int]
geoTransform: List[float]
metadata: Dict[any, any]
cornerCoordinates: Dict[any, any]
extent: Dict[any, any]
bands: List[GdalInfoBands]

def gdal_info(path: str, stats: bool = True) -> GdalInfo:
# Set GDAL_PAM_ENABLED to NO to temporarily diable PAM support and prevent creation of auxiliary XML file.
gdalinfo_command = ["gdalinfo", "-stats", "-json", "--config", "GDAL_PAM_ENABLED", "NO"]
gdalinfo_result = {}
gdalinfo_command = ["gdalinfo", "-json", "--config", "GDAL_PAM_ENABLED", "NO"]

# Stats takes a while to generate only generate if needed
if stats:
gdalinfo_command.append("-stats")

try:
gdalinfo_process = run_gdal(gdalinfo_command, path)
try:
gdalinfo_result = json.loads(gdalinfo_process.stdout)
except json.JSONDecodeError as e:
get_log().error("load_gdalinfo_result_error", file=path, error=e)
raise e
return gdalinfo_result
return json.loads(gdalinfo_process.stdout)
except json.JSONDecodeError as e:
get_log().error("load_gdalinfo_result_error", file=path, error=e)
raise e
except GDALExecutionException as gee:
get_log().error("gdalinfo_failed", file=path, error=str(gee))
raise gee
Expand Down
Loading