Skip to content

Commit

Permalink
support bands_metadata for GTiff output assets
Browse files Browse the repository at this point in the history
* use tiffset to write GDAL metadata with "role" support

Open-EO/openeo-geotrellis-extensions#317

* encapsulate tiffset

Open-EO/openeo-geotrellis-extensions#317

* fix tests that rely on test layer with tile size 4

The spec demands that TIFF tiles have a tile size that is a multiple of 16. We write GeoTIFFs
with a tile size that is equal to the TileLayerRDD's tile size so a tile size of 4 will produce GeoTiffs
that are not compliant. Some tools cope, like gdalinfo (with a warning), but some will simply fail,
like tiffset.

Open-EO/openeo-geotrellis-extensions#317

* fix some tests

Open-EO/openeo-geotrellis-extensions#317

* try workaround for tiffset segfault in container

Open-EO/openeo-geotrellis-extensions#317

* fix tests

Open-EO/openeo-geotrellis-extensions#317

* add TODOs

Open-EO/openeo-geotrellis-extensions#317

* test tiffset

Open-EO/openeo-geotrellis-extensions#317

* fix tests

Open-EO/openeo-geotrellis-extensions#317

* fix interaction with asset-per-band

Open-EO/openeo-geotrellis-extensions#317

* include details if tiffset fails

Open-EO/openeo-geotrellis-extensions#317

* simplify asset-per-band mapping

Open-EO/openeo-geotrellis-extensions#317

* tiffset is called from Scala instead + adapt/add tests

Open-EO/openeo-geotrellis-extensions#317

* cleanup

Open-EO/openeo-geotrellis-extensions#317

* fix tests

KeyError: DESCRIPTION

Open-EO/openeo-geotrellis-extensions#317

* quick fix test

py4j.protocol.Py4JJavaError: An error occurred while calling z:org.openeo.geotrellis.geotiff.package.saveRDDAllowAssetPerBand.
: java.io.IOException: tiffset -sf 42112 /tmp/GDALMetadata_14266360818144674820.xml.tmp /tmp/18350960666773006729.tif failed; output was: _TIFFVSetField: /tmp/18350960666773006729.tif: Bad value 5 for "TileWidth" tag.
	at org.openeo.geotrellis.geotiff.package$.embedGdalMetadata(package.scala:940)
	at org.openeo.geotrellis.geotiff.package$.$anonfun$writeGeoTiff$2(package.scala:839)
	at org.openeo.geotrellis.geotiff.package$.$anonfun$writeGeoTiff$2$adapted(package.scala:839)
	at scala.Option.foreach(Option.scala:407)
	at org.openeo.geotrellis.geotiff.package$.writeGeoTiff(package.scala:839)
	at org.openeo.geotrellis.geotiff.package$.writeTiff(package.scala:588)
	at org.openeo.geotrellis.geotiff.package$.saveRDDGeneric(package.scala:415)
	at org.openeo.geotrellis.geotiff.package$.saveRDDAllowAssetPerBand(package.scala:254)
	at org.openeo.geotrellis.geotiff.package.saveRDDAllowAssetPerBand(package.scala)
	at jdk.internal.reflect.GeneratedMethodAccessor439.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)

Open-EO/openeo-geotrellis-extensions#317

* update version and CHANGELOG

Open-EO/openeo-geotrellis-extensions#317
  • Loading branch information
bossie authored Oct 28, 2024
1 parent 636c09a commit a84ea45
Show file tree
Hide file tree
Showing 17 changed files with 280 additions and 72 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ without compromising stable operations.

## Unreleased

## 0.47.0

- Support `bands_metadata` format option to set band-specific scale, offset and other metadata on `GTiff` output assets ([Open-EO/openeo-geotrellis-extensions#317](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/317))

## 0.46.0

- Automatic Python UDF dependency handling: add option to work with ZIP archive
Expand Down
2 changes: 1 addition & 1 deletion openeogeotrellis/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.46.2a1"
__version__ = "0.47.0a1"
19 changes: 9 additions & 10 deletions openeogeotrellis/collections/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@ def dates_between(start: datetime, end: datetime) -> List[datetime]:


def load_test_collection(
collection_id: str,
collection_metadata: GeopysparkCubeMetadata,
extent, srs: str,
from_date: str, to_date: str,
bands=None,
correlation_id: str = "NA",
tile_size: int,
collection_metadata: GeopysparkCubeMetadata,
extent,
srs: str,
from_date: str,
to_date: str,
bands=None,
correlation_id: str = "NA",
) -> Dict[int, geopyspark.TiledRasterLayer]:
"""
Load synthetic data as test collection
:param collection_id:
:param tile_size:
:param collection_metadata:
:param extent:
:param srs:
Expand All @@ -45,10 +47,7 @@ def load_test_collection(
:param correlation_id:
:return:
"""
# TODO: support more test collections
assert collection_id == "TestCollection-LonLat4x4"
grid_size: float = 1.0
tile_size = 4

# TODO: support other srs'es?
assert srs == "EPSG:4326"
Expand Down
23 changes: 12 additions & 11 deletions openeogeotrellis/geopysparkdatacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1821,14 +1821,15 @@ def return_netcdf_assets(asset_paths, bands, nodata):
overviews = format_options.get("overviews", "AUTO")
overview_resample = format_options.get("overview_method", "near")
colormap = format_options.get("colormap", None)
description = format_options.get("file_metadata",{}).get("description","")
description = format_options.get("file_metadata", {}).get("description", "")
filename_prefix = get_jvm().scala.Option.apply(format_options.get("filename_prefix", None))
separate_asset_per_band_tmp = (
smart_bool(format_options.get("separate_asset_per_band"))
if "separate_asset_per_band" in format_options
else None
)
separate_asset_per_band = get_jvm().scala.Option.apply(separate_asset_per_band_tmp)
bands_metadata = format_options.get("bands_metadata", {}) # band_name -> (tag -> value)

if separate_asset_per_band.isDefined() and format != "GTIFF":
raise OpenEOApiException("separate_asset_per_band is only supported with format GTIFF")
Expand Down Expand Up @@ -1910,6 +1911,9 @@ def color_to_int(color):
for index, band_name in enumerate(self.metadata.band_dimension.band_names):
gtiff_options.addBandTag(index, "DESCRIPTION", str(band_name))

for tag_name, tag_value in bands_metadata.get(band_name, {}).items():
gtiff_options.addBandTag(index, tag_name.upper(), str(tag_value))

bands = []
if self.metadata.has_band_dimension():
bands = [b._asdict() for b in self.metadata.bands]
Expand Down Expand Up @@ -1963,17 +1967,14 @@ def color_to_int(color):
# TODO: contains a bbox so rename
timestamped_paths = [(timestamped_path._1(), timestamped_path._2(), timestamped_path._3())
for timestamped_path in timestamped_paths]
for index, tup in enumerate(timestamped_paths):
path, timestamp, bbox = tup
tmp_bands = bands
if band_indices_per_file:
band_indices = band_indices_per_file[index]
tmp_bands = [b for i, b in enumerate(bands) if i in band_indices]
for index, (path, timestamp, bbox) in enumerate(timestamped_paths):
assets[str(pathlib.Path(path).name)] = {
"href": str(path),
"type": "image/tiff; application=geotiff",
"roles": ["data"],
"bands": tmp_bands,
"bands": (
[bands[i] for i in band_indices_per_file[index]] if band_indices_per_file else bands
),
"nodata": nodata,
"datetime": timestamp,
"bbox": to_latlng_bbox(bbox),
Expand All @@ -2000,20 +2001,20 @@ def color_to_int(color):
str(save_filename),
zlevel,
get_jvm().scala.Option.apply(crop_extent),
gtiff_options)
gtiff_options,
)

paths_tuples = [
(timestamped_path._1(), timestamped_path._2()) for timestamped_path in paths_tuples
]
assets = {}
for path, band_indices in paths_tuples:
file_name = pathlib.Path(path).name
tmp_bands = [b for i, b in enumerate(bands) if i in band_indices]
assets[file_name] = {
"href": str(path),
"type": "image/tiff; application=geotiff",
"roles": ["data"],
"bands": tmp_bands,
"bands": [bands[i] for i in band_indices],
"nodata": nodata,
}
return assets
Expand Down
3 changes: 2 additions & 1 deletion openeogeotrellis/integrations/gdal.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,8 @@ def _get_raster_statistics(gdal_info: GDALInfo, band_name: Optional[str] = None)
# just the empty string.
gdal_band_stats = band_metadata.get("", {})
band_name_out = (
band_name or gdal_band_stats.get("long_name") or gdal_band_stats.get("DESCRIPTION") or str(band_num)
band_name or gdal_band_stats.get("long_name") or band.get("description")
or gdal_band_stats.get("DESCRIPTION") or str(band_num)
)

def to_float_or_none(x: Optional[str]):
Expand Down
14 changes: 11 additions & 3 deletions openeogeotrellis/layercatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,10 +689,18 @@ def file_agera5_pyramid():
elif layer_source_type == 'accumulo':
pyramid = accumulo_pyramid()
elif layer_source_type == 'testing':
import re

tile_cols, tile_rows = map(int, re.match(r".*?(\d+)x(\d+)", collection_id).groups())
assert tile_cols == tile_rows

pyramid = load_test_collection(
collection_id=collection_id, collection_metadata=metadata,
extent=extent, srs=srs,
from_date=from_date, to_date=to_date,
tile_size=tile_cols,
collection_metadata=metadata,
extent=extent,
srs=srs,
from_date=from_date,
to_date=to_date,
bands=bands,
correlation_id=correlation_id
)
Expand Down
2 changes: 1 addition & 1 deletion tests/data_collections/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_load_test_collection():

extent = get_jvm().geotrellis.vector.Extent(1.0, 2.0, 2.0, 4.0)
pyramid = load_test_collection(
collection_id="TestCollection-LonLat4x4",
tile_size=4,
collection_metadata=collection_metadata,
extent=extent,
srs="EPSG:4326",
Expand Down
16 changes: 9 additions & 7 deletions tests/datacube_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@

from openeogeotrellis.service_registry import InMemoryServiceRegistry

matrix_of_one = np.zeros((1, 4, 4))
TILE_SIZE = 16 # multiple of 16 as this is used for the GeoTIFF tile size as well and mandated by its spec

matrix_of_one = np.zeros((1, TILE_SIZE, TILE_SIZE))
matrix_of_one.fill(1)

matrix_of_two = np.zeros((1, 4, 4))
matrix_of_two = np.zeros((1, TILE_SIZE, TILE_SIZE))
matrix_of_two.fill(2)

matrix_of_nodata = np.zeros((1, 4, 4))
matrix_of_nodata = np.zeros((1, TILE_SIZE, TILE_SIZE))
matrix_of_nodata.fill(-1)

extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0}
extent_webmerc = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 445277.96317309426, 'ymax': 445640.1096560266}
layout = {'layoutCols': 1, 'layoutRows': 1, 'tileCols': 4, 'tileRows': 4}

extent = {"xmin": 0.0, "ymin": 0.0, "xmax": 4.0, "ymax": 4.0}
extent_webmerc = {"xmin": 0.0, "ymin": 0.0, "xmax": 445277.96317309426, "ymax": 445640.1096560266}
# TODO: shouldn't layoutCols/Rows be 2 as we're adding 4 tiles to it? Or at least make it easier to reason about?
layout = {"layoutCols": 1, "layoutRows": 1, "tileCols": TILE_SIZE, "tileRows": TILE_SIZE}


openeo_metadata = {
Expand Down
63 changes: 61 additions & 2 deletions tests/layercatalog.json
Original file line number Diff line number Diff line change
Expand Up @@ -908,12 +908,71 @@
"x": {
"type": "spatial",
"axis": "x",
"reference_system": {"$schema":"https://proj.org/schemas/v0.2/projjson.schema.json","type":"GeodeticCRS","name":"AUTO 42001 (Universal Transverse Mercator)","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"area":"World","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"OGC","version":"1.3","code":"Auto42001"}}
"reference_system": 4326
},
"y": {
"type": "spatial",
"axis": "y",
"reference_system": {"$schema":"https://proj.org/schemas/v0.2/projjson.schema.json","type":"GeodeticCRS","name":"AUTO 42001 (Universal Transverse Mercator)","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"area":"World","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"OGC","version":"1.3","code":"Auto42001"}}
"reference_system": 4326
},
"t": {
"type": "temporal"
},
"bands": {
"type": "bands",
"values": [
"Flat:0",
"Flat:1",
"Flat:2",
"TileCol",
"TileRow",
"TileColRow:10",
"Longitude",
"Latitude",
"Year",
"Month",
"Day"
]
}
},
"extent": {
"spatial": {
"bbox": [
[
-180,
-56,
180,
83
]
]
},
"temporal": {
"interval": [
[
"2000-01-01",
null
]
]
}
}
},
{
"id": "TestCollection-LonLat16x16",
"_vito": {
"data_source": {
"type": "testing"
}
},
"cube:dimensions": {
"x": {
"type": "spatial",
"axis": "x",
"reference_system": 4326
},
"y": {
"type": "spatial",
"axis": "y",
"reference_system": 4326
},
"t": {
"type": "temporal"
Expand Down
69 changes: 67 additions & 2 deletions tests/test_api_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
as_geojson_feature,
as_geojson_feature_collection,
)
from osgeo import gdal
from pystac import (
Asset,
Catalog,
Expand All @@ -56,6 +57,7 @@
)
from shapely.geometry import GeometryCollection, Point, Polygon, box, mapping

from openeogeotrellis._version import __version__
from openeogeotrellis.backend import JOB_METADATA_FILENAME
from openeogeotrellis.config.config import EtlApiConfig
from openeogeotrellis.job_registry import ZkJobRegistry
Expand Down Expand Up @@ -2086,7 +2088,7 @@ def test_apply_neighborhood_filter_spatial(api100, tmp_path):
"lc": {
"process_id": "load_collection",
"arguments": {
"id": "TestCollection-LonLat4x4",
"id": "TestCollection-LonLat16x16",
"temporal_extent": ["2020-03-01", "2020-03-10"],
"spatial_extent": {"west": 0.0, "south": 0.0, "east": 32.0, "north": 32.0},
"bands": ["Longitude", "Day"]
Expand Down Expand Up @@ -2124,7 +2126,7 @@ def test_apply_neighborhood_filter_spatial(api100, tmp_path):
with rasterio.open(tmp_path / "apply_neighborhood.tif") as ds:
print(ds.bounds)
assert ds.bounds.right == 11
assert ds.width == 4
assert ds.width == 16


def test_aggregate_spatial_netcdf_feature_names(api100, tmp_path):
Expand Down Expand Up @@ -4776,3 +4778,66 @@ def test_aggregate_temporal_period_from_merge_cubes_on_time_dimension_contains_f
np.datetime64("2024-03-01"),
],
)


def test_geotiff_scale_offset(api110, tmp_path):
response = api110.check_result(
{
"loadcollection1": {
"process_id": "load_collection",
"arguments": {
"id": "TestCollection-LonLat16x16",
"temporal_extent": ["2021-01-05", "2021-01-06"],
"spatial_extent": {"west": 0.0, "south": 50.0, "east": 5.0, "north": 55.0},
"bands": ["Flat:1", "Flat:2"],
},
},
"saveresult1": {
"process_id": "save_result",
"arguments": {
"data": {"from_node": "loadcollection1"},
"format": "GTiff",
"options": {
"bands_metadata": {
"Flat:1": {
"SCALE": 1.23,
"ARBITRARY": "value",
},
"Flat:2": {
"OFFSET": 4.56,
},
"Flat:3": {
"SCALE": 7.89,
"OFFSET": 10.11
}
}
},
},
"result": True,
},
}
)

output_file = tmp_path / "out.tif"

with open(output_file, mode="wb") as f:
f.write(response.data)

raster = gdal.Open(str(output_file))
head_metadata = raster.GetMetadata()
assert head_metadata["AREA_OR_POINT"] == "Area"
assert head_metadata["PROCESSING_SOFTWARE"] == __version__

band_count = raster.RasterCount
assert band_count == 2

first_band = raster.GetRasterBand(1)
assert first_band.GetDescription() == "Flat:1"
assert first_band.GetScale() == 1.23
assert first_band.GetOffset() == 0.0
assert first_band.GetMetadata()["ARBITRARY"] == "value"

second_band = raster.GetRasterBand(2)
assert second_band.GetDescription() == "Flat:2"
assert second_band.GetScale() == 1.0
assert second_band.GetOffset() == 4.56
Loading

0 comments on commit a84ea45

Please sign in to comment.