Skip to content

Commit

Permalink
Issue Open-EO#567/Open-EO#591 Finetune detection of actual temporal d…
Browse files Browse the repository at this point in the history
…imension name in `load_stac`

-  move logic to _StacMetadataParser (less logic nesting)
- improve test coverage (and add DummyStacDictBuilder utility)
  • Loading branch information
soxofaan committed Aug 20, 2024
1 parent 5eae436 commit 587f657
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 140 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `load_stac`/`metadata_from_stac`: add support for extracting actual temporal dimension metadata ([#567](https://github.com/Open-EO/openeo-python-client/issues/567))

### Changed

### Removed

### Fixed

- apply_dimension with a 'target_dimension' argument was not correctly adjusting datacube metadata on the client side, causing a mismatch.
- `apply_dimension` with a `target_dimension` argument was not correctly adjusting datacube metadata on the client side, causing a mismatch.


## [0.31.0] - 2024-07-26
Expand Down
68 changes: 36 additions & 32 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union

import pystac
import pystac.extensions.datacube
import pystac.extensions.eo
import pystac.extensions.item_assets

Expand All @@ -23,6 +24,7 @@ class DimensionAlreadyExistsException(MetadataException):
pass


# TODO: make these dimension classes immutable data classes
class Dimension:
"""Base class for dimensions."""

Expand Down Expand Up @@ -539,30 +541,6 @@ def metadata_from_stac(url: str) -> CubeMetadata:

# TODO move these nested functions and other logic to _StacMetadataParser

def get_temporal_metadata(spec: Union(pystac.Collection,pystac.Item, pystac.Catalog), complain: Callable[[str], None] = warnings.warn) -> TemporalDimension:
# Dimension info is in `cube:dimensions`
# Check if the datacube extension is present
if spec.ext.has("cube"):
return TemporalDimension(**dict(zip(["name","extent"],[(n, d.extent) for (n, d) in spec.ext.cube.dimensions.items() if d.dim_type =="temporal"][0])))
else:
complain("No cube:dimensions metadata")
return TemporalDimension(name="t", extent=[None, None])

def get_temporal_metadata_old(spec: dict, complain: Callable[[str], None] = warnings.warn) -> TemporalDimension:
# Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`)
cube_dimensions = (
deep_get(spec, "cube:dimensions", default=None)
or deep_get(spec, "properties", "cube:dimensions", default=None)
or {}
)
if not cube_dimensions:
complain("No cube:dimensions metadata")
for name, info in cube_dimensions.items():
dim_type = info.get("type")
if dim_type == "temporal":
return TemporalDimension(name=name, extent=info.get("extent"))
return None

def get_band_metadata(eo_bands_location: dict) -> List[Band]:
# TODO: return None iso empty list when no metadata?
return [
Expand Down Expand Up @@ -608,17 +586,17 @@ def is_band_asset(asset: pystac.Asset) -> bool:
bands = get_band_metadata(catalog.extra_fields.get("summaries", {}))
else:
raise ValueError(stac_object)
if _PYSTAC_1_9_EXTENSION_INTERFACE:
temporal_dimension = get_temporal_metadata(stac_object)
else:
temporal_dimension = get_temporal_metadata_old(stac_object.to_dict())
if temporal_dimension is None:
temporal_dimension = TemporalDimension(name="t", extent=[None, None])
# TODO: conditionally include band dimension when there was actual indication of band metadata?
band_dimension = BandDimension(name="bands", bands=bands)
metadata = CubeMetadata(dimensions=[band_dimension, temporal_dimension])
return metadata
dimensions = [band_dimension]

# TODO: is it possible to derive the actual name of temporal dimension that the backend will use?
temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object)
if temporal_dimension:
dimensions.append(temporal_dimension)

metadata = CubeMetadata(dimensions=dimensions)
return metadata

# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9)
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
Expand Down Expand Up @@ -692,3 +670,29 @@ def get_bands_from_item_assets(
if asset_bands:
bands.update(asset_bands)
return bands

def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]:
"""
Extract the temporal dimension from a STAC Collection/Item (if any)
"""
# TODO: also extract temporal dimension from assets?
if isinstance(stac_obj, (pystac.Item, pystac.Collection)):
if _PYSTAC_1_9_EXTENSION_INTERFACE:
if stac_obj.ext.has("cube"):
temporal_dims = [
(n, d.extent or [None, None])
for (n, d) in stac_obj.ext.cube.dimensions.items()
if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
else:
temporal_dims = [
(n, d.get("extent", [None, None]))
for (n, d) in stac_obj.to_dict().get("cube:dimensions", {}).items()
if d.get("type") == "temporal"
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
94 changes: 94 additions & 0 deletions openeo/testing/stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List, Optional, Union


class DummyStacDictBuilder:
"""
Helper to compactly produce STAC Item/Collection/Catalog/... dicts for test purposes
"""

# TODO: move this to more generic test utilities module
_EXT_DATACUBE = "https://stac-extensions.github.io/datacube/v2.2.0/schema.json"

@classmethod
def item(
cls,
*,
id: str = "item123",
stac_version="1.0.0",
datetime: str = "2024-03-08",
properties: Optional[dict] = None,
cube_dimensions: Optional[dict] = None,
stac_extensions: Optional[List[str]] = None,
**kwargs,
) -> dict:
properties = properties or {}
properties.setdefault("datetime", datetime)

if cube_dimensions is not None:
properties["cube:dimensions"] = cube_dimensions
stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE)

d = {
"type": "Feature",
"stac_version": stac_version,
"id": id,
"geometry": None,
"properties": properties,
**kwargs,
}

if stac_extensions is not None:
d["stac_extensions"] = stac_extensions
return d

@classmethod
def _add_stac_extension(cls, stac_extensions: Union[List[str], None], stac_extension: str) -> List[str]:
stac_extensions = list(stac_extensions or [])
if stac_extension not in stac_extensions:
stac_extensions.append(stac_extension)
return stac_extensions

@classmethod
def collection(
cls,
*,
id: str = "collection123",
description: str = "Collection 123",
stac_version: str = "1.0.0",
stac_extensions: Optional[List[str]] = None,
license: str = "proprietary",
extent: Optional[dict] = None,
cube_dimensions: Optional[dict] = None,
summaries: Optional[dict] = None,
) -> dict:
if extent is None:
extent = {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}}

d = {
"type": "Collection",
"stac_version": stac_version,
"id": id,
"description": description,
"license": license,
"extent": extent,
"links": [],
}
if cube_dimensions is not None:
d["cube:dimensions"] = cube_dimensions
stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE)
if summaries is not None:
d["summaries"] = summaries
if stac_extensions is not None:
d["stac_extensions"] = stac_extensions
return d

@classmethod
def catalog(cls, *, id: str = "catalog123", stac_version: str = "1.0.0", description: str = "Catalog 123"):
d = {
"type": "Catalog",
"stac_version": stac_version,
"id": id,
"description": description,
"links": [],
}
return d
23 changes: 7 additions & 16 deletions tests/rest/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
paginate,
)
from openeo.rest.vectorcube import VectorCube
from openeo.testing.stac import DummyStacDictBuilder
from openeo.util import ContextTimer, dict_no_none

from .auth.test_cli import auth_config, refresh_token_store
Expand Down Expand Up @@ -2584,24 +2585,14 @@ def test_load_stac_from_job_empty_result(self, con120, requests_mock):
}
}

def test_load_stac_reduce_temporal(self, con120, tmp_path):
@pytest.mark.parametrize("temporal_dim", ["t", "datezz"])
def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
# TODO: reusable utility to create/generate a STAC resource for testing
# (a file, but preferably a URL, but that requires urllib mocking)
stac_path = tmp_path / "stac.json"
stac_data = {
"type": "Collection",
"id": "test-collection",
"stac_version": "1.0.0",
"description": "Test collection",
"links": [],
"title": "Test Collection",
"extent": {
"spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]},
"temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]},
},
"license": "proprietary",
"summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]},
}
stac_data = DummyStacDictBuilder.collection(
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
)
stac_path.write_text(json.dumps(stac_data))

cube = con120.load_stac(str(stac_path))
Expand All @@ -2615,7 +2606,7 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path):
"process_id": "reduce_dimension",
"arguments": {
"data": {"from_node": "loadstac1"},
"dimension": "t",
"dimension": temporal_dim,
"reducer": {
"process_graph": {
"max1": {
Expand Down
Loading

0 comments on commit 587f657

Please sign in to comment.