Skip to content

Commit

Permalink
Merge branch 'issue567_stac_metadata'
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Aug 21, 2024
2 parents e930f72 + ff6294b commit 35b5fc7
Show file tree
Hide file tree
Showing 6 changed files with 303 additions and 58 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `load_stac`/`metadata_from_stac`: add support for extracting actual temporal dimension metadata ([#567](https://github.com/Open-EO/openeo-python-client/issues/567))

### Changed

### Removed

### Fixed

- apply_dimension with a 'target_dimension' argument was not correctly adjusting datacube metadata on the client side, causing a mismatch.
- `apply_dimension` with a `target_dimension` argument was not correctly adjusting datacube metadata on the client side, causing a mismatch.


## [0.31.0] - 2024-07-26
Expand Down
44 changes: 39 additions & 5 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union

import pystac
import pystac.extensions.datacube
import pystac.extensions.eo
import pystac.extensions.item_assets

Expand All @@ -23,6 +24,7 @@ class DimensionAlreadyExistsException(MetadataException):
pass


# TODO: make these dimension classes immutable data classes
class Dimension:
"""Base class for dimensions."""

Expand Down Expand Up @@ -591,12 +593,15 @@ def is_band_asset(asset: pystac.Asset) -> bool:

# TODO: conditionally include band dimension when there was actual indication of band metadata?
band_dimension = BandDimension(name="bands", bands=bands)
# TODO #567 get actual temporal extent information from metadata (if any)
# TODO #567 is it possible to derive the actual name of temporal dimension that the backend will use?
temporal_dimension = TemporalDimension(name="t", extent=[None, None])
metadata = CubeMetadata(dimensions=[band_dimension, temporal_dimension])
return metadata
dimensions = [band_dimension]

# TODO: is it possible to derive the actual name of temporal dimension that the backend will use?
temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object)
if temporal_dimension:
dimensions.append(temporal_dimension)

metadata = CubeMetadata(dimensions=dimensions)
return metadata

# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9)
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
Expand Down Expand Up @@ -670,3 +675,32 @@ def get_bands_from_item_assets(
if asset_bands:
bands.update(asset_bands)
return bands

def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]:
"""
Extract the temporal dimension from a STAC Collection/Item (if any)
"""
# TODO: also extract temporal dimension from assets?
if _PYSTAC_1_9_EXTENSION_INTERFACE:
if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"):
temporal_dims = [
(n, d.extent or [None, None])
for (n, d) in stac_obj.ext.cube.dimensions.items()
if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
else:
if isinstance(stac_obj, pystac.Item):
cube_dimensions = stac_obj.properties.get("cube:dimensions", {})
elif isinstance(stac_obj, pystac.Collection):
cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {})
else:
cube_dimensions = {}
temporal_dims = [
(n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal"
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
110 changes: 110 additions & 0 deletions openeo/testing/stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from typing import List, Optional, Union


class StacDummyBuilder:
"""
Helper to compactly produce STAC Item/Collection/Catalog/... dicts for test purposes
.. warning::
This is an experimental API subject to change.
"""

_EXT_DATACUBE = "https://stac-extensions.github.io/datacube/v2.2.0/schema.json"

@classmethod
def item(
cls,
*,
id: str = "item123",
stac_version="1.0.0",
datetime: str = "2024-03-08",
properties: Optional[dict] = None,
cube_dimensions: Optional[dict] = None,
stac_extensions: Optional[List[str]] = None,
**kwargs,
) -> dict:
"""Create a STAC Item represented as dictionary."""
properties = properties or {}
properties.setdefault("datetime", datetime)

if cube_dimensions is not None:
properties["cube:dimensions"] = cube_dimensions
stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE)

d = {
"type": "Feature",
"stac_version": stac_version,
"id": id,
"geometry": None,
"properties": properties,
"links": [],
"assets": {},
**kwargs,
}

if stac_extensions is not None:
d["stac_extensions"] = stac_extensions
return d

@classmethod
def _add_stac_extension(cls, stac_extensions: Union[List[str], None], stac_extension: str) -> List[str]:
stac_extensions = list(stac_extensions or [])
if stac_extension not in stac_extensions:
stac_extensions.append(stac_extension)
return stac_extensions

@classmethod
def collection(
cls,
*,
id: str = "collection123",
description: str = "Collection 123",
stac_version: str = "1.0.0",
stac_extensions: Optional[List[str]] = None,
license: str = "proprietary",
extent: Optional[dict] = None,
cube_dimensions: Optional[dict] = None,
summaries: Optional[dict] = None,
) -> dict:
"""Create a STAC Collection represented as dictionary."""
if extent is None:
extent = {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}}

d = {
"type": "Collection",
"stac_version": stac_version,
"id": id,
"description": description,
"license": license,
"extent": extent,
"links": [],
}
if cube_dimensions is not None:
d["cube:dimensions"] = cube_dimensions
stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE)
if summaries is not None:
d["summaries"] = summaries
if stac_extensions is not None:
d["stac_extensions"] = stac_extensions
return d

@classmethod
def catalog(
cls,
*,
id: str = "catalog123",
stac_version: str = "1.0.0",
description: str = "Catalog 123",
stac_extensions: Optional[List[str]] = None,
) -> dict:
"""Create a STAC Catalog represented as dictionary."""
d = {
"type": "Catalog",
"stac_version": stac_version,
"id": id,
"description": description,
"links": [],
}
if stac_extensions is not None:
d["stac_extensions"] = stac_extensions
return d
23 changes: 7 additions & 16 deletions tests/rest/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
paginate,
)
from openeo.rest.vectorcube import VectorCube
from openeo.testing.stac import StacDummyBuilder
from openeo.util import ContextTimer, dict_no_none

from .auth.test_cli import auth_config, refresh_token_store
Expand Down Expand Up @@ -2584,24 +2585,14 @@ def test_load_stac_from_job_empty_result(self, con120, requests_mock):
}
}

def test_load_stac_reduce_temporal(self, con120, tmp_path):
@pytest.mark.parametrize("temporal_dim", ["t", "datezz"])
def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
# TODO: reusable utility to create/generate a STAC resource for testing
# (a file, but preferably a URL, but that requires urllib mocking)
stac_path = tmp_path / "stac.json"
stac_data = {
"type": "Collection",
"id": "test-collection",
"stac_version": "1.0.0",
"description": "Test collection",
"links": [],
"title": "Test Collection",
"extent": {
"spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]},
"temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]},
},
"license": "proprietary",
"summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]},
}
stac_data = StacDummyBuilder.collection(
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
)
stac_path.write_text(json.dumps(stac_data))

cube = con120.load_stac(str(stac_path))
Expand All @@ -2615,7 +2606,7 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path):
"process_id": "reduce_dimension",
"arguments": {
"data": {"from_node": "loadstac1"},
"dimension": "t",
"dimension": temporal_dim,
"reducer": {
"process_graph": {
"max1": {
Expand Down
97 changes: 61 additions & 36 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import json
import re
from typing import List, Union
from typing import List, Optional, Union

import pytest

Expand All @@ -19,6 +19,7 @@
TemporalDimension,
metadata_from_stac,
)
from openeo.testing.stac import StacDummyBuilder


def test_metadata_get():
Expand Down Expand Up @@ -792,57 +793,31 @@ def filter_bbox(self, bbox):
"test_stac, expected",
[
(
{
"type": "Collection",
"id": "test-collection",
"stac_version": "1.0.0",
"description": "Test collection",
"links": [],
"title": "Test Collection",
"extent": {
"spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]},
"temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]},
},
"license": "proprietary",
"summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]},
},
StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}]}),
["B01", "B02"],
),
# TODO: test asset handling in collection?
(
{
"type": "Catalog",
"id": "test-catalog",
"stac_version": "1.0.0",
"description": "Test Catalog",
"links": [],
},
StacDummyBuilder.catalog(),
[],
),
(
{
"type": "Feature",
"stac_version": "1.0.0",
"id": "test-item",
"properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]},
"geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"},
"links": [],
"assets": {},
"bbox": [0, 1, 0, 1],
"stac_extensions": [],
},
StacDummyBuilder.item(
properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]}
),
["SCL", "B08"],
),
# TODO: test asset handling in item?
],
)
def test_metadata_from_stac(tmp_path, test_stac, expected):
def test_metadata_from_stac_bands(tmp_path, test_stac, expected):
path = tmp_path / "stac.json"
path.write_text(json.dumps(test_stac))
metadata = metadata_from_stac(path)
metadata = metadata_from_stac(str(path))
assert metadata.band_names == expected



@pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface")
@pytest.mark.parametrize("eo_extension_is_declared", [False, True])
def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_path, eo_extension_is_declared, caplog):
Expand All @@ -859,7 +834,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
path = tmp_path / "stac.json"
path.write_text(json.dumps(stac_data))

metadata = metadata_from_stac(path)
metadata = metadata_from_stac(str(path))
assert sorted(metadata.band_names) == [
"2m_temperature_max",
"2m_temperature_min",
Expand All @@ -872,3 +847,53 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
for m in caplog.messages
)
assert warn_count == (0 if eo_extension_is_declared else 1)


@pytest.mark.parametrize(
["stac_dict", "expected"],
[
(
StacDummyBuilder.item(),
None,
),
(
StacDummyBuilder.item(cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}),
("t", ["2024-04-04", "2024-06-06"]),
),
(
StacDummyBuilder.item(
cube_dimensions={"datezz": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}
),
("datezz", ["2024-04-04", "2024-06-06"]),
),
(
StacDummyBuilder.collection(),
None,
),
(
StacDummyBuilder.collection(
cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}
),
("t", ["2024-04-04", "2024-06-06"]),
),
(
StacDummyBuilder.catalog(),
None,
),
(
# Note: a catalog is not supposed to have datacube extension enabled, but we should not choke on that
StacDummyBuilder.catalog(stac_extensions=[StacDummyBuilder._EXT_DATACUBE]),
None,
),
],
)
def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected):
path = tmp_path / "stac.json"
path.write_text(json.dumps(stac_dict))
metadata = metadata_from_stac(str(path))
if expected:
dim = metadata.temporal_dimension
assert isinstance(dim, TemporalDimension)
assert (dim.name, dim.extent) == expected
else:
assert not metadata.has_temporal_dimension()
Loading

0 comments on commit 35b5fc7

Please sign in to comment.