diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b83adabe..3bafa06f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457)) + ### Changed - `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)]) diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index f0da58849..d3e60816e 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -12,7 +12,9 @@ import datetime import logging import pathlib +import re import typing +import urllib.parse import warnings from builtins import staticmethod from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union @@ -584,7 +586,9 @@ def filter_bbox( ) @openeo_process - def filter_spatial(self, geometries) -> DataCube: + def filter_spatial( + self, geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube] + ) -> DataCube: """ Limits the data cube over the spatial dimensions to the specified geometries. @@ -597,10 +601,24 @@ def filter_spatial(self, geometries) -> DataCube: More specifically, pixels outside of the bounding box of the given geometry will not be available after filtering. All pixels inside the bounding box that are not retained will be set to null (no data). - :param geometries: One or more geometries used for filtering, specified as GeoJSON in EPSG:4326. + :param geometries: One or more geometries used for filtering, Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (that is valid for the back-end) to a GeoJSON file. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + :return: A data cube restricted to the specified geometries. The dimensions and dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less (or the same) dimension labels. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. """ valid_geojson_types = [ "Point", "MultiPoint", "LineString", "MultiLineString", @@ -1052,15 +1070,29 @@ def _get_geometry_argument( :param crs: value that encodes a coordinate reference system. See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. """ + if isinstance(geometry, Parameter): + return geometry + elif isinstance(geometry, _FromNodeMixin): + return geometry.from_node() + + if isinstance(geometry, str) and re.match(r"^https?://", geometry, flags=re.I): + # Geometry provided as URL: load with `load_url` (with best-effort format guess) + url = urllib.parse.urlparse(geometry) + suffix = pathlib.Path(url.path.lower()).suffix + format = { + ".json": "GeoJSON", + ".geojson": "GeoJSON", + ".pq": "Parquet", + ".parquet": "Parquet", + ".geoparquet": "Parquet", + }.get(suffix, suffix.split(".")[-1]) + return self.connection.load_url(url=geometry, format=format) + if isinstance(geometry, (str, pathlib.Path)): # Assumption: `geometry` is path to polygon is a path to vector file at backend. # TODO #104: `read_vector` is non-standard process. # TODO: If path exists client side: load it client side? return PGNode(process_id="read_vector", arguments={"filename": str(geometry)}) - elif isinstance(geometry, Parameter): - return geometry - elif isinstance(geometry, _FromNodeMixin): - return geometry.from_node() if isinstance(geometry, shapely.geometry.base.BaseGeometry): geometry = mapping(geometry) @@ -1107,8 +1139,18 @@ def aggregate_spatial( Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) over the spatial dimensions. - :param geometries: a shapely geometry, a GeoJSON-style dictionary, - a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file. + :param geometries: The geometries to aggregate in. Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (that is valid for the back-end) to a GeoJSON file. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + :param reducer: the "child callback": the name of a single openEO process, or a callback function as discussed in :ref:`callbackfunctions`, @@ -1128,10 +1170,13 @@ def aggregate_spatial( By default, longitude-latitude (EPSG:4326) is assumed. See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. - :param context: Additional data to be passed to the reducer process. - .. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends. See https://github.com/Open-EO/openeo-processes/issues/235 for details. + + :param context: Additional data to be passed to the reducer process. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. """ valid_geojson_types = [ "Point", "MultiPoint", "LineString", "MultiLineString", @@ -1461,8 +1506,18 @@ def apply_polygon( the GeometriesOverlap exception is thrown. Each sub data cube is passed individually to the given process. - :param geometries: Polygons, provided as a shapely geometry, a GeoJSON-style dictionary, - a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file. + :param geometries: Can be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (that is valid for the back-end) to a GeoJSON file. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + :param process: "child callback" function, see :ref:`callbackfunctions` :param mask_value: The value used for pixels outside the polygon. :param context: Additional data to be passed to the process. @@ -1473,6 +1528,9 @@ def apply_polygon( Argument ``polygons`` was renamed to ``geometries``. While deprecated, the old name ``polygons`` is still supported as keyword argument for backwards compatibility. + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. """ # TODO drop support for legacy `polygons` argument: # remove `kwargs, remove default `None` value for `geometries` and `process` @@ -1957,14 +2015,27 @@ def mask_polygon( The pixel values are replaced with the value specified for `replacement`, which defaults to `no data`. - :param mask: The geometry to mask with: a shapely geometry, a GeoJSON-style dictionary, - a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file. + :param mask: The geometry to mask with.an be provided in different ways: + + - a shapely geometry + - a GeoJSON-style dictionary, + - a public URL to the geometries in a vector format that is supported by the backend + (also see :py:func:`Connection.list_file_formats() `), + e.g. GeoJSON, GeoParquet, etc. + A ``load_url`` process will automatically be added to the process graph. + - a path (that is valid for the back-end) to a GeoJSON file. + - a :py:class:`~openeo.rest.vectorcube.VectorCube` instance. + - a :py:class:`~openeo.api.process.Parameter` instance. + :param srs: The spatial reference system of the provided polygon. By default longitude-latitude (EPSG:4326) is assumed. .. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends. See https://github.com/Open-EO/openeo-processes/issues/235 for details. :param replacement: the value to replace the masked pixels with + + .. versionchanged:: 0.36.0 + Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process. """ valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"] mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs) diff --git a/tests/rest/datacube/test_datacube100.py b/tests/rest/datacube/test_datacube100.py index d565fb119..e0a2b289a 100644 --- a/tests/rest/datacube/test_datacube100.py +++ b/tests/rest/datacube/test_datacube100.py @@ -347,21 +347,47 @@ def test_filter_bbox_args_and_kwargs_conflict(con100: Connection, args, kwargs, con100.load_collection("S2").filter_bbox(*args, **kwargs) -def test_filter_spatial(con100: Connection, recwarn): - img = con100.load_collection("S2") +def test_filter_spatial(con100: Connection): + cube = con100.load_collection("S2") polygon = shapely.geometry.box(0, 0, 1, 1) - masked = img.filter_spatial(geometries=polygon) - assert sorted(masked.flat_graph().keys()) == ["filterspatial1", "loadcollection1"] - assert masked.flat_graph()["filterspatial1"] == { - "process_id": "filter_spatial", - "arguments": { - "data": {"from_node": "loadcollection1"}, - "geometries": { - "type": "Polygon", - "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), - } + masked = cube.filter_spatial(geometries=polygon) + assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == { + "filterspatial1": { + "process_id": "filter_spatial", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": { + "type": "Polygon", + "coordinates": [[[1.0, 0.0], [1.0, 1.0], [0.0, 1.0], [0.0, 0.0], [1.0, 0.0]]], + }, + }, + } + } + + +@pytest.mark.parametrize( + ["url", "expected_format"], + [ + ("https://example.com/geometry.json", "GeoJSON"), + ("https://example.com/geometry.geojson", "GeoJSON"), + ("https://example.com/geometry.GeoJSON", "GeoJSON"), + ("https://example.com/geometry.pq", "Parquet"), + ("https://example.com/geometry.parquet", "Parquet"), + ("https://example.com/geometry.GeoParquet", "Parquet"), + ], +) +def test_filter_spatial_geometry_url(con100: Connection, url, expected_format): + cube = con100.load_collection("S2") + masked = cube.filter_spatial(geometries=url) + assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"url": url, "format": expected_format}, + }, + "filterspatial1": { + "process_id": "filter_spatial", + "arguments": {"data": {"from_node": "loadcollection1"}, "geometries": {"from_node": "loadurl1"}}, }, - "result": True } @@ -595,6 +621,44 @@ def test_aggregate_spatial_geometry_from_node(con100: Connection, get_geometries } +@pytest.mark.parametrize( + ["url", "expected_format"], + [ + ("https://example.com/geometry.json", "GeoJSON"), + ("https://example.com/geometry.geojson", "GeoJSON"), + ("https://example.com/geometry.GeoJSON", "GeoJSON"), + ("https://example.com/geometry.pq", "Parquet"), + ("https://example.com/geometry.parquet", "Parquet"), + ("https://example.com/geometry.GeoParquet", "Parquet"), + ], +) +def test_aggregate_spatial_geometry_url(con100: Connection, url, expected_format): + cube = con100.load_collection("S2") + result = cube.aggregate_spatial(geometries=url, reducer="mean") + assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"url": url, "format": expected_format}, + }, + "aggregatespatial1": { + "process_id": "aggregate_spatial", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": {"from_node": "loadurl1"}, + "reducer": { + "process_graph": { + "mean1": { + "process_id": "mean", + "arguments": {"data": {"from_parameter": "data"}}, + "result": True, + } + } + }, + }, + }, + } + + def test_aggregate_spatial_window(con100: Connection): img = con100.load_collection("S2") size = [5, 3] @@ -810,6 +874,35 @@ def test_mask_polygon_from_node(con100: Connection, get_geometries): } +@pytest.mark.parametrize( + ["url", "expected_format"], + [ + ("https://example.com/geometry.json", "GeoJSON"), + ("https://example.com/geometry.geojson", "GeoJSON"), + ("https://example.com/geometry.GeoJSON", "GeoJSON"), + ("https://example.com/geometry.pq", "Parquet"), + ("https://example.com/geometry.parquet", "Parquet"), + ("https://example.com/geometry.GeoParquet", "Parquet"), + ], +) +def test_mask_polygon_geometry_url(con100: Connection, url, expected_format): + cube = con100.load_collection("S2") + masked = cube.mask_polygon(mask=url) + assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"url": url, "format": expected_format}, + }, + "maskpolygon1": { + "process_id": "mask_polygon", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "mask": {"from_node": "loadurl1"}, + }, + }, + } + + def test_mask_raster(con100: Connection): img = con100.load_collection("S2") mask = con100.load_collection("MASK") @@ -1768,6 +1861,49 @@ def test_apply_polygon_context(con100: Connection, geometries_argument, geometri } +@pytest.mark.parametrize( + ["url", "expected_format"], + [ + ("https://example.com/geometry.json", "GeoJSON"), + ("https://example.com/geometry.geojson", "GeoJSON"), + ("https://example.com/geometry.GeoJSON", "GeoJSON"), + ("https://example.com/geometry.pq", "Parquet"), + ("https://example.com/geometry.parquet", "Parquet"), + ("https://example.com/geometry.GeoParquet", "Parquet"), + ], +) +def test_apply_polygon_geometry_url(con100: Connection, url, expected_format): + cube = con100.load_collection("S2") + process = UDF(code="myfancycode", runtime="Python") + result = cube.apply_polygon(geometries=url, process=process) + assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"url": url, "format": expected_format}, + }, + "applypolygon1": { + "process_id": "apply_polygon", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": {"from_node": "loadurl1"}, + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "data": {"from_parameter": "data"}, + "runtime": "Python", + "udf": "myfancycode", + }, + "result": True, + } + } + }, + }, + }, + } + + def test_metadata_load_collection_100(con100, requests_mock): requests_mock.get(API_URL + "/collections/SENTINEL2", json={ "cube:dimensions": {