From 55402b74bacf3938f5fd613188e9a14ecd250832 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 3 Aug 2023 19:57:26 +0200 Subject: [PATCH] Issue #197 add basic pg based test of UDF on VectorCube --- openeo_driver/ProcessGraphDeserializer.py | 10 +-- openeo_driver/datacube.py | 9 +++ openeo_driver/dummy/dummy_backend.py | 8 ++- openeo_driver/testing.py | 47 ++++++++++++-- tests/test_testing.py | 36 +++++++++++ tests/test_views_execute.py | 78 +++++++++++++++++++++-- 6 files changed, 172 insertions(+), 16 deletions(-) diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 8c3bc8a8..90fb1860 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -677,10 +677,10 @@ def apply_neighborhood(args: ProcessArgs, env: EvalEnv) -> DriverDataCube: @process def apply_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube: - data_cube = args.get_required("data", expected_type=DriverDataCube) + data_cube = args.get_required("data", expected_type=(DriverDataCube, DriverVectorCube)) process = args.get_deep("process", "process_graph", expected_type=dict) dimension = args.get_required( - "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()) + "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()) ) target_dimension = args.get_optional("target_dimension", default=None, expected_type=str) context = args.get_optional("context", default=None) @@ -748,7 +748,7 @@ def reduce_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube: data_cube: DriverDataCube = args.get_required("data", expected_type=DriverDataCube) reduce_pg = args.get_deep("reducer", "process_graph", expected_type=dict) dimension = args.get_required( - "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()) + "dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()) ) context = args.get_optional("context", default=None) return data_cube.reduce_dimension(reducer=reduce_pg, dimension=dimension, context=context, env=env) @@ -924,7 +924,7 @@ def aggregate_temporal(args: ProcessArgs, env: EvalEnv) -> DriverDataCube: dimension = args.get_optional( "dimension", default=lambda: data_cube.metadata.temporal_dimension.name, - validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()), + validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()), ) context = args.get_optional("context", default=None) @@ -941,7 +941,7 @@ def aggregate_temporal_period(args: ProcessArgs, env: EvalEnv) -> DriverDataCube dimension = args.get_optional( "dimension", default=lambda: data_cube.metadata.temporal_dimension.name, - validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()), + validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()), ) context = args.get_optional("context", default=None) diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index e33cd9b1..a9a1afcc 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -61,6 +61,9 @@ def __eq__(self, o: object) -> bool: return True return False + def get_dimension_names(self) -> List[str]: + return self.metadata.dimension_names() + def _not_implemented(self): """Helper to raise a NotImplemented exception containing method name""" raise NotImplementedError("DataCube method not implemented: {m!r}".format(m=inspect.stack()[1].function)) @@ -511,6 +514,12 @@ def to_legacy_save_result(self) -> Union["AggregatePolygonResult", "JSONResult"] f"Unsupported cube configuration {cube.dims} for _write_legacy_aggregate_polygon_result_json" ) + def get_dimension_names(self) -> List[str]: + if self._cube is None: + return [self.DIM_GEOMETRIES] + else: + return list(self._cube.dims) + def get_bounding_box(self) -> Tuple[float, float, float, float]: # TODO: cache bounding box? # TODO #114 #141 Open-EO/openeo-geopyspark-driver#239: option to buffer point geometries (if any) diff --git a/openeo_driver/dummy/dummy_backend.py b/openeo_driver/dummy/dummy_backend.py index d2d29004..cbdd35b9 100644 --- a/openeo_driver/dummy/dummy_backend.py +++ b/openeo_driver/dummy/dummy_backend.py @@ -181,10 +181,12 @@ def __init__(self, metadata: CollectionMetadata = None): self.apply_tiles = Mock(name="apply_tiles", return_value=self) self.apply_tiles_spatiotemporal = Mock(name="apply_tiles_spatiotemporal", return_value=self) - # Create mock methods for remaining data cube methods that are not yet defined - already_defined = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys()) + # Create mock methods for remaining DriverDataCube methods that are not yet defined directly by DummyDataCube + to_keep = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys()) + to_keep.update(m for m in DriverDataCube.__dict__.keys() if m.startswith("_")) + to_keep.update(["get_dimension_names"]) for name, method in DriverDataCube.__dict__.items(): - if not name.startswith('_') and name not in already_defined and callable(method): + if not name in to_keep and callable(method): setattr(self, name, Mock(name=name, return_value=self)) for name in [n for n, m in DummyDataCube.__dict__.items() if getattr(m, '_mock_side_effect', False)]: diff --git a/openeo_driver/testing.py b/openeo_driver/testing.py index 95a90b99..d978a922 100644 --- a/openeo_driver/testing.py +++ b/openeo_driver/testing.py @@ -10,7 +10,7 @@ import re import urllib.request from pathlib import Path -from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union +from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union, Collection from unittest import mock import pytest @@ -532,9 +532,48 @@ def to_geojson_feature_collection(self) -> dict: return approxify(result, rel=self.rel, abs=self.abs) -def caplog_with_custom_formatter( - caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter] -): +class ApproxGeoJSONByBounds: + """pytest assert helper to build a matcher to check if a certain GeoJSON construct is within expected bounds""" + + def __init__( + self, + *args, + types: Collection[str] = ("Polygon", "MultiPolygon"), + rel: Optional[float] = None, + abs: Optional[float] = None, + ): + bounds = args[0] if len(args) == 1 else args + assert isinstance(bounds, (list, tuple)) and len(bounds) == 4 + self.expected_bounds = [float(b) for b in bounds] + self.rel = rel + self.abs = abs + self.expected_types = set(types) + self.actual_info = [] + + def __eq__(self, other): + try: + assert isinstance(other, dict), "Not a dict" + assert "type" in other, "No 'type' field" + assert other["type"] in self.expected_types, f"Wrong type {other['type']!r}" + assert "coordinates" in other, "No 'coordinates' field" + + actual_bounds = shapely.geometry.shape(other).bounds + matching = actual_bounds == pytest.approx(self.expected_bounds, rel=self.rel, abs=self.abs) + if not matching: + self.actual_info.append(f"expected bounds {self.expected_bounds} != actual bounds: {actual_bounds}") + return matching + except Exception as e: + self.actual_info.append(str(e)) + return False + + def __repr__(self): + msg = f"<{type(self).__name__} types={self.expected_types} bounds={self.expected_bounds} rel={self.rel}, abs={self.abs}>" + if self.actual_info: + msg += "\n" + "\n".join(f" # {i}" for i in self.actual_info) + return msg + + +def caplog_with_custom_formatter(caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]): """ Context manager to set a custom formatter on the caplog fixture. diff --git a/tests/test_testing.py b/tests/test_testing.py index dac39ea7..23ba81d4 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -21,6 +21,7 @@ caplog_with_custom_formatter, ephemeral_fileserver, preprocess_check_and_replace, + ApproxGeoJSONByBounds, ) @@ -284,3 +285,38 @@ def test_caplog_with_custom_formatter(caplog, format): "[WARNING] still not good (root)", "WARNING root:test_testing.py:XXX hmm bad times", ] + + +class TestApproxGeoJSONByBounds: + def test_basic(self): + geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]} + assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, abs=0.1) + + @pytest.mark.parametrize( + ["data", "expected_message"], + [ + ("nope", "# Not a dict"), + ({"foo": "bar"}, " # No 'type' field"), + ({"type": "Polygommm", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}, " # Wrong type 'Polygommm'"), + ({"type": "Polygon"}, " # No 'coordinates' field"), + ], + ) + def test_invalid_construct(self, data, expected_message): + expected = ApproxGeoJSONByBounds(1, 2, 3, 4) + assert data != expected + assert expected_message in repr(expected) + + def test_out_of_bounds(self): + geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]} + expected = ApproxGeoJSONByBounds(11, 22, 33, 44, abs=0.1) + assert geometry != expected + assert "# expected bounds [11.0, 22.0, 33.0, 44.0] != actual bounds: (1.0, 1.0, 3.0, 4.0)" in repr(expected) + + def test_types(self): + geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]} + assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon"], abs=0.1) + assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon", "Point"], abs=0.1) + + expected = ApproxGeoJSONByBounds(1, 1, 3, 4, types=["MultiPolygon"], abs=0.1) + assert geometry != expected + assert "Wrong type 'Polygon'" in repr(expected) diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index 11f1bab5..4ece7ad3 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -33,6 +33,7 @@ generate_unique_test_process_id, preprocess_check_and_replace, preprocess_regex_check_and_replace, + ApproxGeoJSONByBounds, ) from openeo_driver.util.geometry import as_geojson_feature, as_geojson_feature_collection from openeo_driver.util.ioformats import IOFORMATS @@ -1310,7 +1311,7 @@ def test_run_udf_on_vector_read_vector(api100, udf_code): "udf": udf_code, "runtime": "Python", }, - "result": "true", + "result": True, }, } resp = api100.check_result(process_graph) @@ -1355,8 +1356,8 @@ def test_run_udf_on_vector_get_geometries(api100, udf_code): "udf": udf_code, "runtime": "Python", }, - "result": "true" - } + "result": True, + }, } resp = api100.check_result(process_graph) assert resp.json == [ @@ -1401,7 +1402,7 @@ def test_run_udf_on_vector_load_uploaded_files(api100, udf_code): "udf": udf_code, "runtime": "Python", }, - "result": "true", + "result": True, }, } resp = api100.check_result(process_graph) @@ -3522,3 +3523,72 @@ def test_request_costs_for_failed_request(api, backend_implementation): assert env["correlation_id"] == "r-abc123" get_request_costs.assert_called_with(TEST_USER, "r-abc123", False) + + +class TestVectorCubeRunUDF: + """ + Tests about running UDF based manipulations on vector cubes + + References: + - https://github.com/Open-EO/openeo-python-driver/issues/197 + - https://github.com/Open-EO/openeo-python-driver/pull/200 + - https://github.com/Open-EO/openeo-geopyspark-driver/issues/437 + """ + + def test_apply_dimension_run_udf_change_geometry(self, api100): + udf_code = """ + from openeo.udf import UdfData, FeatureCollection + def process_geometries(udf_data: UdfData) -> UdfData: + [feature_collection] = udf_data.get_feature_collection_list() + gdf = feature_collection.data + gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2) + udf_data.set_feature_collection_list([ + FeatureCollection(id="_", data=gdf), + ]) + """ + udf_code = textwrap.dedent(udf_code) + process_graph = { + "get_vector_data": { + "process_id": "load_uploaded_files", + "arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"}, + }, + "apply_dimension": { + "process_id": "apply_dimension", + "arguments": { + "data": {"from_node": "get_vector_data"}, + "dimension": "properties", + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "data": {"from_node": "get_vector_data"}, + "udf": udf_code, + "runtime": "Python", + }, + "result": True, + } + }, + }, + }, + "result": True, + }, + } + resp = api100.check_result(process_graph) + assert resp.json == DictSubSet( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": ApproxGeoJSONByBounds(0, 0, 4, 4, types=["Polygon"], abs=0.1), + "properties": {"id": "first", "pop": 1234}, + }, + { + "type": "Feature", + "geometry": ApproxGeoJSONByBounds(2, 1, 6, 5, types=["Polygon"], abs=0.1), + "properties": {"id": "second", "pop": 5678}, + }, + ], + } + )