From eed14890cff7ccb4c01279278033de690917900a Mon Sep 17 00:00:00 2001 From: JeroenVerstraelen Date: Sun, 9 Jun 2024 23:36:55 +0200 Subject: [PATCH] use DriverVectorCube in chunk_polygon Open-EO/openeo-python-driver#288 --- openeogeotrellis/geopysparkdatacube.py | 13 ++----------- tests/test_chunk_polygon.py | 5 +++-- tests/test_error.py | 4 ++-- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/openeogeotrellis/geopysparkdatacube.py b/openeogeotrellis/geopysparkdatacube.py index 4149e16c2..754295827 100644 --- a/openeogeotrellis/geopysparkdatacube.py +++ b/openeogeotrellis/geopysparkdatacube.py @@ -572,9 +572,7 @@ def partitionByKey(spatialkey): def chunk_polygon( self, reducer: Union[ProcessGraphVisitor, Dict], - # TODO: it's wrong to use MultiPolygon as a collection of polygons. MultiPolygons should be handled as single, atomic "features" - # also see https://github.com/Open-EO/openeo-python-driver/issues/288 - chunks: MultiPolygon, + chunks: DriverVectorCube, mask_value: float, env: EvalEnv, context: Optional[dict] = None, @@ -584,19 +582,13 @@ def chunk_polygon( if isinstance(reducer, dict): reducer = GeoPySparkBackendImplementation.accept_process_graph(reducer) - chunks: List[Polygon] = chunks.geoms jvm = get_jvm() result_collection = None if isinstance(reducer, SingleNodeUDFProcessGraphVisitor): udf, udf_context = self._extract_udf_code_and_context(process=reducer, context=context, env=env) - # Polygons should use the same projection as the rdd. - # TODO Usage of GeometryCollection should be avoided. It's abused here like a FeatureCollection, - # but a GeometryCollections is conceptually just single "feature". - # What you want here is proper support for FeatureCollections or at least a list of individual geometries. - # also see https://github.com/Open-EO/openeo-python-driver/issues/71, https://github.com/Open-EO/openeo-python-driver/issues/288 reprojected_polygons: jvm.org.openeo.geotrellis.ProjectedPolygons \ - = to_projected_polygons(jvm, GeometryCollection(chunks)) + = to_projected_polygons(jvm, chunks) band_names = self.metadata.band_dimension.band_names def rdd_function(rdd, _zoom): @@ -604,7 +596,6 @@ def rdd_function(rdd, _zoom): udf, rdd, reprojected_polygons, band_names, udf_context, mask_value ) - # All JEP implementation work with float cell types. float_cube = self.apply_to_levels(lambda layer: self._convert_celltype(layer, "float32")) result_collection = float_cube._apply_to_levels_geotrellis_rdd( rdd_function, self.metadata, gps.LayerType.SPACETIME diff --git a/tests/test_chunk_polygon.py b/tests/test_chunk_polygon.py index 2246a8ced..a5f7bff84 100644 --- a/tests/test_chunk_polygon.py +++ b/tests/test_chunk_polygon.py @@ -6,6 +6,7 @@ from shapely.geometry import Polygon, MultiPolygon from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube +from openeo_driver.datacube import DriverVectorCube from .data import get_test_data_file, TEST_DATA_ROOT @@ -14,7 +15,7 @@ # # Note: In order to run these tests you need to set several environment variables. # If you use the virtual environment venv (with JEP and Numpy installed): -# 1. LD_LIBRARY_PATH = .../venv/lib/python3.6/site-packages/jep +# 1. LD_LIBRARY_PATH = .../venv/lib/python3.8/site-packages/jep # This will look for the shared library 'jep.so'. This is the compiled C code that binds Java and Python objects. def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates): @@ -36,7 +37,7 @@ def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates): env = EvalEnv() polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon - chunks = MultiPolygon([polygon1]) + chunks = DriverVectorCube.from_geometry(polygon1) cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env) result_layer: TiledRasterLayer = result_cube.pyramid.levels[0] diff --git a/tests/test_error.py b/tests/test_error.py index 3e9e0ef17..e071c03ca 100644 --- a/tests/test_error.py +++ b/tests/test_error.py @@ -1,11 +1,11 @@ from geopyspark import TiledRasterLayer, Extent from openeo_driver.utils import EvalEnv from py4j.protocol import Py4JJavaError -from shapely.geometry import MultiPolygon from openeogeotrellis.backend import GeoPySparkBackendImplementation from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube from openeogeotrellis.utils import get_jvm +from openeo_driver.datacube import DriverVectorCube # Note: Ensure that the python environment has all the required modules installed. @@ -44,7 +44,7 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: env = EvalEnv() polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon - chunks = MultiPolygon([polygon1]) + chunks = DriverVectorCube.from_geometry(polygon1) cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates try: result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)