Skip to content

Commit

Permalink
use DriverVectorCube in chunk_polygon Open-EO/openeo-python-driver#288
Browse files Browse the repository at this point in the history
  • Loading branch information
JeroenVerstraelen committed Jun 9, 2024
1 parent 5739cb6 commit eed1489
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 15 deletions.
13 changes: 2 additions & 11 deletions openeogeotrellis/geopysparkdatacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,7 @@ def partitionByKey(spatialkey):
def chunk_polygon(
self,
reducer: Union[ProcessGraphVisitor, Dict],
# TODO: it's wrong to use MultiPolygon as a collection of polygons. MultiPolygons should be handled as single, atomic "features"
# also see https://github.com/Open-EO/openeo-python-driver/issues/288
chunks: MultiPolygon,
chunks: DriverVectorCube,
mask_value: float,
env: EvalEnv,
context: Optional[dict] = None,
Expand All @@ -584,27 +582,20 @@ def chunk_polygon(

if isinstance(reducer, dict):
reducer = GeoPySparkBackendImplementation.accept_process_graph(reducer)
chunks: List[Polygon] = chunks.geoms
jvm = get_jvm()

result_collection = None
if isinstance(reducer, SingleNodeUDFProcessGraphVisitor):
udf, udf_context = self._extract_udf_code_and_context(process=reducer, context=context, env=env)
# Polygons should use the same projection as the rdd.
# TODO Usage of GeometryCollection should be avoided. It's abused here like a FeatureCollection,
# but a GeometryCollections is conceptually just single "feature".
# What you want here is proper support for FeatureCollections or at least a list of individual geometries.
# also see https://github.com/Open-EO/openeo-python-driver/issues/71, https://github.com/Open-EO/openeo-python-driver/issues/288
reprojected_polygons: jvm.org.openeo.geotrellis.ProjectedPolygons \
= to_projected_polygons(jvm, GeometryCollection(chunks))
= to_projected_polygons(jvm, chunks)
band_names = self.metadata.band_dimension.band_names

def rdd_function(rdd, _zoom):
return jvm.org.openeo.geotrellis.udf.Udf.runChunkPolygonUserCode(
udf, rdd, reprojected_polygons, band_names, udf_context, mask_value
)

# All JEP implementation work with float cell types.
float_cube = self.apply_to_levels(lambda layer: self._convert_celltype(layer, "float32"))
result_collection = float_cube._apply_to_levels_geotrellis_rdd(
rdd_function, self.metadata, gps.LayerType.SPACETIME
Expand Down
5 changes: 3 additions & 2 deletions tests/test_chunk_polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from shapely.geometry import Polygon, MultiPolygon

from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
from openeo_driver.datacube import DriverVectorCube
from .data import get_test_data_file, TEST_DATA_ROOT


Expand All @@ -14,7 +15,7 @@
#
# Note: In order to run these tests you need to set several environment variables.
# If you use the virtual environment venv (with JEP and Numpy installed):
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.6/site-packages/jep
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.8/site-packages/jep
# This will look for the shared library 'jep.so'. This is the compiled C code that binds Java and Python objects.

def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
Expand All @@ -36,7 +37,7 @@ def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
env = EvalEnv()

polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
chunks = MultiPolygon([polygon1])
chunks = DriverVectorCube.from_geometry(polygon1)
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
result_layer: TiledRasterLayer = result_cube.pyramid.levels[0]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_error.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from geopyspark import TiledRasterLayer, Extent
from openeo_driver.utils import EvalEnv
from py4j.protocol import Py4JJavaError
from shapely.geometry import MultiPolygon

from openeogeotrellis.backend import GeoPySparkBackendImplementation
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
from openeogeotrellis.utils import get_jvm
from openeo_driver.datacube import DriverVectorCube


# Note: Ensure that the python environment has all the required modules installed.
Expand Down Expand Up @@ -44,7 +44,7 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
env = EvalEnv()

polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
chunks = MultiPolygon([polygon1])
chunks = DriverVectorCube.from_geometry(polygon1)
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
try:
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
Expand Down

0 comments on commit eed1489

Please sign in to comment.