diff --git a/CHANGELOG.md b/CHANGELOG.md index 572ebd5e..dc8ebcaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ without compromising stable operations. ## Unreleased +## 0.47.0 + +- Support `bands_metadata` format option to set band-specific scale, offset and other metadata on `GTiff` output assets ([Open-EO/openeo-geotrellis-extensions#317](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/317)) + ## 0.46.0 - Automatic Python UDF dependency handling: add option to work with ZIP archive diff --git a/openeogeotrellis/_version.py b/openeogeotrellis/_version.py index 3a9787ab..8cd10d80 100644 --- a/openeogeotrellis/_version.py +++ b/openeogeotrellis/_version.py @@ -1 +1 @@ -__version__ = "0.46.2a1" +__version__ = "0.47.0a1" diff --git a/openeogeotrellis/collections/testing.py b/openeogeotrellis/collections/testing.py index ecf0a409..e26f419e 100644 --- a/openeogeotrellis/collections/testing.py +++ b/openeogeotrellis/collections/testing.py @@ -26,16 +26,18 @@ def dates_between(start: datetime, end: datetime) -> List[datetime]: def load_test_collection( - collection_id: str, - collection_metadata: GeopysparkCubeMetadata, - extent, srs: str, - from_date: str, to_date: str, - bands=None, - correlation_id: str = "NA", + tile_size: int, + collection_metadata: GeopysparkCubeMetadata, + extent, + srs: str, + from_date: str, + to_date: str, + bands=None, + correlation_id: str = "NA", ) -> Dict[int, geopyspark.TiledRasterLayer]: """ Load synthetic data as test collection - :param collection_id: + :param tile_size: :param collection_metadata: :param extent: :param srs: @@ -45,10 +47,7 @@ def load_test_collection( :param correlation_id: :return: """ - # TODO: support more test collections - assert collection_id == "TestCollection-LonLat4x4" grid_size: float = 1.0 - tile_size = 4 # TODO: support other srs'es? assert srs == "EPSG:4326" diff --git a/openeogeotrellis/geopysparkdatacube.py b/openeogeotrellis/geopysparkdatacube.py index ab41f70a..726fe418 100644 --- a/openeogeotrellis/geopysparkdatacube.py +++ b/openeogeotrellis/geopysparkdatacube.py @@ -1821,7 +1821,7 @@ def return_netcdf_assets(asset_paths, bands, nodata): overviews = format_options.get("overviews", "AUTO") overview_resample = format_options.get("overview_method", "near") colormap = format_options.get("colormap", None) - description = format_options.get("file_metadata",{}).get("description","") + description = format_options.get("file_metadata", {}).get("description", "") filename_prefix = get_jvm().scala.Option.apply(format_options.get("filename_prefix", None)) separate_asset_per_band_tmp = ( smart_bool(format_options.get("separate_asset_per_band")) @@ -1829,6 +1829,7 @@ def return_netcdf_assets(asset_paths, bands, nodata): else None ) separate_asset_per_band = get_jvm().scala.Option.apply(separate_asset_per_band_tmp) + bands_metadata = format_options.get("bands_metadata", {}) # band_name -> (tag -> value) if separate_asset_per_band.isDefined() and format != "GTIFF": raise OpenEOApiException("separate_asset_per_band is only supported with format GTIFF") @@ -1910,6 +1911,9 @@ def color_to_int(color): for index, band_name in enumerate(self.metadata.band_dimension.band_names): gtiff_options.addBandTag(index, "DESCRIPTION", str(band_name)) + for tag_name, tag_value in bands_metadata.get(band_name, {}).items(): + gtiff_options.addBandTag(index, tag_name.upper(), str(tag_value)) + bands = [] if self.metadata.has_band_dimension(): bands = [b._asdict() for b in self.metadata.bands] @@ -1963,17 +1967,14 @@ def color_to_int(color): # TODO: contains a bbox so rename timestamped_paths = [(timestamped_path._1(), timestamped_path._2(), timestamped_path._3()) for timestamped_path in timestamped_paths] - for index, tup in enumerate(timestamped_paths): - path, timestamp, bbox = tup - tmp_bands = bands - if band_indices_per_file: - band_indices = band_indices_per_file[index] - tmp_bands = [b for i, b in enumerate(bands) if i in band_indices] + for index, (path, timestamp, bbox) in enumerate(timestamped_paths): assets[str(pathlib.Path(path).name)] = { "href": str(path), "type": "image/tiff; application=geotiff", "roles": ["data"], - "bands": tmp_bands, + "bands": ( + [bands[i] for i in band_indices_per_file[index]] if band_indices_per_file else bands + ), "nodata": nodata, "datetime": timestamp, "bbox": to_latlng_bbox(bbox), @@ -2000,7 +2001,8 @@ def color_to_int(color): str(save_filename), zlevel, get_jvm().scala.Option.apply(crop_extent), - gtiff_options) + gtiff_options, + ) paths_tuples = [ (timestamped_path._1(), timestamped_path._2()) for timestamped_path in paths_tuples @@ -2008,12 +2010,11 @@ def color_to_int(color): assets = {} for path, band_indices in paths_tuples: file_name = pathlib.Path(path).name - tmp_bands = [b for i, b in enumerate(bands) if i in band_indices] assets[file_name] = { "href": str(path), "type": "image/tiff; application=geotiff", "roles": ["data"], - "bands": tmp_bands, + "bands": [bands[i] for i in band_indices], "nodata": nodata, } return assets diff --git a/openeogeotrellis/integrations/gdal.py b/openeogeotrellis/integrations/gdal.py index 8d30bf84..4d1d757a 100644 --- a/openeogeotrellis/integrations/gdal.py +++ b/openeogeotrellis/integrations/gdal.py @@ -479,7 +479,8 @@ def _get_raster_statistics(gdal_info: GDALInfo, band_name: Optional[str] = None) # just the empty string. gdal_band_stats = band_metadata.get("", {}) band_name_out = ( - band_name or gdal_band_stats.get("long_name") or gdal_band_stats.get("DESCRIPTION") or str(band_num) + band_name or gdal_band_stats.get("long_name") or band.get("description") + or gdal_band_stats.get("DESCRIPTION") or str(band_num) ) def to_float_or_none(x: Optional[str]): diff --git a/openeogeotrellis/layercatalog.py b/openeogeotrellis/layercatalog.py index c4c09647..2617ff06 100644 --- a/openeogeotrellis/layercatalog.py +++ b/openeogeotrellis/layercatalog.py @@ -689,10 +689,18 @@ def file_agera5_pyramid(): elif layer_source_type == 'accumulo': pyramid = accumulo_pyramid() elif layer_source_type == 'testing': + import re + + tile_cols, tile_rows = map(int, re.match(r".*?(\d+)x(\d+)", collection_id).groups()) + assert tile_cols == tile_rows + pyramid = load_test_collection( - collection_id=collection_id, collection_metadata=metadata, - extent=extent, srs=srs, - from_date=from_date, to_date=to_date, + tile_size=tile_cols, + collection_metadata=metadata, + extent=extent, + srs=srs, + from_date=from_date, + to_date=to_date, bands=bands, correlation_id=correlation_id ) diff --git a/tests/data_collections/test_testing.py b/tests/data_collections/test_testing.py index 01bd82b9..2f6dcfc2 100644 --- a/tests/data_collections/test_testing.py +++ b/tests/data_collections/test_testing.py @@ -57,7 +57,7 @@ def test_load_test_collection(): extent = get_jvm().geotrellis.vector.Extent(1.0, 2.0, 2.0, 4.0) pyramid = load_test_collection( - collection_id="TestCollection-LonLat4x4", + tile_size=4, collection_metadata=collection_metadata, extent=extent, srs="EPSG:4326", diff --git a/tests/datacube_fixtures.py b/tests/datacube_fixtures.py index 69f8cba7..578066c0 100644 --- a/tests/datacube_fixtures.py +++ b/tests/datacube_fixtures.py @@ -7,19 +7,21 @@ from openeogeotrellis.service_registry import InMemoryServiceRegistry -matrix_of_one = np.zeros((1, 4, 4)) +TILE_SIZE = 16 # multiple of 16 as this is used for the GeoTIFF tile size as well and mandated by its spec + +matrix_of_one = np.zeros((1, TILE_SIZE, TILE_SIZE)) matrix_of_one.fill(1) -matrix_of_two = np.zeros((1, 4, 4)) +matrix_of_two = np.zeros((1, TILE_SIZE, TILE_SIZE)) matrix_of_two.fill(2) -matrix_of_nodata = np.zeros((1, 4, 4)) +matrix_of_nodata = np.zeros((1, TILE_SIZE, TILE_SIZE)) matrix_of_nodata.fill(-1) -extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0} -extent_webmerc = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 445277.96317309426, 'ymax': 445640.1096560266} -layout = {'layoutCols': 1, 'layoutRows': 1, 'tileCols': 4, 'tileRows': 4} - +extent = {"xmin": 0.0, "ymin": 0.0, "xmax": 4.0, "ymax": 4.0} +extent_webmerc = {"xmin": 0.0, "ymin": 0.0, "xmax": 445277.96317309426, "ymax": 445640.1096560266} +# TODO: shouldn't layoutCols/Rows be 2 as we're adding 4 tiles to it? Or at least make it easier to reason about? +layout = {"layoutCols": 1, "layoutRows": 1, "tileCols": TILE_SIZE, "tileRows": TILE_SIZE} openeo_metadata = { diff --git a/tests/layercatalog.json b/tests/layercatalog.json index be7aa411..fd0ffe27 100644 --- a/tests/layercatalog.json +++ b/tests/layercatalog.json @@ -908,12 +908,71 @@ "x": { "type": "spatial", "axis": "x", - "reference_system": {"$schema":"https://proj.org/schemas/v0.2/projjson.schema.json","type":"GeodeticCRS","name":"AUTO 42001 (Universal Transverse Mercator)","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"area":"World","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"OGC","version":"1.3","code":"Auto42001"}} + "reference_system": 4326 }, "y": { "type": "spatial", "axis": "y", - "reference_system": {"$schema":"https://proj.org/schemas/v0.2/projjson.schema.json","type":"GeodeticCRS","name":"AUTO 42001 (Universal Transverse Mercator)","datum":{"type":"GeodeticReferenceFrame","name":"World Geodetic System 1984","ellipsoid":{"name":"WGS 84","semi_major_axis":6378137,"inverse_flattening":298.257223563}},"coordinate_system":{"subtype":"ellipsoidal","axis":[{"name":"Geodetic latitude","abbreviation":"Lat","direction":"north","unit":"degree"},{"name":"Geodetic longitude","abbreviation":"Lon","direction":"east","unit":"degree"}]},"area":"World","bbox":{"south_latitude":-90,"west_longitude":-180,"north_latitude":90,"east_longitude":180},"id":{"authority":"OGC","version":"1.3","code":"Auto42001"}} + "reference_system": 4326 + }, + "t": { + "type": "temporal" + }, + "bands": { + "type": "bands", + "values": [ + "Flat:0", + "Flat:1", + "Flat:2", + "TileCol", + "TileRow", + "TileColRow:10", + "Longitude", + "Latitude", + "Year", + "Month", + "Day" + ] + } + }, + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -56, + 180, + 83 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2000-01-01", + null + ] + ] + } + } + }, + { + "id": "TestCollection-LonLat16x16", + "_vito": { + "data_source": { + "type": "testing" + } + }, + "cube:dimensions": { + "x": { + "type": "spatial", + "axis": "x", + "reference_system": 4326 + }, + "y": { + "type": "spatial", + "axis": "y", + "reference_system": 4326 }, "t": { "type": "temporal" diff --git a/tests/test_api_result.py b/tests/test_api_result.py index 33678ab7..7e58c13f 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -45,6 +45,7 @@ as_geojson_feature, as_geojson_feature_collection, ) +from osgeo import gdal from pystac import ( Asset, Catalog, @@ -56,6 +57,7 @@ ) from shapely.geometry import GeometryCollection, Point, Polygon, box, mapping +from openeogeotrellis._version import __version__ from openeogeotrellis.backend import JOB_METADATA_FILENAME from openeogeotrellis.config.config import EtlApiConfig from openeogeotrellis.job_registry import ZkJobRegistry @@ -2086,7 +2088,7 @@ def test_apply_neighborhood_filter_spatial(api100, tmp_path): "lc": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "temporal_extent": ["2020-03-01", "2020-03-10"], "spatial_extent": {"west": 0.0, "south": 0.0, "east": 32.0, "north": 32.0}, "bands": ["Longitude", "Day"] @@ -2124,7 +2126,7 @@ def test_apply_neighborhood_filter_spatial(api100, tmp_path): with rasterio.open(tmp_path / "apply_neighborhood.tif") as ds: print(ds.bounds) assert ds.bounds.right == 11 - assert ds.width == 4 + assert ds.width == 16 def test_aggregate_spatial_netcdf_feature_names(api100, tmp_path): @@ -4776,3 +4778,66 @@ def test_aggregate_temporal_period_from_merge_cubes_on_time_dimension_contains_f np.datetime64("2024-03-01"), ], ) + + +def test_geotiff_scale_offset(api110, tmp_path): + response = api110.check_result( + { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "TestCollection-LonLat16x16", + "temporal_extent": ["2021-01-05", "2021-01-06"], + "spatial_extent": {"west": 0.0, "south": 50.0, "east": 5.0, "north": 55.0}, + "bands": ["Flat:1", "Flat:2"], + }, + }, + "saveresult1": { + "process_id": "save_result", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "format": "GTiff", + "options": { + "bands_metadata": { + "Flat:1": { + "SCALE": 1.23, + "ARBITRARY": "value", + }, + "Flat:2": { + "OFFSET": 4.56, + }, + "Flat:3": { + "SCALE": 7.89, + "OFFSET": 10.11 + } + } + }, + }, + "result": True, + }, + } + ) + + output_file = tmp_path / "out.tif" + + with open(output_file, mode="wb") as f: + f.write(response.data) + + raster = gdal.Open(str(output_file)) + head_metadata = raster.GetMetadata() + assert head_metadata["AREA_OR_POINT"] == "Area" + assert head_metadata["PROCESSING_SOFTWARE"] == __version__ + + band_count = raster.RasterCount + assert band_count == 2 + + first_band = raster.GetRasterBand(1) + assert first_band.GetDescription() == "Flat:1" + assert first_band.GetScale() == 1.23 + assert first_band.GetOffset() == 0.0 + assert first_band.GetMetadata()["ARBITRARY"] == "value" + + second_band = raster.GetRasterBand(2) + assert second_band.GetDescription() == "Flat:2" + assert second_band.GetScale() == 1.0 + assert second_band.GetOffset() == 4.56 diff --git a/tests/test_batch_result.py b/tests/test_batch_result.py index 8486dfea..c3ea5c4c 100644 --- a/tests/test_batch_result.py +++ b/tests/test_batch_result.py @@ -19,8 +19,10 @@ from openeo_driver.util.geometry import validate_geojson_coordinates from openeo_driver.utils import EvalEnv from openeo_driver.workspace import DiskWorkspace +from osgeo import gdal from shapely.geometry import Point, Polygon, shape +from openeogeotrellis._version import __version__ from openeogeotrellis.backend import JOB_METADATA_FILENAME from openeogeotrellis.config import get_backend_config from openeogeotrellis.deploy.batch_job import run_job @@ -277,7 +279,7 @@ def test_separate_asset_per_band(tmp_path, from_node, expected_names): "process_id": "load_collection", "arguments": { "bands": ["TileRow", "TileCol"], - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "properties": {}, "spatial_extent": {"west": 0.0, "south": 50.0, "east": 5.0, "north": 55.0}, "temporal_extent": ["2021-06-01", "2021-06-16"], @@ -863,7 +865,7 @@ def test_multiple_image_collection_results(tmp_path): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "spatial_extent": {"west": 0.0, "south": 50.0, "east": 5.0, "north": 55.0}, "temporal_extent": ["2021-01-04", "2021-01-06"], "bands": ["Flat:2"] @@ -912,7 +914,7 @@ def test_export_workspace(tmp_path, remove_original): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "temporal_extent": ["2021-01-05", "2021-01-06"], "spatial_extent": {"west": 0.0, "south": 0.0, "east": 1.0, "north": 2.0}, "bands": ["Flat:2"] @@ -1013,7 +1015,7 @@ def test_export_workspace_with_asset_per_band(tmp_path): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "temporal_extent": ["2021-01-05", "2021-01-06"], "spatial_extent": {"west": 0.0, "south": 0.0, "east": 1.0, "north": 2.0}, "bands": ["Longitude", "Latitude"], @@ -1097,10 +1099,10 @@ def test_export_workspace_with_asset_per_band(tmp_path): { "name": "Latitude", "statistics": { - "maximum": 1.75, - "mean": 0.875, + "maximum": 1.9375, + "mean": 0.96875, "minimum": 0.0, - "stddev": 0.57282196186948, + "stddev": 0.57706829101936, "valid_percent": 100.0, }, } @@ -1154,7 +1156,7 @@ def test_multiple_top_level_side_effects(tmp_path, caplog): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "spatial_extent": {"west": 5, "south": 50, "east": 5.1, "north": 50.1}, "temporal_extent": ["2024-07-11", "2024-07-21"], "bands": ["Flat:1"] @@ -1229,8 +1231,8 @@ def test_multiple_top_level_side_effects(tmp_path, caplog): "final.tif": lambda dataset: dataset.res == (80, 80) }), ("pg02.json", { - "B04.tif": lambda dataset: dataset.tags(1)["DESCRIPTION"] == "B04", - "B11.tif": lambda dataset: dataset.tags(1)["DESCRIPTION"] == "B11", + "B04.tif": lambda dataset: dataset.descriptions == ("B04",), + "B11.tif": lambda dataset: dataset.descriptions == ("B11",), }), ]) def test_multiple_save_results(tmp_path, process_graph_file, output_file_predicates): @@ -1306,7 +1308,7 @@ def test_load_ml_model_via_jobid(tmp_path): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "temporal_extent": ["2021-01-01", "2021-02-01"], "spatial_extent": {"west": 0.0, "south": 0.0, "east": 1.0, "north": 2.0}, "bands": ["TileRow", "TileCol"] @@ -1454,7 +1456,7 @@ def test_multiple_save_result_single_export_workspace(tmp_path): "loadcollection1": { "process_id": "load_collection", "arguments": { - "id": "TestCollection-LonLat4x4", + "id": "TestCollection-LonLat16x16", "temporal_extent": ["2021-01-05", "2021-01-06"], "spatial_extent": {"west": 0.0, "south": 0.0, "east": 1.0, "north": 2.0}, "bands": ["Flat:2"], @@ -1565,3 +1567,72 @@ def test_vectorcube_write_assets(tmp_path): dependencies={}, user_id="jenkins", ) + + +def test_geotiff_scale_offset(tmp_path): + process_graph = { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "TestCollection-LonLat16x16", + "temporal_extent": ["2021-01-05", "2021-01-06"], + "spatial_extent": {"west": 0.0, "south": 50.0, "east": 5.0, "north": 55.0}, + "bands": ["Flat:2"], + }, + }, + "saveresult1": { + "process_id": "save_result", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "format": "GTiff", + "options": { + "bands_metadata": { + "Flat:2": { + "SCALE": 1.23, + "OFFSET": 4.56, + "ARBITRARY": "value", + }, + } + }, + }, + "result": True, + }, + } + + process = { + "process_graph": process_graph, + "description": "some description", + } + + run_job( + process, + output_file=tmp_path / "out.tif", + metadata_file=tmp_path / "job_metadata.json", + api_version="2.0.0", + job_dir=tmp_path, + dependencies=[], + ) + + # metadata should be embedded in the tiff, not in a sidecar file + aux_files = [tmp_path / aux_file for aux_file in os.listdir(tmp_path) if aux_file.endswith(".tif.aux.xml")] + for aux_file in aux_files: + aux_file.unlink() + + output_tiffs = [tmp_path / tiff_file for tiff_file in os.listdir(tmp_path) if tiff_file.endswith(".tif")] + assert len(output_tiffs) == 1 + output_tiff = output_tiffs[0] + + raster = gdal.Open(str(output_tiff)) + head_metadata = raster.GetMetadata() + assert head_metadata["AREA_OR_POINT"] == "Area" + assert head_metadata["PROCESSING_SOFTWARE"] == __version__ + assert head_metadata["ImageDescription"] == "some description" + + band_count = raster.RasterCount + assert band_count == 1 + band = raster.GetRasterBand(1) + assert band.GetDescription() == "Flat:2" + assert band.GetScale() == 1.23 + assert band.GetOffset() == 4.56 + band_metadata = band.GetMetadata() + assert band_metadata["ARBITRARY"] == "value" diff --git a/tests/test_chunk_polygon.py b/tests/test_chunk_polygon.py index 6acfb0e4..f8b1e2c9 100644 --- a/tests/test_chunk_polygon.py +++ b/tests/test_chunk_polygon.py @@ -45,15 +45,15 @@ def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates): assert result_layer.layer_type == LayerType.SPACETIME results_numpy = result_layer.to_numpy_rdd().collect() - band0_month10 = np.zeros((4, 4)) - band1_month10 = np.zeros((4, 4)) + band0_month10 = np.zeros((16, 16)) + band1_month10 = np.zeros((16, 16)) band0_month10.fill(1012) band1_month10.fill(1101) for key_and_tile in results_numpy: instant: datetime.datetime = key_and_tile[0].instant tile: Tile = key_and_tile[1] cells: np.ndarray = tile.cells - assert cells.shape == (2, 4, 4) + assert cells.shape == (2, 16, 16) assert tile.cell_type == 'FLOAT' if instant.month == 10: np.testing.assert_array_equal(cells, np.array([band0_month10, band1_month10])) diff --git a/tests/test_load_collection.py b/tests/test_load_collection.py index 28f313a3..86ae392c 100644 --- a/tests/test_load_collection.py +++ b/tests/test_load_collection.py @@ -415,7 +415,7 @@ def test_load_disk_collection_pyramid( cube = cube.rename_labels("bands", ["band1", "bands"]) assert len(cube.metadata.spatial_dimensions) == 2 - assert len(cube.pyramid.levels) == 2 + assert len(cube.pyramid.levels) == 4 def test_load_disk_collection_batch(imagecollection_with_two_bands_and_three_dates,backend_implementation,tmp_path): @@ -497,6 +497,4 @@ def test_load_stac_collection_with_property_filters(catalog, tmp_path, requests_ with rasterio.open(output_file) as ds: assert ds.count == len(expected_bands) - for band_index, band_name in enumerate(expected_bands): - # ds.tags(0) has global metadata, band metadata starts from 1 - assert ds.tags(band_index + 1)["DESCRIPTION"] == expected_bands[band_index] + assert list(ds.descriptions) == expected_bands diff --git a/tests/test_multiple_date.py b/tests/test_multiple_date.py index 8f98f58c..2cb4d900 100644 --- a/tests/test_multiple_date.py +++ b/tests/test_multiple_date.py @@ -154,18 +154,18 @@ def test_reproject_spatial(self): imagecollection = GeopysparkDataCube(pyramid=input, metadata=self.collection_metadata) - ref_path = str(self.temp_folder / "reproj_ref.tiff") + ref_path = str(self.temp_folder / "reproj_ref.nc") imagecollection.reduce_dimension(reducer=reducer("max"), dimension="t", env=EvalEnv()).save_result( - ref_path, format="GTIFF" + ref_path, format="netCDF" ) resampled = imagecollection.resample_spatial(resolution=0,projection="EPSG:3395",method="max") metadata = resampled.pyramid.levels[0].layer_metadata print(metadata) self.assertTrue("proj=merc" in metadata.crs) - path = str(self.temp_folder / "reprojected.tiff") + path = str(self.temp_folder / "reprojected.nc") res = resampled.reduce_dimension(reducer=reducer("max"), dimension="t", env=EvalEnv()) - res.save_result(path, format="GTIFF") + res.save_result(path, format="netCDF") with rasterio.open(ref_path) as ref_ds: with rasterio.open(path) as ds: diff --git a/tests/test_reduce_dimension.py b/tests/test_reduce_dimension.py index 8fd99e92..0360ba77 100644 --- a/tests/test_reduce_dimension.py +++ b/tests/test_reduce_dimension.py @@ -41,16 +41,16 @@ def test_reduce_bands(imagecollection_with_two_bands_and_three_dates): cube = imagecollection_with_two_bands_and_three_dates ts = _timeseries_stitch(cube) assert len(ts) == 3 - assert set(t.cells.shape for t in ts.values()) == {(2, 8, 8)} + assert set(t.cells.shape for t in ts.values()) == {(2, 32, 32)} reducer = _simple_reducer("sum") env = EvalEnv() cube = cube.reduce_dimension(dimension="bands", reducer=reducer, env=env) ts = _timeseries_stitch(cube) assert len(ts) == 3 - assert_array_almost_equal(ts[dt.datetime(2017, 9, 25, 11, 37, 0)].cells, np.full((1, 8, 8), 3.0)) - assert_array_almost_equal(ts[dt.datetime(2017, 9, 30, 0, 37, 0)].cells, np.full((1, 8, 8), np.nan)) - assert_array_almost_equal(ts[dt.datetime(2017, 9, 25, 11, 37, 0)].cells, np.full((1, 8, 8), 3.0)) + assert_array_almost_equal(ts[dt.datetime(2017, 9, 25, 11, 37, 0)].cells, np.full((1, 32, 32), 3.0)) + assert_array_almost_equal(ts[dt.datetime(2017, 9, 30, 0, 37, 0)].cells, np.full((1, 32, 32), np.nan)) + assert_array_almost_equal(ts[dt.datetime(2017, 9, 25, 11, 37, 0)].cells, np.full((1, 32, 32), 3.0)) @pytest.mark.parametrize("udf", [("udf_noop"), ("udf_noop_jep")]) @@ -59,19 +59,19 @@ def test_reduce_bands_reduce_time(imagecollection_with_two_bands_and_three_dates cube = imagecollection_with_two_bands_and_three_dates ts = _timeseries_stitch(cube) assert len(ts) == 3 - assert set(t.cells.shape for t in ts.values()) == {(2, 8, 8)} + assert set(t.cells.shape for t in ts.values()) == {(2, 32, 32)} reducer = _simple_reducer("sum") env = EvalEnv() cube = cube.reduce_dimension(dimension="bands", reducer=reducer, env=env) ts = _timeseries_stitch(cube) assert len(ts) == 3 - assert set(t.cells.shape for t in ts.values()) == {(1, 8, 8)} + assert set(t.cells.shape for t in ts.values()) == {(1, 32, 32)} cube = cube.reduce_dimension(dimension='t', reducer=udf, env=env) stiched = _stitch(cube) - assert stiched.cells.shape == (1, 8, 8) - expected = np.full((1, 8, 8), 3.0) + assert stiched.cells.shape == (1, 32, 32) + expected = np.full((1, 32, 32), 3.0) assert_array_almost_equal(stiched.cells, expected) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index dea87035..818030ce 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -400,7 +400,7 @@ def test_multiple_zonal_statistics(imagecollection_with_two_bands_and_three_date result = imagecollection_with_two_bands_and_three_dates.aggregate_spatial(polygon, callback) assert isinstance(result, AggregatePolygonResultCSV) assert result.get_data() == { - "2017-09-25T11:37:00Z": [[1.0, 1.0, 1.0, 2.0, 1.0, 2.0]], + "2017-09-25T11:37:00Z": [[16.0, 16.0, 1.0, 32.0, 16.0, 2.0]], "2017-09-30T00:37:00Z": [ [ pytest.approx(np.nan, nan_ok=True), @@ -411,7 +411,7 @@ def test_multiple_zonal_statistics(imagecollection_with_two_bands_and_three_date pytest.approx(np.nan, nan_ok=True), ] ], - "2017-10-25T11:37:00Z": [[2.0, 1.0, 2.0, 1.0, 1.0, 1.0]], + "2017-10-25T11:37:00Z": [[32.0, 16.0, 2.0, 16.0, 16.0, 1.0]], } diff --git a/tests/test_vectorcube.py b/tests/test_vectorcube.py index 26b88424..89deaa42 100644 --- a/tests/test_vectorcube.py +++ b/tests/test_vectorcube.py @@ -237,7 +237,7 @@ def test_aggregatespatialresultcsv_vector_to_raster(imagecollection_with_two_ban space_time_key = output_cube_np[i][0] tile: geopyspark.Tile = output_cube_np[i][1] assert space_time_key in expected_values.keys() - assert tile.cells.shape == (2, 4, 4) + assert tile.cells.shape == (2, 16, 16) mean_band0 = np.unique(tile.cells[0]).tolist() mean_band1 = np.unique(tile.cells[1]).tolist() expected_band_values = expected_values[space_time_key]