From efd6ccedb35e47928f018aeafce3d826a2132e7e Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Tue, 12 Nov 2024 12:31:54 -0500 Subject: [PATCH 1/4] Update spatial io dependencies --- apis/python/requirements_spatial.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apis/python/requirements_spatial.txt b/apis/python/requirements_spatial.txt index de9f3213b3..d91e347662 100644 --- a/apis/python/requirements_spatial.txt +++ b/apis/python/requirements_spatial.txt @@ -1,5 +1,6 @@ geopandas tifffile pillow -spatialdata -xarray>=2024.05.0 +spatialdata>=0.2.5 +xarray +dask From 6ccf214e3e4a1641ff61e31aaf8fd212c8cb438a Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Fri, 1 Nov 2024 16:11:29 -0400 Subject: [PATCH 2/4] Export MultiscaleImage to SpatialData multiscale image --- .../src/tiledbsoma/experimental/outgest.py | 82 ++++++++++++++++++ .../tests/test_export_multiscale_image.py | 86 +++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/apis/python/src/tiledbsoma/experimental/outgest.py b/apis/python/src/tiledbsoma/experimental/outgest.py index f8246f2d85..1fe776590a 100644 --- a/apis/python/src/tiledbsoma/experimental/outgest.py +++ b/apis/python/src/tiledbsoma/experimental/outgest.py @@ -256,3 +256,85 @@ def to_spatial_data_image( attrs={"transform": transformations}, context=image.context, ) + + +def to_spatial_data_multiscale_image( + image: MultiscaleImage, + *, + scene_id: str, + scene_dim_map: Dict[str, str], + transform: somacore.CoordinateTransform, +) -> xr.DataTree: + """Export a MultiscaleImage to a DataTree.""" + + # Check for channel axis. + if not image.has_channel_axis: + raise NotImplementedError( + "Support for exporting a MultiscaleImage to without a channel axis to " + "SpatialData is not yet implemented." + ) + + # Convert from SOMA axis names to SpatialData axis names. + orig_axis_names = image.coordinate_space.axis_names + if len(orig_axis_names) not in {2, 3}: + raise NotImplementedError( + f"Support for converting a '{len(orig_axis_names)}'D is not yet implemented." + ) + new_axis_names, image_dim_map = _convert_axis_names( + orig_axis_names, image.data_axis_order + ) + + # Get the transformtion from the image level to the scene: + # If the result is a single scale transform (or identity transform), output a + # single transformation. Otherwise, convert to a SpatialData sequence of + # transformations. + inv_transform = transform.inverse_transform() + if isinstance(transform, somacore.ScaleTransform): + # inv_transform @ scale_transform -> applies scale_transform first + spatial_data_transformations = tuple( + _transform_to_spatial_data( + inv_transform @ image.get_transform_from_level(level), + image_dim_map, + scene_dim_map, + ) + for level in range(image.level_count) + ) + + else: + sd_scale_transforms = tuple( + _transform_to_spatial_data( + image.get_transform_from_level(level), image_dim_map, image_dim_map + ) + for level in range(1, image.level_count) + ) + sd_inv_transform = _transform_to_spatial_data( + inv_transform, image_dim_map, scene_dim_map + ) + + # First level transform is always the identity, so just directly use + # inv_transform. For remaining transformations, + # Sequence([sd_transform1, sd_transform2]) -> applies sd_transform1 first + spatial_data_transformations = (sd_inv_transform,) + tuple( + sd.transformations.Sequence([scale_transform, sd_inv_transform]) + for scale_transform in sd_scale_transforms + ) + + # Create the datatree + image_datasets = { + f"scale{index}": xr.Dataset( + { + "image": dense_nd_array_to_data_array( + uri=image.level_uri(index), + dim_names=new_axis_names, + attrs={ + "transform": {scene_id: spatial_data_transformations[index]} + }, + context=image.context, + ) + } + ) + for index, (soma_name, val) in enumerate(image.levels().items()) + } + multiscale_image = xr.DataTree.from_dict(image_datasets) + + return multiscale_image diff --git a/apis/python/tests/test_export_multiscale_image.py b/apis/python/tests/test_export_multiscale_image.py index 6ab2238437..652fd9bfb5 100644 --- a/apis/python/tests/test_export_multiscale_image.py +++ b/apis/python/tests/test_export_multiscale_image.py @@ -133,3 +133,89 @@ def test_export_image_level_to_spatial_data( metadata = dict(image2d.attrs) assert len(metadata) == 1 assert metadata["transform"] == {"scene0": expected_transformation} + + +@pytest.mark.parametrize( + "transform,expected_transformation", + [ + ( + somacore.IdentityTransform(("x_scene", "y_scene"), ("x_image", "y_image")), + [ + sd.transformations.Identity(), + sd.transformations.Scale([2, 2], ("x", "y")), + sd.transformations.Scale([4, 4], ("x", "y")), + ], + ), + ( + somacore.ScaleTransform( + ("x_scene", "y_scene"), ("x_image", "y_image"), [0.25, 0.5] + ), + [ + sd.transformations.Scale([4, 2], ("x", "y")), + sd.transformations.Scale([8, 4], ("x", "y")), + sd.transformations.Scale([16, 8], ("x", "y")), + ], + ), + ( + somacore.AffineTransform( + ("x_scene", "y_scene"), ("x_image", "y_image"), [[1, 0, 1], [0, 1, 2]] + ), + [ + sd.transformations.Affine( + np.array([[1, 0, -1], [0, 1, -2], [0, 0, 1]]), + ("x", "y"), + ("x", "y"), + ), + sd.transformations.Sequence( + [ + sd.transformations.Scale([2, 2], ("x", "y")), + sd.transformations.Affine( + np.array([[1, 0, -1], [0, 1, -2], [0, 0, 1]]), + ("x", "y"), + ("x", "y"), + ), + ] + ), + sd.transformations.Sequence( + [ + sd.transformations.Scale([4, 4], ("x", "y")), + sd.transformations.Affine( + np.array([[1, 0, -1], [0, 1, -2], [0, 0, 1]]), + ("x", "y"), + ("x", "y"), + ), + ] + ), + ], + ), + ], +) +def test_export_full_image_to_spatial_data( + sample_multiscale_image_2d, sample_2d_data, transform, expected_transformation +): + image2d = soma_outgest.to_spatial_data_multiscale_image( + sample_multiscale_image_2d, + scene_id="scene0", + scene_dim_map={"x_scene": "x", "y_scene": "y"}, + transform=transform, + ) + + assert isinstance(image2d, xr.DataTree) + + # Validate the model. + schema = sd.models.get_model(image2d) + assert schema == sd.models.Image2DModel + + # Check the correct data exists. + for index in range(3): + data_array = image2d[f"scale{index}"]["image"] + print(f"{index}: {data_array}") + + # Check data. + result = data_array.data.compute() + np.testing.assert_equal(result, sample_2d_data[index]) + + # Check the metadata. + metadata = dict(data_array.attrs) + assert len(metadata) == 1 + assert metadata["transform"] == {"scene0": expected_transformation[index]} From 522282a0a64c76645b22d5436f76e532ba8a068f Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 20 Nov 2024 14:30:45 -0500 Subject: [PATCH 3/4] Update MultiscaleImage export to work with multiple SpatialData versions --- .../src/tiledbsoma/experimental/_util.py | 20 +++++++ .../experimental/_xarray_backend.py | 37 +++++++++++-- .../src/tiledbsoma/experimental/outgest.py | 53 ++++++++++--------- .../tests/test_export_multiscale_image.py | 5 +- 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/apis/python/src/tiledbsoma/experimental/_util.py b/apis/python/src/tiledbsoma/experimental/_util.py index 38833e0a0a..24211ae725 100644 --- a/apis/python/src/tiledbsoma/experimental/_util.py +++ b/apis/python/src/tiledbsoma/experimental/_util.py @@ -16,6 +16,26 @@ def _str_to_int(value: str) -> int: return int(value) +def _version_less_than(version: str, upper_bound: Tuple[int, int, int]) -> bool: + split_version = version.split(".") + try: + major = _str_to_int(split_version[0]) + minor = _str_to_int(split_version[1]) + patch = _str_to_int(split_version[2]) + except ValueError as err: + raise ValueError(f"Unable to parse version {version}.") from err + print(f"Actual: {(major, minor, patch)} and Compare: {upper_bound}") + return ( + major < upper_bound[0] + or (major == upper_bound[0] and minor < upper_bound[1]) + or ( + major == upper_bound[0] + and minor == upper_bound[1] + and patch < upper_bound[2] + ) + ) + + def _read_visium_software_version( gene_expression_path: Union[str, Path] ) -> Tuple[int, int, int]: diff --git a/apis/python/src/tiledbsoma/experimental/_xarray_backend.py b/apis/python/src/tiledbsoma/experimental/_xarray_backend.py index e709426ea3..bc7240948f 100644 --- a/apis/python/src/tiledbsoma/experimental/_xarray_backend.py +++ b/apis/python/src/tiledbsoma/experimental/_xarray_backend.py @@ -3,11 +3,25 @@ # # Licensed under the MIT License. import json -from typing import Any, Mapping, Optional, Tuple, Union +import warnings +from typing import Any, Mapping, Optional, Sequence, Tuple, Union import dask.array as da import numpy as np -from xarray import DataArray + +from ._util import _version_less_than + +try: + import spatialdata as sd + from spatialdata.models.models import DataTree +except ImportError as err: + warnings.warn("Experimental spatial exporter requires the spatialdatda package.") + raise err +try: + import xarray as xr +except ImportError as err: + warnings.warn("Experimental spatial exporter requires the spatialdatda package.") + raise err from .. import DenseNDArray from ..options._soma_tiledb_context import SOMATileDBContext @@ -112,7 +126,7 @@ def dense_nd_array_to_data_array( chunks: Optional[Tuple[int, ...]] = None, attrs: Optional[Mapping[str, Any]] = None, context: Optional[SOMATileDBContext] = None, -) -> DataArray: +) -> xr.DataArray: """Create a :class:`xarray.DataArray` that accesses a SOMA :class:`DenseNDarray` through dask. @@ -139,4 +153,19 @@ def dense_nd_array_to_data_array( fancy=False, ) - return DataArray(data, dims=dim_names, attrs=attrs) + return xr.DataArray(data, dims=dim_names, attrs=attrs) + + +def images_to_datatree(image_data_arrays: Sequence[xr.DataArray]) -> DataTree: + # If SpatialData version < 0.2.6 use the legacy xarray_datatree implementation + # of the DataTree. + if _version_less_than(sd.__version__, (0, 2, 5)): + return DataTree.from_dict( + {f"scale{index}": image for index, image in enumerate(image_data_arrays)} + ) + return DataTree.from_dict( + { + f"scale{index}": xr.Dataset({"image": image}) + for index, image in enumerate(image_data_arrays) + } + ) diff --git a/apis/python/src/tiledbsoma/experimental/outgest.py b/apis/python/src/tiledbsoma/experimental/outgest.py index 1fe776590a..174a32cc3a 100644 --- a/apis/python/src/tiledbsoma/experimental/outgest.py +++ b/apis/python/src/tiledbsoma/experimental/outgest.py @@ -2,17 +2,29 @@ # Copyright (c) 2024 TileDB, Inc # # Licensed under the MIT License. -from typing import Dict, Optional, Tuple, Union - -import geopandas as gpd +import warnings +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union + +try: + import geopandas as gpd +except ImportError as err: + warnings.warn("Experimental spatial outgestor requires the geopandas package.") + raise err import pandas as pd import somacore -import spatialdata as sd -import xarray as xr + +try: + import spatialdata as sd +except ImportError as err: + warnings.warn("Experimental spatial outgestor requires the spatialdata package.") + raise err from .. import MultiscaleImage, PointCloudDataFrame from .._constants import SOMA_JOINID -from ._xarray_backend import dense_nd_array_to_data_array +from ._xarray_backend import dense_nd_array_to_data_array, images_to_datatree + +if TYPE_CHECKING: + from spatialdata.models.models import DataArray, DataTree def _convert_axis_names( @@ -195,7 +207,7 @@ def to_spatial_data_image( scene_id: str, scene_dim_map: Dict[str, str], transform: somacore.CoordinateTransform, -) -> xr.DataArray: +) -> "DataArray": """Export a level of a :class:`MultiscaleImage` to a :class:`spatialdata.Image2DModel` or :class:`spatialdata.Image3DModel`. """ @@ -264,7 +276,7 @@ def to_spatial_data_multiscale_image( scene_id: str, scene_dim_map: Dict[str, str], transform: somacore.CoordinateTransform, -) -> xr.DataTree: +) -> "DataTree": """Export a MultiscaleImage to a DataTree.""" # Check for channel axis. @@ -319,22 +331,15 @@ def to_spatial_data_multiscale_image( for scale_transform in sd_scale_transforms ) - # Create the datatree - image_datasets = { - f"scale{index}": xr.Dataset( - { - "image": dense_nd_array_to_data_array( - uri=image.level_uri(index), - dim_names=new_axis_names, - attrs={ - "transform": {scene_id: spatial_data_transformations[index]} - }, - context=image.context, - ) - } + # Create a sequence of resolution level. + image_data_arrays = tuple( + dense_nd_array_to_data_array( + uri=image.level_uri(index), + dim_names=new_axis_names, + attrs={"transform": {scene_id: spatial_data_transformations[index]}}, + context=image.context, ) for index, (soma_name, val) in enumerate(image.levels().items()) - } - multiscale_image = xr.DataTree.from_dict(image_datasets) + ) - return multiscale_image + return images_to_datatree(image_data_arrays) diff --git a/apis/python/tests/test_export_multiscale_image.py b/apis/python/tests/test_export_multiscale_image.py index 652fd9bfb5..ab0d0dc9b2 100644 --- a/apis/python/tests/test_export_multiscale_image.py +++ b/apis/python/tests/test_export_multiscale_image.py @@ -9,7 +9,6 @@ soma_outgest = pytest.importorskip("tiledbsoma.experimental.outgest") sd = pytest.importorskip("spatialdata") -xr = pytest.importorskip("xarray") @pytest.fixture(scope="module") @@ -119,7 +118,7 @@ def test_export_image_level_to_spatial_data( transform=transform, ) - assert isinstance(image2d, xr.DataArray) + assert isinstance(image2d, sd.models.models.DataArray) # Validate the model. schema = sd.models.get_model(image2d) @@ -200,7 +199,7 @@ def test_export_full_image_to_spatial_data( transform=transform, ) - assert isinstance(image2d, xr.DataTree) + assert isinstance(image2d, sd.models.models.DataTree) # Validate the model. schema = sd.models.get_model(image2d) From 44226aa5adb99772a6f90a8d5f96748f4af8466f Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Fri, 22 Nov 2024 08:39:14 -0500 Subject: [PATCH 4/4] Remove stray print statements --- apis/python/src/tiledbsoma/experimental/_util.py | 1 - apis/python/tests/test_export_multiscale_image.py | 1 - 2 files changed, 2 deletions(-) diff --git a/apis/python/src/tiledbsoma/experimental/_util.py b/apis/python/src/tiledbsoma/experimental/_util.py index 24211ae725..13e03c43c6 100644 --- a/apis/python/src/tiledbsoma/experimental/_util.py +++ b/apis/python/src/tiledbsoma/experimental/_util.py @@ -24,7 +24,6 @@ def _version_less_than(version: str, upper_bound: Tuple[int, int, int]) -> bool: patch = _str_to_int(split_version[2]) except ValueError as err: raise ValueError(f"Unable to parse version {version}.") from err - print(f"Actual: {(major, minor, patch)} and Compare: {upper_bound}") return ( major < upper_bound[0] or (major == upper_bound[0] and minor < upper_bound[1]) diff --git a/apis/python/tests/test_export_multiscale_image.py b/apis/python/tests/test_export_multiscale_image.py index ab0d0dc9b2..0c56452d86 100644 --- a/apis/python/tests/test_export_multiscale_image.py +++ b/apis/python/tests/test_export_multiscale_image.py @@ -208,7 +208,6 @@ def test_export_full_image_to_spatial_data( # Check the correct data exists. for index in range(3): data_array = image2d[f"scale{index}"]["image"] - print(f"{index}: {data_array}") # Check data. result = data_array.data.compute()