Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Add PointCloudDataFrame to SpatialData points model #3357

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions apis/python/src/tiledbsoma/experimental/outgest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Dict, Optional, Tuple, Union

import geopandas as gpd
import pandas as pd
import somacore
import spatialdata as sd
import xarray as xr
Expand Down Expand Up @@ -80,6 +81,45 @@ def _transform_to_spatial_data(
)


def to_spatial_data_points(
points: PointCloudDataFrame,
*,
scene_id: str,
scene_dim_map: Dict[str, str],
transform: somacore.CoordinateTransform,
soma_joinid_name: str,
) -> pd.DataFrame:
"""Export a :class:`PointCloudDataFrame` to a :class:`spatialdata.ShapesModel.

Args:
points: The point cloud data frame to convert to SpatialData shapes.
scene_id: The ID of the scene this point cloud dataframe is from.
scene_dim_map: A mapping from the axis names of the scene to the corresponding
SpatialData dimension names.
transform: The transformation from the coordinate space of the scene this point
cloud is in to the coordinate space of the point cloud.
soma_joinid: The name to use for the SOMA joinid.
"""

# Get the axis names for the spatial data shapes.
orig_axis_names = points.coordinate_space.axis_names
new_axis_names, points_dim_map = _convert_axis_names(orig_axis_names)

# Create the SpatialData transform from the points to the Scene (inverse of the
# transform SOMA stores).
transforms = {
scene_id: _transform_to_spatial_data(
transform.inverse_transform(), points_dim_map, scene_dim_map
)
}

# Read the pandas dataframe, rename SOMA_JOINID, add metadata, and return.
df: pd.DataFrame = points.read().concat().to_pandas()
df.rename(columns={SOMA_JOINID: soma_joinid_name}, inplace=True)
df.attrs["transform"] = transforms
return df


def to_spatial_data_shapes(
points: PointCloudDataFrame,
*,
Expand Down
37 changes: 36 additions & 1 deletion apis/python/tests/test_export_point_cloud_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from urllib.parse import urljoin

import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
import shapely
Expand Down Expand Up @@ -47,7 +48,7 @@ def test_export_to_shapes_2d(sample_point_cloud_dataframe_2d):
),
)

# Validate that this is validate storage for the SpatialData "Shapes"
# Check this is valid storage for the SpatialData "Shapes" model.
spatialdata.models.ShapesModel.validate(shape)

# Check the dataframe.
Expand All @@ -68,3 +69,37 @@ def test_export_to_shapes_2d(sample_point_cloud_dataframe_2d):
print(f"{key}: {val}")
assert len(metadata) == 1
assert metadata["transform"] == {"scene0": spatialdata.transformations.Identity()}


def test_export_to_points_2d(sample_point_cloud_dataframe_2d):
"""Test exporting a simple point cloud to a SpatialData shape model."""
# Export PointCloudDataFrame to shapes.
points = soma_outgest.to_spatial_data_points(
sample_point_cloud_dataframe_2d,
scene_id="scene0",
scene_dim_map={"x_scene": "x", "y_scene": "y"},
soma_joinid_name="obs_id",
transform=somacore.IdentityTransform(
("x_scene", "y_scene"), ("x_points", "y_points")
),
)

# Check this is valid storage for the SpatialData "Points" model.
spatialdata.models.PointsModel.validate(points)

# Check the dataframe.
expected = pd.DataFrame.from_dict(
{
"x": [0, 0, 0.5, 0.5],
"y": [0, 0.5, 0, 0.5],
"obs_id": np.arange(4),
}
)
assert all(points == expected)

# Check the metadata.
metadata = dict(points.attrs)
for key, val in metadata.items():
print(f"{key}: {val}")
assert len(metadata) == 1
assert metadata["transform"] == {"scene0": spatialdata.transformations.Identity()}
Loading