Skip to content

Commit

Permalink
[python] Fix some dense+2.27 failing test cases (#3265)
Browse files Browse the repository at this point in the history
* Fix for dense domain/maxdomain with core 2.27

* improve a comment

* Fix a one-off unit-test issue with dense + 2.27

* Make this file loggable via `$SPDLOG_LEVEL` like other files

* misc neatens

* missed a spot

* spec-conformity

* fix ci
  • Loading branch information
johnkerl authored Oct 30, 2024
1 parent 7a9c67e commit 130ccc6
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 26 deletions.
53 changes: 36 additions & 17 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,25 @@ def create(

index_column_schema = []
index_column_data = {}

for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())
index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
# [0] core max domain lo
# [1] core max domain hi
# [2] core extent parameter
# If present, these next two signal to use the current-domain feature:
# [3] core current domain lo
# [4] core current domain hi

if dim_shape is None:
raise ValueError("DenseNDArray shape slots must be numeric")

if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
Expand All @@ -117,11 +133,6 @@ def create(

if dim_shape == 0:
raise ValueError("DenseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

index_column_data[pa_field.name] = [
0,
Expand All @@ -138,8 +149,7 @@ def create(
TileDBCreateOptions.from_platform_config(platform_config),
)

index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]
index_column_schema.append(pa_field)
index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down Expand Up @@ -349,16 +359,25 @@ def _dim_capacity_and_extent(
dim_shape: Optional[int],
create_options: TileDBCreateOptions,
) -> Tuple[int, int]:
"""Given a user-specified shape along a particular dimension, returns a tuple of
the TileDB capacity and extent for that dimension, suitable for schema creation.
The user-specified shape cannot be ``None`` for :class:`DenseNDArray`.
"""Given a user-specified shape (maybe ``None``) along a particular dimension,
returns a tuple of the TileDB capacity and extent for that dimension, suitable
for schema creation. If the user-specified shape is None, the largest possible
int64 is returned for the capacity -- which is particularly suitable for
maxdomain.
"""
if dim_shape is None or dim_shape <= 0:
raise ValueError(
"SOMADenseNDArray shape must be a non-zero-length tuple of positive ints"
)

dim_capacity = dim_shape
dim_extent = min(dim_shape, create_options.dim_tile(dim_name, 2048))
if dim_shape is None:
dim_capacity = 2**63 - 1
dim_extent = min(dim_capacity, create_options.dim_tile(dim_name, 2048))
# For core: "domain max expanded to multiple of tile extent exceeds max value
# representable by domain type. Reduce domain max by 1 tile extent to allow for
# expansion."
dim_capacity -= dim_extent
else:
if dim_shape <= 0:
raise ValueError(
"SOMASparseNDArray shape must be a non-zero-length tuple of positive ints or Nones"
)
dim_capacity = dim_shape
dim_extent = min(dim_shape, create_options.dim_tile(dim_name, 2048))

return (dim_capacity, dim_extent)
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def create(
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())

index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
Expand Down Expand Up @@ -504,7 +503,8 @@ def _dim_capacity_and_extent(
"""Given a user-specified shape (maybe ``None``) along a particular dimension,
returns a tuple of the TileDB capacity and extent for that dimension, suitable
for schema creation. If the user-specified shape is None, the largest possible
int64 is returned for the capacity.
int64 is returned for the capacity -- which is particularly suitable for
maxdomain.
"""
if dim_shape is None:
dim_capacity = 2**63 - 1
Expand Down
9 changes: 6 additions & 3 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
NotCreateableError,
SOMAError,
)
from .._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from .._flags import (
DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN,
NEW_SHAPE_FEATURE_FLAG_ENABLED,
)
from .._soma_array import SOMAArray
from .._soma_object import AnySOMAObject, SOMAObject
from .._tdb_handles import RawHandle
Expand Down Expand Up @@ -1322,7 +1325,7 @@ def _create_from_matrix(
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())
elif cls.is_sparse:
elif cls.is_sparse or DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
shape = tuple(None for _ in matrix.shape)
else:
shape = matrix.shape
Expand Down Expand Up @@ -1909,7 +1912,7 @@ def _write_matrix_to_denseNDArray(
else:
tensor = pa.Tensor.from_numpy(chunk.toarray())
if matrix.ndim == 2:
soma_ndarray.write((slice(i, i2), slice(None)), tensor)
soma_ndarray.write((slice(i, i2), slice(0, ncol)), tensor)
else:
soma_ndarray.write((slice(i, i2),), tensor)

Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def src_matrix(request):
],
indirect=True,
)
def test_io_create_from_matrix_Dense_nd_array(tmp_path, tdb_create_options, src_matrix):
def test_io_create_from_matrix_dense_nd_array(tmp_path, tdb_create_options, src_matrix):
"""
Test soma.io.from_matrix to a DenseNDArray.
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_io_create_from_matrix_Dense_nd_array(tmp_path, tdb_create_options, src_
],
indirect=True,
)
def test_io_create_from_matrix_Sparse_nd_array(
def test_io_create_from_matrix_sparse_nd_array(
tmp_path, tdb_create_options, src_matrix
):
"""
Expand Down
7 changes: 6 additions & 1 deletion apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,12 @@ def test_dense_nd_array_basics(tmp_path):

with tiledbsoma.DenseNDArray.open(uri) as dnda:
assert dnda.shape == (100, 200)
assert dnda.maxshape == (100, 200)
if tiledbsoma._flags.DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
assert len(dnda.maxshape)
assert dnda.maxshape[0] > 2**62
assert dnda.maxshape[1] > 2**62
else:
assert dnda.maxshape == (100, 200)

assert dnda.non_empty_domain() == ((0, 0), (0, 0))

Expand Down
2 changes: 1 addition & 1 deletion apis/r/R/SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SOMADenseNDArray <- R6::R6Class(
read_arrow_table = function(
coords = NULL,
result_order = "auto",
log_level = "warn"
log_level = "auto"
) {
private$check_open_for_read()

Expand Down

0 comments on commit 130ccc6

Please sign in to comment.