Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Fix some dense+2.27 failing test cases #3265

Merged
merged 8 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 36 additions & 17 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,25 @@

index_column_schema = []
index_column_data = {}

for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())
index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
# [0] core max domain lo
# [1] core max domain hi
# [2] core extent parameter
# If present, these next two signal to use the current-domain feature:
# [3] core current domain lo
# [4] core current domain hi

if dim_shape is None:
raise ValueError("DenseNDArray shape slots must be numeric")

if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
Expand All @@ -117,11 +133,6 @@

if dim_shape == 0:
raise ValueError("DenseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

index_column_data[pa_field.name] = [
0,
Expand All @@ -138,8 +149,7 @@
TileDBCreateOptions.from_platform_config(platform_config),
)

index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]
index_column_schema.append(pa_field)
index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down Expand Up @@ -349,16 +359,25 @@
dim_shape: Optional[int],
create_options: TileDBCreateOptions,
) -> Tuple[int, int]:
"""Given a user-specified shape along a particular dimension, returns a tuple of
the TileDB capacity and extent for that dimension, suitable for schema creation.
The user-specified shape cannot be ``None`` for :class:`DenseNDArray`.
"""Given a user-specified shape (maybe ``None``) along a particular dimension,
returns a tuple of the TileDB capacity and extent for that dimension, suitable
for schema creation. If the user-specified shape is None, the largest possible
int64 is returned for the capacity -- which is particularly suitable for
maxdomain.
"""
if dim_shape is None or dim_shape <= 0:
raise ValueError(
"SOMADenseNDArray shape must be a non-zero-length tuple of positive ints"
)

dim_capacity = dim_shape
dim_extent = min(dim_shape, create_options.dim_tile(dim_name, 2048))
if dim_shape is None:
dim_capacity = 2**63 - 1
dim_extent = min(dim_capacity, create_options.dim_tile(dim_name, 2048))

Check warning on line 370 in apis/python/src/tiledbsoma/_dense_nd_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dense_nd_array.py#L369-L370

Added lines #L369 - L370 were not covered by tests
# For core: "domain max expanded to multiple of tile extent exceeds max value
# representable by domain type. Reduce domain max by 1 tile extent to allow for
# expansion."
dim_capacity -= dim_extent

Check warning on line 374 in apis/python/src/tiledbsoma/_dense_nd_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dense_nd_array.py#L374

Added line #L374 was not covered by tests
else:
if dim_shape <= 0:
raise ValueError(
"SOMASparseNDArray shape must be a non-zero-length tuple of positive ints or Nones"
)
dim_capacity = dim_shape
dim_extent = min(dim_shape, create_options.dim_tile(dim_name, 2048))

return (dim_capacity, dim_extent)
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def create(
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())

index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
Expand Down Expand Up @@ -504,7 +503,8 @@ def _dim_capacity_and_extent(
"""Given a user-specified shape (maybe ``None``) along a particular dimension,
returns a tuple of the TileDB capacity and extent for that dimension, suitable
for schema creation. If the user-specified shape is None, the largest possible
int64 is returned for the capacity.
int64 is returned for the capacity -- which is particularly suitable for
maxdomain.
"""
if dim_shape is None:
dim_capacity = 2**63 - 1
Expand Down
9 changes: 6 additions & 3 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
NotCreateableError,
SOMAError,
)
from .._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from .._flags import (
DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN,
NEW_SHAPE_FEATURE_FLAG_ENABLED,
)
from .._soma_array import SOMAArray
from .._soma_object import AnySOMAObject, SOMAObject
from .._tdb_handles import RawHandle
Expand Down Expand Up @@ -1322,7 +1325,7 @@
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())
elif cls.is_sparse:
elif cls.is_sparse or DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:

Check warning on line 1328 in apis/python/src/tiledbsoma/io/ingest.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/io/ingest.py#L1328

Added line #L1328 was not covered by tests
shape = tuple(None for _ in matrix.shape)
else:
shape = matrix.shape
Expand Down Expand Up @@ -1909,7 +1912,7 @@
else:
tensor = pa.Tensor.from_numpy(chunk.toarray())
if matrix.ndim == 2:
soma_ndarray.write((slice(i, i2), slice(None)), tensor)
soma_ndarray.write((slice(i, i2), slice(0, ncol)), tensor)
else:
soma_ndarray.write((slice(i, i2),), tensor)

Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def src_matrix(request):
],
indirect=True,
)
def test_io_create_from_matrix_Dense_nd_array(tmp_path, tdb_create_options, src_matrix):
def test_io_create_from_matrix_dense_nd_array(tmp_path, tdb_create_options, src_matrix):
"""
Test soma.io.from_matrix to a DenseNDArray.

Expand Down Expand Up @@ -94,7 +94,7 @@ def test_io_create_from_matrix_Dense_nd_array(tmp_path, tdb_create_options, src_
],
indirect=True,
)
def test_io_create_from_matrix_Sparse_nd_array(
def test_io_create_from_matrix_sparse_nd_array(
tmp_path, tdb_create_options, src_matrix
):
"""
Expand Down
7 changes: 6 additions & 1 deletion apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,12 @@ def test_dense_nd_array_basics(tmp_path):

with tiledbsoma.DenseNDArray.open(uri) as dnda:
assert dnda.shape == (100, 200)
assert dnda.maxshape == (100, 200)
if tiledbsoma._flags.DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
assert len(dnda.maxshape)
assert dnda.maxshape[0] > 2**62
assert dnda.maxshape[1] > 2**62
else:
assert dnda.maxshape == (100, 200)

assert dnda.non_empty_domain() == ((0, 0), (0, 0))

Expand Down
2 changes: 1 addition & 1 deletion apis/r/R/SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SOMADenseNDArray <- R6::R6Class(
read_arrow_table = function(
coords = NULL,
result_order = "auto",
log_level = "warn"
log_level = "auto"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this meant to be included in this PR?

Copy link
Member Author

@johnkerl johnkerl Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it was -- I should have commented. It actually overrides the environment variable SPDLOG_LEVEL and makes requested log statements NOT appear :(. This has already been fixed for other classes except this one. I thought this was a nice little thing to include in this 'misc. bits' PR.

Sorry for the lack of clarity!

) {
private$check_open_for_read()

Expand Down
Loading