Skip to content

Commit

Permalink
[python] Use bindings for DenseNDArray readpath
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Feb 13, 2024
1 parent 5fe06d0 commit 229e297
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 32 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/_arrow_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def tiledb_schema_to_arrow(
if attr.enum_label is not None: # enumerated
if A is None:
A = tiledb.open(uri, ctx=ctx)
info = A.enum(name)
info = A.enum(attr.enum_label)
arrow_schema_dict[name] = pa.dictionary(
index_type=arrow_type_from_tiledb_dtype(attr.dtype),
value_type=arrow_type_from_tiledb_dtype(
Expand Down
29 changes: 24 additions & 5 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from typing_extensions import Self

from . import _util
from . import pytiledbsoma as clib
from ._common_nd_array import NDArray
from ._exception import SOMAError
from ._tdb_handles import ArrayWrapper
from ._tdb_handles import DenseNDArrayWrapper
from ._util import dense_indices_to_shape
from .options._tiledb_create_options import TileDBCreateOptions

Expand Down Expand Up @@ -72,7 +73,7 @@ class DenseNDArray(NDArray, somacore.DenseNDArray):

__slots__ = ()

_reader_wrapper_type = ArrayWrapper
_reader_wrapper_type = DenseNDArrayWrapper

def read(
self,
Expand Down Expand Up @@ -107,7 +108,7 @@ def read(
Lifecycle:
Experimental.
"""
del partitions, platform_config # Currently unused.
del partitions # Currently unused.
self._check_open_read()
result_order = somacore.ResultOrder(result_order)

Expand All @@ -123,13 +124,31 @@ def read(
#
# The only exception is if the array has been created but no data have been written at
# all, in which case the best we can do is use the schema shape.
data_shape = self._handle.schema.shape
handle: clib.DenseNDArrayWrapper = self._handle._handle

data_shape = handle.shape
ned = self.non_empty_domain()
if ned is not None:
data_shape = tuple(slot[1] + 1 for slot in ned)
target_shape = dense_indices_to_shape(coords, data_shape, result_order)

sr = self._soma_reader(result_order=result_order)
config = handle.config().copy()
config.update(platform_config or {})

ts = None
if handle.timestamp is not None:
ts = (0, handle.timestamp)

sr = clib.SOMADenseNDArray.open(
uri=handle.uri,
mode=clib.OpenMode.read,
platform_config=config,
column_names=[],
result_order=_util.to_clib_result_order(result_order),
timestamp=ts,
)

# sr = self._soma_reader(result_order=result_order)

self._set_reader_coords(sr, coords)

Expand Down
93 changes: 69 additions & 24 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ def _open_with_clib_wrapper(
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: Optional[OpenTimestamp] = None,
) -> "DataFrameWrapper":
) -> "SOMAArrayWrapper":
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
timestamp_ms = context._open_timestamp_ms(timestamp)
obj = clib.SOMAObject.open(uri, open_mode, config, (0, timestamp_ms))
if obj.type == "SOMADataFrame":
return DataFrameWrapper._from_soma_object(obj, context)
elif obj.type == "SOMADenseNDArray":
return DenseNDArrayWrapper._from_soma_object(obj, context)
raise SOMAError(f"clib.SOMAObject {obj.type!r} not yet supported")


Expand Down Expand Up @@ -309,8 +311,8 @@ def _do_initial_reads(self, reader: tiledb.Group) -> None:
}


class DataFrameWrapper(Wrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""
class SOMAArrayWrapper(Wrapper[clib.SOMAArray]):
"""Base class for Pybind11 SOMAArrayWrapper handles."""

@classmethod
def _opener(
Expand All @@ -319,19 +321,8 @@ def _opener(
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: int,
) -> clib.SOMADataFrame:
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
column_names: List[str] = []
result_order = clib.ResultOrder.automatic
return clib.SOMADataFrame.open(
uri,
open_mode,
config,
column_names,
result_order,
(0, timestamp),
)
) -> clib.SOMAArray:
raise NotImplementedError

# Covariant types should normally not be in parameters, but this is for
# internal use only so it's OK.
Expand All @@ -354,17 +345,13 @@ def meta(self) -> "MetadataWrapper":

@property
def ndim(self) -> int:
return len(self._handle.index_column_names)

@property
def count(self) -> int:
return int(self._handle.count)
return len(self._handle.dimension_names)

def _cast_domain(
self, domain: Callable[[str, DTypeLike], Tuple[object, object]]
) -> Tuple[Tuple[object, object], ...]:
result = []
for name in self._handle.index_column_names:
for name in self._handle.dimension_names:
dtype = self._handle.schema.field(name).type
if pa.types.is_timestamp(dtype):
np_dtype = np.dtype(dtype.to_pandas_dtype())
Expand Down Expand Up @@ -395,12 +382,12 @@ def non_empty_domain(self) -> Tuple[Tuple[object, object], ...]:
@property
def attr_names(self) -> Tuple[str, ...]:
return tuple(
f.name for f in self.schema if f.name not in self._handle.index_column_names
f.name for f in self.schema if f.name not in self._handle.dimension_names
)

@property
def dim_names(self) -> Tuple[str, ...]:
return tuple(self._handle.index_column_names)
return tuple(self._handle.dimension_names)

def enum(self, label: str) -> tiledb.Enumeration:
# The DataFrame handle may either be ArrayWrapper or DataFrameWrapper.
Expand All @@ -409,6 +396,64 @@ def enum(self, label: str) -> tiledb.Enumeration:
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper, Wrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""

@classmethod
def _opener(
cls,
uri: str,
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: int,
) -> clib.SOMADataFrame:
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
column_names: List[str] = []
result_order = clib.ResultOrder.automatic
return clib.SOMADataFrame.open(
uri,
open_mode,
config,
column_names,
result_order,
(0, timestamp),
)

@property
def count(self) -> int:
return int(self._handle.count)


class DenseNDArrayWrapper(SOMAArrayWrapper, Wrapper[clib.SOMADenseNDArray]):
"""Wrapper around a Pybind11 DenseNDArrayWrapper handle."""

@classmethod
def _opener(
cls,
uri: str,
mode: options.OpenMode,
context: SOMATileDBContext,
timestamp: int,
) -> clib.SOMADenseNDArray:
open_mode = clib.OpenMode.read if mode == "r" else clib.OpenMode.write
config = {k: str(v) for k, v in context.tiledb_config.items()}
column_names: List[str] = []
result_order = clib.ResultOrder.automatic
return clib.SOMADenseNDArray.open(
uri,
open_mode,
config,
column_names,
result_order,
(0, timestamp),
)

@property
def shape(self) -> Tuple[int, ...]:
return tuple(self._handle.shape)


class _DictMod(enum.Enum):
"""State machine to keep track of modifications to a dictionary.
Expand Down
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/_tiledb_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def open(
def __init__(
self,
# TODO DataFrameWrapper should be _WrapperType_co
handle: Union[_WrapperType_co, _tdb_handles.DataFrameWrapper],
handle: Union[_WrapperType_co, _tdb_handles.SOMAArrayWrapper],
*,
_dont_call_this_use_create_or_open_instead: str = "unset",
):
Expand Down Expand Up @@ -128,7 +128,7 @@ def __init__(

_wrapper_type: Type[_WrapperType_co]
_reader_wrapper_type: Union[
Type[_WrapperType_co], Type[_tdb_handles.DataFrameWrapper]
Type[_WrapperType_co], Type[_tdb_handles.SOMAArrayWrapper]
]
"""Class variable of the Wrapper class used to open this object type."""

Expand Down
13 changes: 13 additions & 0 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,23 @@ void load_soma_array(py::module &m) {
"result_order"_a = ResultOrder::automatic)

.def("reopen", py::overload_cast<OpenMode, std::optional<std::pair<uint64_t, uint64_t>>>(&SOMAArray::open))

.def("close", &SOMAArray::close)

.def_property_readonly("closed", [](SOMAArray& reader) -> bool {
return not reader.is_open();
})

.def_property_readonly("mode", [](SOMAArray& reader){
return reader.mode() == OpenMode::read ? "r" : "w";
})

.def_property_readonly("schema", [](SOMAArray& reader) -> py::object {
auto pa = py::module::import("pyarrow");
auto pa_schema_import = pa.attr("Schema").attr("_import_from_c");
return pa_schema_import(py::capsule(reader.arrow_schema().get()));
})

.def("config", [](SOMAArray& reader) -> py::dict {
return py::cast(reader.config());
})
Expand Down Expand Up @@ -568,6 +573,7 @@ void load_soma_array(py::module &m) {
throw TileDBSOMAError("Unsupported dtype for nonempty domain.");
}
})

.def("domain", [](SOMAArray& reader, std::string name, py::dtype dtype) {
switch (np_to_tdb_dtype(dtype)) {
case TILEDB_UINT64:
Expand Down Expand Up @@ -612,11 +618,16 @@ void load_soma_array(py::module &m) {
throw TileDBSOMAError("Unsupported dtype for Dimension's domain");
}
})

.def_property_readonly("dimension_names", &SOMAArray::dimension_names)

.def("set_metadata", &SOMAArray::set_metadata)

.def("delete_metadata", &SOMAArray::delete_metadata)

.def("get_metadata",
py::overload_cast<const std::string&>(&SOMAArray::get_metadata))

.def_property_readonly("meta", [](SOMAArray&soma_dataframe) -> py::dict {
py::dict results;

Expand All @@ -636,7 +647,9 @@ void load_soma_array(py::module &m) {
}
return results;
})

.def("has_metadata", &SOMAArray::has_metadata)

.def("metadata_num", &SOMAArray::metadata_num);
}
} // namespace tiledbsoma
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/soma_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,14 @@ void load_soma_object(py::module &m) {

try{
auto obj = SOMAObject::open(uri, mode, config, timestamp);

if(!obj->type().has_value())
TPY_ERROR_LOC("Invalid SOMAObject");

if (obj->type() == "SOMADataFrame")
return py::cast(dynamic_cast<SOMADataFrame&>(*obj));
else if (obj->type() == "SOMADenseNDArray")
return py::cast(dynamic_cast<SOMADenseNDArray&>(*obj));
}
catch(...){
TPY_ERROR_LOC("SOMAObject not handled in Python API yet.");
Expand Down
8 changes: 8 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ def test_dataframe(tmp_path, arrow_schema):
assert sdf.count == 5
assert len(sdf) == 5

# Ensure read mode uses clib object
with soma.DataFrame.open(tmp_path.as_posix(), "r") as A:
assert isinstance(A._handle._handle, soma.pytiledbsoma.SOMADataFrame)

# Ensure write mode uses Python object
with soma.DataFrame.open(tmp_path.as_posix(), "w") as A:
assert isinstance(A._handle._handle, tiledb.Array)


def test_dataframe_with_float_dim(tmp_path, arrow_schema):
sdf = soma.DataFrame.create(
Expand Down
8 changes: 8 additions & 0 deletions apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def test_dense_nd_array_create_ok(
with tiledb.open(tmp_path.as_posix()) as A:
assert not A.schema.sparse

# Ensure read mode uses clib object
with soma.DenseNDArray.open(tmp_path.as_posix(), "r") as A:
assert isinstance(A._handle._handle, soma.pytiledbsoma.SOMADenseNDArray)

# Ensure write mode uses Python object
with soma.DenseNDArray.open(tmp_path.as_posix(), "w") as A:
assert isinstance(A._handle._handle, tiledb.Array)


@pytest.mark.parametrize("shape", [(10,)])
@pytest.mark.parametrize("element_type", NDARRAY_ARROW_TYPES_NOT_SUPPORTED)
Expand Down
9 changes: 9 additions & 0 deletions libtiledbsoma/src/soma/soma_dense_ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ class SOMADenseNDArray : public SOMAArray {
timestamp) {
}

SOMADenseNDArray(const SOMAArray& other)
: SOMAArray(other) {

Check warning on line 164 in libtiledbsoma/src/soma/soma_dense_ndarray.h

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_dense_ndarray.h#L163-L164

Added lines #L163 - L164 were not covered by tests
}

SOMADenseNDArray() = delete;
SOMADenseNDArray(const SOMADenseNDArray&) = default;
SOMADenseNDArray(SOMADenseNDArray&&) = delete;
~SOMADenseNDArray() = default;

using SOMAArray::open;

/**
Expand Down
3 changes: 3 additions & 0 deletions libtiledbsoma/src/soma/soma_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "soma_array.h"
#include "soma_dataframe.h"
#include "soma_dense_ndarray.h"

namespace tiledbsoma {

Expand Down Expand Up @@ -35,6 +36,8 @@ std::unique_ptr<SOMAObject> SOMAObject::open(

if (*(array_->type()) == "SOMADataFrame")
return std::make_unique<SOMADataFrame>(*array_);
else if (*(array_->type()) == "SOMADenseNDArray")
return std::make_unique<SOMADenseNDArray>(*array_);

Check warning on line 40 in libtiledbsoma/src/soma/soma_object.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_object.cc#L39-L40

Added lines #L39 - L40 were not covered by tests
else
throw TileDBSOMAError(
"Invalid SOMAObject passed to SOMAObject::open");
Expand Down

0 comments on commit 229e297

Please sign in to comment.