Skip to content

Commit

Permalink
[c++] Pass ColumnIndexInfo as ArrowTable (#2472)
Browse files Browse the repository at this point in the history
* Add ArrowAdapter::_create_dim
---------

Co-authored-by: Dirk Eddelbuettel <[email protected]>
  • Loading branch information
nguyenv and eddelbuettel authored Apr 24, 2024
1 parent 1a59abc commit abe428a
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 152 deletions.
24 changes: 12 additions & 12 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,17 @@ def create(
f"if domain is specified, it must have the same length as index_column_names; got {ndom} != {nidx}"
)

domains = []
extents = []
index_column_schema = []
index_column_data = {}

for index_column_name, slot_domain in zip(index_column_names, domain):
pa_type = schema.field(index_column_name).type
pa_field = schema.field(index_column_name)
dtype = _arrow_types.tiledb_type_from_arrow_type(
pa_type, is_indexed_column=True
pa_field.type, is_indexed_column=True
)

slot_domain = _fill_out_slot_domain(
slot_domain, index_column_name, pa_type, dtype
slot_domain, index_column_name, pa_field.type, dtype
)

extent = _find_extent_for_domain(
Expand All @@ -249,11 +250,12 @@ def create(
slot_domain,
)

domains.append(pa.array(slot_domain, type=pa_type))
extents.append(pa.array([extent], type=pa_type))
index_column_schema.append(pa_field)
index_column_data[pa_field.name] = [*slot_domain, extent]

domains = pa.StructArray.from_arrays(domains, names=index_column_names)
extents = pa.StructArray.from_arrays(extents, names=index_column_names)
index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
)

plt_cfg = None
if platform_config:
Expand Down Expand Up @@ -282,9 +284,7 @@ def create(
clib.SOMADataFrame.create(
uri,
schema=schema,
index_column_names=index_column_names,
domains=domains,
extents=extents,
index_column_info=index_column_info,
ctx=context.native_context,
platform_config=plt_cfg,
timestamp=(0, timestamp_ms),
Expand Down
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class NotCreateableError(SOMAError):
pass


def is_not_createable_error(e: tiledb.TileDBError) -> bool:
def is_not_createable_error(e: Union[SOMAError, tiledb.TileDBError]) -> bool:
"""Given a TileDBError, return true if it indicates the object cannot be created
Lifecycle: experimental
Expand Down Expand Up @@ -132,7 +132,7 @@ def is_not_createable_error(e: tiledb.TileDBError) -> bool:
return False


def is_duplicate_group_key_error(e: tiledb.TileDBError) -> bool:
def is_duplicate_group_key_error(e: Union[SOMAError, tiledb.TileDBError]) -> bool:
"""Given a TileDBError, return try if it indicates a duplicate member
add request in a tiledb.Group.
Expand Down
30 changes: 13 additions & 17 deletions apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ void load_soma_dataframe(py::module& m) {
"create",
[](std::string_view uri,
py::object py_schema,
std::vector<std::string> index_columns_names,
py::object py_domains,
py::object py_extents,
py::object index_column_info,
std::shared_ptr<SOMAContext> context,
std::optional<PlatformConfig> platform_config,
std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
Expand All @@ -80,22 +78,22 @@ void load_soma_dataframe(py::module& m) {
}
}

ArrowArray domains;
uintptr_t domains_ptr = (uintptr_t)(&domains);
py_domains.attr("_export_to_c")(domains_ptr);

ArrowArray extents;
uintptr_t extents_ptr = (uintptr_t)(&extents);
py_extents.attr("_export_to_c")(extents_ptr);
ArrowSchema index_column_schema;
ArrowArray index_column_array;
uintptr_t
index_column_schema_ptr = (uintptr_t)(&index_column_schema);
uintptr_t
index_column_array_ptr = (uintptr_t)(&index_column_array);
index_column_info.attr("_export_to_c")(
index_column_array_ptr, index_column_schema_ptr);

try {
SOMADataFrame::create(
uri,
std::make_unique<ArrowSchema>(schema),
ColumnIndexInfo(
index_columns_names,
std::make_shared<ArrowArray>(domains),
std::make_shared<ArrowArray>(extents)),
ArrowTable(
std::make_unique<ArrowArray>(index_column_array),
std::make_unique<ArrowSchema>(index_column_schema)),
context,
platform_config,
timestamp);
Expand All @@ -108,9 +106,7 @@ void load_soma_dataframe(py::module& m) {
"uri"_a,
py::kw_only(),
"schema"_a,
"index_column_names"_a,
"domains"_a,
"extents"_a,
"index_column_info"_a,
"ctx"_a,
"platform_config"_a,
"timestamp"_a = py::none())
Expand Down
27 changes: 21 additions & 6 deletions libtiledbsoma/src/soma/soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,15 @@ std::shared_ptr<SOMAExperiment> SOMACollection::add_new_experiment(
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::optional<PlatformConfig> platform_config) {
SOMAExperiment::create(
uri, std::move(schema), index_columns, ctx, platform_config);
uri,
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
ctx,
platform_config);
std::shared_ptr<SOMAExperiment> member = SOMAExperiment::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -128,8 +133,13 @@ std::shared_ptr<SOMAMeasurement> SOMACollection::add_new_measurement(
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns) {
SOMAMeasurement::create(uri, std::move(schema), index_columns, ctx);
ArrowTable index_columns) {
SOMAMeasurement::create(
uri,
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
ctx);
std::shared_ptr<SOMAMeasurement> member = SOMAMeasurement::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand All @@ -143,10 +153,15 @@ std::shared_ptr<SOMADataFrame> SOMACollection::add_new_dataframe(
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::optional<PlatformConfig> platform_config) {
SOMADataFrame::create(
uri, std::move(schema), index_columns, ctx, platform_config);
uri,
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
ctx,
platform_config);
std::shared_ptr<SOMADataFrame> member = SOMADataFrame::open(
uri, OpenMode::read, ctx);
this->set(std::string(uri), uri_type, std::string(key));
Expand Down
6 changes: 3 additions & 3 deletions libtiledbsoma/src/soma/soma_collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class SOMACollection : public SOMAGroup {
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::optional<PlatformConfig> platform_config = std::nullopt);

/**
Expand All @@ -175,7 +175,7 @@ class SOMACollection : public SOMAGroup {
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns);
ArrowTable index_columns);

/**
* Create and add a SOMADataFrame to the SOMACollection.
Expand All @@ -191,7 +191,7 @@ class SOMACollection : public SOMAGroup {
URIType uri_type,
std::shared_ptr<SOMAContext> ctx,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::optional<PlatformConfig> platform_config = std::nullopt);

/**
Expand Down
8 changes: 6 additions & 2 deletions libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,16 @@ using namespace tiledb;
void SOMADataFrame::create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config,
std::optional<TimestampRange> timestamp) {
auto tiledb_schema = ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(), std::move(schema), index_columns, platform_config);
ctx->tiledb_ctx(),
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
platform_config);
SOMAArray::create(ctx, uri, tiledb_schema, "SOMADataFrame", timestamp);
}

Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/soma/soma_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class SOMADataFrame : public SOMAArray {
static void create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config = std::nullopt,
std::optional<TimestampRange> timestamp = std::nullopt);
Expand Down
5 changes: 3 additions & 2 deletions libtiledbsoma/src/soma/soma_experiment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ using namespace tiledb;
void SOMAExperiment::create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config,
std::optional<TimestampRange> timestamp) {
Expand All @@ -54,7 +54,8 @@ void SOMAExperiment::create(
SOMADataFrame::create(
exp_uri + "/obs",
std::move(schema),
index_columns,
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
ctx,
platform_config,
timestamp);
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/soma/soma_experiment.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class SOMAExperiment : public SOMACollection {
static void create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config = std::nullopt,
std::optional<TimestampRange> timestamp = std::nullopt);
Expand Down
5 changes: 3 additions & 2 deletions libtiledbsoma/src/soma/soma_measurement.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ using namespace tiledb;
void SOMAMeasurement::create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config,
std::optional<TimestampRange> timestamp) {
Expand All @@ -54,7 +54,8 @@ void SOMAMeasurement::create(
SOMADataFrame::create(
exp_uri + "/var",
std::move(schema),
index_columns,
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
ctx,
platform_config,
timestamp);
Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/src/soma/soma_measurement.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class SOMAMeasurement : public SOMACollection {
static void create(
std::string_view uri,
std::unique_ptr<ArrowSchema> schema,
ColumnIndexInfo index_columns,
ArrowTable index_columns,
std::shared_ptr<SOMAContext> ctx,
std::optional<PlatformConfig> platform_config = std::nullopt,
std::optional<TimestampRange> timestamp = std::nullopt);
Expand Down
Loading

0 comments on commit abe428a

Please sign in to comment.