diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 6a03aa5b2f..5a450e3e26 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -32,7 +32,7 @@ #include "arrow_adapter.h" #include "../soma/column_buffer.h" -#include "../utils/logger.h" +#include "logger.h" namespace tiledbsoma { @@ -417,29 +417,122 @@ Dimension ArrowAdapter::_create_dim( case TILEDB_DATETIME_SEC: case TILEDB_DATETIME_MS: case TILEDB_DATETIME_US: - case TILEDB_DATETIME_NS: - return Dimension::create( - *ctx, name, type, (uint64_t*)buff, (uint64_t*)buff + 2); - case TILEDB_INT8: + case TILEDB_DATETIME_NS: { + // Sadly we cannot put this in the centralized _create_dim_aux + // in the header file. That's because we need utils/logger.h + // -- which is a _fixed_ relative path from _this_ .cc file + // but a _varying_ relative path from all the places that + // #include arrow_adapter.h. Hence the code duplication in + // logging statements. :( + uint64_t* b = (uint64_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); + return Dimension::create(*ctx, name, type, b, b + 2); + } + case TILEDB_INT8: { + int8_t* b = (int8_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (int8_t*)buff); - case TILEDB_UINT8: + } + case TILEDB_UINT8: { + uint8_t* b = (uint8_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (uint8_t*)buff); - case TILEDB_INT16: + } + case TILEDB_INT16: { + int16_t* b = (int16_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (int16_t*)buff); - case TILEDB_UINT16: + } + case TILEDB_UINT16: { + uint16_t* b = (uint16_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (uint16_t*)buff); - case TILEDB_INT32: + } + case TILEDB_INT32: { + int32_t* b = (int32_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (int32_t*)buff); - case TILEDB_UINT32: + } + case TILEDB_UINT32: { + uint32_t* b = (uint32_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (uint32_t*)buff); - case TILEDB_INT64: + } + case TILEDB_INT64: { + int64_t* b = (int64_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (int64_t*)buff); - case TILEDB_UINT64: + } + case TILEDB_UINT64: { + uint64_t* b = (uint64_t*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (uint64_t*)buff); - case TILEDB_FLOAT32: + } + case TILEDB_FLOAT32: { + float* b = (float*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (float*)buff); - case TILEDB_FLOAT64: + } + case TILEDB_FLOAT64: { + double* b = (double*)buff; + LOG_DEBUG(fmt::format( + "_create_dim name={} b={} b1={} b2={}", + name, + b[0], + b[1], + b[2])); return ArrowAdapter::_create_dim_aux(ctx, name, (double*)buff); + } default: throw TileDBSOMAError(fmt::format( "ArrowAdapter: Unsupported TileDB dimension: {} ", diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index d82d880c4a..f8a3b5cdaa 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -31,14 +31,10 @@ */ #include "common.h" +#include "utils/logger.h" namespace helper { -static std::unique_ptr _create_index_cols_info_array( - int64_t dim_max, bool use_current_domain); -static std::unique_ptr _create_index_cols_info_schema( - std::string dim_name); - // This non-obvious number is: // * Something that fits into signed 32-bit integer for R-friendliness; // * Is a comfortable tile-extent distance away from 2^31-1 for default @@ -46,6 +42,13 @@ static std::unique_ptr _create_index_cols_info_schema( // array-creation error.) const int CORE_DOMAIN_MAX = 2147483646; +static std::unique_ptr _create_index_cols_info_schema( + const std::vector& dim_infos); +static std::unique_ptr _create_index_cols_info_array( + const std::vector& dim_infos); + +static std::string tdb_to_arrow_type(tiledb_datatype_t tiledb_dtype); + // Notes: // // * This is multi-purpose code used for generic SOMASparseNDArray, @@ -80,35 +83,47 @@ const int CORE_DOMAIN_MAX = 2147483646; // * The Python and R bindings prepare similar Arrow information when // passing a create-array request to libtiledbsoma. +// Create ArrowSchema for the entire SOMAArray -- dims and attrs both -- as well +// as index-column info std::pair, ArrowTable> create_arrow_schema_and_index_columns( - int64_t dim_max, bool use_current_domain) { - // Create ArrowSchema for the entire SOMAArray: dims and attrs both + const std::vector& dim_infos, + const std::vector& attr_infos) { + int ndim = dim_infos.size(); + int nattr = attr_infos.size(); + auto arrow_schema = std::make_unique(); arrow_schema->format = "+s"; - arrow_schema->n_children = 2; // non-leaf node + arrow_schema->n_children = ndim + nattr; // non-leaf node arrow_schema->dictionary = nullptr; arrow_schema->release = &ArrowAdapter::release_schema; arrow_schema->children = new ArrowSchema*[arrow_schema->n_children]; - ArrowSchema* dim = arrow_schema->children[0] = new ArrowSchema; - dim->format = "l"; - dim->name = "d0"; - dim->n_children = 0; // leaf node - dim->dictionary = nullptr; - dim->release = &ArrowAdapter::release_schema; - - ArrowSchema* attr = arrow_schema->children[1] = new ArrowSchema; - attr->format = "l"; - attr->name = "a0"; - attr->n_children = 0; // leaf node - attr->flags = 0; - attr->dictionary = nullptr; - attr->release = &ArrowAdapter::release_schema; - - auto index_cols_info_schema = _create_index_cols_info_schema("d0"); - auto index_cols_info_array = _create_index_cols_info_array( - dim_max, use_current_domain); + for (int i = 0; i < ndim; i++) { + const DimInfo& info = dim_infos[i]; + ArrowSchema* dim = new ArrowSchema; + dim->name = strdup(info.name.c_str()); + dim->format = strdup(tdb_to_arrow_type(info.tiledb_datatype).c_str()); + dim->n_children = 0; // leaf node + dim->dictionary = nullptr; + dim->release = &ArrowAdapter::release_schema; + arrow_schema->children[i] = dim; + } + + for (int i = 0; i < nattr; i++) { + const AttrInfo& info = attr_infos[i]; + ArrowSchema* attr = new ArrowSchema; + attr->name = strdup(info.name.c_str()); + attr->format = strdup(tdb_to_arrow_type(info.tiledb_datatype).c_str()); + attr->n_children = 0; // leaf node + attr->flags = 0; + attr->dictionary = nullptr; + attr->release = &ArrowAdapter::release_schema; + arrow_schema->children[ndim + i] = attr; + } + + auto index_cols_info_schema = _create_index_cols_info_schema(dim_infos); + auto index_cols_info_array = _create_index_cols_info_array(dim_infos); return std::pair( std::move(arrow_schema), @@ -117,71 +132,139 @@ create_arrow_schema_and_index_columns( std::move(index_cols_info_schema))); } -ArrowTable create_column_index_info(int64_t dim_max, bool use_current_domain) { - auto index_cols_info_schema = _create_index_cols_info_schema("soma_dim_0"); - auto index_cols_info_array = _create_index_cols_info_array( - dim_max, use_current_domain); +// Create index-column info only, no schema involving the attrs +ArrowTable create_column_index_info(const std::vector& dim_infos) { + for (auto info : dim_infos) { + LOG_DEBUG(fmt::format( + "create_column_index_info name={} type={} dim_max={} ucd={}", + info.name, + info.tiledb_datatype, + info.dim_max, + info.use_current_domain)); + } + + auto index_cols_info_schema = _create_index_cols_info_schema(dim_infos); + auto index_cols_info_array = _create_index_cols_info_array(dim_infos); return ArrowTable( std::move(index_cols_info_array), std::move(index_cols_info_schema)); } static std::unique_ptr _create_index_cols_info_schema( - std::string dim_name) { + const std::vector& dim_infos) { + int ndim = dim_infos.size(); + auto index_cols_info_schema = std::make_unique(); index_cols_info_schema->format = "+s"; - index_cols_info_schema->n_children = 1; // non-leaf node + index_cols_info_schema->n_children = ndim; // non-leaf node index_cols_info_schema->dictionary = nullptr; index_cols_info_schema->release = &ArrowAdapter::release_schema; index_cols_info_schema ->children = new ArrowSchema*[index_cols_info_schema->n_children]; - ArrowSchema* dim = index_cols_info_schema->children[0] = new ArrowSchema; - dim->format = "l"; - dim->name = strdup(dim_name.c_str()); - dim->n_children = 0; // leaf node - dim->dictionary = nullptr; - dim->release = &ArrowAdapter::release_schema; + for (int i = 0; i < ndim; i++) { + const DimInfo& info = dim_infos[i]; + ArrowSchema* dim_schema = new ArrowSchema; + dim_schema->format = strdup( + tdb_to_arrow_type(info.tiledb_datatype).c_str()); + dim_schema->name = strdup(info.name.c_str()); + dim_schema->n_children = 0; // leaf node + dim_schema->dictionary = nullptr; + dim_schema->release = &ArrowAdapter::release_schema; + index_cols_info_schema->children[i] = dim_schema; + } return index_cols_info_schema; } static std::unique_ptr _create_index_cols_info_array( - int64_t dim_max, bool use_current_domain) { + const std::vector& dim_infos) { + int ndim = dim_infos.size(); + auto index_cols_info_array = std::make_unique(); index_cols_info_array->length = 0; index_cols_info_array->null_count = 0; index_cols_info_array->offset = 0; index_cols_info_array->n_buffers = 0; index_cols_info_array->buffers = nullptr; - index_cols_info_array->n_children = 1; + index_cols_info_array->n_children = ndim; index_cols_info_array->release = &ArrowAdapter::release_array; - index_cols_info_array->children = new ArrowArray*[1]; - - int n = use_current_domain ? 5 : 3; - - auto d0_info = index_cols_info_array->children[0] = new ArrowArray; - d0_info->length = n; - d0_info->null_count = 0; - d0_info->offset = 0; - d0_info->n_buffers = 2; - d0_info->release = &ArrowAdapter::release_array; - d0_info->buffers = new const void*[2]; - d0_info->buffers[0] = nullptr; - d0_info->buffers[1] = malloc(sizeof(int64_t) * n); - d0_info->n_children = 0; - - if (use_current_domain) { - // domain big; current_domain small - int64_t dom[] = {0, CORE_DOMAIN_MAX, 1, 0, dim_max}; - std::memcpy((void*)d0_info->buffers[1], &dom, sizeof(int64_t) * n); - } else { - // domain small; current_domain feature not being used - int64_t dom[] = {0, dim_max, 1}; - std::memcpy((void*)d0_info->buffers[1], &dom, sizeof(int64_t) * n); + index_cols_info_array->children = new ArrowArray*[ndim]; + + for (int i = 0; i < ndim; i++) { + const DimInfo& info = dim_infos[i]; + + int n = info.use_current_domain ? 5 : 3; + + auto dim_array = new ArrowArray; + dim_array->length = n; + dim_array->null_count = 0; + dim_array->offset = 0; + dim_array->n_buffers = 2; + dim_array->release = &ArrowAdapter::release_array; + dim_array->buffers = new const void*[2]; + dim_array->buffers[0] = nullptr; + dim_array->n_children = 0; // leaf node + index_cols_info_array->children[i] = dim_array; + + // The full user-level SOMA API supports many more index types. + // Here we support enough types to verify we've got variant-indexed + // SOMADataFrame objects baseline-tested in C++, then defer exhaustive + // loop-over-all-datatypes handling to Python and R. + if (info.tiledb_datatype == TILEDB_INT64) { + size_t nbytes = n * sizeof(int64_t); + dim_array->buffers[1] = malloc(nbytes); + if (info.use_current_domain) { + // domain big; current_domain small + int64_t dom[] = {0, CORE_DOMAIN_MAX, 1, 0, info.dim_max}; + void* vsrc = (void*)&dom[0]; + std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); + } else { + // domain small; current_domain feature not being used + int64_t dom[] = {0, info.dim_max, 1}; + void* vsrc = (void*)&dom[0]; + std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); + } + + } else if (info.tiledb_datatype == TILEDB_UINT32) { + size_t nbytes = n * sizeof(uint32_t); + dim_array->buffers[1] = malloc(nbytes); + if (info.use_current_domain) { + // domain big; current_domain small + uint32_t dom[] = { + 0, (uint32_t)CORE_DOMAIN_MAX, 1, 0, (uint32_t)info.dim_max}; + void* vsrc = (void*)&dom[0]; + std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); + } else { + // domain small; current_domain feature not being used + int64_t dom[] = {0, info.dim_max, 1}; + void* vsrc = (void*)&dom[0]; + std::memcpy((void*)dim_array->buffers[1], vsrc, nbytes); + } + + } else if (info.tiledb_datatype == TILEDB_STRING_ASCII) { + // Domain specification for strings is not supported in core. See + // arrow_adapter for more info. We rely on arrow_adapter to also + // handle this case. + dim_array->buffers[1] = nullptr; + + } else { + throw TileDBSOMAError( + "Unsupported datatype encountered in unit test. You can add a " + "new type if you like!"); + } } return index_cols_info_array; } +// Just a keystroke-saver +static std::string tdb_to_arrow_type(tiledb_datatype_t tiledb_datatype) { + return std::string(ArrowAdapter::to_arrow_format(tiledb_datatype)); +} + +std::string to_arrow_format(tiledb_datatype_t tiledb_datatype) { + return std::string(ArrowAdapter::to_arrow_format(tiledb_datatype)); +} + } // namespace helper diff --git a/libtiledbsoma/test/common.h b/libtiledbsoma/test/common.h index da238e6ad6..e4ad5f8476 100644 --- a/libtiledbsoma/test/common.h +++ b/libtiledbsoma/test/common.h @@ -60,8 +60,30 @@ using namespace Catch::Matchers; static const std::string src_path = TILEDBSOMA_SOURCE_ROOT; namespace helper { + +// E.g. "d0" is of type TILEDB_INT64 with dim_max 1000 and current-domain +// feature enabled +struct DimInfo { + std::string name; + tiledb_datatype_t tiledb_datatype; + int64_t dim_max; + bool use_current_domain; +}; + +// E.g. "a0" is of type TILEDB_FLOAT64 +struct AttrInfo { + std::string name; + tiledb_datatype_t tiledb_datatype; +}; + std::pair, ArrowTable> -create_arrow_schema_and_index_columns(int64_t dim_max, bool use_current_domain); -ArrowTable create_column_index_info(int64_t dim_max, bool use_current_domain); +create_arrow_schema_and_index_columns( + const std::vector& dim_infos, + const std::vector& attr_infos); + +ArrowTable create_column_index_info(const std::vector& dim_infos); + +std::string to_arrow_format(tiledb_datatype_t tiledb_datatype); + } // namespace helper #endif diff --git a/libtiledbsoma/test/unit_column_buffer.cc b/libtiledbsoma/test/unit_column_buffer.cc index 79b04308dc..1ff087d542 100644 --- a/libtiledbsoma/test/unit_column_buffer.cc +++ b/libtiledbsoma/test/unit_column_buffer.cc @@ -98,4 +98,4 @@ TEST_CASE("ColumnBuffer: Create from array") { REQUIRE(buffers->is_var() == true); REQUIRE(buffers->is_nullable() == true); } -} \ No newline at end of file +} diff --git a/libtiledbsoma/test/unit_managed_query.cc b/libtiledbsoma/test/unit_managed_query.cc index ee708533c5..1413450245 100644 --- a/libtiledbsoma/test/unit_managed_query.cc +++ b/libtiledbsoma/test/unit_managed_query.cc @@ -65,17 +65,20 @@ auto create_array(const std::string& uri, Context& ctx) { vfs.remove_dir(uri); } + std::string dim_name = "d0"; + std::string attr_name = "a0"; + // Create schema ArraySchema schema(ctx, TILEDB_SPARSE); auto dim = Dimension::create( - ctx, "d0", TILEDB_STRING_ASCII, nullptr, nullptr); + ctx, dim_name, TILEDB_STRING_ASCII, nullptr, nullptr); dim.set_cell_val_num(TILEDB_VAR_NUM); Domain domain(ctx); domain.add_dimension(dim); schema.set_domain(domain); - auto attr = Attribute::create(ctx, "a0"); + auto attr = Attribute::create(ctx, attr_name); attr.set_nullable(true); schema.add_attribute(attr); schema.check(); @@ -97,11 +100,11 @@ auto create_array(const std::string& uri, Context& ctx) { // Write data to array and close the array Query query(ctx, array); query.set_layout(TILEDB_UNORDERED) - .set_data_buffer("d0", d0_data) - .set_offsets_buffer("d0", d0_offsets) - .set_data_buffer("a0", a0_data) - .set_offsets_buffer("a0", a0_offsets) - .set_validity_buffer("a0", a0_valids); + .set_data_buffer(dim_name, d0_data) + .set_offsets_buffer(dim_name, d0_offsets) + .set_data_buffer(attr_name, a0_data) + .set_offsets_buffer(attr_name, a0_offsets) + .set_validity_buffer(attr_name, a0_valids); query.submit(); array.close(); @@ -118,6 +121,9 @@ TEST_CASE("ManagedQuery: Basic execution test") { } std::string uri = "mem://unit-test-array"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + auto ctx = std::make_shared(); auto [array, d0, a0, _] = create_array(uri, *ctx); @@ -131,18 +137,21 @@ TEST_CASE("ManagedQuery: Basic execution test") { auto num_cells = mq.total_num_cells(); REQUIRE(num_cells == d0.size()); - REQUIRE_THAT(d0, Equals(mq.strings("d0"))); - REQUIRE_THAT(a0, Equals(mq.strings("a0"))); + REQUIRE_THAT(d0, Equals(mq.strings(dim_name))); + REQUIRE_THAT(a0, Equals(mq.strings(attr_name))); } TEST_CASE("ManagedQuery: Select test") { std::string uri = "mem://unit-test-array"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + auto ctx = std::make_shared(); auto [array, d0, a0, _] = create_array(uri, *ctx); auto mq = ManagedQuery(array, ctx); - mq.select_columns({"a0"}); - mq.select_points("d0", {"a"}); + mq.select_columns({attr_name}); + mq.select_points(dim_name, {"a"}); mq.setup_read(); mq.submit_read(); @@ -153,15 +162,18 @@ TEST_CASE("ManagedQuery: Select test") { REQUIRE(num_cells == 1); REQUIRE_THROWS(mq.data("a1")); - REQUIRE_THROWS(mq.strings("d0")); + REQUIRE_THROWS(mq.strings(dim_name)); REQUIRE_THROWS(mq.string_view("d1", 0)); REQUIRE_THAT( - std::string(a0[0]), Equals(std::string(mq.string_view("a0", 0)))); + std::string(a0[0]), Equals(std::string(mq.string_view(attr_name, 0)))); } TEST_CASE("ManagedQuery: Validity test") { std::string uri = "mem://unit-test-array"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + auto ctx = std::make_shared(); auto [array, d0, a0, a0_valids] = create_array(uri, *ctx); @@ -176,11 +188,11 @@ TEST_CASE("ManagedQuery: Validity test") { REQUIRE(num_cells == d0.size()); // Convert span to vector - auto valids = mq.validity("a0"); + auto valids = mq.validity(attr_name); std::vector a0_valids_actual; a0_valids_actual.assign(valids.begin(), valids.end()); - REQUIRE_THAT(d0, Equals(mq.strings("d0"))); - REQUIRE_THAT(a0, Equals(mq.strings("a0"))); + REQUIRE_THAT(d0, Equals(mq.strings(dim_name))); + REQUIRE_THAT(a0, Equals(mq.strings(attr_name))); REQUIRE_THAT(a0_valids, Equals(a0_valids_actual)); } diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index a1c000c415..29ee295596 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -44,6 +44,7 @@ #include #include +#include "common.h" #include "utils/util.h" using namespace tiledb; @@ -54,8 +55,6 @@ using namespace Catch::Matchers; #define TILEDBSOMA_SOURCE_ROOT "not_defined" #endif -const std::string src_path = TILEDBSOMA_SOURCE_ROOT; - namespace { std::tuple create_array( @@ -70,17 +69,22 @@ std::tuple create_array( vfs.remove_dir(uri); } + const char* dim_name = "d0"; + const char* attr_name = "a0"; + // Create schema ArraySchema schema(*ctx->tiledb_ctx(), TILEDB_SPARSE); auto dim = Dimension::create( - *ctx->tiledb_ctx(), "d0", {0, std::numeric_limits::max() - 1}); + *ctx->tiledb_ctx(), + dim_name, + {0, std::numeric_limits::max() - 1}); Domain domain(*ctx->tiledb_ctx()); domain.add_dimension(dim); schema.set_domain(domain); - auto attr = Attribute::create(*ctx->tiledb_ctx(), "a0"); + auto attr = Attribute::create(*ctx->tiledb_ctx(), attr_name); schema.add_attribute(attr); schema.set_allows_dups(allow_duplicates); schema.check(); @@ -115,6 +119,9 @@ std::tuple, std::vector> write_array( std::iota(frags.begin(), frags.end(), 0); std::shuffle(frags.begin(), frags.end(), std::random_device{}); + const char* dim_name = "d0"; + const char* attr_name = "a0"; + // Write to SOMAArray for (auto i = 0; i < num_fragments; ++i) { auto frag_num = frags[i]; @@ -140,8 +147,8 @@ std::tuple, std::vector> write_array( std::vector a0(num_cells_per_fragment, frag_num); // Write data to array - soma_array->set_column_data("a0", a0.size(), a0.data()); - soma_array->set_column_data("d0", d0.size(), d0.data()); + soma_array->set_column_data(attr_name, a0.size(), a0.data()); + soma_array->set_column_data(dim_name, d0.size(), d0.data()); soma_array->write(); soma_array->close(); } @@ -159,14 +166,14 @@ std::tuple, std::vector> write_array( Query query(*ctx->tiledb_ctx(), tiledb_array); query.set_layout(TILEDB_UNORDERED) - .set_data_buffer("d0", expected_d0) - .set_data_buffer("a0", expected_a0); + .set_data_buffer(dim_name, expected_d0) + .set_data_buffer(attr_name, expected_a0); query.submit(); tiledb_array.close(); - expected_d0.resize(query.result_buffer_elements()["d0"].second); - expected_a0.resize(query.result_buffer_elements()["a0"].second); + expected_d0.resize(query.result_buffer_elements()[dim_name].second); + expected_a0.resize(query.result_buffer_elements()[attr_name].second); return {expected_d0, expected_a0}; } @@ -180,6 +187,9 @@ TEST_CASE("SOMAArray: nnz") { int num_cells_per_fragment = 128; auto timestamp = 10; + const char* dim_name = "d0"; + const char* attr_name = "a0"; + // TODO this use to be formatted with fmt::format which is part of internal // header spd/log/fmt/fmt.h and should not be used. In C++20, this can be // replaced with std::format. @@ -230,11 +240,13 @@ TEST_CASE("SOMAArray: nnz") { // Check that data from SOMAArray::read_next matches expected data while (auto batch = soma_array->read_next()) { auto arrbuf = batch.value(); - REQUIRE(arrbuf->names() == std::vector({"d0", "a0"})); + REQUIRE( + arrbuf->names() == + std::vector({dim_name, attr_name})); REQUIRE(arrbuf->num_rows() == nnz); - auto d0span = arrbuf->at("d0")->data(); - auto a0span = arrbuf->at("a0")->data(); + auto d0span = arrbuf->at(dim_name)->data(); + auto a0span = arrbuf->at(attr_name)->data(); std::vector d0col(d0span.begin(), d0span.end()); std::vector a0col(a0span.begin(), a0span.end()); @@ -495,12 +507,15 @@ TEST_CASE("SOMAArray: Write and read back Boolean") { auto ctx = std::make_shared(); + const char* dim_name = "d0"; + const char* attr_name = "a0"; + ArraySchema schema(*ctx->tiledb_ctx(), TILEDB_SPARSE); - auto dim = Dimension::create(*ctx->tiledb_ctx(), "d0", {0, 7}); + auto dim = Dimension::create(*ctx->tiledb_ctx(), dim_name, {0, 7}); Domain domain(*ctx->tiledb_ctx()); domain.add_dimension(dim); schema.set_domain(domain); - auto attr = Attribute::create(*ctx->tiledb_ctx(), "a0"); + auto attr = Attribute::create(*ctx->tiledb_ctx(), attr_name); schema.add_attribute(attr); schema.set_allows_dups(true); @@ -514,14 +529,14 @@ TEST_CASE("SOMAArray: Write and read back Boolean") { arrow_schema->release = &ArrowAdapter::release_schema; arrow_schema->children = new ArrowSchema*[arrow_schema->n_children]; ArrowSchema* arrow_dim = arrow_schema->children[0] = new ArrowSchema; - arrow_dim->format = "l"; - arrow_dim->name = "d0"; + arrow_dim->format = strdup(helper::to_arrow_format(TILEDB_INT64).c_str()); + arrow_dim->name = dim_name; arrow_dim->n_children = 0; arrow_dim->dictionary = nullptr; arrow_dim->release = &ArrowAdapter::release_schema; ArrowSchema* arrow_att = arrow_schema->children[1] = new ArrowSchema; arrow_att->format = "b"; - arrow_att->name = "a0"; + arrow_att->name = attr_name; arrow_att->n_children = 0; arrow_att->dictionary = nullptr; arrow_att->release = &ArrowAdapter::release_schema; @@ -569,12 +584,12 @@ TEST_CASE("SOMAArray: Write and read back Boolean") { soma_array = SOMAArray::open(OpenMode::read, uri, ctx); auto arrbuf = soma_array->read_next().value(); - auto d0_span = arrbuf->at("d0")->data(); + auto d0_span = arrbuf->at(dim_name)->data(); REQUIRE( std::vector(d0_span.begin(), d0_span.end()) == std::vector(d0_data, d0_data + 8)); - auto a0_span = arrbuf->at("a0")->data(); + auto a0_span = arrbuf->at(attr_name)->data(); REQUIRE( std::vector(a0_span.begin(), a0_span.end()) == std::vector( diff --git a/libtiledbsoma/test/unit_soma_collection.cc b/libtiledbsoma/test/unit_soma_collection.cc index 4ffdb8ff26..903c9710f3 100644 --- a/libtiledbsoma/test/unit_soma_collection.cc +++ b/libtiledbsoma/test/unit_soma_collection.cc @@ -60,11 +60,19 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") { auto ctx = std::make_shared(); std::string base_uri = "mem://unit-test-add-sparse-ndarray"; std::string sub_uri = "mem://unit-test-add-sparse-ndarray/sub"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); SOMACollection::create(base_uri, ctx, ts); - auto index_columns = helper::create_column_index_info( - DIM_MAX, use_current_domain); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); std::map expected_map{ {"sparse_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; @@ -78,7 +86,7 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") { sub_uri, URIType::absolute, ctx, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second))); @@ -104,11 +112,19 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") { auto ctx = std::make_shared(); std::string base_uri = "mem://unit-test-add-dense-ndarray"; std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); SOMACollection::create(base_uri, ctx, ts); // TODO: add support for current domain in dense arrays once we have that // support from core - auto index_columns = helper::create_column_index_info(DIM_MAX, false); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = false}}); + auto index_columns = helper::create_column_index_info(dim_infos); std::map expected_map{ {"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}}; @@ -122,7 +138,7 @@ TEST_CASE("SOMACollection: add SOMADenseNDArray") { sub_uri, URIType::absolute, ctx, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second))); REQUIRE(soma_collection->members_map() == expected_map); @@ -149,11 +165,23 @@ TEST_CASE("SOMACollection: add SOMADataFrame") { auto ctx = std::make_shared(); std::string base_uri = "mem://unit-test-add-dataframe"; std::string sub_uri = "mem://unit-test-add-dataframe/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); SOMACollection::create(base_uri, ctx, ts); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - DIM_MAX, use_current_domain); + dim_infos, attr_infos); std::map expected_map{ {"dataframe", SOMAGroupEntry(sub_uri, "SOMAArray")}}; @@ -175,7 +203,7 @@ TEST_CASE("SOMACollection: add SOMADataFrame") { REQUIRE(soma_dataframe->uri() == sub_uri); REQUIRE(soma_dataframe->ctx() == ctx); REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {"d0"}; + std::vector expected_index_column_names = {dim_name}; REQUIRE( soma_dataframe->index_column_names() == expected_index_column_names); @@ -190,27 +218,35 @@ TEST_CASE("SOMACollection: add SOMADataFrame") { } TEST_CASE("SOMACollection: add SOMACollection") { - auto ctx = std::make_shared(); - std::string base_uri = "mem://unit-test-add-collection"; - std::string sub_uri = "mem://unit-test-add-collection/sub"; + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + auto ctx = std::make_shared(); + std::string base_uri = "mem://unit-test-add-collection"; + std::string sub_uri = "mem://unit-test-add-collection/sub"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); - SOMACollection::create(base_uri, ctx); + SOMACollection::create(base_uri, ctx); - std::map expected_map{ - {"subcollection", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; + std::map expected_map{ + {"subcollection", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; - auto soma_collection = SOMACollection::open(base_uri, OpenMode::write, ctx); - auto soma_subcollection = soma_collection->add_new_collection( - "subcollection", sub_uri, URIType::absolute, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - REQUIRE(soma_subcollection->uri() == sub_uri); - REQUIRE(soma_subcollection->ctx() == ctx); - REQUIRE(soma_subcollection->type() == "SOMACollection"); - soma_collection->close(); + auto soma_collection = SOMACollection::open( + base_uri, OpenMode::write, ctx); + auto soma_subcollection = soma_collection->add_new_collection( + "subcollection", sub_uri, URIType::absolute, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + REQUIRE(soma_subcollection->uri() == sub_uri); + REQUIRE(soma_subcollection->ctx() == ctx); + REQUIRE(soma_subcollection->type() == "SOMACollection"); + soma_collection->close(); - soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); - REQUIRE(soma_collection->members_map() == expected_map); - soma_collection->close(); + soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx); + REQUIRE(soma_collection->members_map() == expected_map); + soma_collection->close(); + } } TEST_CASE("SOMACollection: add SOMAExperiment") { @@ -221,11 +257,23 @@ TEST_CASE("SOMACollection: add SOMAExperiment") { auto ctx = std::make_shared(); std::string base_uri = "mem://unit-test-add-experiment"; std::string sub_uri = "mem://unit-test-add-experiment/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); SOMACollection::create(base_uri, ctx); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - DIM_MAX, use_current_domain); + dim_infos, attr_infos); std::map expected_map{ {"experiment", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; @@ -262,11 +310,23 @@ TEST_CASE("SOMACollection: add SOMAMeasurement") { auto ctx = std::make_shared(); std::string base_uri = "mem://unit-test-add-measurement"; std::string sub_uri = "mem://unit-test-add-measurement/sub"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); SOMACollection::create(base_uri, ctx); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - DIM_MAX, use_current_domain); + dim_infos, attr_infos); std::map expected_map{ {"measurement", SOMAGroupEntry(sub_uri, "SOMAGroup")}}; @@ -296,56 +356,64 @@ TEST_CASE("SOMACollection: add SOMAMeasurement") { } TEST_CASE("SOMACollection: metadata") { - auto ctx = std::make_shared(); + auto use_current_domain = GENERATE(false, true); + std::ostringstream section; + section << "- use_current_domain=" << use_current_domain; + SECTION(section.str()) { + auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-collection"; - SOMACollection::create(uri, ctx, TimestampRange(0, 2)); - auto soma_collection = SOMACollection::open( - uri, OpenMode::write, ctx, std::pair(1, 1)); + std::string uri = "mem://unit-test-collection"; + SOMACollection::create(uri, ctx, TimestampRange(0, 2)); + auto soma_collection = SOMACollection::open( + uri, OpenMode::write, ctx, std::pair(1, 1)); - int32_t val = 100; - soma_collection->set_metadata("md", TILEDB_INT32, 1, &val); - soma_collection->close(); + int32_t val = 100; + soma_collection->set_metadata("md", TILEDB_INT32, 1, &val); + soma_collection->close(); - // Read metadata - soma_collection->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(soma_collection->metadata_num() == 3); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(soma_collection->has_metadata("md")); - auto mdval = soma_collection->get_metadata("md"); - REQUIRE(std::get(*mdval) == TILEDB_INT32); - REQUIRE(std::get(*mdval) == 1); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - soma_collection->close(); + // Read metadata + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); + auto mdval = soma_collection->get_metadata("md"); + REQUIRE(std::get(*mdval) == TILEDB_INT32); + REQUIRE(std::get(*mdval) == 1); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); + soma_collection->close(); - // md should not be available at (2, 2) - soma_collection->open(OpenMode::read, TimestampRange(2, 2)); - REQUIRE(soma_collection->metadata_num() == 2); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(!soma_collection->has_metadata("md")); - soma_collection->close(); + // md should not be available at (2, 2) + soma_collection->open(OpenMode::read, TimestampRange(2, 2)); + REQUIRE(soma_collection->metadata_num() == 2); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(!soma_collection->has_metadata("md")); + soma_collection->close(); - // Metadata should also be retrievable in write mode - soma_collection->open(OpenMode::write, TimestampRange(0, 2)); - REQUIRE(soma_collection->metadata_num() == 3); - REQUIRE(soma_collection->has_metadata("soma_object_type")); - REQUIRE(soma_collection->has_metadata("soma_encoding_version")); - REQUIRE(soma_collection->has_metadata("md")); - mdval = soma_collection->get_metadata("md"); - REQUIRE(*((const int32_t*)std::get(*mdval)) == 100); - - // Delete and have it reflected when reading metadata while in write mode - soma_collection->delete_metadata("md"); - mdval = soma_collection->get_metadata("md"); - REQUIRE(!mdval.has_value()); - soma_collection->close(); + // Metadata should also be retrievable in write mode + soma_collection->open(OpenMode::write, TimestampRange(0, 2)); + REQUIRE(soma_collection->metadata_num() == 3); + REQUIRE(soma_collection->has_metadata("soma_object_type")); + REQUIRE(soma_collection->has_metadata("soma_encoding_version")); + REQUIRE(soma_collection->has_metadata("md")); + mdval = soma_collection->get_metadata("md"); + REQUIRE( + *((const int32_t*)std::get(*mdval)) == 100); - // Confirm delete in read mode - soma_collection->open(OpenMode::read, TimestampRange(0, 2)); - REQUIRE(!soma_collection->has_metadata("md")); - REQUIRE(soma_collection->metadata_num() == 2); + // Delete and have it reflected when reading metadata while in write + // mode + soma_collection->delete_metadata("md"); + mdval = soma_collection->get_metadata("md"); + REQUIRE(!mdval.has_value()); + soma_collection->close(); + + // Confirm delete in read mode + soma_collection->open(OpenMode::read, TimestampRange(0, 2)); + REQUIRE(!soma_collection->has_metadata("md")); + REQUIRE(soma_collection->metadata_num() == 2); + } } TEST_CASE("SOMAExperiment: metadata") { @@ -356,9 +424,22 @@ TEST_CASE("SOMAExperiment: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-experiment"; + std::string dim_name = "soma_dim_0"; + std::string attr_name = "soma_data"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - DIM_MAX, use_current_domain); + dim_infos, attr_infos); + SOMAExperiment::create( uri, std::move(schema), @@ -434,9 +515,22 @@ TEST_CASE("SOMAMeasurement: metadata") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-measurement"; + std::string dim_name = "soma_dim_0"; + std::string attr_name = "soma_data"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = DIM_MAX, + .use_current_domain = use_current_domain}}); + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - DIM_MAX, use_current_domain); + dim_infos, attr_infos); + SOMAMeasurement::create( uri, std::move(schema), diff --git a/libtiledbsoma/test/unit_soma_dataframe.cc b/libtiledbsoma/test/unit_soma_dataframe.cc index 49974f0290..bd75350caa 100644 --- a/libtiledbsoma/test/unit_soma_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_dataframe.cc @@ -45,12 +45,26 @@ TEST_CASE("SOMADataFrame: basic") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dataframe-basic"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); REQUIRE(!SOMADataFrame::exists(uri, ctx)); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - dim_max, use_current_domain); + dim_infos, attr_infos); + SOMADataFrame::create( uri, std::move(schema), @@ -67,7 +81,7 @@ TEST_CASE("SOMADataFrame: basic") { REQUIRE(soma_dataframe->uri() == uri); REQUIRE(soma_dataframe->ctx() == ctx); REQUIRE(soma_dataframe->type() == "SOMADataFrame"); - std::vector expected_index_column_names = {"d0"}; + std::vector expected_index_column_names = {dim_name}; REQUIRE( soma_dataframe->index_column_names() == expected_index_column_names); @@ -80,16 +94,16 @@ TEST_CASE("SOMADataFrame: basic") { std::vector a0(10, 1); soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); - soma_dataframe->set_column_data("a0", a0.size(), a0.data()); - soma_dataframe->set_column_data("d0", d0.size(), d0.data()); + soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); + soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); soma_dataframe->write(); soma_dataframe->close(); soma_dataframe = SOMADataFrame::open(uri, OpenMode::read, ctx); while (auto batch = soma_dataframe->read_next()) { auto arrbuf = batch.value(); - auto d0span = arrbuf->at("d0")->data(); - auto a0span = arrbuf->at("a0")->data(); + auto d0span = arrbuf->at(dim_name)->data(); + auto a0span = arrbuf->at(attr_name)->data(); REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); } @@ -161,6 +175,9 @@ TEST_CASE("SOMADataFrame: platform_config") { SECTION(section2.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dataframe-platform-config"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; PlatformConfig platform_config; platform_config.dataframe_dim_zstd_level = 6; @@ -171,9 +188,19 @@ TEST_CASE("SOMADataFrame: platform_config") { filter.first + R"(]}})"; } + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); + auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - dim_max, use_current_domain); + dim_infos, attr_infos); + SOMADataFrame::create( uri, std::move(schema), @@ -194,7 +221,7 @@ TEST_CASE("SOMADataFrame: platform_config") { filter.second); auto dim_filter = sch->domain() - .dimension("d0") + .dimension(dim_name) .filter_list() .filter(0); REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); @@ -203,7 +230,7 @@ TEST_CASE("SOMADataFrame: platform_config") { if (filter.second != TILEDB_FILTER_WEBP) { REQUIRE( - sch->attribute("a0") + sch->attribute(attr_name) .filter_list() .filter(0) .filter_type() == filter.second); @@ -224,10 +251,24 @@ TEST_CASE("SOMADataFrame: metadata") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-collection"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = helper::create_arrow_schema_and_index_columns( - dim_max, use_current_domain); + dim_infos, attr_infos); + SOMADataFrame::create( uri, std::move(schema), @@ -302,9 +343,22 @@ TEST_CASE("SOMADataFrame: bounds-checking") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-bounds-checking"; + std::string dim_name = "d0"; + std::string attr_name = "a0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = old_max, + .use_current_domain = use_current_domain}}); + + std::vector attr_infos( + {{.name = attr_name, .tiledb_datatype = tiledb_datatype}}); auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns(100, use_current_domain); + helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); SOMADataFrame::create( uri, @@ -317,8 +371,8 @@ TEST_CASE("SOMADataFrame: bounds-checking") { std::vector d0({old_max + 1, old_max + 2}); std::vector a0({1.5, 2.5}); - soma_dataframe->set_column_data("d0", d0.size(), d0.data()); - soma_dataframe->set_column_data("a0", a0.size(), a0.data()); + soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); + soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); // Writing outside the current domain should fail REQUIRE_THROWS(soma_dataframe->write()); soma_dataframe->close(); @@ -328,8 +382,8 @@ TEST_CASE("SOMADataFrame: bounds-checking") { soma_dataframe->close(); soma_dataframe = SOMADataFrame::open(uri, OpenMode::write, ctx); - soma_dataframe->set_column_data("d0", d0.size(), d0.data()); - soma_dataframe->set_column_data("a0", a0.size(), a0.data()); + soma_dataframe->set_column_data(dim_name, d0.size(), d0.data()); + soma_dataframe->set_column_data(attr_name, a0.size(), a0.data()); // Writing after resize should succeed soma_dataframe->write(); diff --git a/libtiledbsoma/test/unit_soma_dense_ndarray.cc b/libtiledbsoma/test/unit_soma_dense_ndarray.cc index 5422975a9d..cbfd4783f4 100644 --- a/libtiledbsoma/test/unit_soma_dense_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_dense_ndarray.cc @@ -45,17 +45,26 @@ TEST_CASE("SOMADenseNDArray: basic") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dense-ndarray-basic"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); REQUIRE(!SOMADenseNDArray::exists(uri, ctx)); - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + if (use_current_domain) { // Setting a current domain on a TileDB dense array is not (yet) // supported REQUIRE_THROWS(SOMADenseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -65,7 +74,7 @@ TEST_CASE("SOMADenseNDArray: basic") { } else { SOMADenseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -82,11 +91,11 @@ TEST_CASE("SOMADenseNDArray: basic") { REQUIRE(soma_dense->ctx() == ctx); REQUIRE(soma_dense->type() == "SOMADenseNDArray"); REQUIRE(soma_dense->is_sparse() == false); - REQUIRE(soma_dense->soma_data_type() == "l"); + REQUIRE(soma_dense->soma_data_type() == arrow_format); auto schema = soma_dense->tiledb_schema(); REQUIRE(schema->has_attribute("soma_data")); REQUIRE(schema->array_type() == TILEDB_DENSE); - REQUIRE(schema->domain().has_dimension("soma_dim_0")); + REQUIRE(schema->domain().has_dimension(dim_name)); REQUIRE(soma_dense->ndim() == 1); // Once we have support for current domain in dense arrays @@ -109,7 +118,7 @@ TEST_CASE("SOMADenseNDArray: basic") { soma_dense->open(OpenMode::write); soma_dense->set_column_data("soma_data", a0.size(), a0.data()); - soma_dense->set_column_data("soma_dim_0", d0.size(), d0.data()); + soma_dense->set_column_data(dim_name, d0.size(), d0.data()); soma_dense->write(); soma_dense->close(); @@ -135,18 +144,27 @@ TEST_CASE("SOMADenseNDArray: platform_config") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dense-ndarray-platform-config"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); PlatformConfig platform_config; platform_config.dense_nd_array_dim_zstd_level = 6; - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + if (use_current_domain) { // Setting a current domain on a TileDB dense array is not (yet) // supported REQUIRE_THROWS(SOMADenseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -156,7 +174,7 @@ TEST_CASE("SOMADenseNDArray: platform_config") { } else { SOMADenseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -166,7 +184,7 @@ TEST_CASE("SOMADenseNDArray: platform_config") { auto soma_dense = SOMADenseNDArray::open(uri, OpenMode::read, ctx); auto dim_filter = soma_dense->tiledb_schema() ->domain() - .dimension("soma_dim_0") + .dimension(dim_name) .filter_list() .filter(0); REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); @@ -188,14 +206,22 @@ TEST_CASE("SOMADenseNDArray: metadata") { section << "- use_current_domain=" << use_current_domain; SECTION(section.str()) { auto ctx = std::make_shared(); - std::string uri = "mem://unit-test-dense-ndarray"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); SOMASparseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), diff --git a/libtiledbsoma/test/unit_soma_group.cc b/libtiledbsoma/test/unit_soma_group.cc index 018d13bfaf..feaf5b3988 100644 --- a/libtiledbsoma/test/unit_soma_group.cc +++ b/libtiledbsoma/test/unit_soma_group.cc @@ -68,6 +68,9 @@ std::tuple create_array( bool allow_duplicates = false, uint64_t timestamp = 1, bool reuse_existing = false) { + std::string dim_name = "d0"; + std::string attr_name = "a0"; + // Create array, if not reusing the existing array if (!reuse_existing) { auto vfs = VFS(ctx); @@ -79,13 +82,13 @@ std::tuple create_array( ArraySchema schema(ctx, TILEDB_SPARSE); auto dim = Dimension::create( - ctx, "d0", {0, std::numeric_limits::max() - 1}); + ctx, dim_name, {0, std::numeric_limits::max() - 1}); Domain domain(ctx); domain.add_dimension(dim); schema.set_domain(domain); - auto attr = Attribute::create(ctx, "a0"); + auto attr = Attribute::create(ctx, attr_name); schema.add_attribute(attr); schema.set_allows_dups(allow_duplicates); schema.check(); @@ -120,8 +123,8 @@ std::tuple create_array( // Write data to array Query query(ctx, array); query.set_layout(TILEDB_UNORDERED) - .set_data_buffer("d0", d0) - .set_data_buffer("a0", a0); + .set_data_buffer(dim_name, d0) + .set_data_buffer(attr_name, a0); query.submit(); } diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index f664a8e563..added6b5b4 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -44,14 +44,23 @@ TEST_CASE("SOMASparseNDArray: basic") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-sparse-ndarray-basic"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); REQUIRE(!SOMASparseNDArray::exists(uri, ctx)); - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + SOMASparseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -68,11 +77,11 @@ TEST_CASE("SOMASparseNDArray: basic") { REQUIRE(soma_sparse->ctx() == ctx); REQUIRE(soma_sparse->type() == "SOMASparseNDArray"); REQUIRE(soma_sparse->is_sparse() == true); - REQUIRE(soma_sparse->soma_data_type() == "l"); + REQUIRE(soma_sparse->soma_data_type() == arrow_format); auto schema = soma_sparse->tiledb_schema(); REQUIRE(schema->has_attribute("soma_data")); REQUIRE(schema->array_type() == TILEDB_SPARSE); - REQUIRE(schema->domain().has_dimension("soma_dim_0")); + REQUIRE(schema->domain().has_dimension(dim_name)); REQUIRE(soma_sparse->ndim() == 1); REQUIRE(soma_sparse->nnz() == 0); @@ -92,14 +101,14 @@ TEST_CASE("SOMASparseNDArray: basic") { soma_sparse->open(OpenMode::write); soma_sparse->set_column_data("soma_data", a0.size(), a0.data()); - soma_sparse->set_column_data("soma_dim_0", d0.size(), d0.data()); + soma_sparse->set_column_data(dim_name, d0.size(), d0.data()); soma_sparse->write(); soma_sparse->close(); soma_sparse->open(OpenMode::read); while (auto batch = soma_sparse->read_next()) { auto arrbuf = batch.value(); - auto d0span = arrbuf->at("soma_dim_0")->data(); + auto d0span = arrbuf->at(dim_name)->data(); auto a0span = arrbuf->at("soma_data")->data(); REQUIRE(d0 == std::vector(d0span.begin(), d0span.end())); REQUIRE(a0 == std::vector(a0span.begin(), a0span.end())); @@ -125,15 +134,24 @@ TEST_CASE("SOMASparseNDArray: platform_config") { SECTION(section.str()) { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-dataframe-platform-config"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); PlatformConfig platform_config; platform_config.sparse_nd_array_dim_zstd_level = 6; - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); + SOMASparseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), @@ -143,7 +161,7 @@ TEST_CASE("SOMASparseNDArray: platform_config") { auto soma_dataframe = SOMASparseNDArray::open(uri, OpenMode::read, ctx); auto dim_filter = soma_dataframe->tiledb_schema() ->domain() - .dimension("soma_dim_0") + .dimension(dim_name) .filter_list() .filter(0); REQUIRE(dim_filter.filter_type() == TILEDB_FILTER_ZSTD); @@ -165,12 +183,21 @@ TEST_CASE("SOMASparseNDArray: metadata") { auto ctx = std::make_shared(); std::string uri = "mem://unit-test-sparse-ndarray"; + std::string dim_name = "soma_dim_0"; + tiledb_datatype_t tiledb_datatype = TILEDB_INT64; + std::string arrow_format = helper::to_arrow_format(tiledb_datatype); + + std::vector dim_infos( + {{.name = dim_name, + .tiledb_datatype = tiledb_datatype, + .dim_max = dim_max, + .use_current_domain = use_current_domain}}); + + auto index_columns = helper::create_column_index_info(dim_infos); - auto index_columns = helper::create_column_index_info( - dim_max, use_current_domain); SOMASparseNDArray::create( uri, - "l", + arrow_format, ArrowTable( std::move(index_columns.first), std::move(index_columns.second)),