From 1bf7b100cf233a07124b0ced8894de234840ad7b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 18 Aug 2024 18:06:33 -0400 Subject: [PATCH] [c++] Resize for variant-indexed `DataFrame` [WIP] [skip ci] --- libtiledbsoma/src/soma/soma_array.cc | 37 ++++++++++++++++++++++++++++ libtiledbsoma/src/soma/soma_array.h | 16 ++++++++++++ 2 files changed, 53 insertions(+) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 092af65253..83d43a3226 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -1307,6 +1307,43 @@ void SOMAArray::resize(const std::vector& newshape) { schema_evolution.array_evolve(uri_); } +void SOMAArray::resize_soma_joinid_if_dim( + const std::vector& newshape) { + if (mq_->query_type() != TILEDB_WRITE) { + throw TileDBSOMAError( + "[SOMAArray::resize] array must be opened in write mode"); + } + + ArraySchema schema = arr_->schema(); + Domain domain = schema.domain(); + unsigned ndim = domain.ndim(); + if (newshape.size() != 1) { + throw TileDBSOMAError(fmt::format( + "[SOMAArray::resize]: newshape has dimension count {}; needed 1", + newshape.size(), + ndim)); + } + + auto tctx = ctx_->tiledb_ctx(); + CurrentDomain old_current_domain = ArraySchemaExperimental::current_domain( + *tctx, schema); + NDRectangle ndrect = old_current_domain.ndrectangle(); + + CurrentDomain new_current_domain(*tctx); + ArraySchemaEvolution schema_evolution(*tctx); + + for (unsigned i = 0; i < ndim; i++) { + if (domain.dimension(i).name() == "soma_joinid") { + ndrect.set_range( + domain.dimension(i).name(), 0, newshape[0] - 1); + } + } + + new_current_domain.set_ndrectangle(ndrect); + schema_evolution.expand_current_domain(new_current_domain); + schema_evolution.array_evolve(uri_); +} + uint64_t SOMAArray::ndim() const { return tiledb_schema()->domain().ndim(); } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 47e272bd6e..dfd9f0d5ef 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -619,6 +619,22 @@ class SOMAArray : public SOMAObject { */ void resize(const std::vector& newshape); + /** + * @brief Increases the tiledbsoma shape up to at most the maxshape, + * resizing the soma_joinid dimension if it is a dimension. + * + * While SOMA SparseNDArray and DenseNDArray, along with default-indexed + * DataFrame, have int64_t dims, non-default-indexed DataFrame objects need + * not: it is only required that they have a dim _or_ an attr called + * soma_joinid. If soma_joinid is one of the dims, it will be resized while + * the others will be preserved. If soma_joinid is not one of the dims, + * nothing will be changed, as nothing _needs_ to be changed. + * + * @return Throws if the requested shape exceeds the array's create-time + * maxshape. Throws if the array does not have current-domain support. + */ + void resize_soma_joinid_if_dim(const std::vector& newshape); + /** * @brief Get the number of dimensions. *