Skip to content

Commit

Permalink
[c++] Integrate SOMAColumn: Update domain accessors inside `SOMAArr…
Browse files Browse the repository at this point in the history
…ay`, part 2 (#3407)

* SOMAColumn abstract class definition

* Remove fmt::format

* Remove unneeded methods and member variables

* Add concrete class wrapper for TileDB dimension

* Add minimal testing for dimensions

* Replace string_view with string when returning column name, add current domain checks, replace vector with span when selecting points

* Add concrete class wrapper for TileDB attribute

* Update CMake files

* Add minimal testing for dimensions

* Misc fixes

* Add read test case

* Remove current_domain flag

* Do not export soma column [skip ci]

* Migrate array creation to SOMAColumn

* Misc fixes

* [c++] SOMAColumn serialization/deserialization (#3599)

* Add minimal testing for dimensions

* Add minimal testing for dimensions

* Add read test case

* Remove current_domain flag

* Do not export soma column [skip ci]

* Replace string_view with string when returning column name, add current domain checks, replace vector with span when selecting points

* Add serialization/deserelization methods

* Serialize SOMAColumn on schema generation

* Update unit tests

* Generate columns on array open

* Add deserialization and default initialization on array open

* Write SOMAColumn metadata if array is open in `write` mode

* Write metadata directly to TileDB array

* Fix error in tests after rebase

* Handle addition and deletion of attributes

* Fix R tests

* [c++] Make `SOMAColumn` metadata required only for `GeometryDataframe` (#3621)

* Make SOMAColumn metadata only required by GeometryDataframe

* Update tests

* Fill SOMAColumn info on array open

* MIgrate domain access methods to use SOMAColumns

* Add optional non empty domain method

* Replace optional non empty domain with the SOMAColumn implementation, update python bindings

* Add template-specialization guards

* Remove unsupported dimension datatypes

* Update old version of `fill_metadata_cache`

* Filter SOMAColumns when iterating to construct the domain

* Fix serialized columns order

* log type [skip ci]

* Specify LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION for clang
  • Loading branch information
XanthosXanthopoulos authored Jan 28, 2025
1 parent 77e4f1a commit 3224740
Show file tree
Hide file tree
Showing 15 changed files with 461 additions and 310 deletions.
5 changes: 5 additions & 0 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ def run(self):
if sys.platform == "darwin":
CXX_FLAGS.append("-mmacosx-version-min=13.3")

# This is necessary for clang to enable templated function calls
# between pybind modules and libtiledbsoma where dynamic_cast or
# std::any_cast is involved
CXX_FLAGS.append("-D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=2")

if os.name == "posix" and sys.platform != "darwin":
LIB_DIRS.append(str(tiledbsoma_dir / "lib" / "x86_64-linux-gnu"))
LIB_DIRS.append(str(tiledbsoma_dir / "lib64"))
Expand Down
6 changes: 4 additions & 2 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@ void load_soma_array(py::module& m) {
array.non_empty_domain_slot<float>(name));
case TILEDB_STRING_UTF8:
case TILEDB_STRING_ASCII:
return py::cast(array.non_empty_domain_slot_var(name));
return py::cast(
array.non_empty_domain_slot<std::string>(name));
default:
throw TileDBSOMAError(
"Unsupported dtype for nonempty domain.");
Expand Down Expand Up @@ -814,7 +815,8 @@ void load_soma_array(py::module& m) {
array.non_empty_domain_slot_opt<float>(name));
case TILEDB_STRING_UTF8:
case TILEDB_STRING_ASCII:
return py::cast(array.non_empty_domain_slot_var(name));
return py::cast(
array.non_empty_domain_slot_opt<std::string>(name));
default:
throw TileDBSOMAError(
"Unsupported dtype for nonempty domain.");
Expand Down
4 changes: 3 additions & 1 deletion libtiledbsoma/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Don't use GNU extensions
# Build with fPIC
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

set(CMAKE_VERBOSE_MAKEFILE ON)

# Set default builds/configuration to be Release.
get_property(is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if (is_multi_config)
Expand Down Expand Up @@ -221,7 +223,7 @@ if(MSVC)
)
else()

set(TILEDBSOMA_COMPILE_OPTIONS -Wall -Wextra -DSPDLOG_USE_STD_FORMAT)
set(TILEDBSOMA_COMPILE_OPTIONS -Wall -Wextra -DSPDLOG_USE_STD_FORMAT -D_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION=2)

if(${TILEDBSOMA_ENABLE_WERROR})
set(TILEDBSOMA_WERROR_OPTION -Werror)
Expand Down
152 changes: 46 additions & 106 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@

#include "soma_array.h"
#include <tiledb/array_experimental.h>
#include <ranges>
#include "../utils/logger.h"
#include "../utils/util.h"
#include "soma_attribute.h"
#include "soma_dimension.h"
#include "soma_geometry_column.h"

namespace tiledbsoma {
using namespace tiledb;
Expand Down Expand Up @@ -266,7 +270,8 @@ void SOMAArray::open(OpenMode mode, std::optional<TimestampRange> timestamp) {

validate(mode, name_, timestamp);
reset(column_names(), batch_size_, result_order_);
fill_metadata_cache(timestamp);
fill_metadata_cache(timestamp_);
fill_columns();
}

std::unique_ptr<SOMAArray> SOMAArray::reopen(
Expand Down Expand Up @@ -490,115 +495,24 @@ std::optional<TimestampRange> SOMAArray::timestamp() {
// The domainish enum simply lets us re-use code which is common across
// core domain, core current domain, and core non-empty domain.
ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) {
int array_ndim = this->ndim();
auto dimensions = tiledb_schema()->domain().dimensions();
int array_ndim = std::count_if(
columns_.begin(), columns_.end(), [](const auto& col) {
return col->isIndexColumn();
});

// Create the schema for the info we return
std::vector<std::string> names(array_ndim);
std::vector<tiledb_datatype_t> tiledb_datatypes(array_ndim);

for (int i = 0; i < (int)array_ndim; i++) {
const Dimension& core_dim = dimensions[i];
names[i] = core_dim.name();
tiledb_datatypes[i] = core_dim.type();
}

auto arrow_schema = ArrowAdapter::make_arrow_schema(
names, tiledb_datatypes);

// Create the data for the info we return
auto arrow_schema = ArrowAdapter::make_arrow_schema_parent(array_ndim);
auto arrow_array = ArrowAdapter::make_arrow_array_parent(array_ndim);

for (int i = 0; i < array_ndim; i++) {
auto core_dim = dimensions[i];
auto core_type_code = core_dim.type();
size_t child_index = 0;
for (const auto& column :
columns_ | std::views::filter(
[](const auto& col) { return col->isIndexColumn(); })) {
arrow_schema->children[child_index] = column->arrow_schema_slot(
*ctx_, *arr_);
arrow_array->children[child_index] = column->arrow_domain_slot(
*ctx_, *arr_, which_kind);

ArrowArray* child = nullptr;

switch (core_type_code) {
case TILEDB_INT64:
case TILEDB_DATETIME_YEAR:
case TILEDB_DATETIME_MONTH:
case TILEDB_DATETIME_WEEK:
case TILEDB_DATETIME_DAY:
case TILEDB_DATETIME_HR:
case TILEDB_DATETIME_MIN:
case TILEDB_DATETIME_SEC:
case TILEDB_DATETIME_MS:
case TILEDB_DATETIME_US:
case TILEDB_DATETIME_NS:
case TILEDB_DATETIME_PS:
case TILEDB_DATETIME_FS:
case TILEDB_DATETIME_AS:
case TILEDB_TIME_HR:
case TILEDB_TIME_MIN:
case TILEDB_TIME_SEC:
case TILEDB_TIME_MS:
case TILEDB_TIME_US:
case TILEDB_TIME_NS:
case TILEDB_TIME_PS:
case TILEDB_TIME_FS:
case TILEDB_TIME_AS:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<int64_t>(core_dim.name(), which_kind));
break;
case TILEDB_UINT64:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<uint64_t>(
core_dim.name(), which_kind));
break;
case TILEDB_INT32:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<int32_t>(core_dim.name(), which_kind));
break;
case TILEDB_UINT32:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<uint32_t>(
core_dim.name(), which_kind));
break;
case TILEDB_INT16:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<int16_t>(core_dim.name(), which_kind));
break;
case TILEDB_UINT16:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<uint16_t>(
core_dim.name(), which_kind));
break;
case TILEDB_INT8:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<int8_t>(core_dim.name(), which_kind));
break;
case TILEDB_UINT8:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<uint8_t>(core_dim.name(), which_kind));
break;

case TILEDB_FLOAT64:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<double>(core_dim.name(), which_kind));
break;
case TILEDB_FLOAT32:
child = ArrowAdapter::make_arrow_array_child(
_core_domainish_slot<float>(core_dim.name(), which_kind));
break;

case TILEDB_STRING_ASCII:
case TILEDB_CHAR:
case TILEDB_GEOM_WKB:
case TILEDB_GEOM_WKT:
child = ArrowAdapter::make_arrow_array_child_string(
_core_domainish_slot_string(core_dim.name(), which_kind));
break;

default:
throw TileDBSOMAError(std::format(
"SOMAArray::_get_core_domainish:dim {} has unhandled type "
"{}",
core_dim.name(),
tiledb::impl::type_to_str(core_type_code)));
}
arrow_array->children[i] = child;
++child_index;
}

return ArrowTable(std::move(arrow_array), std::move(arrow_schema));
Expand Down Expand Up @@ -1696,4 +1610,30 @@ void SOMAArray::_check_dims_are_int64() {
}
}

std::shared_ptr<SOMAColumn> SOMAArray::get_column(std::string_view name) const {
auto result = std::find_if(columns_.begin(), columns_.end(), [&](auto col) {
return col->name() == name;
});

if (result == columns_.end()) {
throw TileDBSOMAError(std::format(
"[SOMAArray] internal coding error: No column named {} found",
name));
}

return *result;
}

std::shared_ptr<SOMAColumn> SOMAArray::get_column(std::size_t index) const {
if (index >= columns_.size()) {
throw TileDBSOMAError(std::format(
"[SOMAArray] internal coding error: Column index outside of range. "
"Requested {}, but {} exist.",
index,
columns_.size()));
}

return columns_[index];
}

} // namespace tiledbsoma
Loading

0 comments on commit 3224740

Please sign in to comment.