Skip to content

Commit

Permalink
Do not accept array_t objects in the Python/C++ interface.
Browse files Browse the repository at this point in the history
This avoids problems with inadvertent casting to get the expected type/layout,
which would often result in a dangling view in the initialized tatami::Matrix.
Rather, we accept array objects and manually check that they're of the expected
type/layout, just in case they weren't casted correctly on the Python side.
  • Loading branch information
LTLA committed Dec 7, 2024
1 parent d530fac commit f59ba9a
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 78 deletions.
83 changes: 57 additions & 26 deletions lib/src/common.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "def.h"
#include "utils.h"

#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
Expand Down Expand Up @@ -174,9 +175,14 @@ pybind11::array_t<MatrixIndex> compute_column_nan_counts(const MatrixPointer& ma

/** Grouped stats **/

pybind11::array_t<MatrixValue> compute_row_sums_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
size_t ngroups = tatami_stats::total_groups(gptr, mat->ncol());
pybind11::array_t<MatrixValue> compute_row_sums_by_group(const MatrixPointer& mat, const pybind11::array& grouping, int num_threads) {
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t ncol = mat->ncol();
if (grouping.size() != ncol) {
throw std::runtime_error("'grouping' should have length equal to the number of columns");
}

size_t ngroups = tatami_stats::total_groups(gptr, ncol);
size_t nrow = mat->nrow();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ nrow, ngroups });

Expand All @@ -192,9 +198,14 @@ pybind11::array_t<MatrixValue> compute_row_sums_by_group(const MatrixPointer& ma
return output;
}

pybind11::array_t<MatrixValue> compute_column_sums_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
size_t ngroups = tatami_stats::total_groups(gptr, mat->nrow());
pybind11::array_t<MatrixValue> compute_column_sums_by_group(const MatrixPointer& mat, const pybind11::array& grouping, int num_threads) {
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t nrow = mat->nrow();
if (grouping.size() != nrow) {
throw std::runtime_error("'grouping' should have length equal to the number of rows");
}

size_t ngroups = tatami_stats::total_groups(gptr, nrow);
size_t ncol = mat->ncol();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ ncol, ngroups });

Expand All @@ -211,8 +222,13 @@ pybind11::array_t<MatrixValue> compute_column_sums_by_group(const MatrixPointer&
}

pybind11::array_t<MatrixValue> compute_row_variances_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
auto group_sizes = tatami_stats::tabulate_groups(gptr, mat->ncol());
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t ncol = mat->ncol();
if (grouping.size() != ncol) {
throw std::runtime_error("'grouping' should have length equal to the number of columns");
}

auto group_sizes = tatami_stats::tabulate_groups<MatrixIndex, MatrixIndex>(gptr, ncol);
size_t ngroups = group_sizes.size();
size_t nrow = mat->nrow();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ nrow, ngroups });
Expand All @@ -230,8 +246,13 @@ pybind11::array_t<MatrixValue> compute_row_variances_by_group(const MatrixPointe
}

pybind11::array_t<MatrixValue> compute_column_variances_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
auto group_sizes = tatami_stats::tabulate_groups(gptr, mat->ncol());
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t nrow = mat->nrow();
if (grouping.size() != nrow) {
throw std::runtime_error("'grouping' should have length equal to the number of rows");
}

auto group_sizes = tatami_stats::tabulate_groups<MatrixIndex, MatrixIndex>(gptr, nrow);
size_t ngroups = group_sizes.size();
size_t ncol = mat->ncol();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ ncol, ngroups });
Expand All @@ -249,8 +270,13 @@ pybind11::array_t<MatrixValue> compute_column_variances_by_group(const MatrixPoi
}

pybind11::array_t<MatrixValue> compute_row_medians_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
auto group_sizes = tatami_stats::tabulate_groups(gptr, mat->ncol());
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t ncol = mat->ncol();
if (grouping.size() != ncol) {
throw std::runtime_error("'grouping' should have length equal to the number of columns");
}

auto group_sizes = tatami_stats::tabulate_groups<MatrixIndex, MatrixIndex>(gptr, ncol);
size_t ngroups = group_sizes.size();
size_t nrow = mat->nrow();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ nrow, ngroups });
Expand All @@ -268,8 +294,13 @@ pybind11::array_t<MatrixValue> compute_row_medians_by_group(const MatrixPointer&
}

pybind11::array_t<MatrixValue> compute_column_medians_by_group(const MatrixPointer& mat, const pybind11::array_t<MatrixIndex>& grouping, int num_threads) {
auto gptr = static_cast<const MatrixIndex*>(grouping.request().ptr);
auto group_sizes = tatami_stats::tabulate_groups(gptr, mat->ncol());
auto gptr = check_numpy_array<MatrixIndex>(grouping);
size_t nrow = mat->nrow();
if (grouping.size() != nrow) {
throw std::runtime_error("'grouping' should have length equal to the number of rows");
}

auto group_sizes = tatami_stats::tabulate_groups<MatrixIndex, MatrixIndex>(gptr, nrow);
size_t ngroups = group_sizes.size();
size_t ncol = mat->ncol();
pybind11::array_t<MatrixValue, pybind11::array::f_style> output({ ncol, ngroups });
Expand All @@ -288,16 +319,16 @@ pybind11::array_t<MatrixValue> compute_column_medians_by_group(const MatrixPoint

/** Extraction **/

pybind11::array_t<MatrixValue> extract_dense_subset(MatrixPointer mat,
bool row_noop, const pybind11::array_t<MatrixIndex>& row_sub,
bool col_noop, const pybind11::array_t<MatrixIndex>& col_sub)
{
pybind11::array_t<MatrixValue> extract_dense_subset(MatrixPointer mat, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) {
if (!row_noop) {
auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView<MatrixIndex>(row_sub.data(), row_sub.size()));
auto rptr = check_numpy_array<MatrixIndex>(row_sub);
auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView<MatrixIndex>(rptr, row_sub.size()));
mat.swap(tmp);
}

if (!col_noop) {
auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView<MatrixIndex>(col_sub.data(), col_sub.size()));
auto cptr = check_numpy_array<MatrixIndex>(col_sub);
auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView<MatrixIndex>(cptr, col_sub.size()));
mat.swap(tmp);
}

Expand All @@ -308,16 +339,16 @@ pybind11::array_t<MatrixValue> extract_dense_subset(MatrixPointer mat,
return output;
}

pybind11::object extract_sparse_subset(MatrixPointer mat,
bool row_noop, const pybind11::array_t<MatrixIndex>& row_sub,
bool col_noop, const pybind11::array_t<MatrixIndex>& col_sub)
{
pybind11::object extract_sparse_subset(MatrixPointer mat, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) {
if (!row_noop) {
auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView<MatrixIndex>(row_sub.data(), row_sub.size()));
auto rptr = check_numpy_array<MatrixIndex>(row_sub);
auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView<MatrixIndex>(rptr, row_sub.size()));
mat.swap(tmp);
}

if (!col_noop) {
auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView<MatrixIndex>(col_sub.data(), col_sub.size()));
auto cptr = check_numpy_array<MatrixIndex>(col_sub);
auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView<MatrixIndex>(cptr, col_sub.size()));
mat.swap(tmp);
}

Expand Down
71 changes: 42 additions & 29 deletions lib/src/compressed_sparse_matrix.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "def.h"
#include "utils.h"

#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
Expand All @@ -8,65 +9,77 @@
#include <cstdint>

template<typename Data_, typename Index_>
MatrixPointer initialize_compressed_sparse_matrix_raw(MatrixIndex nr, MatrixValue nc, const Data_* dptr, const Index_* iptr, const pybind11::array_t<uint64_t>& indptr, bool byrow) {
size_t nz = indptr.at(indptr.size() - 1);
tatami::ArrayView<Data_> dview(dptr, nz);
tatami::ArrayView<Index_> iview(iptr, nz);
tatami::ArrayView<uint64_t> pview(static_cast<const uint64_t*>(indptr.request().ptr), indptr.size());
return MatrixPointer(new tatami::CompressedSparseMatrix<MatrixValue, MatrixIndex, decltype(dview), decltype(iview), decltype(pview)>(nr, nc, std::move(dview), std::move(iview), std::move(pview), byrow));
MatrixPointer initialize_compressed_sparse_matrix_raw(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) {
size_t expected = (byrow ? nr : nc);
if (indptr.size() != expected + 1) {
throw std::runtime_error("unexpected length for the 'indptr' array");
}
tatami::ArrayView<uint64_t> pview(check_numpy_array<uint64_t>(indptr), indptr.size());

size_t nz = pview[pview.size() - 1];
if (data.size() != nz) {
throw std::runtime_error("unexpected length for the 'data' array");
}
tatami::ArrayView<Data_> dview(check_contiguous_numpy_array<Data_>(data), nz);

if (data.size() != nz) {
throw std::runtime_error("unexpected length for the 'data' array");
}
tatami::ArrayView<Index_> iview(check_contiguous_numpy_array<Index_>(index), nz);

typedef tatami::CompressedSparseMatrix<MatrixValue, MatrixIndex, decltype(dview), decltype(iview), decltype(pview)> Spmat;
return MatrixPointer(new Spmat(nr, nc, std::move(dview), std::move(iview), std::move(pview), byrow));
}

template<typename Data_>
MatrixPointer initialize_compressed_sparse_matrix_itype(MatrixIndex nr, MatrixValue nc, const Data_* dptr, const pybind11::array& index, const pybind11::array_t<uint64_t>& indptr, bool byrow) {
MatrixPointer initialize_compressed_sparse_matrix_itype(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) {
auto dtype = index.dtype();
auto iptr = index.request().ptr;

if (dtype.is(pybind11::dtype::of<int64_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const int64_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, int64_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int32_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const int32_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, int32_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int16_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const int16_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, int16_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int8_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const int8_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, int8_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint64_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const uint64_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, uint64_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint32_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const uint32_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, uint32_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint16_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const uint16_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, uint16_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint8_t>())) {
return initialize_compressed_sparse_matrix_raw(nr, nc, dptr, static_cast<const uint8_t*>(iptr), indptr, byrow);
return initialize_compressed_sparse_matrix_raw<Data_, uint8_t>(nr, nc, data, index, indptr, byrow);
}

throw std::runtime_error("unrecognized index type '" + std::string(dtype.kind(), 1) + std::to_string(dtype.itemsize()) + "' for compressed sparse matrix initialization");
return MatrixPointer();
}

MatrixPointer initialize_compressed_sparse_matrix(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array_t<uint64_t>& indptr, bool byrow) {
MatrixPointer initialize_compressed_sparse_matrix(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) {
auto dtype = data.dtype();
auto dptr = data.request().ptr;

if (dtype.is(pybind11::dtype::of<double>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const double*>(dptr), index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<float>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const float*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< double>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<float>())) {
return initialize_compressed_sparse_matrix_itype< float>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int64_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const int64_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< int64_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int32_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const int32_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< int32_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int16_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const int16_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< int16_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<int8_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const int8_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< int8_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint64_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const uint64_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype<uint64_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint32_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const uint32_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype<uint32_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint16_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const uint16_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype<uint16_t>(nr, nc, data, index, indptr, byrow);
} else if (dtype.is(pybind11::dtype::of<uint8_t>())) {
return initialize_compressed_sparse_matrix_itype(nr, nc, reinterpret_cast<const uint8_t*>(dptr), index, indptr, byrow);
return initialize_compressed_sparse_matrix_itype< uint8_t>(nr, nc, data, index, indptr, byrow);
}

throw std::runtime_error("unrecognized data type '" + std::string(dtype.kind(), 1) + std::to_string(dtype.itemsize()) + "' for compressed sparse matrix initialization");
Expand Down
6 changes: 4 additions & 2 deletions lib/src/delayed_subset.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#include "def.h"
#include "utils.h"

#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"

#include <string>
#include <cstdint>

MatrixPointer initialize_delayed_subset(MatrixPointer mat, const pybind11::array_t<MatrixIndex>& subset, bool byrow) {
return tatami::make_DelayedSubset(std::move(mat), tatami::ArrayView<MatrixIndex>(static_cast<const MatrixIndex*>(subset.request().ptr), subset.size()), byrow);
MatrixPointer initialize_delayed_subset(MatrixPointer mat, const pybind11::array& subset, bool byrow) {
auto sptr = check_numpy_array<MatrixIndex>(subset);
return tatami::make_DelayedSubset(std::move(mat), tatami::ArrayView<MatrixIndex>(sptr, subset.size()), byrow);
}

void init_delayed_subset(pybind11::module& m) {
Expand Down
12 changes: 9 additions & 3 deletions lib/src/delayed_unary_isometric_operation_with_args.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "def.h"
#include "utils.h"

#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
Expand All @@ -9,8 +10,13 @@
#include <cstdint>

template<bool right_>
MatrixPointer initialize_delayed_unary_isometric_operation_with_vector_internal(MatrixPointer mat, const std::string& op, bool by_row, const pybind11::array_t<double>& arg) {
tatami::ArrayView<double> aview(static_cast<const double*>(arg.request().ptr), by_row ? mat->nrow() : mat->ncol());
MatrixPointer initialize_delayed_unary_isometric_operation_with_vector_internal(MatrixPointer mat, const std::string& op, bool by_row, const pybind11::array& arg) {
auto aptr = check_numpy_array<double>(arg);
size_t expected = by_row ? mat->nrow() : mat->ncol();
if (expected != arg.size()) {
throw std::runtime_error("unexpected length of array for isometric unary operation");
}
tatami::ArrayView<double> aview(aptr, expected);

if (op == "add") {
return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricAddVector(std::move(aview), by_row));
Expand Down Expand Up @@ -52,7 +58,7 @@ MatrixPointer initialize_delayed_unary_isometric_operation_with_vector_internal(
return MatrixPointer();
}

MatrixPointer initialize_delayed_unary_isometric_operation_with_vector(MatrixPointer mat, const std::string& op, bool right, bool by_row, const pybind11::array_t<double>& args) {
MatrixPointer initialize_delayed_unary_isometric_operation_with_vector(MatrixPointer mat, const std::string& op, bool right, bool by_row, const pybind11::array& args) {
if (right) {
return initialize_delayed_unary_isometric_operation_with_vector_internal<true>(std::move(mat), op, by_row, args);
} else {
Expand Down
Loading

0 comments on commit f59ba9a

Please sign in to comment.