diff --git a/.github/workflows/check-install.yaml b/.github/workflows/check-install.yaml
new file mode 100644
index 0000000..114513d
--- /dev/null
+++ b/.github/workflows/check-install.yaml
@@ -0,0 +1,36 @@
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+name: Check CMake install
+
+jobs:
+  install:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Get latest CMake
+      uses: lukka/get-cmake@latest
+
+    - name: Configure the build
+      run: cmake -S . -B build -DSCRAN_PCA_TESTS=OFF
+
+    - name: Install the library
+      run: sudo cmake --install build
+
+    - name: Test downstream usage
+      run: |
+        mkdir _downstream
+        touch _downstream/source.cpp
+        cat << EOF > _downstream/CMakeLists.txt
+        cmake_minimum_required(VERSION 3.24)
+        project(test_install)
+        add_executable(whee source.cpp)
+        find_package(libscran_scran_pca)
+        target_link_libraries(whee libscran::scran_pca)
+        EOF
+        cd _downstream && cmake -S . -B build
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 736ee34..d950acc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,20 +1,77 @@
 cmake_minimum_required(VERSION 3.14)
 
-project(scran_principal_component_analysis
+project(scran_pca
     VERSION 1.0.0
     DESCRIPTION "Principal components analysis for single-cell data"
     LANGUAGES CXX)
 
-add_library(scran_principal_component_analysis INTERFACE)
-target_compile_features(scran_principal_component_analysis INTERFACE cxx_std_17)
-target_include_directories(scran_principal_component_analysis INTERFACE include/scran)
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
 
-add_subdirectory(extern)
-target_link_libraries(scran_principal_component_analysis INTERFACE scran_core_utils tatami::tatami tatami::tatami_stats ltla::irlba Eigen3::Eigen)
+# Library
+add_library(scran_pca INTERFACE)
+add_library(libscran::scran_pca ALIAS scran_pca)
 
+target_include_directories(scran_pca INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/scran_pca>)
+target_compile_features(scran_pca INTERFACE cxx_std_17)
+
+# Dependencies
+option(SCRAN_PCA_FETCH_EXTERN "Automatically fetch scran_pca's external dependencies." ON)
+if(SCRAN_PCA_FETCH_EXTERN)
+    add_subdirectory(extern)
+else()
+    find_package(tatami_tatami 3.0.0 CONFIG REQUIRED)
+    find_package(tatami_tatami_stats 1.0.0 CONFIG REQUIRED)
+    find_package(libscran_scran_blocks 1.0.0 CONFIG REQUIRED)
+    find_package(ltla_irlba 2.0.0 CONFIG REQUIRED)
+    find_package(Eigen3 3.4.0 REQUIRED NO_MODULE)
+endif()
+
+target_link_libraries(
+    scran_pca INTERFACE 
+    tatami::tatami
+    tatami::tatami_stats
+    libscran::scran_blocks
+    ltla::irlba
+    Eigen3::Eigen
+)
+
+# Tests
 if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
+    option(SCRAN_PCA_TESTS "Build scran_pca's test suite." ON)
+else()
+    option(SCRAN_PCA_TESTS "Build scran_pca's test suite." OFF)
+endif()
+
+if(SCRAN_PCA_TESTS)
     include(CTest)
     if(BUILD_TESTING)
         add_subdirectory(tests)
-    endif()
+    endif() 
 endif()
+
+# Install
+install(DIRECTORY include/
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/scran_pca)
+
+install(TARGETS scran_pca
+    EXPORT scran_pcaTargets)
+
+install(EXPORT scran_pcaTargets
+    FILE libscran_scran_pcaTargets.cmake
+    NAMESPACE libscran::
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libscran_scran_pca)
+
+configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Config.cmake.in
+    "${CMAKE_CURRENT_BINARY_DIR}/libscran_scran_pcaConfig.cmake"
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libscran_scran_pca)
+
+write_basic_package_version_file(
+    "${CMAKE_CURRENT_BINARY_DIR}/libscran_scran_pcaConfigVersion.cmake"
+    COMPATIBILITY SameMajorVersion)
+
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libscran_scran_pcaConfig.cmake"
+    "${CMAKE_CURRENT_BINARY_DIR}/libscran_scran_pcaConfigVersion.cmake"
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libscran_scran_pca)
diff --git a/README.md b/README.md
index 36c8ae0..deb1dfa 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # Principal components analysis, duh
 
-![Unit tests](https://github.com/libscran/principal_component_analysis/actions/workflows/run-tests.yaml/badge.svg)
-![Documentation](https://github.com/libscran/principal_component_analysis/actions/workflows/doxygenate.yaml/badge.svg)
-[![Codecov](https://codecov.io/gh/libscran/principal_component_analysis/graph/badge.svg?token=qklLZtJSE9)](https://codecov.io/gh/libscran/principal_component_analysis)
+![Unit tests](https://github.com/libscran/scran_pca/actions/workflows/run-tests.yaml/badge.svg)
+![Documentation](https://github.com/libscran/scran_pca/actions/workflows/doxygenate.yaml/badge.svg)
+[![Codecov](https://codecov.io/gh/libscran/scran_pca/graph/badge.svg?token=qklLZtJSE9)](https://codecov.io/gh/libscran/scran_pca)
 
 ## Overview
 
@@ -16,13 +16,13 @@ factored out into a separate C++ library for easier re-use.
 Given a [`tatami::Matrix`](https://github.com/tatami-inc/tatami), the `simple_pca::compute()` function will compute the PCA to obtain a low-dimensional representation of the cells:
 
 ```cpp
-#include "scran/simple_pca.hpp"
+#include "scran_pca/scran_pca.hpp"
 
-tatami::Matrix<double, int>* ptr = some_data_source();
+const tatami::Matrix<double, int>& mat = some_data_source();
 
 // Take the top 20 PCs:
-scran::simple_pca::Options opt;
-auto res = scran::simple_pca::compute(ptr, 20, opt);
+scran_pca::SimplePcaOptions opt;
+auto res = scran_pca::simple_pca(mat, 20, opt);
 
 res.components; // rows are PCs, columns are cells.
 res.rotation; // rows are genes, columns correspond to PCs.
@@ -45,8 +45,8 @@ This ensures that the inter-block differences do not contribute to the first few
 ```cpp
 std::vector<int> blocks = some_blocks();
 
-scran::blocked_pca::Options bopt;
-auto bres = scran::blocked_pca::compute(ptr, blocks.data(), 20, bopt);
+scran_pca::BlockedPcaOptions bopt;
+auto bres = scran_pca::blocked_pca(mat, blocks.data(), 20, bopt);
 
 bres.components; // rows are PCs, columns are cells.
 bres.center; // rows are blocks, columns are genes.
@@ -59,31 +59,59 @@ we can use `blocked_pca::compute()` to obtain an appropriate matrix that focuses
 
 ```cpp
 bopt.components_from_residuals = false;
-auto bres2 = scran::blocked_pca::compute(ptr, blocks.data(), 20, bopt);
+auto bres2 = scran_pca::blocked_pca(mat, blocks.data(), 20, bopt);
 ```
 
-Check out the [reference documentation](https://libscran.github.io/principal_component_analysis) for more details.
+Check out the [reference documentation](https://libscran.github.io/scran_pca) for more details.
 
 ## Building projects
 
-This repository is part of the broader [**libscran**](https://github.com/libscran/libscran) library,
-so users are recommended to use the latter in their projects.
-**libscran** developers should just use CMake with `FetchContent`:
+### CMake with `FetchContent`
+
+If you're using CMake, you just need to add something like this to your `CMakeLists.txt`:
 
 ```cmake
 include(FetchContent)
 
 FetchContent_Declare(
-  scran_principal_component_analysis 
-  GIT_REPOSITORY https://github.com/libscran/principal_component_analysis
+  scran_pca
+  GIT_REPOSITORY https://github.com/libscran/scran_pca
   GIT_TAG master # or any version of interest
 )
 
-FetchContent_MakeAvailable(scran_principal_component_analysis)
+FetchContent_MakeAvailable(scran_pca)
+```
+
+Then you can link to **scran_pca** to make the headers available during compilation:
 
+```cmake
 # For executables:
-target_link_libraries(myexe scran_principal_component_analysis)
+target_link_libraries(myexe libscran::scran_pca)
 
 # For libaries
-target_link_libraries(mylib INTERFACE scran_principal_component_analysis)
+target_link_libraries(mylib INTERFACE libscran::scran_pca)
+```
+
+### CMake with `find_package()`
+
+```cmake
+find_package(libscran_scran_pca CONFIG REQUIRED)
+target_link_libraries(mylib INTERFACE libscran::scran_pca)
+```
+
+To install the library, use:
+
+```sh
+mkdir build && cd build
+cmake .. -DSCRAN_PCA_TESTS=OFF
+cmake --build . --target install
 ```
+
+By default, this will use `FetchContent` to fetch all external dependencies.
+If you want to install them manually, use `-DSCRAN_PCA_FETCH_EXTERN=OFF`.
+See the tags in [`extern/CMakeLists.txt`](extern/CMakeLists.txt) to find compatible versions of each dependency.
+
+### Manual
+
+If you're not using CMake, the simple approach is to just copy the files in `include/` - either directly or with Git submodules - and include their path during compilation with, e.g., GCC's `-I`.
+This requires the external dependencies listed in [`extern/CMakeLists.txt`](extern/CMakeLists.txt), which also need to be made available during compilation.
diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in
new file mode 100644
index 0000000..5e2e334
--- /dev/null
+++ b/cmake/Config.cmake.in
@@ -0,0 +1,10 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+find_dependency(tatami_tatami 3.0.0 CONFIG REQUIRED)
+find_dependency(tatami_tatami_stats 1.0.0 CONFIG REQUIRED)
+find_dependency(libscran_scran_blocks 1.0.0 CONFIG REQUIRED)
+find_dependency(ltla_irlba 2.0.0 CONFIG REQUIRED)
+find_dependency(Eigen3 3.4.0 REQUIRED NO_MODULE)
+
+include("${CMAKE_CURRENT_LIST_DIR}/libscran_scran_pcaTargets.cmake")
diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 97c8f7f..33aee6d 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -25,13 +25,13 @@ FetchContent_Declare(
 )
 
 FetchContent_Declare(
-  scran_core_utils
-  GIT_REPOSITORY https://github.com/libscran/core_utils
-  GIT_TAG master
+  scran_blocks
+  GIT_REPOSITORY https://github.com/libscran/scran_blocks
+  GIT_TAG master # ^1.0.0
 )
 
 FetchContent_MakeAvailable(tatami)
 FetchContent_MakeAvailable(tatami_stats)
 FetchContent_MakeAvailable(irlba)
 FetchContent_MakeAvailable(eigen)
-FetchContent_MakeAvailable(scran_core_utils)
+FetchContent_MakeAvailable(scran_blocks)
diff --git a/include/scran/scran.hpp b/include/scran/scran.hpp
deleted file mode 100644
index e71314e..0000000
--- a/include/scran/scran.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef SCRAN_SCRAN_HPP
-#define SCRAN_SCRAN_HPP
-
-/**
- * @file scran.hpp
- * @brief Methods for single-cell analysis.
- */
-
-/**
- * @namespace scran
- * @brief Methods for single-cell analysis.
- */
-namespace scran {}
-
-#endif
diff --git a/include/scran/blocked_pca.hpp b/include/scran_pca/blocked_pca.hpp
similarity index 83%
rename from include/scran/blocked_pca.hpp
rename to include/scran_pca/blocked_pca.hpp
index c9a5baa..5528bf3 100644
--- a/include/scran/blocked_pca.hpp
+++ b/include/scran_pca/blocked_pca.hpp
@@ -1,5 +1,5 @@
-#ifndef SCRAN_BLOCKED_PCA_HPP
-#define SCRAN_BLOCKED_PCA_HPP
+#ifndef SCRAN_PCA_BLOCKED_PCA_HPP
+#define SCRAN_PCA_BLOCKED_PCA_HPP
 
 #include "tatami/tatami.hpp"
 #include "irlba/irlba.hpp"
@@ -9,8 +9,8 @@
 #include <vector>
 #include <cmath>
 
-#include "block_weights.hpp"
-#include "pca_utils.hpp"
+#include "scran_blocks/scran_blocks.hpp"
+#include "utils.hpp"
 
 /**
  * @file blocked_pca.hpp
@@ -18,47 +18,16 @@
  * @brief Perform PCA on residuals after regressing out a blocking factor.
  */
 
-namespace scran {
+namespace scran_pca {
 
 /**
- * @namespace scran::blocked_pca
- * @brief Perform PCA on residuals after regressing out a blocking factor.
- *
- * In the presence of a blocking factor (e.g., batches, samples), we want to ensure that the PCA is not driven by uninteresting differences between blocks.
- * To achieve this, `blocked_pca::compute()` centers the expression of each gene in each blocking level and uses the residuals for PCA.
- * The gene-gene covariance matrix will thus focus on variation within each batch, 
- * ensuring that the top rotation vectors/principal components capture biological heterogeneity instead of inter-block differences.
- * Internally, `blocked_pca::compute()` defers the residual calculation until the matrix multiplication steps within [IRLBA](https://github.com/LTLA/CppIrlba).
- * This yields the same results as the naive calculation of residuals but is much faster as it can take advantage of efficient sparse operations.
- *
- * By default, the principal components are computed from the (conceptual) matrix of residuals.
- * This yields a low-dimensional space where all inter-block differences have been removed,
- * assuming that all blocks have the same composition and the inter-block differences are consistent for all cell subpopulations.
- * Under these assumptions, we could use these components for downstream analysis without any concern for block-wise effects.
- * In practice, these assumptions do not hold and more sophisticated batch correction methods like [MNN correction](https://github.com/LTLA/CppMnnCorrect) are required.
- * Some of these methods accept a low-dimensional embedding of cells as input, which can be created by `blocked_pca::compute()` with `Options::components_from_residuals = false`.
- * In this mode, only the rotation vectors are computed from the residuals.
- * The original expression values for each cell are then projected onto the associated subspace to obtain PC coordinates that can be used for further batch correction.
- * This approach aims to preserve the benefits of blocking to focus on intra-block biology instead of inter-block differences,
- * without making strong assumptions about the nature of those differences.
- *
- * If one batch has many more cells than the others, it will dominate the PCA by driving the axes of maximum variance. 
- * This may mask interesting aspects of variation in the smaller batches.
- * To mitigate this, we scale each batch in inverse proportion to its size (see `Options::block_weight_policy`).
- * This ensures that each batch contributes equally to the (conceptual) gene-gene covariance matrix and thus the rotation vectors.
- * The vector of residuals for each cell (or the original expression values, if `Options::components_from_residuals = false`) 
- * is then projected to the subspace defined by these rotation vectors to obtain that cell's PC coordinates.
- */
-namespace blocked_pca {
-
-/**
- * @brief Options for `compute()`.
+ * @brief BlockedPcaOptions for `blocked_pca()`.
  */
-struct Options {
+struct BlockedPcaOptions {
     /**
      * @cond
      */
-    Options() {
+    BlockedPcaOptions() {
         irlba_options.cap_number = true;
     }
     /**
@@ -80,13 +49,13 @@ struct Options {
     /**
      * Policy to use for weighting batches of different size.
      */
-    block_weights::Policy block_weight_policy = block_weights::Policy::VARIABLE;
+    scran_blocks::WeightPolicy block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
 
     /**
      * Parameters for the variable block weights.
-     * Only used when `Options::block_weight_policy = block_weights::Policy::VARIABLE`.
+     * Only used when `BlockedPcaOptions::block_weight_policy = scran_blocks::WeightPolicy::VARIABLE`.
      */
-    block_weights::VariableParameters variable_block_weight_parameters;
+    scran_blocks::VariableWeightParameters variable_block_weight_parameters;
 
     /**
      * Compute the principal components from the residuals.
@@ -138,12 +107,12 @@ template<class EigenVector_, typename Index_, typename Block_>
 BlockingDetails<Index_, EigenVector_> compute_blocking_details(
     Index_ ncells,
     const Block_* block,
-    block_weights::Policy block_weight_policy, 
-    const block_weights::VariableParameters& variable_block_weight_parameters) 
+    scran_blocks::WeightPolicy block_weight_policy, 
+    const scran_blocks::VariableWeightParameters& variable_block_weight_parameters) 
 {
     BlockingDetails<Index_, EigenVector_> output;
     output.block_size = tatami_stats::tabulate_groups(block, ncells);
-    if (block_weight_policy == block_weights::Policy::NONE) {
+    if (block_weight_policy == scran_blocks::WeightPolicy::NONE) {
         return output;
     }
 
@@ -162,8 +131,8 @@ BlockingDetails<Index_, EigenVector_> compute_blocking_details(
         // 'compute_blockwise_mean_and_variance*()' functions.
         if (bsize) {
             typename EigenVector_::Scalar block_weight = 1;
-            if (block_weight_policy == block_weights::Policy::VARIABLE) {
-                block_weight = block_weights::compute_variable(bsize, variable_block_weight_parameters);
+            if (block_weight_policy == scran_blocks::WeightPolicy::VARIABLE) {
+                block_weight = scran_blocks::compute_variable_weight(bsize, variable_block_weight_parameters);
             }
 
             element_weight[i] = block_weight / bsize;
@@ -198,7 +167,7 @@ BlockingDetails<Index_, EigenVector_> compute_blocking_details(
  *****************************************************************/
 
 template<typename Num_, typename Value_, typename Index_, typename Block_, typename EigenVector_, typename Float_>
-void compute_sparse_mean_and_variance(
+void compute_sparse_mean_and_variance_blocked(
     Num_ num_nonzero, 
     const Value_* values, 
     const Index_* indices, 
@@ -257,12 +226,12 @@ void compute_sparse_mean_and_variance(
     // COMMENT ON DENOMINATOR:
     // If we're not dealing with weights, we compute the actual sample
     // variance for easy interpretation (and to match up with the
-    // per-PC calculations in pca_utils::clean_up).
+    // per-PC calculations in internal::clean_up).
     //
     // If we're dealing with weights, the concept of the sample variance
     // becomes somewhat weird, but we just use the same denominator for
     // consistency in clean_up_projected. Magnitude doesn't matter when
-    // scaling for pca_utils::process_scale_vector anyway.
+    // scaling for internal::process_scale_vector anyway.
     variance /= num_all - 1;
 }
 
@@ -288,7 +257,7 @@ void compute_blockwise_mean_and_variance_realized_sparse(
 
         for (size_t c = start, end = start + length; c < end; ++c, mptr += nblocks) {
             auto offset = ptrs[c];
-            compute_sparse_mean_and_variance(
+            compute_sparse_mean_and_variance_blocked(
                 ptrs[c + 1] - offset,
                 values.data() + offset,
                 indices.data() + offset,
@@ -304,7 +273,7 @@ void compute_blockwise_mean_and_variance_realized_sparse(
 }
 
 template<typename Num_, typename Value_, typename Block_, typename Index_, typename EigenVector_, typename Float_>
-void compute_dense_mean_and_variance(
+void compute_dense_mean_and_variance_blocked(
     Num_ number, 
     const Value_* values, 
     const Block_* block, 
@@ -363,14 +332,14 @@ void compute_blockwise_mean_and_variance_realized_dense(
         auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
 
         for (size_t c = start, end = start + length; c < end; ++c, ptr += ncells, mptr += nblocks) {
-            compute_dense_mean_and_variance(ncells, ptr, block, block_details, mptr, variances[c]);
+            compute_dense_mean_and_variance_blocked(ncells, ptr, block, block_details, mptr, variances[c]);
         }
     }, emat.cols(), nthreads);
 }
 
 template<typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>
 void compute_blockwise_mean_and_variance_tatami(
-    const tatami::Matrix<Value_, Index_>* mat, // this should have genes in the rows!
+    const tatami::Matrix<Value_, Index_>& mat, // this should have genes in the rows!
     const Block_* block, 
     const BlockingDetails<Index_, EigenVector_>& block_details,
     EigenMatrix_& centers,
@@ -379,10 +348,10 @@ void compute_blockwise_mean_and_variance_tatami(
 {
     const auto& block_size = block_details.block_size;
     size_t nblocks = block_size.size();
-    Index_ ngenes = mat->nrow();
-    Index_ ncells = mat->ncol();
+    Index_ ngenes = mat.nrow();
+    Index_ ncells = mat.ncol();
 
-    if (mat->prefer_rows()) {
+    if (mat.prefer_rows()) {
         tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
             static_assert(!EigenMatrix_::IsRowMajor);
             auto mptr = centers.data() + static_cast<size_t>(start) * nblocks; // cast to avoid overflow.
@@ -390,18 +359,18 @@ void compute_blockwise_mean_and_variance_tatami(
 
             std::vector<Value_> vbuffer(ncells);
 
-            if (mat->is_sparse()) {
+            if (mat.is_sparse()) {
                 std::vector<Index_> ibuffer(ncells);
-                auto ext = tatami::consecutive_extractor<true>(mat, true, start, length);
+                auto ext = tatami::consecutive_extractor<true>(&mat, true, start, length);
                 for (Index_ r = start, end = start + length; r < end; ++r, mptr += nblocks) {
                     auto range = ext->fetch(vbuffer.data(), ibuffer.data());
-                    compute_sparse_mean_and_variance(range.number, range.value, range.index, block, block_details, mptr, variances[r], block_copy, ncells);
+                    compute_sparse_mean_and_variance_blocked(range.number, range.value, range.index, block, block_details, mptr, variances[r], block_copy, ncells);
                 }
             } else {
-                auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
+                auto ext = tatami::consecutive_extractor<false>(&mat, true, start, length);
                 for (Index_ r = start, end = start + length; r < end; ++r, mptr += nblocks) {
                     auto ptr = ext->fetch(vbuffer.data());
-                    compute_dense_mean_and_variance(ncells, ptr, block, block_details, mptr, variances[r]);
+                    compute_dense_mean_and_variance_blocked(ncells, ptr, block, block_details, mptr, variances[r]);
                 }
             }
         }, ngenes, nthreads);
@@ -433,7 +402,7 @@ void compute_blockwise_mean_and_variance_tatami(
 
             std::vector<Value_> vbuffer(length);
 
-            if (mat->is_sparse()) {
+            if (mat.is_sparse()) {
                 std::vector<tatami_stats::variances::RunningSparse<Scalar, Value_, Index_> > running;
                 running.reserve(nblocks);
                 for (size_t b = 0; b < nblocks; ++b) {
@@ -441,7 +410,7 @@ void compute_blockwise_mean_and_variance_tatami(
                 }
 
                 std::vector<Index_> ibuffer(length);
-                auto ext = tatami::consecutive_extractor<true>(mat, false, static_cast<Index_>(0), ncells, start, length);
+                auto ext = tatami::consecutive_extractor<true>(&mat, false, static_cast<Index_>(0), ncells, start, length);
                 for (Index_ c = 0; c < ncells; ++c) {
                     auto range = ext->fetch(vbuffer.data(), ibuffer.data());
                     running[block[c]].add(range.value, range.index, range.number);
@@ -458,7 +427,7 @@ void compute_blockwise_mean_and_variance_tatami(
                     running.emplace_back(length, re_centers[b].data(), re_variances[b].data(), /* skip_nan = */ false);
                 }
 
-                auto ext = tatami::consecutive_extractor<false>(mat, false, static_cast<Index_>(0), ncells, start, length);
+                auto ext = tatami::consecutive_extractor<false>(&mat, false, static_cast<Index_>(0), ncells, start, length);
                 for (Index_ c = 0; c < ncells; ++c) {
                     auto ptr = ext->fetch(vbuffer.data());
                     running[block[c]].add(ptr);
@@ -563,18 +532,18 @@ inline void project_matrix_realized_sparse(
 
 template<typename Value_, typename Index_, class EigenMatrix_>
 inline void project_matrix_transposed_tatami(
-    const tatami::Matrix<Value_, Index_>* mat, // genes in rows, cells in columns
+    const tatami::Matrix<Value_, Index_>& mat, // genes in rows, cells in columns
     EigenMatrix_& components,
     const EigenMatrix_& scaled_rotation, // genes in rows, dims in columns
     int nthreads) 
 {
     size_t rank = scaled_rotation.cols();
-    auto ngenes = mat->nrow(), ncells = mat->ncol();
+    auto ngenes = mat.nrow(), ncells = mat.ncol();
     typedef typename EigenMatrix_::Scalar Scalar;
 
     components.resize(rank, ncells); // used a transposed version for more cache efficiency.
 
-    if (mat->prefer_rows()) {
+    if (mat.prefer_rows()) {
         tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
             static_assert(!EigenMatrix_::IsRowMajor);
             auto vptr = scaled_rotation.data();
@@ -586,9 +555,9 @@ inline void project_matrix_transposed_tatami(
                 local_buffers.emplace_back(length);
             }
 
-            if (mat->is_sparse()) {
+            if (mat.is_sparse()) {
                 std::vector<Index_> ibuffer(length);
-                auto ext = tatami::consecutive_extractor<true>(mat, true, static_cast<Index_>(0), ngenes, start, length);
+                auto ext = tatami::consecutive_extractor<true>(&mat, true, static_cast<Index_>(0), ngenes, start, length);
                 for (Index_ g = 0; g < ngenes; ++g) {
                     auto range = ext->fetch(vbuffer.data(), ibuffer.data());
                     for (size_t r = 0; r < rank; ++r) {
@@ -601,7 +570,7 @@ inline void project_matrix_transposed_tatami(
                 }
 
             } else {
-                auto ext = tatami::consecutive_extractor<false>(mat, true, static_cast<Index_>(0), ngenes, start, length);
+                auto ext = tatami::consecutive_extractor<false>(&mat, true, static_cast<Index_>(0), ngenes, start, length);
                 for (Index_ g = 0; g < ngenes; ++g) {
                     auto ptr = ext->fetch(vbuffer.data());
                     for (size_t r = 0; r < rank; ++r) {
@@ -628,9 +597,9 @@ inline void project_matrix_transposed_tatami(
             static_assert(!EigenMatrix_::IsRowMajor);
             auto optr = components.data() + static_cast<size_t>(start) * rank; // cast to avoid overflow.
 
-            if (mat->is_sparse()) {
+            if (mat.is_sparse()) {
                 std::vector<Index_> ibuffer(ngenes);
-                auto ext = tatami::consecutive_extractor<true>(mat, false, start, length);
+                auto ext = tatami::consecutive_extractor<true>(&mat, false, start, length);
 
                 for (Index_ c = start, end = start + length; c < end; ++c, optr += rank) {
                     auto range = ext->fetch(vbuffer.data(), ibuffer.data());
@@ -646,7 +615,7 @@ inline void project_matrix_transposed_tatami(
                 }
 
             } else {
-                auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);
+                auto ext = tatami::consecutive_extractor<false>(&mat, false, start, length);
                 for (Index_ c = start, end = start + length; c < end; ++c, optr += rank) {
                     auto ptr = ext->fetch(vbuffer.data()); 
                     static_assert(!EigenMatrix_::IsRowMajor);
@@ -776,12 +745,12 @@ class ResidualWrapper {
  **************************/
 
 template<bool realize_matrix_, bool sparse_, typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>
-void run_general(
-    const tatami::Matrix<Value_, Index_>* mat, 
+void run_blocked(
+    const tatami::Matrix<Value_, Index_>& mat, 
     const Block_* block, 
     const BlockingDetails<Index_, EigenVector_>& block_details, 
     int rank,
-    const Options& options,
+    const BlockedPcaOptions& options,
     EigenMatrix_& components, 
     EigenMatrix_& rotation, 
     EigenVector_& variance_explained, 
@@ -789,22 +758,22 @@ void run_general(
     EigenVector_& scale_v,
     typename EigenVector_::Scalar& total_var) 
 {
-    Index_ ngenes = mat->nrow(), ncells = mat->ncol(); 
+    Index_ ngenes = mat.nrow(), ncells = mat.ncol(); 
 
     auto emat = [&]{
         if constexpr(!realize_matrix_) {
-            return pca_utils::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads);
+            return internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads);
 
         } else if constexpr(sparse_) {
             // 'extracted' contains row-major contents... but we implicitly transpose it to CSC with genes in columns.
-            auto extracted = tatami::retrieve_compressed_sparse_contents<Value_, Index_>(mat, /* row = */ true, /* two_pass = */ false, /* threads = */ options.num_threads);
+            auto extracted = tatami::retrieve_compressed_sparse_contents<Value_, Index_>(&mat, /* row = */ true, /* two_pass = */ false, /* threads = */ options.num_threads);
             return irlba::ParallelSparseMatrix(ncells, ngenes, std::move(extracted.value), std::move(extracted.index), std::move(extracted.pointers), true, options.num_threads); 
 
         } else {
             // Perform an implicit transposition by performing a row-major extraction into a column-major transposed matrix.
             EigenMatrix_ emat(ncells, ngenes); 
             static_assert(!EigenMatrix_::IsRowMajor);
-            tatami::convert_to_dense(mat, /* row_major = */ true, emat.data(), options.num_threads);
+            tatami::convert_to_dense(&mat, /* row_major = */ true, emat.data(), options.num_threads);
             return emat;
         }
     }();
@@ -819,7 +788,7 @@ void run_general(
     } else {
         compute_blockwise_mean_and_variance_realized_dense(emat, block, block_details, center_m, scale_v, options.num_threads);
     }
-    total_var = pca_utils::process_scale_vector(options.scale, scale_v);
+    total_var = internal::process_scale_vector(options.scale, scale_v);
 
     ResidualWrapper<decltype(emat), Block_, EigenMatrix_, EigenVector_> centered(emat, block, center_m);
 
@@ -867,7 +836,7 @@ void run_general(
         }
 
         if (options.components_from_residuals) {
-            pca_utils::clean_up(mat->ncol(), components, variance_explained);
+            internal::clean_up(mat.ncol(), components, variance_explained);
             if (options.transpose) {
                 components.adjointInPlace();
             }
@@ -892,54 +861,24 @@ void run_general(
     }
 }
 
-template<typename Value_, typename Index_, typename Block_, class EigenMatrix_, class EigenVector_>
-void dispatch(
-    const tatami::Matrix<Value_, Index_>* mat, 
-    const Block_* block,
-    int rank,
-    const Options& options,
-    EigenMatrix_& components, 
-    EigenMatrix_& rotation, 
-    EigenVector_& variance_explained, 
-    EigenMatrix_& center_m,
-    EigenVector_& scale_v,
-    double& total_var) 
-{
-    irlba::EigenThreadScope t(options.num_threads);
-    auto bdetails = compute_blocking_details<EigenVector_>(mat->ncol(), block, options.block_weight_policy, options.variable_block_weight_parameters);
-
-    if (mat->sparse()) {
-        if (options.realize_matrix) {
-            run_general<true, true>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
-        } else {
-            run_general<false, true>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
-        }
-    } else {
-        if (options.realize_matrix) {
-            run_general<true, false>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
-        } else {
-            run_general<false, false>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
-        }
-    }
-}
-
 }
 /**
  * @endcond
  */
 
 /**
- * @brief Results of the PCA on the residuals.
+ * @brief Results of `blocked_pca()`.
  *
- * Instances should generally be constructed by the `compute()` function.
+ * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
+ * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
  */
 template<typename EigenMatrix_, typename EigenVector_>
-struct Results {
+struct BlockedPcaResults {
     /**
      * Matrix of principal components.
      * By default, each row corresponds to a PC while each column corresponds to a cell in the input matrix.
-     * If `Options::transpose = false`, rows are cells instead.
-     * The number of PCs is determined by the `rank` used in `compute()`.
+     * If `BlockedPcaOptions::transpose = false`, rows are cells instead.
+     * The number of PCs is determined by the `rank` used in `blocked_pca()`.
      */
     EigenMatrix_ components;
 
@@ -950,7 +889,7 @@ struct Results {
     EigenVector_ variance_explained;
 
     /**
-     * Total variance of the dataset (possibly after scaling, if `Options::scale = true`).
+     * Total variance of the dataset (possibly after scaling, if `BlockedPcaOptions::scale = true`).
      * This can be used to divide `variance_explained` to obtain the percentage of variance explained.
      */
     typename EigenVector_::Scalar total_variance = 0;
@@ -958,7 +897,7 @@ struct Results {
     /**
      * Rotation matrix.
      * Each row corresponds to a gene while each column corresponds to a PC.
-     * The number of PCs is determined by the `rank` used in `compute()`.
+     * The number of PCs is determined by the `rank` used in `blocked_pca()`.
      */
     EigenMatrix_ rotation;
 
@@ -970,14 +909,89 @@ struct Results {
     EigenMatrix_ center;
 
     /**
-     * Scaling vector, only returned if `Options::scale = true`.
-     * Each entry corresponds to a row in the input matrix and contains the scaling factor used to divide that gene's values if `Options::scale = true`.
+     * Scaling vector, only returned if `BlockedPcaOptions::scale = true`.
+     * Each entry corresponds to a row in the input matrix and contains the scaling factor used to divide that gene's values if `BlockedPcaOptions::scale = true`.
      */
     EigenVector_ scale;
 };
 
 /**
- * Run PCA on the residuals after regressing out a blocking factor from the rows of a gene-by-cell matrix.
+ * In the presence of a blocking factor (e.g., batches, samples), we want to ensure that the PCA is not driven by uninteresting differences between blocks.
+ * To achieve this, `blocked_pca()` centers the expression of each gene in each blocking level and uses the residuals for PCA.
+ * The gene-gene covariance matrix will thus focus on variation within each batch, 
+ * ensuring that the top rotation vectors/principal components capture biological heterogeneity instead of inter-block differences.
+ * Internally, `blocked_pca()` defers the residual calculation until the matrix multiplication steps within [IRLBA](https://github.com/LTLA/CppIrlba).
+ * This yields the same results as the naive calculation of residuals but is much faster as it can take advantage of efficient sparse operations.
+ *
+ * By default, the principal components are computed from the (conceptual) matrix of residuals.
+ * This yields a low-dimensional space where all inter-block differences have been removed,
+ * assuming that all blocks have the same composition and the inter-block differences are consistent for all cell subpopulations.
+ * Under these assumptions, we could use these components for downstream analysis without any concern for block-wise effects.
+ * In practice, these assumptions do not hold and more sophisticated batch correction methods like [MNN correction](https://github.com/LTLA/CppMnnCorrect) are required.
+ * Some of these methods accept a low-dimensional embedding of cells as input, which can be created by `blocked_pca()` with `BlockedPcaOptions::components_from_residuals = false`.
+ * In this mode, only the rotation vectors are computed from the residuals.
+ * The original expression values for each cell are then projected onto the associated subspace to obtain PC coordinates that can be used for further batch correction.
+ * This approach aims to preserve the benefits of blocking to focus on intra-block biology instead of inter-block differences,
+ * without making strong assumptions about the nature of those differences.
+ *
+ * If one batch has many more cells than the others, it will dominate the PCA by driving the axes of maximum variance. 
+ * This may mask interesting aspects of variation in the smaller batches.
+ * To mitigate this, we scale each batch in inverse proportion to its size (see `BlockedPcaOptions::block_weight_policy`).
+ * This ensures that each batch contributes equally to the (conceptual) gene-gene covariance matrix and thus the rotation vectors.
+ * The vector of residuals for each cell (or the original expression values, if `BlockedPcaOptions::components_from_residuals = false`) 
+ * is then projected to the subspace defined by these rotation vectors to obtain that cell's PC coordinates.
+ *
+ * @tparam Value_ Type of the matrix data.
+ * @tparam Index_ Integer type for the indices.
+ * @tparam Block_ Integer type for the blocking factor.
+ * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
+ * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
+ *
+ * @param[in] mat Input expression matrix.
+ * Columns should contain cells while rows should contain genes.
+ * @param[in] block Pointer to an array of length equal to the number of cells, 
+ * containing the block assignment for each cell. 
+ * Each assignment should be an integer in \f$[0, N)\f$ where \f$N\f$ is the number of blocks.
+ * @param rank Number of PCs to compute.
+ * This should be no greater than the maximum number of PCs, i.e., the smaller dimension of the input matrix;
+ * otherwise, only the maximum number of PCs will be reported in the results.
+ * @param options Further options.
+ * @param[out] output On output, the results of the PCA on the residuals. 
+ * This can be re-used across multiple calls to `blocked_pca()`. 
+ */
+template<typename Value_, typename Index_, typename Block_, typename EigenMatrix_, class EigenVector_>
+void blocked_pca(const tatami::Matrix<Value_, Index_>& mat, const Block_* block, int rank, const BlockedPcaOptions& options, BlockedPcaResults<EigenMatrix_, EigenVector_>& output) {
+    irlba::EigenThreadScope t(options.num_threads);
+    auto bdetails = internal::compute_blocking_details<EigenVector_>(mat.ncol(), block, options.block_weight_policy, options.variable_block_weight_parameters);
+
+    EigenMatrix_& components = output.components;
+    EigenMatrix_& rotation = output.rotation;
+    EigenVector_& variance_explained = output.variance_explained;
+    EigenMatrix_& center_m = output.center;
+    EigenVector_& scale_v = output.scale;
+    auto& total_var = output.total_variance;
+
+    if (mat.sparse()) {
+        if (options.realize_matrix) {
+            internal::run_blocked<true, true>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
+        } else {
+            internal::run_blocked<false, true>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
+        }
+    } else {
+        if (options.realize_matrix) {
+            internal::run_blocked<true, false>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
+        } else {
+            internal::run_blocked<false, false>(mat, block, bdetails, rank, options, components, rotation, variance_explained, center_m, scale_v, total_var);
+        }
+    }
+
+    if (!options.scale) {
+        output.scale = EigenVector_();
+    }
+}
+
+/**
+ * Overload of `blocked_pca()` that allocates memory for the output.
  *
  * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
  * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
@@ -985,7 +999,7 @@ struct Results {
  * @tparam Index_ Integer type for the indices.
  * @tparam Block_ Integer type for the blocking factor.
  *
- * @param[in] mat Pointer to the input matrix.
+ * @param[in] mat Input expression matrix.
  * Columns should contain cells while rows should contain genes.
  * @param[in] block Pointer to an array of length equal to the number of cells, 
  * containing the block assignment for each cell. 
@@ -995,17 +1009,15 @@ struct Results {
  * otherwise, only the maximum number of PCs will be reported in the results.
  * @param options Further options.
  *
- * @return The results of the PCA on the residuals. 
+ * @return Results of the PCA on the residuals. 
  */
-template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_ = double, typename Index_ = int, typename Block_ = int>
-Results<EigenMatrix_, EigenVector_> compute(const tatami::Matrix<Value_, Index_>* mat, const Block_* block, int rank, const Options& options) {
-    Results<EigenMatrix_, EigenVector_> output;
-    internal::dispatch(mat, block, rank, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance);
+template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_, typename Block_>
+BlockedPcaResults<EigenMatrix_, EigenVector_> blocked_pca(const tatami::Matrix<Value_, Index_>& mat, const Block_* block, int rank, const BlockedPcaOptions& options) {
+    BlockedPcaResults<EigenMatrix_, EigenVector_> output;
+    blocked_pca(mat, block, rank, options, output);
     return output;
 }
 
 }
 
-}
-
 #endif
diff --git a/include/scran_pca/scran_pca.hpp b/include/scran_pca/scran_pca.hpp
new file mode 100644
index 0000000..aa22b3c
--- /dev/null
+++ b/include/scran_pca/scran_pca.hpp
@@ -0,0 +1,18 @@
+#ifndef SCRAN_PCA_HPP
+#define SCRAN_PCA_HPP
+
+#include "simple_pca.hpp"
+#include "blocked_pca.hpp"
+
+/**
+ * @file scran_pca.hpp
+ * @brief Principal component analysis on single-cell data
+ */
+
+/**
+ * @namespace scran_pca
+ * @brief Principal component analysis on single-cell data
+ */
+namespace scran_pca {}
+
+#endif
diff --git a/include/scran/simple_pca.hpp b/include/scran_pca/simple_pca.hpp
similarity index 73%
rename from include/scran/simple_pca.hpp
rename to include/scran_pca/simple_pca.hpp
index d535294..8b19136 100644
--- a/include/scran/simple_pca.hpp
+++ b/include/scran_pca/simple_pca.hpp
@@ -1,5 +1,5 @@
-#ifndef SCRAN_SIMPLE_PCA_HPP
-#define SCRAN_SIMPLE_PCA_HPP
+#ifndef SCRAN_PCA_SIMPLE_PCA_HPP
+#define SCRAN_PCA_SIMPLE_PCA_HPP
 
 #include "tatami/tatami.hpp"
 #include "tatami_stats/tatami_stats.hpp"
@@ -10,34 +10,23 @@
 #include <vector>
 #include <cmath>
 
-#include "pca_utils.hpp"
+#include "utils.hpp"
 
 /**
  * @file simple_pca.hpp
  * @brief Perform a simple PCA on a gene-by-cell matrix.
  */
 
-namespace scran {
+namespace scran_pca {
 
 /**
- * @namespace scran::simple_pca
- * @brief Perform a simple PCA on a gene-cell matrix.
- *
- * Principal components analysis (PCA) is a helpful technique for data compression and denoising.
- * The idea is that the earlier PCs capture most of the systematic biological variation while the later PCs capture random technical noise.
- * Thus, we can reduce the size of the data and eliminate noise by only using the earlier PCs for further analyses.
- * Most practitioners will keep the first 10-50 PCs, though the exact choice is fairly arbitrary.
- */
-namespace simple_pca {
-
-/**
- * @brief Options for `compute()`.
+ * @brief Options for `simple_pca()`.
  */
-struct Options {
+struct SimplePcaOptions {
     /**
      * @cond
      */
-    Options() {
+    SimplePcaOptions() {
         irlba_options.cap_number = true;
     }
     /**
@@ -79,13 +68,13 @@ struct Options {
 namespace internal {
 
 template<bool sparse_, typename Value_, typename Index_, class EigenVector_>
-void compute_row_means_and_variances(const tatami::Matrix<Value_, Index_>* mat, int num_threads, EigenVector_& center_v, EigenVector_& scale_v) {
-    if (mat->prefer_rows()) {
+void compute_row_means_and_variances(const tatami::Matrix<Value_, Index_>& mat, int num_threads, EigenVector_& center_v, EigenVector_& scale_v) {
+    if (mat.prefer_rows()) {
         tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
             tatami::Options opt;
             opt.sparse_extract_index = false;
-            auto ext = tatami::consecutive_extractor<sparse_>(mat, true, start, length, opt);
-            auto ncells = mat->ncol();
+            auto ext = tatami::consecutive_extractor<sparse_>(&mat, true, start, length, opt);
+            auto ncells = mat.ncol();
             std::vector<Value_> vbuffer(ncells);
 
             for (Index_ r = start, end = start + length; r < end; ++r) {
@@ -101,13 +90,13 @@ void compute_row_means_and_variances(const tatami::Matrix<Value_, Index_>* mat,
                 center_v.coeffRef(r) = results.first;
                 scale_v.coeffRef(r) = results.second;
             }
-        }, mat->nrow(), num_threads);
+        }, mat.nrow(), num_threads);
 
     } else {
         tatami::parallelize([&](size_t t, Index_ start, Index_ length) -> void {
             tatami::Options opt;
-            auto ncells = mat->ncol();
-            auto ext = tatami::consecutive_extractor<sparse_>(mat, false, 0, ncells, start, length, opt);
+            auto ncells = mat.ncol();
+            auto ext = tatami::consecutive_extractor<sparse_>(&mat, false, 0, ncells, start, length, opt);
 
             typedef typename EigenVector_::Scalar Scalar;
             tatami_stats::LocalOutputBuffer<Scalar> cbuffer(t, start, length, center_v.data());
@@ -136,7 +125,7 @@ void compute_row_means_and_variances(const tatami::Matrix<Value_, Index_>* mat,
             running.finish();
             cbuffer.transfer();
             sbuffer.transfer();
-        }, mat->nrow(), num_threads);
+        }, mat.nrow(), num_threads);
     }
 }
 
@@ -144,7 +133,7 @@ template<class IrlbaMatrix_, class EigenMatrix_, class EigenVector_>
 void run_irlba_deferred(
     const IrlbaMatrix_& mat,
     int rank,
-    const Options& options,
+    const SimplePcaOptions& options,
     EigenMatrix_& components, 
     EigenMatrix_& rotation, 
     EigenVector_& variance_explained,
@@ -162,9 +151,9 @@ void run_irlba_deferred(
 
 template<typename Value_, typename Index_, class EigenMatrix_, class EigenVector_>
 void run_sparse(
-    const tatami::Matrix<Value_, Index_>* mat, 
+    const tatami::Matrix<Value_, Index_>& mat, 
     int rank,
-    const Options& options,
+    const SimplePcaOptions& options,
     EigenMatrix_& components, 
     EigenMatrix_& rotation, 
     EigenVector_& variance_explained,
@@ -172,21 +161,21 @@ void run_sparse(
     EigenVector_& scale_v,
     typename EigenVector_::Scalar& total_var)
 {
-    Index_ ngenes = mat->nrow();
+    Index_ ngenes = mat.nrow();
     center_v.resize(ngenes);
     scale_v.resize(ngenes);
 
     if (options.realize_matrix) {
         // 'extracted' contains row-major contents...
         auto extracted = tatami::retrieve_compressed_sparse_contents<Value_, Index_>(
-            mat, 
+            &mat, 
             /* row = */ true, 
             /* two_pass = */ false, 
             /* threads = */ options.num_threads
         );
 
         // But we effectively transpose it to CSC with genes in columns.
-        Index_ ncells = mat->ncol();
+        Index_ ncells = mat.ncol();
         irlba::ParallelSparseMatrix emat(
             ncells,
             ngenes,
@@ -209,14 +198,14 @@ void run_sparse(
             }
         }, ngenes, options.num_threads);
 
-        total_var = pca_utils::process_scale_vector(options.scale, scale_v);
+        total_var = internal::process_scale_vector(options.scale, scale_v);
         run_irlba_deferred(emat, rank, options, components, rotation, variance_explained, center_v, scale_v);
 
     } else {
         compute_row_means_and_variances<true>(mat, options.num_threads, center_v, scale_v);
-        total_var = pca_utils::process_scale_vector(options.scale, scale_v);
+        total_var = internal::process_scale_vector(options.scale, scale_v);
         run_irlba_deferred(
-            pca_utils::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads), 
+            internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads), 
             rank, 
             options, 
             components, 
@@ -230,9 +219,9 @@ void run_sparse(
 
 template<typename Value_, typename Index_, class EigenMatrix_, class EigenVector_>
 void run_dense(
-    const tatami::Matrix<Value_, Index_>* mat, 
+    const tatami::Matrix<Value_, Index_>& mat, 
     int rank,
-    const Options& options,
+    const SimplePcaOptions& options,
     EigenMatrix_& components, 
     EigenMatrix_& rotation, 
     EigenVector_& variance_explained, 
@@ -240,18 +229,18 @@ void run_dense(
     EigenVector_& scale_v,
     typename EigenVector_::Scalar& total_var)
 {
-    Index_ ngenes = mat->nrow();
+    Index_ ngenes = mat.nrow();
     center_v.resize(ngenes);
     scale_v.resize(ngenes);
 
     if (options.realize_matrix) {
         // Create a matrix with genes in columns.
-        Index_ ncells = mat->ncol();
+        Index_ ncells = mat.ncol();
         EigenMatrix_ emat(ncells, ngenes);
 
         // If emat is row-major, we want to fill it with columns of 'mat', so row_major = false.
         // If emat is column-major, we want to fill it with rows of 'mat', so row_major = true.
-        tatami::convert_to_dense(mat, /* row_major = */ !emat.IsRowMajor, emat.data(), options.num_threads);
+        tatami::convert_to_dense(&mat, /* row_major = */ !emat.IsRowMajor, emat.data(), options.num_threads);
 
         center_v.array() = emat.array().colwise().sum();
         if (ncells) {
@@ -268,7 +257,7 @@ void run_dense(
             std::fill(scale_v.begin(), scale_v.end(), std::numeric_limits<typename EigenVector_::Scalar>::quiet_NaN());
         }
 
-        total_var = pca_utils::process_scale_vector(options.scale, scale_v);
+        total_var = internal::process_scale_vector(options.scale, scale_v);
         if (options.scale) {
             emat.array().rowwise() /= scale_v.adjoint().array();
         }
@@ -277,9 +266,9 @@ void run_dense(
 
     } else {
         compute_row_means_and_variances<false>(mat, options.num_threads, center_v, scale_v);
-        total_var = pca_utils::process_scale_vector(options.scale, scale_v);
+        total_var = internal::process_scale_vector(options.scale, scale_v);
         run_irlba_deferred(
-            pca_utils::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads), 
+            internal::TransposedTatamiWrapper<EigenVector_, Value_, Index_>(mat, options.num_threads), 
             rank, 
             options, 
             components, 
@@ -291,51 +280,23 @@ void run_dense(
     }
 }
 
-template<typename Value_, typename Index_, class EigenMatrix_, class EigenVector_>
-void dispatch(
-    const tatami::Matrix<Value_, Index_>* mat, 
-    int rank, 
-    const Options& options,
-    EigenMatrix_& components, 
-    EigenMatrix_& rotation, 
-    EigenVector_& variance_explained, 
-    EigenVector_& center_v,
-    EigenVector_& scale_v,
-    typename EigenVector_::Scalar& total_var) 
-{
-    irlba::EigenThreadScope t(options.num_threads);
-
-    if (mat->sparse()) {
-        run_sparse(mat, rank, options, components, rotation, variance_explained, center_v, scale_v, total_var);
-    } else {
-        run_dense(mat, rank, options, components, rotation, variance_explained, center_v, scale_v, total_var);
-    }
-
-    pca_utils::clean_up(mat->ncol(), components, variance_explained);
-    if (options.transpose) {
-        components.adjointInPlace();
-    }
-}
-
 }
 /**
  * @endcond
  */
 
 /**
- * @brief Container for the PCA results.
+ * @brief Results of `simple_pca()`.
  * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
  * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
- *
- * Instances should be constructed by the `compute()` function.
  */
 template<typename EigenMatrix_, typename EigenVector_>
-struct Results {
+struct SimplePcaResults {
     /**
      * Matrix of principal components.
      * By default, each row corresponds to a PC while each column corresponds to a cell in the input matrix.
-     * If `Options::transpose = false`, rows are cells instead.
-     * The number of PCs is determined by the `rank` used in `compute()`.
+     * If `SimplePcaOptions::transpose = false`, rows are cells instead.
+     * The number of PCs is determined by the `rank` used in `simple_pca()`.
      */
     EigenMatrix_ components;
 
@@ -346,7 +307,7 @@ struct Results {
     EigenVector_ variance_explained;
 
     /**
-     * Total variance of the dataset (possibly after scaling, if `Options::scale = true`).
+     * Total variance of the dataset (possibly after scaling, if `SimplePcaOptions::scale = true`).
      * This can be used to divide `variance_explained` to obtain the percentage of variance explained.
      */
     typename EigenVector_::Scalar total_variance = 0;
@@ -354,7 +315,7 @@ struct Results {
     /**
      * Rotation matrix. 
      * Each row corresponds to a feature while each column corresponds to a PC.
-     * The number of PCs is determined by the `rank` used in `compute()`.
+     * The number of PCs is determined by the `rank` used in `simple_pca()`.
      */
     EigenMatrix_ rotation;
 
@@ -365,41 +326,74 @@ struct Results {
     EigenVector_ center;
 
     /**
-     * Scaling vector, only returned if `Options::scale = true`.
-     * Each entry corresponds to a row in the matrix and contains the scaling factor used to divide the feature values if `Options::scale = true`.
+     * Scaling vector, only returned if `SimplePcaOptions::scale = true`.
+     * Each entry corresponds to a row in the matrix and contains the scaling factor used to divide the feature values if `SimplePcaOptions::scale = true`.
      */
     EigenVector_ scale;
 };
 
 /**
- * Run PCA on an input gene-by-cell matrix.
+ * Principal components analysis (PCA) is a helpful technique for data compression and denoising.
+ * The idea is that the earlier PCs capture most of the systematic biological variation while the later PCs capture random technical noise.
+ * Thus, we can reduce the size of the data and eliminate noise by only using the earlier PCs for further analyses.
+ * Most practitioners will keep the first 10-50 PCs, though the exact choice is fairly arbitrary. *
  *
- * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
- * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
  * @tparam Value_ Type of the matrix data.
  * @tparam Index_ Integer type for the indices.
+ * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
+ * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
  *
- * @param[in] mat Pointer to the input matrix.
+ * @param[in] mat The input expression matrix.
  * Columns should contain cells while rows should contain genes.
  * @param rank Number of PCs to compute.
  * This should be no greater than the maximum number of PCs, i.e., the smaller dimension of the input matrix;
  * otherwise, only the maximum number of PCs will be reported in the results.
  * @param options Further options.
- *
- * @return The results of the PCA on `mat`.
+ * @param[out] output On output, the results of the PCA on `mat`.
+ * This can be re-used across multiple calls to `simple_pca()`. 
  */
-template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_ = double, typename Index_ = int>
-Results<EigenMatrix_, EigenVector_> compute(const tatami::Matrix<Value_, Index_>* mat, int rank, const Options& options) {
-    Results<EigenMatrix_, EigenVector_> output;
-    internal::dispatch(mat, rank, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance);
+template<typename Value_, typename Index_, typename EigenMatrix_, class EigenVector_>
+void simple_pca(const tatami::Matrix<Value_, Index_>& mat, int rank, const SimplePcaOptions& options, SimplePcaResults<EigenMatrix_, EigenVector_>& output) {
+    irlba::EigenThreadScope t(options.num_threads);
+
+    if (mat.sparse()) {
+        internal::run_sparse(mat, rank, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance);
+    } else {
+        internal::run_dense(mat, rank, options, output.components, output.rotation, output.variance_explained, output.center, output.scale, output.total_variance);
+    }
+
+    internal::clean_up(mat.ncol(), output.components, output.variance_explained);
+    if (options.transpose) {
+        output.components.adjointInPlace();
+    }
 
     if (!options.scale) {
         output.scale = EigenVector_();
     }
-
-    return output;
 }
 
+/**
+ * Overload of `simple_pca()` that allocates memory for the output.
+ *
+ * @tparam EigenMatrix_ A floating-point `Eigen::Matrix` class.
+ * @tparam EigenVector_ A floating-point `Eigen::Vector` class.
+ * @tparam Value_ Type of the matrix data.
+ * @tparam Index_ Integer type for the indices.
+ *
+ * @param[in] mat The input expression matrix.
+ * Columns should contain cells while rows should contain genes.
+ * @param rank Number of PCs to compute.
+ * This should be no greater than the maximum number of PCs, i.e., the smaller dimension of the input matrix;
+ * otherwise, only the maximum number of PCs will be reported in the results.
+ * @param options Further options.
+ *
+ * @return Results of the PCA.
+ */
+template<typename EigenMatrix_ = Eigen::MatrixXd, class EigenVector_ = Eigen::VectorXd, typename Value_, typename Index_>
+SimplePcaResults<EigenMatrix_, EigenVector_> simple_pca(const tatami::Matrix<Value_, Index_>& mat, int rank, const SimplePcaOptions& options) {
+    SimplePcaResults<EigenMatrix_, EigenVector_> output;
+    simple_pca(mat, rank, options, output);
+    return output;
 }
 
 }
diff --git a/include/scran/pca_utils.hpp b/include/scran_pca/utils.hpp
similarity index 96%
rename from include/scran/pca_utils.hpp
rename to include/scran_pca/utils.hpp
index 5f486a4..dc75fc5 100644
--- a/include/scran/pca_utils.hpp
+++ b/include/scran_pca/utils.hpp
@@ -9,9 +9,9 @@
 #include "tatami/tatami.hpp"
 #include "tatami_stats/tatami_stats.hpp"
 
-namespace scran {
+namespace scran_pca {
 
-namespace pca_utils {
+namespace internal {
 
 template<class EigenVector_>
 auto process_scale_vector(bool scale, EigenVector_& scale_v) {
@@ -43,12 +43,12 @@ void clean_up(size_t NC, EigenMatrix_& U, EigenVector_& D) {
 template<class EigenVector_, typename Value_, typename Index_>
 class TransposedTatamiWrapper {
 public:
-    TransposedTatamiWrapper(const tatami::Matrix<Value_, Index_>* mat, int num_threads) : 
-        my_mat(mat), 
-        my_nrow(mat->nrow()),
-        my_ncol(mat->ncol()),
-        my_is_sparse(mat->is_sparse()),
-        my_prefer_rows(mat->prefer_rows()),
+    TransposedTatamiWrapper(const tatami::Matrix<Value_, Index_>& mat, int num_threads) : 
+        my_mat(&mat), 
+        my_nrow(mat.nrow()),
+        my_ncol(mat.ncol()),
+        my_is_sparse(mat.is_sparse()),
+        my_prefer_rows(mat.prefer_rows()),
         my_num_threads(num_threads)
     {}
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 766d06d..57b42fc 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,20 +1,10 @@
 include(FetchContent)
 FetchContent_Declare(
-  googletest
-  URL https://github.com/google/googletest/archive/1d17ea141d2c11b8917d2c7d029f1c4e2b9769b2.zip
-)
-
-FetchContent_Declare(
-  scran_test_utils 
-  GIT_REPOSITORY https://github.com/libscran/test_utils
+  scran_tests
+  GIT_REPOSITORY https://github.com/libscran/scran_tests
   GIT_TAG master
 )
-
-# For Windows: Prevent overriding the parent project's compiler/linker settings
-set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-FetchContent_MakeAvailable(googletest)
-
-FetchContent_MakeAvailable(scran_test_utils)
+FetchContent_MakeAvailable(scran_tests)
 
 enable_testing()
 
@@ -35,9 +25,8 @@ macro(create_test name)
 
     target_link_libraries(
         ${name}
-        gtest_main
-        scran_principal_component_analysis
-        scran_test_utils
+        scran_pca
+        scran_tests
     )
 
     target_compile_options(${name} PRIVATE -Wall -Wpedantic -Wextra)
diff --git a/tests/src/blocked_pca.cpp b/tests/src/blocked_pca.cpp
index 4522458..6032486 100644
--- a/tests/src/blocked_pca.cpp
+++ b/tests/src/blocked_pca.cpp
@@ -2,12 +2,12 @@
 
 #include "compare_pcs.h"
 #include "utils.h"
-#include "simulate_vector.h"
 
+#include "scran_tests/scran_tests.hpp"
 #include "tatami/tatami.hpp"
 
-#include "blocked_pca.hpp"
-#include "simple_pca.hpp"
+#include "scran_pca/blocked_pca.hpp"
+#include "scran_pca/simple_pca.hpp"
 
 TEST(ResidualWrapperTest, EigenDense) {
     size_t NR = 30, NC = 10, NB = 3;
@@ -29,7 +29,7 @@ TEST(ResidualWrapperTest, EigenDense) {
         }
     }
 
-    scran::blocked_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blocked(thing, block.data(), centers);
+    scran_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blocked(thing, block.data(), centers);
     auto realized = blocked.template realize<Eigen::MatrixXd>();
 
     // Trying in the normal orientation.
@@ -101,12 +101,12 @@ TEST(ResidualWrapperTest, CustomSparse) {
     }
 
     irlba::ParallelSparseMatrix thing(NR, NC, std::move(values), std::move(indices), std::move(ptrs), /* column_major = */ true, 1);
-    scran::blocked_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blocked(thing, block.data(), centers);
+    scran_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blocked(thing, block.data(), centers);
     auto realized = blocked.template realize<Eigen::MatrixXd>();
 
     // Checking that the dense reference matches up.
     {
-        scran::blocked_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blockedref(thing, block.data(), centers);
+        scran_pca::internal::ResidualWrapper<decltype(thing), int, Eigen::MatrixXd, Eigen::VectorXd> blockedref(thing, block.data(), centers);
         auto realizedref = blockedref.template realize<Eigen::MatrixXd>();
 
         for (Eigen::Index i = 0; i < realizedref.cols(); ++i) {
@@ -155,7 +155,14 @@ class BlockedPcaTestCore {
 
     static void assemble() {
         size_t nr = 121, nc = 155;
-        auto vec = simulate_sparse_vector<double>(nr * nc, 0.1, /* lower = */ -10, /* upper = */ 10, /* seed = */ 123456);
+        auto vec = scran_tests::simulate_vector(nr * nc, [&]{
+            scran_tests::SimulationParameters sparams;
+            sparams.density = 0.1; 
+            sparams.lower = -10;
+            sparams.upper = 10;
+            sparams.seed = 123456;
+            return sparams;
+        }());
         dense_row.reset(new tatami::DenseRowMatrix<double, int>(nr, nc, std::move(vec)));
         return;
     }
@@ -185,11 +192,11 @@ TEST_P(BlockedPcaBasicTest, BasicConsistency) {
 
     auto block = generate_blocks(dense_row->ncol(), nblocks);
 
-    scran::blocked_pca::Options opts;
+    scran_pca::BlockedPcaOptions opts;
     opts.scale = scale;
     opts.components_from_residuals = use_resids;
-    opts.block_weight_policy = scran::block_weights::Policy::NONE;
-    auto ref = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+    opts.block_weight_policy = scran_blocks::WeightPolicy::NONE;
+    auto ref = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
 
     if (nthreads == 1) {
         EXPECT_EQ(ref.components.rows(), rank);
@@ -218,7 +225,7 @@ TEST_P(BlockedPcaBasicTest, BasicConsistency) {
 
     } else {
         opts.num_threads = nthreads;
-        auto res1 = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+        auto res1 = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
 
         // Results should be EXACTLY the same with parallelization.
         EXPECT_EQ(ref.components, res1.components);
@@ -227,39 +234,39 @@ TEST_P(BlockedPcaBasicTest, BasicConsistency) {
     }
 
     // Checking that we get more-or-less the same results. 
-    auto res2 = scran::blocked_pca::compute(dense_column.get(), block.data(), rank, opts);
+    auto res2 = scran_pca::blocked_pca(*dense_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res2.components);
     expect_equal_vectors(ref.variance_explained, res2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res2.total_variance);
 
-    auto res3 = scran::blocked_pca::compute(sparse_row.get(), block.data(), rank, opts);
+    auto res3 = scran_pca::blocked_pca(*sparse_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res3.components);
     expect_equal_vectors(ref.variance_explained, res3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res3.total_variance);
 
-    auto res4 = scran::blocked_pca::compute(sparse_column.get(), block.data(), rank, opts);
+    auto res4 = scran_pca::blocked_pca(*sparse_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res4.components);
     expect_equal_vectors(ref.variance_explained, res4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res4.total_variance);
 
     // Checking that we get more-or-less the same results. 
     opts.realize_matrix = false;
-    auto tres1 = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+    auto tres1 = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres1.components);
     expect_equal_vectors(ref.variance_explained, tres1.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres1.total_variance);
 
-    auto tres2 = scran::blocked_pca::compute(dense_column.get(), block.data(), rank, opts);
+    auto tres2 = scran_pca::blocked_pca(*dense_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres2.components);
     expect_equal_vectors(ref.variance_explained, tres2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres2.total_variance);
 
-    auto tres3 = scran::blocked_pca::compute(sparse_row.get(), block.data(), rank, opts);
+    auto tres3 = scran_pca::blocked_pca(*sparse_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres3.components);
     expect_equal_vectors(ref.variance_explained, tres3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres3.total_variance);
 
-    auto tres4 = scran::blocked_pca::compute(sparse_column.get(), block.data(), rank, opts);
+    auto tres4 = scran_pca::blocked_pca(*sparse_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres4.components);
     expect_equal_vectors(ref.variance_explained, tres4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres4.total_variance);
@@ -275,11 +282,11 @@ TEST_P(BlockedPcaBasicTest, WeightedConsistency) {
 
     auto block = generate_blocks(dense_row->ncol(), nblocks);
 
-    scran::blocked_pca::Options opts;
+    scran_pca::BlockedPcaOptions opts;
     opts.scale = scale;
     opts.components_from_residuals = use_resids;
-    opts.block_weight_policy = scran::block_weights::Policy::EQUAL;
-    auto ref = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+    opts.block_weight_policy = scran_blocks::WeightPolicy::EQUAL;
+    auto ref = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
 
     if (nthreads == 1) {
         are_pcs_centered(ref.components);
@@ -304,7 +311,7 @@ TEST_P(BlockedPcaBasicTest, WeightedConsistency) {
 
     } else {
         opts.num_threads = nthreads;
-        auto res1 = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+        auto res1 = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
 
         // Results should be EXACTLY the same with parallelization.
         EXPECT_EQ(ref.components, res1.components);
@@ -313,39 +320,39 @@ TEST_P(BlockedPcaBasicTest, WeightedConsistency) {
     }
 
     // Checking that we get more-or-less the same results. 
-    auto res2 = scran::blocked_pca::compute(dense_column.get(), block.data(), rank, opts);
+    auto res2 = scran_pca::blocked_pca(*dense_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res2.components);
     expect_equal_vectors(ref.variance_explained, res2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res2.total_variance);
 
-    auto res3 = scran::blocked_pca::compute(sparse_row.get(), block.data(), rank, opts);
+    auto res3 = scran_pca::blocked_pca(*sparse_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res3.components);
     expect_equal_vectors(ref.variance_explained, res3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res3.total_variance);
 
-    auto res4 = scran::blocked_pca::compute(sparse_column.get(), block.data(), rank, opts);
+    auto res4 = scran_pca::blocked_pca(*sparse_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, res4.components);
     expect_equal_vectors(ref.variance_explained, res4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res4.total_variance);
 
     // Checking that we get more-or-less the same results. 
     opts.realize_matrix = false;
-    auto tres1 = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opts);
+    auto tres1 = scran_pca::blocked_pca(*dense_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres1.components);
     expect_equal_vectors(ref.variance_explained, tres1.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres1.total_variance);
 
-    auto tres2 = scran::blocked_pca::compute(dense_column.get(), block.data(), rank, opts);
+    auto tres2 = scran_pca::blocked_pca(*dense_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres2.components);
     expect_equal_vectors(ref.variance_explained, tres2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres2.total_variance);
 
-    auto tres3 = scran::blocked_pca::compute(sparse_row.get(), block.data(), rank, opts);
+    auto tres3 = scran_pca::blocked_pca(*sparse_row, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres3.components);
     expect_equal_vectors(ref.variance_explained, tres3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres3.total_variance);
 
-    auto tres4 = scran::blocked_pca::compute(sparse_column.get(), block.data(), rank, opts);
+    auto tres4 = scran_pca::blocked_pca(*sparse_column, block.data(), rank, opts);
     expect_equal_pcs(ref.components, tres4.components);
     expect_equal_vectors(ref.variance_explained, tres4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres4.total_variance);
@@ -380,19 +387,19 @@ TEST_P(BlockedPcaMoreTest, VersusSimple) {
     int nblocks = std::get<3>(param);
     auto block = generate_blocks(dense_row->ncol(), nblocks);
 
-    scran::blocked_pca::Options opt;
+    scran_pca::BlockedPcaOptions opt;
     opt.scale = scale;
     opt.components_from_residuals = use_resids;
-    opt.block_weight_policy = scran::block_weights::Policy::NONE;
-    auto res1 = scran::blocked_pca::compute(dense_row.get(), block.data(), rank, opt);
+    opt.block_weight_policy = scran_blocks::WeightPolicy::NONE;
+    auto res1 = scran_pca::blocked_pca(*dense_row, block.data(), rank, opt);
 
-    scran::simple_pca::Options refopt;
+    scran_pca::SimplePcaOptions refopt;
     refopt.scale = scale;
 
     if (nblocks == 1) {
         // Checking that we get more-or-less the same results
         // from the vanilla PCA algorithm in the absence of blocks.
-        auto res2 = scran::simple_pca::compute(dense_row.get(), rank, refopt);
+        auto res2 = scran_pca::simple_pca(*dense_row, rank, refopt);
 
         expect_equal_pcs(res1.components, res2.components);
         expect_equal_vectors(res1.variance_explained, res2.variance_explained);
@@ -432,7 +439,7 @@ TEST_P(BlockedPcaMoreTest, VersusSimple) {
         }
 
         tatami::DenseColumnMatrix<double, int> refmat(nr, nc, std::move(regressed));
-        auto res2 = scran::simple_pca::compute(&refmat, rank, refopt);
+        auto res2 = scran_pca::simple_pca(refmat, rank, refopt);
 
         expect_equal_rotation(res1.rotation, res2.rotation);
         expect_equal_vectors(res1.variance_explained, res2.variance_explained);
@@ -486,21 +493,27 @@ TEST_P(BlockedPcaWeightedTest, VersusReference) {
     std::vector<int> blocking;
     size_t nr = 80, nc = 50;
     for (int b = 0; b < nblocks; ++b) {
-        auto vec = simulate_vector(nr * nc, /* lower = */ -10.0, /* upper = */ 10.0, /* seed = */ b + 100);
+        auto vec = scran_tests::simulate_vector(nr * nc, [&]{
+            scran_tests::SimulationParameters sparams;
+            sparams.lower = -10;
+            sparams.upper = 10;
+            sparams.seed = b + 100;
+            return sparams;
+        }());
         components.emplace_back(new tatami::DenseRowMatrix<double, int>(nr, nc, std::move(vec)));
         blocking.insert(blocking.end(), nc, b);
     }
 
-    scran::blocked_pca::Options base_opt;
+    scran_pca::BlockedPcaOptions base_opt;
     base_opt.scale = scale;
     base_opt.components_from_residuals = use_resids;
     base_opt.num_threads = nthreads;
 
     auto combined = tatami::make_DelayedBind(components, false);
-    auto ref = scran::blocked_pca::compute(combined.get(), blocking.data(), rank, [&]{
+    auto ref = scran_pca::blocked_pca(*combined, blocking.data(), rank, [&]{
         auto opt = base_opt;
         opt.num_threads = 1; // using a single thread for a consistent reference.
-        opt.block_weight_policy = scran::block_weights::Policy::NONE;
+        opt.block_weight_policy = scran_blocks::WeightPolicy::NONE;
         return opt;
     }());
 
@@ -512,9 +525,9 @@ TEST_P(BlockedPcaWeightedTest, VersusReference) {
         // Checking that we get more-or-less the same results with equiweighting
         // when all blocks are of the same size.
         auto opt = base_opt;
-        opt.block_weight_policy = scran::block_weights::Policy::EQUAL;
+        opt.block_weight_policy = scran_blocks::WeightPolicy::EQUAL;
 
-        auto res1 = scran::blocked_pca::compute(combined.get(), blocking.data(), rank, opt);
+        auto res1 = scran_pca::blocked_pca(*combined, blocking.data(), rank, opt);
         res1.components.array() /= res1.components.norm();
         expect_equal_pcs(ref.components, res1.components);
 
@@ -536,12 +549,12 @@ TEST_P(BlockedPcaWeightedTest, VersusReference) {
     // which is equivalent to the total absence of re-weighting.
     {
         auto opt = base_opt;
-        opt.block_weight_policy = scran::block_weights::Policy::VARIABLE;
+        opt.block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
         opt.variable_block_weight_parameters.upper_bound = 1000000;
-        auto res2 = scran::blocked_pca::compute(expanded.get(), expanded_block.data(), rank, opt);
+        auto res2 = scran_pca::blocked_pca(*expanded, expanded_block.data(), rank, opt);
 
-        opt.block_weight_policy = scran::block_weights::Policy::NONE;
-        auto ref2 = scran::blocked_pca::compute(expanded.get(), expanded_block.data(), rank, opt);
+        opt.block_weight_policy = scran_blocks::WeightPolicy::NONE;
+        auto ref2 = scran_pca::blocked_pca(*expanded, expanded_block.data(), rank, opt);
 
         ref2.components.array() /= ref2.components.norm();
         res2.components.array() /= res2.components.norm();
@@ -557,9 +570,9 @@ TEST_P(BlockedPcaWeightedTest, VersusReference) {
     // when the PCA was performed with batches of equal size. 
     {
         auto opt = base_opt;
-        opt.block_weight_policy = scran::block_weights::Policy::VARIABLE;
+        opt.block_weight_policy = scran_blocks::WeightPolicy::VARIABLE;
         opt.variable_block_weight_parameters.upper_bound = 0;
-        auto res2 = scran::blocked_pca::compute(expanded.get(), expanded_block.data(), rank, opt);
+        auto res2 = scran_pca::blocked_pca(*expanded, expanded_block.data(), rank, opt);
 
         // Mocking up the expected results.
         Eigen::MatrixXd expanded_pcs(rank, expanded->ncol());
diff --git a/tests/src/compare_pcs.h b/tests/src/compare_pcs.h
index 0aff5b4..c2df66c 100644
--- a/tests/src/compare_pcs.h
+++ b/tests/src/compare_pcs.h
@@ -5,7 +5,7 @@
 
 #include <vector>
 
-#include "compare_almost_equal.h"
+#include "scran_tests/scran_tests.hpp"
 
 #include "Eigen/Dense"
 #include "tatami/tatami.hpp"
@@ -35,7 +35,7 @@ inline void expect_equal_pcs(const Eigen::MatrixXd& left, const Eigen::MatrixXd&
             auto aleft = std::abs(left(i, j));
             auto aright = std::abs(right(i, j));
             if (relative) {
-                compare_almost_equal(aleft, aright, tol);
+                scran_tests::compare_almost_equal(aleft, aright, tol);
             } else if (std::abs(aleft - aright) > tol) {
                 EXPECT_TRUE(false) << "mismatch in almost-equal floats (expected " << aleft << ", got " << aright << ")";
             }
@@ -45,7 +45,6 @@ inline void expect_equal_pcs(const Eigen::MatrixXd& left, const Eigen::MatrixXd&
         EXPECT_TRUE(std::abs(left.row(i).sum()) < tol);
         EXPECT_TRUE(std::abs(right.row(i).sum()) < tol);
     }
-    return;
 }
 
 inline void expect_equal_rotation(const Eigen::MatrixXd& left, const Eigen::MatrixXd& right, double tol=1e-8) {
@@ -57,17 +56,16 @@ inline void expect_equal_rotation(const Eigen::MatrixXd& left, const Eigen::Matr
         for (int j = 0; j < ngenes; ++j) {
             auto aleft = std::abs(left(i, j));
             auto aright = std::abs(right(i, j));
-            compare_almost_equal(aleft, aright, tol);
+            scran_tests::compare_almost_equal(aleft, aright, tol);
         }
     }
-    return;
 }
 
 inline void expect_equal_vectors(const Eigen::VectorXd& left, const Eigen::VectorXd& right, double tol=1e-8) {
     int n = left.size();
     ASSERT_EQ(n, right.size());
     for (int i = 0; i < n; ++i) {
-        compare_almost_equal(left[i], right[i], tol);
+        scran_tests::compare_almost_equal(left[i], right[i], tol);
     }
 }
 
@@ -78,7 +76,7 @@ inline void compare_almost_equal(const Eigen::MatrixXd& left, const Eigen::Matri
 
     for (int c = 0; c < ngenes; ++c) {
         for (int r = 0; r < ndims; ++r) {
-            compare_almost_equal(left(r, c), right(r, c), tol);
+            scran_tests::compare_almost_equal(left(r, c), right(r, c), tol);
         }
     }
 }
diff --git a/tests/src/simple_pca.cpp b/tests/src/simple_pca.cpp
index 706aeef..954e1c9 100644
--- a/tests/src/simple_pca.cpp
+++ b/tests/src/simple_pca.cpp
@@ -1,11 +1,11 @@
 #include <gtest/gtest.h>
 
-#include "simulate_vector.h"
 #include "compare_pcs.h"
 
+#include "scran_tests/scran_tests.hpp"
 #include "tatami/tatami.hpp"
 
-#include "simple_pca.hpp"
+#include "scran_pca/simple_pca.hpp"
 
 class SimplePcaTestCore {
 protected:
@@ -17,7 +17,15 @@ class SimplePcaTestCore {
         }
 
         size_t nr = 199, nc = 165;
-        auto vec = simulate_sparse_vector(nr * nc, 0.1, /* lower = */ -10, /* upper = */ 10, /* seed = */ 69);
+        auto vec = scran_tests::simulate_vector(nr * nc, [&]() {
+            scran_tests::SimulationParameters sparams;
+            sparams.density = 0.1;
+            sparams.lower = -10;
+            sparams.upper = 10;
+            sparams.seed = 69;
+            return sparams;
+        }());
+
         dense_row.reset(new tatami::DenseRowMatrix<double, int>(nr, nc, std::move(vec)));
         dense_column = tatami::convert_to_dense(dense_row.get(), false);
         sparse_row = tatami::convert_to_compressed_sparse(dense_row.get(), true);
@@ -40,9 +48,9 @@ TEST_P(SimplePcaBasicTest, Test) {
     int rank = std::get<1>(param);
     int threads = std::get<2>(param);
 
-    scran::simple_pca::Options opt;
+    scran_pca::SimplePcaOptions opt;
     opt.scale = scale;
-    auto ref = scran::simple_pca::compute(dense_row.get(), rank, opt);
+    auto ref = scran_pca::simple_pca(*dense_row, rank, opt);
 
     if (threads == 1) {
         EXPECT_EQ(ref.variance_explained.size(), rank);
@@ -81,46 +89,46 @@ TEST_P(SimplePcaBasicTest, Test) {
     } else {
         // Results should be EXACTLY the same with parallelization.
         opt.num_threads = threads;
-        auto res1 = scran::simple_pca::compute(dense_row.get(), rank, opt);
+        auto res1 = scran_pca::simple_pca(*dense_row, rank, opt);
         EXPECT_EQ(ref.components, res1.components);
         EXPECT_EQ(ref.variance_explained, res1.variance_explained);
         EXPECT_EQ(ref.total_variance, res1.total_variance);
     }
 
     // Checking that we get more-or-less the same results. 
-    auto res2 = scran::simple_pca::compute(dense_column.get(), rank, opt);
+    auto res2 = scran_pca::simple_pca(*dense_column, rank, opt);
     expect_equal_pcs(ref.components, res2.components);
     expect_equal_vectors(ref.variance_explained, res2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res2.total_variance);
 
-    auto res3 = scran::simple_pca::compute(sparse_row.get(), rank, opt);
+    auto res3 = scran_pca::simple_pca(*sparse_row, rank, opt);
     expect_equal_pcs(ref.components, res3.components);
     expect_equal_vectors(ref.variance_explained, res3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res3.total_variance);
 
-    auto res4 = scran::simple_pca::compute(sparse_column.get(), rank, opt);
+    auto res4 = scran_pca::simple_pca(*sparse_column, rank, opt);
     expect_equal_pcs(ref.components, res4.components);
     expect_equal_vectors(ref.variance_explained, res4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, res4.total_variance);
 
     // Checking that we get more-or-less the same results. 
     opt.realize_matrix = false;
-    auto tres1 = scran::simple_pca::compute(dense_row.get(), rank, opt);
+    auto tres1 = scran_pca::simple_pca(*dense_row, rank, opt);
     expect_equal_pcs(ref.components, tres1.components);
     expect_equal_vectors(ref.variance_explained, tres1.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres1.total_variance);
 
-    auto tres2 = scran::simple_pca::compute(dense_column.get(), rank, opt);
+    auto tres2 = scran_pca::simple_pca(*dense_column, rank, opt);
     expect_equal_pcs(ref.components, tres2.components);
     expect_equal_vectors(ref.variance_explained, tres2.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres2.total_variance);
 
-    auto tres3 = scran::simple_pca::compute(sparse_row.get(), rank, opt);
+    auto tres3 = scran_pca::simple_pca(*sparse_row, rank, opt);
     expect_equal_pcs(ref.components, tres3.components);
     expect_equal_vectors(ref.variance_explained, tres3.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres3.total_variance);
 
-    auto tres4 = scran::simple_pca::compute(sparse_column.get(), rank, opt);
+    auto tres4 = scran_pca::simple_pca(*sparse_column, rank, opt);
     expect_equal_pcs(ref.components, tres4.components);
     expect_equal_vectors(ref.variance_explained, tres4.variance_explained);
     EXPECT_FLOAT_EQ(ref.total_variance, tres4.total_variance);
@@ -146,32 +154,46 @@ TEST_P(SimplePcaMoreTest, ZeroVariance) {
     int rank = std::get<1>(param);
 
     size_t nr = 109, nc = 153;
-    auto vec = simulate_sparse_vector(nr * nc, 0.1, /* lower = */ -10, /* upper = */ 10, /* seed = */ scale + rank + 100);
+    auto vec = scran_tests::simulate_vector(nr * nc, [&]() {
+        scran_tests::SimulationParameters sparams;
+        sparams.density = 0.1;
+        sparams.lower = -10;
+        sparams.upper = 10;
+        sparams.seed = scale * 100 + rank;
+        return sparams;
+    }());
 
     auto copy = vec;
     size_t last_row = (nr - 1) * nc;
-    std::fill(copy.begin() + last_row, copy.begin() + last_row + nc, 0);
+    std::fill_n(copy.begin() + last_row, nc, 0);
     tatami::DenseRowMatrix<double, int> has_zero(nr, nc, std::move(copy));
 
     std::vector<double> removed(vec.begin(), vec.begin() + last_row);
     tatami::DenseRowMatrix<double, int> leftovers(nr - 1, nc, std::move(removed));
 
-    scran::simple_pca::Options opt;
+    scran_pca::SimplePcaOptions opt;
     opt.scale = scale;
 
     // The initial vector is slightly different when we lose a feature, so we manually force our own random initialization.
-    auto raw_init = simulate_vector(nr, /* lower = */ -2, /* upper = */ 2, /* seed = */ scale + rank + 10); 
+    auto raw_init = scran_tests::simulate_vector(nr, [&]() {
+        scran_tests::SimulationParameters sparams;
+        sparams.lower = -2;
+        sparams.upper = 2;
+        sparams.seed = scale * 10 + rank;
+        return sparams;
+    }());
+
     raw_init.back() = 0;
 
     Eigen::VectorXd init(nr - 1);
     std::copy_n(raw_init.begin(), nr - 1, init.data());
     opt.irlba_options.initial = &init;
-    auto ref = scran::simple_pca::compute(&leftovers, rank, opt);
+    auto ref = scran_pca::simple_pca(leftovers, rank, opt);
 
     Eigen::VectorXd init2(nr);
     std::copy_n(raw_init.begin(), nr, init2.data());
     opt.irlba_options.initial = &init2;
-    auto out = scran::simple_pca::compute(&has_zero, rank, opt);
+    auto out = scran_pca::simple_pca(has_zero, rank, opt);
 
     expect_equal_pcs(ref.components, out.components); 
     expect_equal_vectors(ref.variance_explained, out.variance_explained);
@@ -179,7 +201,7 @@ TEST_P(SimplePcaMoreTest, ZeroVariance) {
 
     // Same behavior with sparse representation.
     auto sparse_zero = tatami::convert_to_compressed_sparse(&has_zero, true);
-    auto spout = scran::simple_pca::compute(sparse_zero.get(), rank, opt);
+    auto spout = scran_pca::simple_pca(*sparse_zero, rank, opt);
 
     expect_equal_pcs(spout.components, out.components);
     expect_equal_vectors(spout.variance_explained, out.variance_explained);