Here's a step-by-step tutorial on how to add new functionality starting from scratch (we'll be adding a new solver, in case you're adding a new data structure, metrics, utils etc. just substitute "solver" in names and path appropriately using common sense).
Add a header file in src/include
or an approriate subfolder. This header file should expose only whatever you want the user facing API to expose (methods, structures etc.). Nothing more. For example for a new solver add:
#pragma once
typedef struct solver_params {
int n;
int m;
int k;
} solver_params;
* Documentation goes here
* \param[in] data
* \param[out] labels
* \param[in,out] centroids
void solver_name_fit_float(const float *data, float *labels, float *centroids, solver_params _params);
void solver_name_fit_double(const double *data, double *labels, double *centroids, solver_params _params);
* Documentation goes here
* \param[in] data
* \param[in] centroids
* \param[out] labels
void solver_name_predict_float(const float *data, const float *centroids, float *labels, solver_params _params);
void solver_name_predict_double(const double *data, const double *centroids, double *labels, solver_params _params);
Add the common C/CPP code in src/common
. Try to DRY as much code as possible and only extract CPU/GPU specific code into separate files (as shown in following sections), e.g.:
#include "../include/solver/solver_name.h"
#include "../include/solver/solver_name_api.h"
void initializeCentroids(Matrix<T> &X, Matrix<T> ¢roids) {
// copy k rows from X to centroids
void calculateDistance(Matrix<T> &distances, Matrix<T> &X, Matrix<T> ¢roids) {
// calculate the distance between each row in X and each row in centroids, put result in distances
template<typename T>
void solver_name_fit(const T *data, T *labels, T *centroids, solver_params _params) {
Matrix<T> X(data, _params.n, _params.m);
Matrix<T> centroids(centroids, _params.k, _params.m);
Matrix<int> labels_matrix(_params.n, 1);
initializeCentroids(X, centroids);
Matrix<T> distances(_params.n, _params.k);
for(int i = 0; i < _params.iterations && totalMoved > _params.threshold; i++) {
calculateDistance(distances, X, centroids);
double totalMoved = recalculateCentroids(centroids, distances);
relabel(labels_matrix, distances);
template<typename T>
void solver_name_predict(const T *data, T *labels, T *centroids, solver_params _params) {
Matrix<T> X(data, _params.n, _params.m);
Matrix<T> centroids(centroids, _params.k, _params.m);
Matrix<int> labels_matrix(_params.n, 1);
Matrix<T> distances(_params.n, _params.k);
calculateDistance(distances, X, centroids);
relabel(labels_matrix, distances);
void solver_name_fit<float>(const float *data, float *labels, float *centroids, solver_params _params);
void solver_name_fit<double>(const double *data, double *labels, double *centroids, solver_params _params);
void solver_name_predict<float>(const float *data, float *labels, float *centroids, solver_params _params);
void solver_name_predict<double>(const double *data, double *labels, double *centroids, solver_params _params);
void solver_name_fit_float(const float *data, float *labels, float *centroids, solver_params _params) {
solver_name_fit(data, labels, centroids, _params);
void solver_name_fit_double(const double *data, double *labels, double *centroids, solver_params _params) {
solver_name_fit(data, labels, centroids, _params);
void solver_name_predict_float(const float *data, const float *centroids, float *labels, solver_params _params) {
solver_name_predict(data, labels, centroids, _params);
void solver_name_predict_double(const double *data, const double *centroids, double *labels, solver_params _params) {
solver_name_predict(data, labels, centroids, _params);
Add headers for files which will require separate CPU/GPU implementation like for example:
Since we will need a data structure for CPU and GPU.
#pragma once
* \class Matrix
* \brief Matrix type. Doc goes here.
template <typename T>
class Matrix {
size_t _n;
size_t _m;
T* _data;
Matrix(size_t n, size_t m);
Matrix(T* data, size_t n, size_t m);
void copy_to(T* dst);
Since parts of the code will require cuBLAS/thrust calls on GPU and parts BLAS/std on CPU etc.
#pragma once
#include "matrix/matrix.h"
* Documentation goes here
* \param[in,out] labels
* \param[in] distances
void relabel(Matrix<T> &labels, const Matrix<T> &distances);
Add CPU/GPU specific implementations of the above headers.
CPU code goes in src/cpu/*
- Sample Matrix class CPP impl used in our solver:
#include "../include/matrix/matrix.h
template <typename T>
Matrix<T>::Matrix(T* data, size_t n, size_t m) {
_n = n;
_m = m;
_data = data;
template <typename T>
void Matrix<T>::copy_to(T* dst) {
// CPU specific impl goes here
template class Matrix<float>;
template class Matrix<double>;
- Sample solver specific method for CPU:
#include "../include/solver/solver_name.h"
void relabel(Matrix<T> &labels, const Matrix<T> &distances) {
// iterate using standard CPP methods over distances, find min index for each row
// set it as new label in the corresponding position in labels
GPU code goes in src/gpu/*
- Sample Matrix class CUDA impl used in our solver:
#include "../include/matrix/matrix.h
template <typename T>
Matrix<T>::Matrix(T* data, size_t n, size_t m) {
_n = n;
_m = m;
safe_cuda(cudaMalloc(&_data, _n * _m * sizeof(T)));
thrust::copy(data, data + n * m, thrust::device_pointer_cast(_data));
template <typename T>
void Matrix<T>::copy_to(T* dst) {
// GPU specific impl goes here
template class Matrix<float>;
template class Matrix<double>;
- Sample solver specific method for GPU:
#include "../include/solver/solver_name.h"
void relabel(Matrix<T> &labels, const Matrix<T> &distances) {
// iterate using CUDA methods (for example Thrust) over distances, find min index for each row
// set it as new label in the corresponding position in labels
Add SWIG interface file:
/* File : solver_name.i */
#include "../../include/solver/solver_name.h"
/* In case of 1D arrays use _ARRAY1 variants */
/* For other mappings consult out other interface files, swig doc or numpy.i doc*/
%apply (float *IN_ARRAY2) {float *data};
%apply (float *OUT_ARRAY2) {float *labels};
%apply (float *INPLACE_ARRAY2) {float *centroids};
%apply (double *IN_ARRAY2) {double *data};
%apply (double *OUT_ARRAY2) {double *labels};
%apply (double *INPLACE_ARRAY2) {double *centroids};
%include "../../include/solver/solver_name.h"
Include the interface file in (either both or only one):
- src/swig/ch2o4gpu_cpu.i - if you provide CPU implementation
- src/swig/ch2o4gpu_gpu.i - if you provide GPU implementation
%include "solver/solver_name.i"
Add Python wrapper files.
# If your solver is not implemented by ScikitLearn or implements it 100%, drop the H2O from the name
class solver_nameH2O(object):
"""Doc goes here.
param_name: type, default=default_val
Doc goes here
def __init__(self, k, n_gpus = -1):
self.param_name = param_name
from ..libs.lib_utils import get_lib
from ..util.gpu import device_count
n_gpus, devices = device_count(n_gpus)
self.lib = get_lib(n_gpus, devices) # Grabs the SWIG generated wrapper which exposes C methods
self.k = k
self.centroids = None
self.labels = None
def fit(self, X, y=None):
:param: X {array-like}, shape (n_samples, n_features)
Doc goes here.
:param y
:returns self : object
params = lib.solver_params()
params.n = X.shape[0]
params.m = X.shape[1]
params.k = self.k
c_method = lib.solver_name_fit_float if X.dtype == np.float32 else lib.solver_name_fit_double
self.centroids = np.empty((params.k, params.m), dtype=X.dtype)
self.labels = np.empty(params.n, dtype=np.int32)
c_method(X, self.labels, self.centroids, params)
return self
def predict(self, X):
# TODO implement
# If you are implementing a solver which is implemented by ScikitLearn and your implementation doesn't handle certain parameter SKLearn does, write a wrapper which detects it and chooses your impl or SKLearn
class solver_name(object):
Register your solver in solvers/
from ..solvers.solver_name import solver_nameH2O
Add Java wrapper files - coming soon.
Add tests! Currently adding Python tests in tests/python/open_data
is the easiest way. C/C++/CUDA tests coming soon.