Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use KMeans from Raft #4713

Merged
merged 53 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
9589e05
Automatically clone raft when branch pin changes
cjnolet Feb 19, 2022
8746a42
Updating output dir for python
cjnolet Feb 19, 2022
6bdadaf
Starting to use the provided raft package
cjnolet Feb 20, 2022
2fc0ab0
iRemoving raft include test
cjnolet Feb 20, 2022
fcb82db
Adding pyraft as dependency
cjnolet Feb 20, 2022
3862ace
Fixing copyright years
cjnolet Feb 20, 2022
15a2569
Merge branch 'build-2204-clone_raft_on_pin' into use_installed_pyraft
cjnolet Feb 20, 2022
87b1d2c
Fixing style
cjnolet Feb 21, 2022
9f454ea
Adding pyraft to gpu build dependencies
cjnolet Feb 21, 2022
9c2cfcb
Adding pyraft to conda deps
cjnolet Feb 21, 2022
154370b
Fixing typo
cjnolet Feb 22, 2022
d2091ec
Merge remote-tracking branch 'rapidsai/branch-22.04' into use_install…
cjnolet Feb 22, 2022
a4971d2
using CPM_DOWNLOAD_raft (and testing pin)
cjnolet Feb 22, 2022
57e6765
Using Robert's cpm branch
cjnolet Feb 22, 2022
7f38c58
Updating header file extensions
cjnolet Feb 22, 2022
a56ca03
Updating copyrights
cjnolet Feb 22, 2022
0ef3599
Ignoring already installed includes for now
cjnolet Feb 22, 2022
34614ee
Merge branch 'use_installed_pyraft' into update_cuml_raft_header_exte…
cjnolet Feb 23, 2022
a1ff988
Removing cmake pin
cjnolet Feb 24, 2022
e9c3fca
Merge remote-tracking branch 'rapidsai/branch-22.04' into update_cuml…
cjnolet Feb 24, 2022
24086a8
POint branch back at raft release
cjnolet Mar 1, 2022
25c9dc1
Merge remote-tracking branch 'rapidsai/branch-22.04' into update_cuml…
cjnolet Mar 17, 2022
696439d
Test kmeans
lowener Apr 12, 2022
f1589f1
Add final API of raft KMeans
lowener Apr 26, 2022
10a9740
Change KMeans MG to raft functions
lowener May 2, 2022
7d314ac
Fix compilation issues
lowener May 3, 2022
2f33c66
Fix comparison types
lowener May 3, 2022
58e735e
Merge branch 'branch-22.08' into 22.06kmeansmg_raft
lowener Jun 21, 2022
05605cd
Add calls to raft kmeans. Remove unused functions
lowener Jun 27, 2022
d4e1406
Add changes to kmeans struct in python
lowener Jun 27, 2022
919c4ca
KMeans cython update to follow raft
lowener Jul 5, 2022
2d057bf
Fix size bug
lowener Jul 11, 2022
d6caa0a
Merge branch 'branch-22.08' into 22.06-kmeans-raft
lowener Jul 11, 2022
46077bd
Revert benchmark kmeans changes
lowener Jul 11, 2022
faf572b
Fix copyright
lowener Jul 11, 2022
0e0bd8e
Fix MNMG kmeans oversampling assert
lowener Jul 12, 2022
4c007c4
Fix style
lowener Jul 12, 2022
3b01dc3
Merge branch 'branch-22.08' into 22.06-kmeans-raft
lowener Jul 25, 2022
543d0ae
Update KMeansMG for latest raft mdarray update
lowener Jul 27, 2022
08aed6b
Merge remote-tracking branch 'rapidsai/branch-22.12' into 22.06-kmean…
cjnolet Oct 14, 2022
34be57c
Merge remote-tracking branch 'rapidsai/branch-22.12' into update_cuml…
cjnolet Oct 14, 2022
52c2b32
Fixing style
cjnolet Oct 15, 2022
197cd88
Using raft::cluster::KMeansParams
cjnolet Oct 15, 2022
8be7aa6
Importing proper raft include
cjnolet Oct 15, 2022
7111e71
Merge branch 'branch-22.12' into update_cuml_raft_header_extensions
cjnolet Oct 19, 2022
2fdb159
Fixing some build errors
cjnolet Oct 20, 2022
ac06adf
Merge branch 'update_cuml_raft_header_extensions' into 22.06-kmeans-raft
cjnolet Oct 20, 2022
a055fdc
Merge branch 'branch-22.12' into 22.06-kmeans-raft
cjnolet Oct 20, 2022
5059ccc
Fixing kmeans
cjnolet Oct 20, 2022
0b13da9
Fix kmeans MG and add C++ gtest
lowener Oct 31, 2022
5fca3b3
Fix raft kmeans call
lowener Nov 7, 2022
92bd585
Fix includes
lowener Nov 8, 2022
e316b57
Fix style
lowener Nov 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ if hasArg clean; then
CLEAN=1
fi

if hasArg cpp-mgtests; then
BUILD_CUML_MG_TESTS=ON
fi

# Long arguments
LONG_ARGUMENT_LIST=(
Expand Down
9 changes: 6 additions & 3 deletions cpp/bench/sg/kmeans.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include "benchmark.cuh"
#include <cuml/cluster/kmeans.hpp>
#include <cuml/common/logger.hpp>
#include <raft/cluster/specializations.cuh>
#include <raft/distance/distance_type.hpp>
#include <raft/random/rng_state.hpp>
#include <utility>

namespace ML {
Expand Down Expand Up @@ -86,9 +89,9 @@ std::vector<Params> getInputs()
p.kmeans.init = ML::kmeans::KMeansParams::InitMethod(0);
p.kmeans.max_iter = 300;
p.kmeans.tol = 1e-4;
p.kmeans.verbosity = CUML_LEVEL_INFO;
p.kmeans.seed = int(p.blobs.seed);
p.kmeans.metric = 0; // L2
p.kmeans.verbosity = RAFT_LEVEL_INFO;
p.kmeans.metric = raft::distance::DistanceType::L2Expanded;
p.kmeans.rng_state = raft::random::RngState(p.blobs.seed);
p.kmeans.inertia_check = true;
std::vector<std::pair<int, int>> rowcols = {
{160000, 64},
Expand Down
5 changes: 2 additions & 3 deletions cpp/examples/kmeans/kmeans_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@

#include <cuda_runtime.h>

#include <raft/core/handle.hpp>

#include <cuml/cluster/kmeans.hpp>
#include <raft/core/handle.hpp>

#ifndef CUDA_RT_CALL
#define CUDA_RT_CALL(call) \
Expand Down Expand Up @@ -112,7 +111,7 @@ int main(int argc, char* argv[])
params.max_iter = 300;
params.tol = 0.05;
}
params.metric = 1;
params.metric = raft::distance::DistanceType::L2SqrtExpanded;
params.init = ML::kmeans::KMeansParams::InitMethod::Random;

// Inputs copied from kmeans_test.cu
Expand Down
56 changes: 3 additions & 53 deletions cpp/include/cuml/cluster/kmeans.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#pragma once

#include <cuml/common/log_levels.hpp>
#include <raft/cluster/kmeans_types.hpp>

namespace raft {
class handle_t;
Expand All @@ -26,54 +27,7 @@ namespace ML {

namespace kmeans {

struct KMeansParams {
enum InitMethod { KMeansPlusPlus, Random, Array };

// The number of clusters to form as well as the number of centroids to
// generate (default:8).
int n_clusters = 8;

/*
* Method for initialization, defaults to k-means++:
* - InitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm
* to select the initial cluster centers.
* - InitMethod::Random (random): Choose 'n_clusters' observations (rows) at
* random from the input data for the initial centroids.
* - InitMethod::Array (ndarray): Use 'centroids' as initial cluster centers.
*/
InitMethod init = KMeansPlusPlus;

// Maximum number of iterations of the k-means algorithm for a single run.
int max_iter = 300;

// Relative tolerance with regards to inertia to declare convergence.
double tol = 1e-4;

// verbosity level.
int verbosity = CUML_LEVEL_INFO;

// Seed to the random number generator.
int seed = 0;

// Metric to use for distance computation. Any metric from
// raft::distance::DistanceType can be used
int metric = 0;

// Number of instance k-means algorithm will be run with different seeds.
int n_init = 1;

// Oversampling factor for use in the k-means|| algorithm.
double oversampling_factor = 2.0;

// batch_samples and batch_centroids are used to tile 1NN computation which is
// useful to optimize/control the memory footprint
// Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0
// then don't tile the centroids
int batch_samples = 1 << 15;
int batch_centroids = 0; // if 0 then batch_centroids = n_clusters

bool inertia_check = false;
};
using KMeansParams = raft::cluster::KMeansParams;

/**
* @brief Compute k-means clustering and predicts cluster index for each sample
Expand Down Expand Up @@ -222,8 +176,6 @@ void predict(const raft::handle_t& handle,
* @param[in] n_features Number of features or the dimensions of each
* sample in 'X' (it should be same as the dimension for each cluster centers in
* 'centroids').
* @param[in] metric Metric to use for distance computation. Any
* metric from raft::distance::DistanceType can be used
* @param[out] X_new X transformed in the new space..
*/
void transform(const raft::handle_t& handle,
Expand All @@ -232,7 +184,6 @@ void transform(const raft::handle_t& handle,
const float* X,
int n_samples,
int n_features,
int metric,
float* X_new);

void transform(const raft::handle_t& handle,
Expand All @@ -241,7 +192,6 @@ void transform(const raft::handle_t& handle,
const double* X,
int n_samples,
int n_features,
int metric,
double* X_new);

}; // end namespace kmeans
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cuml/cluster/kmeans_mg.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,13 +16,14 @@

#pragma once

#include <cuml/cluster/kmeans.hpp>

namespace raft {
class handle_t;
}

namespace ML {
namespace kmeans {
struct KMeansParams;
namespace opg {

/**
Expand Down
185 changes: 0 additions & 185 deletions cpp/src/common/tensor.hpp

This file was deleted.

Loading