Skip to content

Commit

Permalink
Unify use of common functors (#1049)
Browse files Browse the repository at this point in the history
The same functors are redefined in many different files. I believe this change can help with compilation times, executable size, and consistency.

Authors:
  - Louis Sugy (https://github.com/Nyrio)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1049
  • Loading branch information
Nyrio authored Dec 14, 2022
1 parent c954297 commit 0039f33
Show file tree
Hide file tree
Showing 194 changed files with 2,028 additions and 1,222 deletions.
3 changes: 2 additions & 1 deletion cpp/bench/linalg/norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <raft/linalg/matrix_vector_op.cuh>
#include <raft/linalg/norm.cuh>
#include <raft/random/rng.cuh>
#include <raft/util/cuda_utils.cuh>
#include <raft/util/itertools.hpp>

#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -60,7 +61,7 @@ struct rowNorm : public fixture {
output_view,
raft::linalg::L2Norm,
raft::linalg::Apply::ALONG_ROWS,
raft::SqrtOp<T>());
raft::sqrt_op());
});
}

Expand Down
117 changes: 45 additions & 72 deletions cpp/include/raft/cluster/detail/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <raft/core/kvp.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/mdarray.hpp>
#include <raft/core/operators.hpp>
#include <raft/distance/distance_types.hpp>
#include <raft/linalg/map_then_reduce.cuh>
#include <raft/linalg/matrix_vector_op.cuh>
Expand Down Expand Up @@ -197,16 +198,15 @@ void kmeansPlusPlus(const raft::handle_t& handle,
// Outputs minDistanceBuf[n_trials x n_samples] where minDistance[i, :] contains updated
// minClusterDistance that includes candidate-i
auto minDistBuf = distBuffer.view();
raft::linalg::matrixVectorOp(
minDistBuf.data_handle(),
pwd.data_handle(),
minClusterDistance.data_handle(),
pwd.extent(1),
pwd.extent(0),
true,
true,
[=] __device__(DataT mat, DataT vec) { return vec <= mat ? vec : mat; },
stream);
raft::linalg::matrixVectorOp(minDistBuf.data_handle(),
pwd.data_handle(),
minClusterDistance.data_handle(),
pwd.extent(1),
pwd.extent(0),
true,
true,
raft::min_op{},
stream);

// Calculate costPerCandidate[n_trials] where costPerCandidate[i] is the cluster cost when using
// centroid candidate-i
Expand Down Expand Up @@ -326,21 +326,15 @@ void update_centroids(const raft::handle_t& handle,
// weight_per_cluster[n_clusters] - 1D array, weight_per_cluster[i] contains sum of weights in
// cluster-i.
// Note - when weight_per_cluster[i] is 0, new_centroids[i] is reset to 0
raft::linalg::matrixVectorOp(
new_centroids.data_handle(),
new_centroids.data_handle(),
weight_per_cluster.data_handle(),
new_centroids.extent(1),
new_centroids.extent(0),
true,
false,
[=] __device__(DataT mat, DataT vec) {
if (vec == 0)
return DataT(0);
else
return mat / vec;
},
handle.get_stream());
raft::linalg::matrixVectorOp(new_centroids.data_handle(),
new_centroids.data_handle(),
weight_per_cluster.data_handle(),
new_centroids.extent(1),
new_centroids.extent(0),
true,
false,
raft::div_checkzero_op{},
handle.get_stream());

// copy centroids[i] to new_centroids[i] when weight_per_cluster[i] is 0
cub::ArgIndexInputIterator<DataT*> itr_wt(weight_per_cluster.data_handle());
Expand All @@ -356,9 +350,7 @@ void update_centroids(const raft::handle_t& handle,
// copy when the sum of weights in the cluster is 0
return map.value == 0;
},
[=] __device__(raft::KeyValuePair<ptrdiff_t, DataT> map) { // map
return map.key;
},
raft::key_op{},
handle.get_stream());
}

Expand Down Expand Up @@ -399,7 +391,7 @@ void kmeans_fit_main(const raft::handle_t& handle,
// resource
auto wtInCluster = raft::make_device_vector<DataT, IndexT>(handle, n_clusters);

rmm::device_scalar<raft::KeyValuePair<IndexT, DataT>> clusterCostD(stream);
rmm::device_scalar<DataT> clusterCostD(stream);

// L2 norm of X: ||x||^2
auto L2NormX = raft::make_device_vector<DataT, IndexT>(handle, n_samples);
Expand Down Expand Up @@ -470,16 +462,12 @@ void kmeans_fit_main(const raft::handle_t& handle,
// compute the squared norm between the newCentroids and the original
// centroids, destructor releases the resource
auto sqrdNorm = raft::make_device_scalar(handle, DataT(0));
raft::linalg::mapThenSumReduce(
sqrdNorm.data_handle(),
newCentroids.size(),
[=] __device__(const DataT a, const DataT b) {
DataT diff = a - b;
return diff * diff;
},
stream,
centroids.data_handle(),
newCentroids.data_handle());
raft::linalg::mapThenSumReduce(sqrdNorm.data_handle(),
newCentroids.size(),
raft::sqdiff_op{},
stream,
centroids.data_handle(),
newCentroids.data_handle());

DataT sqrdNormError = 0;
raft::copy(&sqrdNormError, sqrdNorm.data_handle(), sqrdNorm.size(), stream);
Expand All @@ -494,18 +482,11 @@ void kmeans_fit_main(const raft::handle_t& handle,
minClusterAndDistance.view(),
workspace,
raft::make_device_scalar_view(clusterCostD.data()),
[] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
const raft::KeyValuePair<IndexT, DataT>& b) {
raft::KeyValuePair<IndexT, DataT> res;
res.key = 0;
res.value = a.value + b.value;
return res;
});

DataT curClusteringCost = 0;
raft::copy(&curClusteringCost, &(clusterCostD.data()->value), 1, stream);

handle.sync_stream(stream);
raft::value_op{},
raft::add_op{});

DataT curClusteringCost = clusterCostD.value(stream);

ASSERT(curClusteringCost != (DataT)0.0,
"Too few points and centroids being found is getting 0 cost from "
"centers");
Expand Down Expand Up @@ -558,15 +539,10 @@ void kmeans_fit_main(const raft::handle_t& handle,
minClusterAndDistance.view(),
workspace,
raft::make_device_scalar_view(clusterCostD.data()),
[] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
const raft::KeyValuePair<IndexT, DataT>& b) {
raft::KeyValuePair<IndexT, DataT> res;
res.key = 0;
res.value = a.value + b.value;
return res;
});
raft::value_op{},
raft::add_op{});

raft::copy(inertia.data_handle(), &(clusterCostD.data()->value), 1, stream);
inertia[0] = clusterCostD.value(stream);

RAFT_LOG_DEBUG("KMeans.fit: completed after %d iterations with %f inertia[0] ",
n_iter[0] > params.max_iter ? n_iter[0] - 1 : n_iter[0],
Expand Down Expand Up @@ -678,7 +654,8 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle,
minClusterDistanceVec.view(),
workspace,
raft::make_device_scalar_view(clusterCost.data()),
[] __device__(const DataT& a, const DataT& b) { return a + b; });
raft::identity_op{},
raft::add_op{});

auto psi = clusterCost.value(stream);

Expand Down Expand Up @@ -710,7 +687,8 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle,
minClusterDistanceVec.view(),
workspace,
raft::make_device_scalar_view<DataT>(clusterCost.data()),
[] __device__(const DataT& a, const DataT& b) { return a + b; });
raft::identity_op{},
raft::add_op{});

psi = clusterCost.value(stream);

Expand Down Expand Up @@ -1079,7 +1057,7 @@ void kmeans_predict(handle_t const& handle,
workspace);

// calculate cluster cost phi_x(C)
rmm::device_scalar<raft::KeyValuePair<IndexT, DataT>> clusterCostD(stream);
rmm::device_scalar<DataT> clusterCostD(stream);
// TODO: add different templates for InType of binaryOp to avoid thrust transform
thrust::transform(handle.get_thrust_policy(),
minClusterAndDistance.data_handle(),
Expand All @@ -1097,21 +1075,16 @@ void kmeans_predict(handle_t const& handle,
minClusterAndDistance.view(),
workspace,
raft::make_device_scalar_view(clusterCostD.data()),
[] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
const raft::KeyValuePair<IndexT, DataT>& b) {
raft::KeyValuePair<IndexT, DataT> res;
res.key = 0;
res.value = a.value + b.value;
return res;
});

raft::copy(inertia.data_handle(), &(clusterCostD.data()->value), 1, stream);
raft::value_op{},
raft::add_op{});

thrust::transform(handle.get_thrust_policy(),
minClusterAndDistance.data_handle(),
minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
labels.data_handle(),
[=] __device__(raft::KeyValuePair<IndexT, DataT> pair) { return pair.key; });
raft::key_op{});

inertia[0] = clusterCostD.value(stream);
}

template <typename DataT, typename IndexT = int>
Expand Down
75 changes: 36 additions & 39 deletions cpp/include/raft/cluster/detail/kmeans_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <raft/core/kvp.hpp>
#include <raft/core/logger.hpp>
#include <raft/core/mdarray.hpp>
#include <raft/core/operators.hpp>
#include <raft/distance/distance.cuh>
#include <raft/distance/distance_types.hpp>
#include <raft/distance/fused_l2_nn.cuh>
Expand Down Expand Up @@ -155,12 +156,11 @@ void checkWeight(const raft::handle_t& handle,
n_samples);

auto scale = static_cast<DataT>(n_samples) / wt_sum;
raft::linalg::unaryOp(
weight.data_handle(),
weight.data_handle(),
n_samples,
[=] __device__(const DataT& wt) { return wt * scale; },
stream);
raft::linalg::unaryOp(weight.data_handle(),
weight.data_handle(),
n_samples,
raft::mul_const_op<DataT>{scale},
stream);
}
}

Expand All @@ -178,33 +178,42 @@ IndexT getCentroidsBatchSize(int batch_centroids, IndexT n_local_clusters)
return (minVal == 0) ? n_local_clusters : minVal;
}

template <typename DataT, typename ReductionOpT, typename IndexT = int>
template <typename InputT,
typename OutputT,
typename MainOpT,
typename ReductionOpT,
typename IndexT = int>
void computeClusterCost(const raft::handle_t& handle,
raft::device_vector_view<DataT, IndexT> minClusterDistance,
raft::device_vector_view<InputT, IndexT> minClusterDistance,
rmm::device_uvector<char>& workspace,
raft::device_scalar_view<DataT> clusterCost,
raft::device_scalar_view<OutputT> clusterCost,
MainOpT main_op,
ReductionOpT reduction_op)
{
cudaStream_t stream = handle.get_stream();
cudaStream_t stream = handle.get_stream();

cub::TransformInputIterator<OutputT, MainOpT, InputT*> itr(minClusterDistance.data_handle(),
main_op);

size_t temp_storage_bytes = 0;
RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(nullptr,
temp_storage_bytes,
minClusterDistance.data_handle(),
itr,
clusterCost.data_handle(),
minClusterDistance.size(),
reduction_op,
DataT(),
OutputT(),
stream));

workspace.resize(temp_storage_bytes, stream);

RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(workspace.data(),
temp_storage_bytes,
minClusterDistance.data_handle(),
itr,
clusterCost.data_handle(),
minClusterDistance.size(),
reduction_op,
DataT(),
OutputT(),
stream));
}

Expand Down Expand Up @@ -266,9 +275,7 @@ void sampleCentroids(const raft::handle_t& handle,
sampledMinClusterDistance.data_handle(),
nPtsSampledInRank,
inRankCp.data(),
[=] __device__(raft::KeyValuePair<ptrdiff_t, DataT> val) { // MapTransformOp
return val.key;
},
raft::key_op{},
stream);
}

Expand Down Expand Up @@ -463,10 +470,8 @@ void minClusterAndDistanceCompute(
pair.value = val;
return pair;
},
[=] __device__(raft::KeyValuePair<IndexT, DataT> a, raft::KeyValuePair<IndexT, DataT> b) {
return (b.value < a.value) ? b : a;
},
[=] __device__(raft::KeyValuePair<IndexT, DataT> pair) { return pair; });
raft::argmin_op{},
raft::identity_op{});
}
}
}
Expand Down Expand Up @@ -541,7 +546,6 @@ void minClusterDistanceCompute(const raft::handle_t& handle,
if (is_fused) {
workspace.resize((sizeof(IndexT)) * ns, stream);

// todo(lsugy): remove cIdx
raft::distance::fusedL2NNMinReduce<DataT, DataT, IndexT>(
minClusterDistanceView.data_handle(),
datasetView.data_handle(),
Expand Down Expand Up @@ -576,23 +580,16 @@ void minClusterDistanceCompute(const raft::handle_t& handle,
pairwise_distance_kmeans<DataT, IndexT>(
handle, datasetView, centroidsView, pairwiseDistanceView, workspace, metric);

raft::linalg::coalescedReduction(
minClusterDistanceView.data_handle(),
pairwiseDistanceView.data_handle(),
pairwiseDistanceView.extent(1),
pairwiseDistanceView.extent(0),
std::numeric_limits<DataT>::max(),
stream,
true,
[=] __device__(DataT val, IndexT i) { // MainLambda
return val;
},
[=] __device__(DataT a, DataT b) { // ReduceLambda
return (b < a) ? b : a;
},
[=] __device__(DataT val) { // FinalLambda
return val;
});
raft::linalg::coalescedReduction(minClusterDistanceView.data_handle(),
pairwiseDistanceView.data_handle(),
pairwiseDistanceView.extent(1),
pairwiseDistanceView.extent(0),
std::numeric_limits<DataT>::max(),
stream,
true,
raft::identity_op{},
raft::min_op{},
raft::identity_op{});
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/raft/cluster/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <raft/cluster/kmeans_types.hpp>
#include <raft/core/kvp.hpp>
#include <raft/core/mdarray.hpp>
#include <raft/core/operators.hpp>

namespace raft::cluster::kmeans {

Expand Down Expand Up @@ -313,7 +314,8 @@ void cluster_cost(const raft::handle_t& handle,
raft::device_scalar_view<DataT> clusterCost,
ReductionOpT reduction_op)
{
detail::computeClusterCost(handle, minClusterDistance, workspace, clusterCost, reduction_op);
detail::computeClusterCost(
handle, minClusterDistance, workspace, clusterCost, raft::identity_op{}, reduction_op);
}

/**
Expand Down
Loading

0 comments on commit 0039f33

Please sign in to comment.