Skip to content

Commit

Permalink
Further additions to distance membership
Browse files Browse the repository at this point in the history
  • Loading branch information
tarang-jain committed Jul 5, 2022
1 parent 971b150 commit b823a4b
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 44 deletions.
25 changes: 18 additions & 7 deletions cpp/src/hdbscan/detail/kernels/soft_clustering.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,29 @@ namespace ML {
namespace HDBSCAN {
namespace detail {
namespace Membership {

template <typename value_idx, typename value_t, int tpb = 256>
__global__ void rearrange_kernel(value_idx* leaf_idx,
value_t* lambdas,
value_t* rearranged_lambdas,
value_idx n_leaves)
__global__ void min_dist_to_exemplar_kernel(value_t* dist,
value_idx m,
value_idx n_selected_clusters,
value_idx* exemplar_label_offsets,
value_t* min_dist)
{
value_idx idx = blockDim.x * blockIdx.x + threadIdx.x;

if (idx >= n_leaves) return;
if (idx >= m * n_selected_clusters) return;

auto row = idx / n_selected_clusters;
auto col = idx % n_selected_clusters;
auto start = exemplar_label_offsets[col];
auto end = exemplar_label_offsets[col + 1];

for(value_idx i = start; i < end; i++){
if dist[idx + i] < min_dist[idx]{
min_dist[idx] = dist[idx + i];
}
}

rearranged_lambdas[idx] = lambdas[leaf_idx[idx]];
return;
}

Expand Down
159 changes: 124 additions & 35 deletions cpp/src/hdbscan/detail/soft_clustering.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ value_idx get_exemplars(const raft::handle_t& handle,
Common::CondensedHierarchy<value_idx, value_t>& condensed_tree,
const value_idx* labels,
value_idx n_selected_clusters,
value_idx* exemplar_idx)
// value_idx* exemplar_offsets)
value_idx* exemplar_idx,
value_idx* exemplar_label_offsets)
{
auto stream = handle.get_stream();
auto exec_policy = handle.get_thrust_policy();
Expand Down Expand Up @@ -183,46 +183,135 @@ value_idx get_exemplars(const raft::handle_t& handle,
exemplar_idx,
[] __device__(auto idx) { return idx >= 0; });

auto n_exemplar_indices = exemplar_idx_end_ptr - exemplar_idx;

// rmm::device_uvector<value_idx>exemplar_labels(n_exemplar_indices, stream);

// thrust::transform(
// exec_policy,
// exemplar_idx,
// exemplar_idx + n_exemplar_indices,
// exemplar_labels.data(),
// [labels] __device__(auto idx) { return labels[idx]; });

// rmm::device_uvector<value_idx> exemplar_label_offsets(n_exemplar_indices + 1, stream);
// thrust::unique_by_key_copy(exec_policy,
// exemplar_labels.data(),
// exemplar_labels.data() + n_exemplar_indices,
// thrust::make_counting_iterator(0),
// thrust::make_discard_iterator(),
// exemplar_label_offsets.begin());
// exemplar_label_offsets.set_element(n_exemplar_indices, n_exemplar_indices, stream);
return n_exemplar_indices;
auto n_exemplars = exemplar_idx_end_ptr - exemplar_idx;

rmm::device_uvector<value_idx>exemplar_labels(n_exemplars, stream);

thrust::transform(
exec_policy,
exemplar_idx,
exemplar_idx + n_exemplars,
exemplar_labels.data(),
[labels] __device__(auto idx) { return labels[idx]; });

thrust::unique_by_key_copy(exec_policy,
exemplar_labels.data(),
exemplar_labels.data() + n_exemplars,
thrust::make_counting_iterator(0),
thrust::make_discard_iterator(),
exemplar_label_offsets.begin());
exemplar_label_offsets.set_element(n_exemplars, n_exemplars, stream);
for(int i = 0; i < n_exemplars + 1; i++){
CUML_LOG_DEBUG("%d", exemplar_label_offsets.element(i, stream));
}
return n_exemplars;
}

template <typename value_idx, typename value_t, int tpb = 256>
value_idx dist_membership_vector(const raft::handle_t& handle,
Common::CondensedHierarchy<value_idx, value_t>& condensed_tree,
const value_t* X,
value_idx* exemplar_idx)
{
raft::matrix::copyRows<value_t, value_idx, size_t>(
X,
index.m,
index.n,
index.get_R(),
R_1nn_cols2.data(),
index.n_landmarks,
handle.get_stream(),
true);
size_t m,
size_t n,
size_t n_exemplars,
size_t n_selected_clusters,
value_idx* exemplar_idx,
value_idx* exemplar_label_offsets,
bool softmax)
{
auto stream = handle.get_stream();

rmm::device_uvector<value_t> exemplars_dense(n_exemplars * n, stream);

raft::matrix::copyRows<value_t, value_idx, size_t>(X,
n_exemplars,
n,
exemplars_dense.data(),
exemplar_idx,
n_exemplars,
stream,
true);

rmm::device_uvector<value_t> dist(m * n_exemplars, stream);
raft::distance::distance<metric, value_idx, value_idx, value_idx, int>(
x, y, dist, m, n, k, handle.get_stream(), isRowMajor);
X, exemplars_dense.data(), dist.data(), m, n_exemplars, n, stream, true);

rmm::device_uvector<value_t> min_dist(m * n_selected_clusters, stream);
thrust::fill(exec_policy, min_dist.begin(), min_dist.end(), FLT_MAX);

auto reduction_op =
[dist = dist.data(),
n_selected_clusters,
exemplar_label_offsets,
min_dist = min_dist.data()]
__device__(auto idx) {
auto col = idx % n_selected_clusters;
auto row = idx / n_selected_clusters;
auto start = exemplar_label_offsets[col];
auto end = exemplar_label_offsets[col + 1];

for(value_idx i = start; i < end; i++){
if dist[row * n_exemplars + i] < min_dist[row * n_selected_clusters + col]{
min_dist[row * n_selected_clusters + col] = dist[row * n_exemplars + i];
}
}
return;
};

thrust::for_each(
exec_policy,
counting,
counting + m * n_selected_clusters,
reduction_op
)

rmm::device_uvector<value_t> dist_membership_vec(m * n_selected_clusters, stream);
thrust::fill(exec_policy, dist_membership_vec.begin(), dist_membership_vec.end(), 0.0f);

thrust::transform(
exec_policy,
min_dist.data(),
min_dist.data() + m * n_selected_clusters,
dist_membership_vec.data(),
[=] __device__(value_t val){
return value_t(1.0/val);
}
)

thrust::

if (softmax){
auto softmax_op =
[min_dist = min_dist.data(),
n_selected_clusters,
exemplar_label_offsets,
min_dist = min_dist.data()]
__device__(auto idx) {
auto col = idx % n_selected_clusters;
auto row = idx / n_selected_clusters;
auto start = exemplar_label_offsets[col];
auto end = exemplar_label_offsets[col + 1];

for(value_idx i = start; i < end; i++){
if dist[row * n_exemplars + i] < min_dist[row * n_selected_clusters + col]{
min_dist[row * n_selected_clusters + col] = dist[row * n_exemplars + i];
}
}
return;
};

thrust::for_each(
exec_policy,
counting,
counting + m * n_selected_clusters,
reduction_op
)
}
for i in range(vector.shape[0]):
result[i] = 1.0 / vector[i]
result = np.exp(result - np.nanmax(result))
sum = np.sum(result)


}; // namespace Membership
}; // namespace detail
}; // namespace HDBSCAN
Expand Down
24 changes: 22 additions & 2 deletions cpp/test/sg/hdbscan_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -303,13 +303,33 @@ class ClusterSelectionTest : public ::testing::TestWithParam<ClusterSelectionInp
handle.sync_stream(handle.get_stream());

rmm::device_uvector<int> exemplar_indices(params.n_row, handle.get_stream());
rmm::device_uvector<int> exemplar_label_offsets(n_selected_clusters + 1, handle.get_stream());

int n_exemplars = ML::HDBSCAN::detail::Membership::get_exemplars<IdxT, T, 256>(
int n_exemplars = ML::HDBSCAN::detail::Membership::get_exemplars<IdxT, T>(
handle,
condensed_tree,
labels.data(),
n_selected_clusters,
exemplar_indices.data()
exemplar_indices.data(),
exemplar_label_offsets.data()
);

ML::HDBSCAN::detail::Membership::get_exemplars<IdxT, T>(
handle,
condensed_tree,
labels.data(),
n_selected_clusters,
exemplar_indices.data(),
exemplar_label_offsets.data()
);

ML::HDBSCAN::detail::Membership::dist_membership_vector<IdxT, T, 256>(
handle,
data.data(),
params.n_row,
params.n_col,
exemplar_indices.data(),
exemplar_label_offsets.data()
);

handle.sync_stream(handle.get_stream());
Expand Down

0 comments on commit b823a4b

Please sign in to comment.