Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Internal library to share headers between test and bench #1162

Merged
merged 7 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ The folder structure mirrors other RAPIDS repos, with the following folders:
- `solver`: Sparse solvers for optimization and approximation
- `stats`: Moments, summary statistics, model performance measures
- `util`: Various reusable tools and utilities for accelerated algorithm development
- `internal`: A private header-only component that hosts the code shared between benchmarks.
- `scripts`: Helpful scripts for development
- `src`: Compiled APIs and template specializations for the shared libraries
- `test`: Googletests source code
Expand Down
7 changes: 7 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,13 @@ raft_export(
distance distributed nn DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string
)

# ##################################################################################################
# * shared test/bench headers ------------------------------------------------

if(BUILD_TESTS OR BUILD_BENCH)
include(internal/CMakeLists.txt)
endif()

# ##################################################################################################
# * build test executable ----------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ function(ConfigureBench)
target_link_libraries(
${BENCH_NAME}
PRIVATE raft::raft
raft_internal
$<$<BOOL:${ConfigureBench_DIST}>:raft::distance>
$<$<BOOL:${ConfigureBench_NN}>:raft::nn>
GTest::gtest
benchmark::benchmark
Threads::Threads
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
Expand Down
7 changes: 1 addition & 6 deletions cpp/bench/matrix/select_k.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,7 @@
* limitations under the License.
*/

/**
* TODO: reconsider how to organize shared test+bench files better
* Related Issue: https://github.com/rapidsai/raft/issues/1153
* (although this header does not depend on any gtest headers)
*/
#include "../../test/matrix/select_k.cuh"
#include <raft_internal/matrix/select_k.cuh>

#include <common/benchmark.hpp>

Expand Down
11 changes: 5 additions & 6 deletions cpp/bench/neighbors/refine.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,15 +14,16 @@
* limitations under the License.
*/

#include <common/benchmark.hpp>
#include <raft_internal/neighbors/refine_helper.cuh>

#include <raft/random/rng.cuh>
#include <common/benchmark.hpp>

#include <raft/core/device_mdspan.hpp>
#include <raft/core/handle.hpp>
#include <raft/distance/distance_types.hpp>
#include <raft/neighbors/detail/refine.cuh>
#include <raft/neighbors/refine.cuh>
#include <raft/random/rng.cuh>

#if defined RAFT_DISTANCE_COMPILED
#include <raft/distance/specializations.cuh>
Expand All @@ -36,12 +37,10 @@
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>

#include "../../test/neighbors/refine_helper.cuh"

#include <iostream>
#include <sstream>

using namespace raft::neighbors::detail;
using namespace raft::neighbors;

namespace raft::bench::neighbors {

Expand Down
21 changes: 21 additions & 0 deletions cpp/internal/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# =============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

if(BUILD_TESTS OR BUILD_BENCH)
add_library(raft_internal INTERFACE)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm particularly not sure and welcome any suggestions regarding the naming of the component. Currently:

  1. The name is raft_internal, because I wanted to call it internal, but it may be a reserved word in cmake.
  2. All headers are placed into double-nested internal/internal. The first internal is for cmake to isolate it from the other components, the second is for includes, so that all includes from this component are easily identifiable, e.g.:
#include <internal/neighbors/naive_knn.cuh>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking through this a little bit- what do you think about keeping the outer directory cpp/internal as-is but nesting everything inside cpp/internal/raft instead of cpp/internal/internal. This would effectively allows us to include the cpp/internal directory in the build for the bench and test source files whilst making the include paths themselves look the same as the other raft includes. So for example, this would still allow us to include the naive_knn.cuh file as #include <raft/neighbors/naive_knn.cuh> but the file would be coming from cpp/internal instead. This assumes we wouldn't have name collisions there, which I highly doubt we would.

Another option could be to do something like cpp/internal/raft/internal but I kind of prefer the option above more. What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I'd prefer to have raft in the name is so that we don't have to worry about potential header collisions if, for example, a package named internal is ever shipped for linux. Separating the namespace by including raft makes it very clear where our scope ends.

Copy link
Contributor Author

@achirkin achirkin Jan 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it turns out we have name collisions already! In branch-23.02 we have include/raft/matrix/select_k.cuh and its corresponding test test/raft/matrix/select_k.cuh; the latter is included using quotes syntax, so it's not a problem so far. In this PR I moved it to the internal component and changed the include syntax, so it would be a problem if we change the folder name to raft. So I see two options:

  1. rename test/raft/matrix/select_k.cuh to something like select_k_helpers.cuh or select_k_utils.cuh
  2. rename the root folder internal to something different from raft, yet not potentially clashing with third-party libraries.

In the new commit I opted for the option (2) with raft_internal as an experiment. Though I don't have a strong preference in this question. I only think that the trailing internal in the folder structure is not the best option, because it feels somewhat harder to maintain.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I can live w/ raft_internal in the meantime. We can always revisit if we find this is presenting maintenance challenges.

target_include_directories(
raft_internal INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/internal>"
)
target_compile_features(raft_internal INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
endif()
126 changes: 126 additions & 0 deletions cpp/internal/raft_internal/neighbors/naive_knn.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/distance/distance_types.hpp>
#include <raft/matrix/detail/select_k.cuh>
#include <raft/spatial/knn/detail/ann_utils.cuh>
#include <raft/util/cuda_utils.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

namespace raft::neighbors {

template <typename EvalT, typename DataT, typename IdxT>
__global__ void naive_distance_kernel(EvalT* dist,
const DataT* x,
const DataT* y,
IdxT m,
IdxT n,
IdxT k,
raft::distance::DistanceType metric)
{
IdxT midx = IdxT(threadIdx.x) + IdxT(blockIdx.x) * IdxT(blockDim.x);
if (midx >= m) return;
IdxT grid_size = IdxT(blockDim.y) * IdxT(gridDim.y);
for (IdxT nidx = threadIdx.y + blockIdx.y * blockDim.y; nidx < n; nidx += grid_size) {
EvalT acc = EvalT(0);
for (IdxT i = 0; i < k; ++i) {
IdxT xidx = i + midx * k;
IdxT yidx = i + nidx * k;
auto xv = EvalT(x[xidx]);
auto yv = EvalT(y[yidx]);
switch (metric) {
case raft::distance::DistanceType::InnerProduct: {
acc += xv * yv;
} break;
case raft::distance::DistanceType::L2SqrtExpanded:
case raft::distance::DistanceType::L2SqrtUnexpanded:
case raft::distance::DistanceType::L2Expanded:
case raft::distance::DistanceType::L2Unexpanded: {
auto diff = xv - yv;
acc += diff * diff;
} break;
default: break;
}
}
switch (metric) {
case raft::distance::DistanceType::L2SqrtExpanded:
case raft::distance::DistanceType::L2SqrtUnexpanded: {
acc = raft::sqrt(acc);
} break;
default: break;
}
dist[midx * n + nidx] = acc;
}
}

/**
* Naive, but flexible bruteforce KNN search.
*
* TODO: either replace this with brute_force_knn or with distance+select_k
* when either distance or brute_force_knn support 8-bit int inputs.
*/
template <typename EvalT, typename DataT, typename IdxT>
void naive_knn(EvalT* dist_topk,
IdxT* indices_topk,
const DataT* x,
const DataT* y,
size_t n_inputs,
size_t input_len,
size_t dim,
uint32_t k,
raft::distance::DistanceType type,
rmm::cuda_stream_view stream)
{
rmm::mr::device_memory_resource* mr = nullptr;
auto pool_guard = raft::get_pool_memory_resource(mr, 1024 * 1024);

dim3 block_dim(16, 32, 1);
// maximum reasonable grid size in `y` direction
auto grid_y =
static_cast<uint16_t>(std::min<size_t>(raft::ceildiv<size_t>(input_len, block_dim.y), 32768));

// bound the memory used by this function
size_t max_batch_size =
std::min<size_t>(n_inputs, raft::ceildiv<size_t>(size_t(1) << size_t(27), input_len));
rmm::device_uvector<EvalT> dist(max_batch_size * input_len, stream, mr);

for (size_t offset = 0; offset < n_inputs; offset += max_batch_size) {
size_t batch_size = std::min(max_batch_size, n_inputs - offset);
dim3 grid_dim(raft::ceildiv<size_t>(batch_size, block_dim.x), grid_y, 1);

naive_distance_kernel<EvalT, DataT, IdxT><<<grid_dim, block_dim, 0, stream>>>(
dist.data(), x + offset * dim, y, batch_size, input_len, dim, type);

matrix::detail::select_k<EvalT, IdxT>(dist.data(),
nullptr,
batch_size,
input_len,
static_cast<int>(k),
dist_topk + offset * k,
indices_topk + offset * k,
type != raft::distance::DistanceType::InnerProduct,
stream,
mr);
}
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
}

} // namespace raft::neighbors
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,8 @@
*/
#pragma once

#include "ann_utils.cuh"
#include <raft_internal/neighbors/naive_knn.cuh>

#include <raft/core/device_mdarray.hpp>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/handle.hpp>
Expand All @@ -25,9 +26,9 @@
#include <raft/random/rng.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>

namespace raft::neighbors::detail {
namespace raft::neighbors {

template <typename IdxT>
struct RefineInputs {
Expand Down Expand Up @@ -66,16 +67,16 @@ class RefineHelper {
{
candidates = raft::make_device_matrix<IdxT, IdxT>(handle_, p.n_queries, p.k0);
rmm::device_uvector<DistanceT> distances_tmp(p.n_queries * p.k0, stream_);
raft::neighbors::naiveBfKnn<DistanceT, DataT, IdxT>(distances_tmp.data(),
candidates.data_handle(),
queries.data_handle(),
dataset.data_handle(),
p.n_queries,
p.n_rows,
p.dim,
p.k0,
p.metric,
stream_);
naive_knn<DistanceT, DataT, IdxT>(distances_tmp.data(),
candidates.data_handle(),
queries.data_handle(),
dataset.data_handle(),
p.n_queries,
p.n_rows,
p.dim,
p.k0,
p.metric,
stream_);
handle_.sync_stream(stream_);
}

Expand All @@ -98,16 +99,16 @@ class RefineHelper {
{
rmm::device_uvector<DistanceT> distances_dev(p.n_queries * p.k, stream_);
rmm::device_uvector<IdxT> indices_dev(p.n_queries * p.k, stream_);
raft::neighbors::naiveBfKnn<DistanceT, DataT, IdxT>(distances_dev.data(),
indices_dev.data(),
queries.data_handle(),
dataset.data_handle(),
p.n_queries,
p.n_rows,
p.dim,
p.k,
p.metric,
stream_);
naive_knn<DistanceT, DataT, IdxT>(distances_dev.data(),
indices_dev.data(),
queries.data_handle(),
dataset.data_handle(),
p.n_queries,
p.n_rows,
p.dim,
p.k,
p.metric,
stream_);
true_refined_distances_host.resize(p.n_queries * p.k);
true_refined_indices_host.resize(p.n_queries * p.k);
raft::copy(true_refined_indices_host.data(), indices_dev.data(), indices_dev.size(), stream_);
Expand Down Expand Up @@ -137,4 +138,4 @@ class RefineHelper {
std::vector<IdxT> true_refined_indices_host;
std::vector<DistanceT> true_refined_distances_host;
};
} // namespace raft::neighbors::detail
} // namespace raft::neighbors
1 change: 1 addition & 0 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ function(ConfigureTest)
target_link_libraries(
${TEST_NAME}
PRIVATE raft::raft
raft_internal
$<$<BOOL:${ConfigureTest_DIST}>:raft::distance>
$<$<BOOL:${ConfigureTest_NN}>:raft::nn>
GTest::gtest
Expand Down
5 changes: 3 additions & 2 deletions cpp/test/matrix/select_k.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,8 @@
*/

#include "../test_utils.cuh"
#include "select_k.cuh"

#include <raft_internal/matrix/select_k.cuh>

#include <raft/core/handle.hpp>
#include <raft/random/rng.cuh>
Expand Down
22 changes: 12 additions & 10 deletions cpp/test/neighbors/ann_ivf_flat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "../test_utils.cuh"
#include "ann_utils.cuh"

#include <raft_internal/neighbors/naive_knn.cuh>

#include <raft/core/device_mdspan.hpp>
#include <raft/core/logger.hpp>
#include <raft/distance/distance_types.hpp>
Expand Down Expand Up @@ -78,16 +80,16 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
{
rmm::device_uvector<T> distances_naive_dev(queries_size, stream_);
rmm::device_uvector<IdxT> indices_naive_dev(queries_size, stream_);
naiveBfKnn<T, DataT, IdxT>(distances_naive_dev.data(),
indices_naive_dev.data(),
search_queries.data(),
database.data(),
ps.num_queries,
ps.num_db_vecs,
ps.dim,
ps.k,
ps.metric,
stream_);
naive_knn<T, DataT, IdxT>(distances_naive_dev.data(),
indices_naive_dev.data(),
search_queries.data(),
database.data(),
ps.num_queries,
ps.num_db_vecs,
ps.dim,
ps.k,
ps.metric,
stream_);
update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
handle_.sync_stream(stream_);
Expand Down
Loading