Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into test-cuda-12.2
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Jan 24, 2024
2 parents 8ea3858 + c73fd0d commit d6767d1
Show file tree
Hide file tree
Showing 79 changed files with 4,071 additions and 1,265 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ if(RAFT_COMPILE_LIBRARY)
src/raft_runtime/neighbors/cagra_build.cu
src/raft_runtime/neighbors/cagra_search.cu
src/raft_runtime/neighbors/cagra_serialize.cu
src/raft_runtime/neighbors/eps_neighborhood.cu
src/raft_runtime/neighbors/ivf_flat_build.cu
src/raft_runtime/neighbors/ivf_flat_search.cu
src/raft_runtime/neighbors/ivf_flat_serialize.cu
Expand All @@ -443,6 +444,7 @@ if(RAFT_COMPILE_LIBRARY)
src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
Expand Down
18 changes: 8 additions & 10 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -287,11 +287,11 @@ void bench_search(::benchmark::State& state,
std::make_shared<buf<std::size_t>>(current_algo_props->query_memory_type, k * query_set_size);

cuda_timer gpu_timer;
auto start = std::chrono::high_resolution_clock::now();
{
nvtx_case nvtx{state.name()};

auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
auto start = std::chrono::high_resolution_clock::now();
for (auto _ : state) {
[[maybe_unused]] auto ntx_lap = nvtx.lap();
[[maybe_unused]] auto gpu_lap = gpu_timer.lap();
Expand All @@ -314,17 +314,15 @@ void bench_search(::benchmark::State& state,

queries_processed += n_queries;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
state.counters.insert(
{"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}});

state.SetItemsProcessed(queries_processed);
if (cudart.found()) {
double gpu_time_per_iteration = gpu_timer.total_time() / (double)state.iterations();
state.counters.insert({"GPU", {gpu_time_per_iteration, benchmark::Counter::kAvgThreads}});
state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
}

// This will be the total number of queries across all threads
Expand Down
27 changes: 2 additions & 25 deletions cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,13 +49,6 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
Expand Down Expand Up @@ -112,21 +105,5 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
{
return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
} // namespace raft::mr
27 changes: 2 additions & 25 deletions cpp/bench/ann/src/common/cuda_pinned_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,13 +53,6 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
Expand Down Expand Up @@ -110,21 +103,5 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
{
return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
} // namespace raft::mr
32 changes: 31 additions & 1 deletion cpp/include/raft/core/math.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,12 +49,42 @@ RAFT_INLINE_FUNCTION auto abs(T x)
template <typename T>
constexpr RAFT_INLINE_FUNCTION auto abs(T x)
-> std::enable_if_t<!std::is_same_v<float, T> && !std::is_same_v<double, T> &&
#if defined(_RAFT_HAS_CUDA)
!std::is_same_v<__half, T> && !std::is_same_v<nv_bfloat16, T> &&
#endif
!std::is_same_v<int, T> && !std::is_same_v<long int, T> &&
!std::is_same_v<long long int, T>,
T>
{
return x < T{0} ? -x : x;
}
#if defined(_RAFT_HAS_CUDA)
template <typename T>
RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t<std::is_same_v<T, __half>, __half> abs(T x)
{
#if (__CUDA_ARCH__ >= 530)
return ::__habs(x);
#else
// Fail during template instantiation if the compute capability doesn't support this operation
static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530");
return T{};
#endif
}

template <typename T>
RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t<std::is_same_v<T, nv_bfloat16>, nv_bfloat16>
abs(T x)
{
#if (__CUDA_ARCH__ >= 800)
return ::__habs(x);
#else
// Fail during template instantiation if the compute capability doesn't support this operation
static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800");
return T{};
#endif
}
#endif
/** @} */

/** Inverse cosine */
template <typename T>
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/core/resource/cublas_handle.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -60,8 +60,8 @@ class cublas_resource_factory : public resource_factory {
*/

/**
* Load a cublasres_t from raft res if it exists, otherwise
* add it and return it.
* Load a `cublasHandle_t` from raft res if it exists, otherwise add it and return it.
*
* @param[in] res the raft resources object
* @return cublas handle
*/
Expand Down
68 changes: 68 additions & 0 deletions cpp/include/raft/core/resource/cublaslt_handle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cublasLt.h>
#include <raft/core/cublas_macros.hpp>
#include <raft/core/resource/resource_types.hpp>
#include <raft/core/resources.hpp>

#include <memory>

namespace raft::resource {

class cublaslt_resource : public resource {
public:
cublaslt_resource() { RAFT_CUBLAS_TRY(cublasLtCreate(&handle_)); }
~cublaslt_resource() noexcept override { RAFT_CUBLAS_TRY_NO_THROW(cublasLtDestroy(handle_)); }
auto get_resource() -> void* override { return &handle_; }

private:
cublasLtHandle_t handle_;
};

/** Factory that knows how to construct a specific raft::resource to populate the res_t. */
class cublaslt_resource_factory : public resource_factory {
public:
auto get_resource_type() -> resource_type override { return resource_type::CUBLASLT_HANDLE; }
auto make_resource() -> resource* override { return new cublaslt_resource(); }
};

/**
* @defgroup resource_cublaslt cuBLASLt handle resource functions
* @{
*/

/**
* Load a `cublasLtHandle_t` from raft res if it exists, otherwise add it and return it.
*
* @param[in] res the raft resources object
* @return cublasLt handle
*/
inline auto get_cublaslt_handle(resources const& res) -> cublasLtHandle_t
{
if (!res.has_resource_factory(resource_type::CUBLASLT_HANDLE)) {
res.add_resource_factory(std::make_shared<cublaslt_resource_factory>());
}
auto ret = *res.get_resource<cublasLtHandle_t>(resource_type::CUBLASLT_HANDLE);
return ret;
};

/**
* @}
*/

} // namespace raft::resource
93 changes: 93 additions & 0 deletions cpp/include/raft/core/resource/custom_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <raft/core/resource/resource_types.hpp>
#include <raft/core/resources.hpp>

#include <algorithm>
#include <memory>
#include <typeindex>

namespace raft::resource {

class custom_resource : public resource {
public:
custom_resource() = default;
~custom_resource() noexcept override = default;
auto get_resource() -> void* override { return this; }

template <typename ResourceT>
auto load() -> ResourceT*
{
std::lock_guard<std::mutex> _(lock_);
auto key = std::type_index{typeid(ResourceT)};
auto pos = std::lower_bound(store_.begin(), store_.end(), kv{key, {nullptr}});
if ((pos != store_.end()) && std::get<0>(*pos) == key) {
return reinterpret_cast<ResourceT*>(std::get<1>(*pos).get());
}
auto store_ptr = new ResourceT{};
store_.insert(pos, kv{key, std::shared_ptr<void>(store_ptr, [](void* ptr) {
delete reinterpret_cast<ResourceT*>(ptr);
})});
return store_ptr;
}

private:
using kv = std::tuple<std::type_index, std::shared_ptr<void>>;
std::mutex lock_{};
std::vector<kv> store_{};
};

/** Factory that knows how to construct a specific raft::resource to populate the res_t. */
class custom_resource_factory : public resource_factory {
public:
auto get_resource_type() -> resource_type override { return resource_type::CUSTOM; }
auto make_resource() -> resource* override { return new custom_resource(); }
};

/**
* @defgroup resource_custom custom resource functions
* @{
*/

/**
* Get the custom default-constructible resource if it exists, create it otherwise.
*
* Note: in contrast to the other, hard-coded resources, there's no information about the custom
* resources at compile time. Hence, custom resources are kept in a hashmap and looked-up at
* runtime. This leads to slightly slower access times.
*
* @tparam ResourceT the type of the resource; it must be complete and default-constructible.
*
* @param[in] res the raft resources object
* @return a pointer to the custom resource.
*/
template <typename ResourceT>
auto get_custom_resource(resources const& res) -> ResourceT*
{
static_assert(std::is_default_constructible_v<ResourceT>);
if (!res.has_resource_factory(resource_type::CUSTOM)) {
res.add_resource_factory(std::make_shared<custom_resource_factory>());
}
return res.get_resource<custom_resource>(resource_type::CUSTOM)->load<ResourceT>();
};

/**
* @}
*/

} // namespace raft::resource
4 changes: 3 additions & 1 deletion cpp/include/raft/core/resource/resource_types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,6 +43,8 @@ enum resource_type {
// CUDA-free builds
THRUST_POLICY, // thrust execution policy
WORKSPACE_RESOURCE, // rmm device memory resource
CUBLASLT_HANDLE, // cublasLt handle
CUSTOM, // runtime-shared default-constructible resource

LAST_KEY // reserved for the last key
};
Expand Down
Loading

0 comments on commit d6767d1

Please sign in to comment.