Skip to content

Commit

Permalink
Merge pull request #473 from rapidsai/branch-24.10
Browse files Browse the repository at this point in the history
Forward-merge branch-24.10 into branch-24.12
  • Loading branch information
GPUtester authored Sep 25, 2024
2 parents f42c36c + 44b3f97 commit 638d53d
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 34 deletions.
4 changes: 3 additions & 1 deletion cpp/include/kvikio/cufile_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <filesystem>
#include <string>

#include <kvikio/utils.hpp>

namespace kvikio {
namespace detail {

Expand All @@ -39,7 +41,7 @@ namespace detail {
*
* @return The filepath to the cufile.json file or the empty string if it isn't found.
*/
[[nodiscard]] inline const std::string& config_path()
[[nodiscard]] KVIKIO_EXPORT inline const std::string& config_path()
{
static const std::string ret = detail::lookup_config_path();
return ret;
Expand Down
1 change: 0 additions & 1 deletion cpp/include/kvikio/defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <sstream>
#include <stdexcept>
#include <string>
#include <utility>

#include <BS_thread_pool.hpp>

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ class FileHandle {
*
* @return The number of bytes
*/
[[nodiscard]] inline std::size_t nbytes() const
[[nodiscard]] std::size_t nbytes() const
{
if (closed()) { return 0; }
if (_nbytes == 0) { _nbytes = detail::get_file_size(_fd_direct_off); }
Expand Down
62 changes: 31 additions & 31 deletions cpp/include/kvikio/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@
#include <kvikio/error.hpp>
#include <kvikio/shim/cuda.hpp>

// Macros used for defining symbol visibility, only GLIBC is supported.
// Since KvikIO is header-only, we rely on the linker to disambiguate inline functions
// that have (or return) static references. To do this, the relevant function must have
// `__attribute__((visibility("default")))`. If not, then if KvikIO is used in two
// different DSOs, the function will appear twice, and there will be two static objects.
// See <https://github.com/rapidsai/kvikio/issues/442>.
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) && !defined(__MINGW64__)
#define KVIKIO_EXPORT __attribute__((visibility("default")))
#define KVIKIO_HIDDEN __attribute__((visibility("hidden")))
#else
#define KVIKIO_EXPORT
#define KVIKIO_HIDDEN
#endif

namespace kvikio {

// cuFile defines a page size to 4 KiB
Expand Down Expand Up @@ -101,33 +115,6 @@ constexpr bool is_host_memory(const void* ptr) { return true; }
return ret;
}

/**
* @brief RAII wrapper for a CUDA primary context
*/
class CudaPrimaryContext {
public:
CUdevice dev{};
CUcontext ctx{};

CudaPrimaryContext(int device_ordinal)
{
CUDA_DRIVER_TRY(cudaAPI::instance().DeviceGet(&dev, device_ordinal));
CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRetain(&ctx, dev));
}
CudaPrimaryContext(const CudaPrimaryContext&) = delete;
CudaPrimaryContext& operator=(CudaPrimaryContext const&) = delete;
CudaPrimaryContext(CudaPrimaryContext&&) = delete;
CudaPrimaryContext&& operator=(CudaPrimaryContext&&) = delete;
~CudaPrimaryContext()
{
try {
CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRelease(dev), CUfileException);
} catch (const CUfileException& e) {
std::cerr << e.what() << std::endl;
}
}
};

/**
* @brief Given a device ordinal, return the primary context of the device.
*
Expand All @@ -136,11 +123,24 @@ class CudaPrimaryContext {
* @param ordinal Device ordinal - an integer between 0 and the number of CUDA devices
* @return Primary CUDA context
*/
[[nodiscard]] inline CUcontext get_primary_cuda_context(int ordinal)
[[nodiscard]] KVIKIO_EXPORT inline CUcontext get_primary_cuda_context(int ordinal)
{
static std::map<int, CudaPrimaryContext> _primary_contexts;
_primary_contexts.try_emplace(ordinal, ordinal);
return _primary_contexts.at(ordinal).ctx;
static std::map<int, CUcontext> _cache;
static std::mutex _mutex;
std::lock_guard const lock(_mutex);

if (_cache.find(ordinal) == _cache.end()) {
CUdevice dev{};
CUcontext ctx{};
CUDA_DRIVER_TRY(cudaAPI::instance().DeviceGet(&dev, ordinal));

// Notice, we let the primary context leak at program exit. We do this because `_cache`
// is static and we are not allowed to call `cuDevicePrimaryCtxRelease()` after main:
// <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRetain(&ctx, dev));
_cache.emplace(ordinal, ctx);
}
return _cache.at(ordinal);
}

/**
Expand Down

0 comments on commit 638d53d

Please sign in to comment.