Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into nvcomp-4.1.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk authored Dec 5, 2024
2 parents ae30f8a + 3d2ab00 commit e78962f
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 54 deletions.
17 changes: 16 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ if(KvikIO_CUDA_SUPPORT)
else()
set(cuFile_FOUND 1)

# Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND)
# Check API support
try_compile(
cuFile_BATCH_API_FOUND SOURCE_FROM_CONTENT
batch.cpp
Expand Down Expand Up @@ -109,6 +109,20 @@ if(KvikIO_CUDA_SUPPORT)
OUTPUT_VARIABLE stream_output
)
message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}")
try_compile(
cuFile_VERSION_API_FOUND SOURCE_FROM_CONTENT
version.cpp
[[#include <cufile.h>
int main() {
int version;
cuFileGetVersion(&version);
return 0;
}
]]
LINK_LIBRARIES CUDA::cuFile rt ${CMAKE_DL_LIBS}
OUTPUT_VARIABLE version_output
)
message(STATUS "Found cuFile Version API: ${cuFile_VERSION_API_FOUND}")
endif()
endif()

Expand Down Expand Up @@ -154,6 +168,7 @@ target_compile_definitions(
$<$<BOOL:${cuFile_FOUND}>:KVIKIO_CUFILE_FOUND>
$<$<BOOL:${cuFile_BATCH_API_FOUND}>:KVIKIO_CUFILE_BATCH_API_FOUND>
$<$<BOOL:${cuFile_STREAM_API_FOUND}>:KVIKIO_CUFILE_STREAM_API_FOUND>
$<$<BOOL:${cuFile_VERSION_API_FOUND}>:KVIKIO_CUFILE_VERSION_API_FOUND>
)

set_target_properties(
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ int main()
cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads()
<< " threads): " << read << endl;
}
if (kvikio::is_batch_and_stream_available() && !kvikio::defaults::is_compat_mode_preferred()) {
if (kvikio::is_batch_api_available() && !kvikio::defaults::is_compat_mode_preferred()) {
std::cout << std::endl;
Timer timer;
// Here we use the batch API to read "/tmp/test-file" into `b_dev` by
Expand Down
29 changes: 13 additions & 16 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,20 @@ class FileHandle {
*/
bool is_compat_mode_preferred_for_async(CompatMode requested_compat_mode)
{
if (!defaults::is_compat_mode_preferred(requested_compat_mode)) {
if (!is_batch_and_stream_available()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile batch or stream library symbol.");
}

// When checking for availability, we also check if cuFile's config file exist. This is
// because even when the stream API is available, it doesn't work if no config file exist.
if (config_path().empty()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile configuration file.");
}

return false;
if (defaults::is_compat_mode_preferred(requested_compat_mode)) { return true; }

if (!is_stream_api_available()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing the cuFile stream api.");
}

return true;
// When checking for availability, we also check if cuFile's config file exists. This is
// because even when the stream API is available, it doesn't work if no config file exists.
if (config_path().empty()) {
if (requested_compat_mode == CompatMode::AUTO) { return true; }
throw std::runtime_error("Missing cuFile configuration file.");
}
return false;
}

public:
Expand Down Expand Up @@ -670,7 +667,7 @@ class FileHandle {
*/
[[nodiscard]] bool is_compat_mode_preferred_for_async() const noexcept
{
static bool is_extra_symbol_available = is_batch_and_stream_available();
static bool is_extra_symbol_available = is_stream_api_available();
static bool is_config_path_empty = config_path().empty();
return is_compat_mode_preferred() || !is_extra_symbol_available || is_config_path_empty;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace detail {
* @note Is not thread-safe.
*/
class BounceBufferH2D {
CUstream _stream; // The CUDA steam to use.
CUstream _stream; // The CUDA stream to use.
CUdeviceptr _dev; // The output device buffer.
AllocRetain::Alloc _host_buffer; // The host buffer to bounce data on.
std::ptrdiff_t _dev_offset{0}; // Number of bytes written to `_dev`.
Expand Down
99 changes: 66 additions & 33 deletions cpp/include/kvikio/shim/cufile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ class cuFileAPI {
decltype(cuFileDriverOpen)* DriverOpen{nullptr};
decltype(cuFileDriverClose)* DriverClose{nullptr};

// Don't call `GetVersion` directly, use `cuFileAPI::instance().version`.
decltype(cuFileGetVersion)* GetVersion{nullptr};

public:
bool stream_available = false;
int version{0};

private:
#ifdef KVIKIO_CUFILE_FOUND
Expand Down Expand Up @@ -88,33 +91,39 @@ class cuFileAPI {
get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize));
get_symbol(DriverSetMaxPinnedMemSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxPinnedMemSize));

#ifdef KVIKIO_CUFILE_BATCH_API_FOUND
get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp));
get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit));
get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus));
get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel));
get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy));
#endif

#ifdef KVIKIO_CUFILE_STREAM_API_FOUND
get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync));
get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync));
get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister));
get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister));
#ifdef KVIKIO_CUFILE_VERSION_API_FOUND
try {
void* s{};
get_symbol(s, lib, "cuFileReadAsync");
stream_available = true;
} catch (const std::runtime_error&) {
get_symbol(GetVersion, lib, KVIKIO_STRINGIFY(cuFileGetVersion));
int ver;
CUfileError_t const error = GetVersion(&ver);
if (error.err == CU_FILE_SUCCESS) { version = ver; }
} catch (std::runtime_error const&) {
}
#endif

// Some symbols were introduced in later versions, so version guards are required.
// Note: `version` is 0 for cuFile versions prior to v1.8 because `cuFileGetVersion`
// did not exist. As a result, the batch and stream APIs are not loaded in versions
// 1.6 and 1.7, respectively, even though they are available. This trade-off is made
// for improved robustness.
if (version >= 1060) {
get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp));
get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit));
get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus));
get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel));
get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy));
}
if (version >= 1070) {
get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync));
get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync));
get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister));
get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister));
}

// cuFile is supposed to open and close the driver automatically but
// because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults:
// <https://github.com/rapidsai/kvikio/issues/159>.
// We use the stream API as a version indicator of cuFile since it was introduced
// in cuFile v1.7 (CUDA v12.2).
if (!stream_available) { driver_open(); }
if (version < 1050) { driver_open(); }
}

// Notice, we have to close the driver at program exit (if we opened it) even though we are
Expand All @@ -124,7 +133,7 @@ class cuFileAPI {
// [1] <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
~cuFileAPI()
{
if (!stream_available) { driver_close(); }
if (version < 1050) { driver_close(); }
}
#else
cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); }
Expand Down Expand Up @@ -205,25 +214,49 @@ inline bool is_cufile_available()
}

/**
* @brief Check if cuFile's batch and stream API is available
* @brief Get cufile version (or zero if older than v1.8).
*
* Technically, the batch API is available in CUDA 12.1 but since there is no good
* way to check CUDA version using the driver API, we check for the existing of the
* `cuFileReadAsync` symbol, which is defined in CUDA 12.2+.
* The version is returned as (1000*major + 10*minor). E.g., cufile v1.8.0 would
* be represented by 1080.
*
* @return The boolean answer
* Notice, this is not the version of the CUDA toolkit. cufile is part of the
* toolkit but follows its own version scheme.
*
* @return The version (1000*major + 10*minor) or zero if older than 1080.
*/
#if defined(KVIKIO_CUFILE_STREAM_API_FOUND) && defined(KVIKIO_CUFILE_STREAM_API_FOUND)
inline bool is_batch_and_stream_available() noexcept
#ifdef KVIKIO_CUFILE_FOUND
inline int cufile_version()
{
try {
return is_cufile_available() && cuFileAPI::instance().stream_available;
} catch (const std::runtime_error&) {
return false;
return cuFileAPI::instance().version;
} catch (std::runtime_error const&) {
return 0;
}
}
#else
constexpr bool is_batch_and_stream_available() { return false; }
constexpr int cufile_version() { return 0; }
#endif

/**
* @brief Check if cuFile's batch API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the batch
* API became available in v1.6.
*
* @return The boolean answer
*/
inline bool is_batch_api_available() noexcept { return cufile_version() >= 1060; }

/**
* @brief Check if cuFile's stream (async) API is available.
*
* Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3),
* this function returns false for versions older than v1.8 even though the stream
* API became available in v1.7.
*
* @return The boolean answer
*/
inline bool is_stream_api_available() noexcept { return cufile_version() >= 1070; }

} // namespace kvikio
7 changes: 5 additions & 2 deletions cpp/include/kvikio/shim/cufile_h_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ CUfileError_t cuFileDriverSetMaxPinnedMemSize(...);

#endif

// If the Batch API isn't defined, we define some of the data types here.
// If some cufile APIs aren't defined, we define some of the data types here.
// Notice, this doesn't need to be ABI compatible with the cufile definitions and
// the lack of definitions is not a problem because the linker will never look for
// these symbols because the "real" function calls are made through the shim instance.
Expand Down Expand Up @@ -105,10 +105,13 @@ CUfileError_t cuFileBatchIOCancel(...);
CUfileError_t cuFileBatchIODestroy(...);
#endif

// If the Stream API isn't defined, we define some of the data types here.
#ifndef KVIKIO_CUFILE_STREAM_API_FOUND
CUfileError_t cuFileReadAsync(...);
CUfileError_t cuFileWriteAsync(...);
CUfileError_t cuFileStreamRegister(...);
CUfileError_t cuFileStreamDeregister(...);
#endif

#ifndef KVIKIO_CUFILE_VERSION_API_FOUND
CUfileError_t cuFileGetVersion(...);
#endif
5 changes: 5 additions & 0 deletions python/kvikio/kvikio/_lib/cufile_driver.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@ from libcpp cimport bool


cdef extern from "<kvikio/shim/cufile.hpp>" nogil:
cdef int cpp_libcufile_version "kvikio::cufile_version"() except +
cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except +
cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except +


def libcufile_version() -> int:
return cpp_libcufile_version()


def driver_open():
cpp_driver_open()

Expand Down
6 changes: 6 additions & 0 deletions python/kvikio/kvikio/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def drop_vm_cache() -> None:
def pprint_sys_info() -> None:
"""Pretty print system information"""

version = kvikio.cufile_driver.libcufile_version()
props = kvikio.cufile_driver.DriverProperties()
try:
import pynvml
Expand All @@ -41,6 +42,10 @@ def pprint_sys_info() -> None:
gpu_name = f"{pynvml.nvmlDeviceGetName(dev)} (dev #0)"
mem_total = format_bytes(pynvml.nvmlDeviceGetMemoryInfo(dev).total)
bar1_total = format_bytes(pynvml.nvmlDeviceGetBAR1MemoryInfo(dev).bar1Total)
if version == (0, 0):
libcufile_version = "unknown (earlier than cuFile 1.8)"
else:
libcufile_version = f"{version[0]}.{version[1]}"
gds_version = "N/A (Compatibility Mode)"
if props.is_gds_available:
gds_version = f"v{props.major_version}.{props.minor_version}"
Expand All @@ -61,6 +66,7 @@ def pprint_sys_info() -> None:
print(f"GPU | {gpu_name}")
print(f"GPU Memory Total | {mem_total}")
print(f"BAR1 Memory Total | {bar1_total}")
print(f"libcufile version | {libcufile_version}")
print(f"GDS driver | {gds_version}")
print(f"GDS config.json | {gds_config_json_path}")

Expand Down
21 changes: 21 additions & 0 deletions python/kvikio/kvikio/cufile_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See file LICENSE for terms.

import atexit
from typing import Tuple

from kvikio._lib import cufile_driver # type: ignore

Expand All @@ -10,6 +11,26 @@
DriverProperties = cufile_driver.DriverProperties


def libcufile_version() -> Tuple[int, int]:
"""Get the libcufile version.
Returns (0, 0) for cuFile versions prior to v1.8.
Notes
-----
This is not the version of the CUDA toolkit. cufile is part of the
toolkit but follows its own version scheme.
Returns
-------
The version as a tuple (MAJOR, MINOR).
"""
v = cufile_driver.libcufile_version()
# Convert the integer version like 1080 to (1, 8).
major, minor = divmod(v, 1000)
return (major, minor // 10)


def driver_open() -> None:
"""Open the cuFile driver
Expand Down
6 changes: 6 additions & 0 deletions python/kvikio/tests/test_cufile_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import kvikio.cufile_driver


def test_version():
major, minor = kvikio.cufile_driver.libcufile_version()
assert major >= 0
assert minor >= 0


@pytest.mark.cufile
def test_open_and_close():
kvikio.cufile_driver.driver_open()
Expand Down

0 comments on commit e78962f

Please sign in to comment.