From e418c8ccfce19fa87c68ad121a3af10c8e8cd863 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 24 Apr 2024 13:48:30 +0200 Subject: [PATCH 1/6] posix: use default stream --- cpp/include/kvikio/file_handle.hpp | 12 +++++++----- cpp/include/kvikio/posix_io.hpp | 25 +++++++++++++++++-------- cpp/include/kvikio/shim/cuda.hpp | 10 +++++----- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 9cd74b2319..55a16da575 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -308,7 +308,8 @@ class FileHandle { std::size_t devPtr_offset) { if (_compat_mode) { - return posix_device_read(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset); + return posix_device_read( + _fd_direct_off, devPtr_base, size, file_offset, devPtr_offset, nullptr); } #ifdef KVIKIO_CUFILE_FOUND ssize_t ret = cuFileAPI::instance().Read( @@ -358,7 +359,8 @@ class FileHandle { _nbytes = 0; // Invalidate the computed file size if (_compat_mode) { - return posix_device_write(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset); + return posix_device_write( + _fd_direct_off, devPtr_base, size, file_offset, devPtr_offset, nullptr); } #ifdef KVIKIO_CUFILE_FOUND ssize_t ret = cuFileAPI::instance().Write( @@ -420,7 +422,7 @@ class FileHandle { if (size < gds_threshold) { auto task = [this, ctx, buf, size, file_offset]() -> std::size_t { PushAndPopContext c(ctx); - return posix_device_read(_fd_direct_off, buf, size, file_offset, 0); + return posix_device_read(_fd_direct_off, buf, size, file_offset, 0, nullptr); }; return std::async(std::launch::deferred, task); } @@ -481,7 +483,7 @@ class FileHandle { if (size < gds_threshold) { auto task = [this, ctx, buf, size, file_offset]() -> std::size_t { PushAndPopContext c(ctx); - return posix_device_write(_fd_direct_off, buf, size, file_offset, 0); + return posix_device_write(_fd_direct_off, buf, size, file_offset, 0, nullptr); }; return std::async(std::launch::deferred, task); } diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index a6786747f3..78f36898b9 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,6 +158,7 @@ ssize_t posix_host_io(int fd, const void* buf, size_t count, off_t offset, bool * @param size Number of bytes to read or write. * @param file_offset Byte offset to the start of the file. * @param devPtr_offset Byte offset to the start of the device pointer. + * @param stream CUDA stream in which to enqueue the operation. * @return Number of bytes read or written. */ template @@ -165,7 +166,8 @@ std::size_t posix_device_io(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset) + std::size_t devPtr_offset, + CUstream stream) { auto alloc = manager.get(); CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset; @@ -178,9 +180,12 @@ std::size_t posix_device_io(int fd, ssize_t nbytes_got = nbytes_requested; if constexpr (IsReadOperation) { nbytes_got = posix_host_io(fd, alloc.get(), nbytes_requested, cur_file_offset, true); - CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoD(devPtr, alloc.get(), nbytes_got)); + CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoDAsync(devPtr, alloc.get(), nbytes_got, stream)); + CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); } else { // Is a write operation - CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyDtoH(alloc.get(), devPtr, nbytes_requested)); + CUDA_DRIVER_TRY( + cudaAPI::instance().MemcpyDtoHAsync(alloc.get(), devPtr, nbytes_requested, stream)); + CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); posix_host_io(fd, alloc.get(), nbytes_requested, cur_file_offset, false); } cur_file_offset += nbytes_got; @@ -241,15 +246,17 @@ inline std::size_t posix_host_write( * @param size Size in bytes to read. * @param file_offset Offset in the file to read from. * @param devPtr_offset Offset relative to the `devPtr_base` pointer to read into. + * @param stream CUDA stream in which to enqueue the operation. * @return Size of bytes that were successfully read. */ inline std::size_t posix_device_read(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset) + std::size_t devPtr_offset, + CUstream stream) { - return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset); + return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset, stream); } /** @@ -263,15 +270,17 @@ inline std::size_t posix_device_read(int fd, * @param size Size in bytes to write. * @param file_offset Offset in the file to write to. * @param devPtr_offset Offset relative to the `devPtr_base` pointer to write into. + * @param stream CUDA stream in which to enqueue the operation. * @return Size of bytes that were successfully written. */ inline std::size_t posix_device_write(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset) + std::size_t devPtr_offset, + CUstream stream) { - return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset); + return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset, stream); } } // namespace kvikio diff --git a/cpp/include/kvikio/shim/cuda.hpp b/cpp/include/kvikio/shim/cuda.hpp index 7d4b08d9d8..ffb71731fe 100644 --- a/cpp/include/kvikio/shim/cuda.hpp +++ b/cpp/include/kvikio/shim/cuda.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ class cudaAPI { decltype(cuInit)* Init{nullptr}; decltype(cuMemHostAlloc)* MemHostAlloc{nullptr}; decltype(cuMemFreeHost)* MemFreeHost{nullptr}; - decltype(cuMemcpyHtoD)* MemcpyHtoD{nullptr}; - decltype(cuMemcpyDtoH)* MemcpyDtoH{nullptr}; + decltype(cuMemcpyHtoDAsync)* MemcpyHtoDAsync{nullptr}; + decltype(cuMemcpyDtoHAsync)* MemcpyDtoHAsync{nullptr}; decltype(cuPointerGetAttribute)* PointerGetAttribute{nullptr}; decltype(cuPointerGetAttributes)* PointerGetAttributes{nullptr}; decltype(cuCtxPushCurrent)* CtxPushCurrent{nullptr}; @@ -58,8 +58,8 @@ class cudaAPI { // the name of the symbol through cude.h. get_symbol(MemHostAlloc, lib, KVIKIO_STRINGIFY(cuMemHostAlloc)); get_symbol(MemFreeHost, lib, KVIKIO_STRINGIFY(cuMemFreeHost)); - get_symbol(MemcpyHtoD, lib, KVIKIO_STRINGIFY(cuMemcpyHtoD)); - get_symbol(MemcpyDtoH, lib, KVIKIO_STRINGIFY(cuMemcpyDtoH)); + get_symbol(MemcpyHtoDAsync, lib, KVIKIO_STRINGIFY(cuMemcpyHtoDAsync)); + get_symbol(MemcpyDtoHAsync, lib, KVIKIO_STRINGIFY(cuMemcpyDtoHAsync)); get_symbol(PointerGetAttribute, lib, KVIKIO_STRINGIFY(cuPointerGetAttribute)); get_symbol(PointerGetAttributes, lib, KVIKIO_STRINGIFY(cuPointerGetAttributes)); get_symbol(CtxPushCurrent, lib, KVIKIO_STRINGIFY(cuCtxPushCurrent)); From 0153d06e3fa1af3b4d77a27e4cd96df365d07d6c Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 24 Apr 2024 16:37:54 +0200 Subject: [PATCH 2/6] async: use posix IO with stream --- cpp/include/kvikio/file_handle.hpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 55a16da575..d9a31570cd 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -548,10 +548,14 @@ class FileHandle { return; } #endif - CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); - *bytes_read_p = - static_cast(read(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); + if (_compat_mode) { + *bytes_read_p = static_cast(posix_device_read( + _fd_direct_off, devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p, stream)); + } else { + *bytes_read_p = + static_cast(read(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); + } } /** @@ -641,10 +645,14 @@ class FileHandle { return; } #endif - CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); - *bytes_written_p = - static_cast(write(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); + if (_compat_mode) { + *bytes_written_p = static_cast(posix_device_write( + _fd_direct_off, devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p, stream)); + } else { + *bytes_written_p = + static_cast(write(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); + } } /** From 6908b363f15e2abaa61cc16cb2664397a7687da3 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 26 Apr 2024 14:41:12 +0200 Subject: [PATCH 3/6] AllocRetain: use instance --- cpp/include/kvikio/posix_io.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index 78f36898b9..f3ca1453f3 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -93,6 +93,12 @@ class AllocRetain { } } + static AllocRetain& instance() + { + static AllocRetain _instance; + return _instance; + } + AllocRetain(const AllocRetain&) = delete; AllocRetain& operator=(AllocRetain const&) = delete; AllocRetain(AllocRetain&& o) = delete; @@ -100,8 +106,6 @@ class AllocRetain { ~AllocRetain() noexcept = default; }; -inline AllocRetain manager; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) - /** * @brief Read or write host memory to or from disk using POSIX * @@ -169,7 +173,7 @@ std::size_t posix_device_io(int fd, std::size_t devPtr_offset, CUstream stream) { - auto alloc = manager.get(); + auto alloc = AllocRetain::instance().get(); CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset; off_t cur_file_offset = convert_size2off(file_offset); off_t byte_remaining = convert_size2off(size); From 439a304480ef8278ff8d49b834d6c534818136a8 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 29 Apr 2024 09:49:37 +0200 Subject: [PATCH 4/6] StreamsByThread --- cpp/include/kvikio/posix_io.hpp | 60 +++++++++++++++++++++++++++++++- cpp/include/kvikio/shim/cuda.hpp | 4 +++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index f3ca1453f3..02bc394c6a 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -18,8 +18,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -33,7 +35,60 @@ inline constexpr std::size_t posix_bounce_buffer_size = 2 << 23; // 16 MiB namespace detail { /** - * @brief Class to retain host memory allocations + * @brief Singleton class to retrieve a CUDA stream for device-host copying + * + * Call `AllocRetain::get` to get the CUDA stream assigned to the current + * CUDA context and thread. + */ +class StreamsByThread { + private: + std::map, CUstream> _streams; + + public: + StreamsByThread() = default; + ~StreamsByThread() noexcept + { + for (auto& [_, stream] : _streams) { + try { + CUDA_DRIVER_TRY(cudaAPI::instance().StreamDestroy(stream)); + } catch (const CUfileException& e) { + std::cerr << e.what() << std::endl; + } + } + } + + static CUstream get(CUcontext ctx, std::thread::id thd_id) + { + static StreamsByThread _instance; + + // It no current context, we return the null/default stream + if (ctx == nullptr) { return nullptr; } + auto key = std::make_pair(ctx, thd_id); + + // Create new stream if `ctx` doesn't have one. + if (_instance._streams.find(key) == _instance._streams.end()) { + CUstream stream{}; + CUDA_DRIVER_TRY(cudaAPI::instance().StreamCreate(&stream, CU_STREAM_DEFAULT)); + _instance._streams[key] = stream; + } + return _instance._streams.at(key); + } + + static CUstream get() + { + CUcontext ctx{nullptr}; + CUDA_DRIVER_TRY(cudaAPI::instance().CtxGetCurrent(&ctx)); + return get(ctx, std::this_thread::get_id()); + } + + StreamsByThread(const StreamsByThread&) = delete; + StreamsByThread& operator=(StreamsByThread const&) = delete; + StreamsByThread(StreamsByThread&& o) = delete; + StreamsByThread& operator=(StreamsByThread&& o) = delete; +}; + +/** + * @brief Singleton class to retain host memory allocations * * Call `AllocRetain::get` to get an allocation that will be retained when it * goes out of scope (RAII). The size of all allocations are `posix_bounce_buffer_size`. @@ -179,6 +234,9 @@ std::size_t posix_device_io(int fd, off_t byte_remaining = convert_size2off(size); const off_t chunk_size2 = convert_size2off(posix_bounce_buffer_size); + // Get a stream if none were given by the caller + if (stream == nullptr) { stream = StreamsByThread::get(); } + while (byte_remaining > 0) { const off_t nbytes_requested = std::min(chunk_size2, byte_remaining); ssize_t nbytes_got = nbytes_requested; diff --git a/cpp/include/kvikio/shim/cuda.hpp b/cpp/include/kvikio/shim/cuda.hpp index ffb71731fe..e01df4643e 100644 --- a/cpp/include/kvikio/shim/cuda.hpp +++ b/cpp/include/kvikio/shim/cuda.hpp @@ -47,6 +47,8 @@ class cudaAPI { decltype(cuDevicePrimaryCtxRetain)* DevicePrimaryCtxRetain{nullptr}; decltype(cuDevicePrimaryCtxRelease)* DevicePrimaryCtxRelease{nullptr}; decltype(cuStreamSynchronize)* StreamSynchronize{nullptr}; + decltype(cuStreamCreate)* StreamCreate{nullptr}; + decltype(cuStreamDestroy)* StreamDestroy{nullptr}; private: cudaAPI() @@ -72,6 +74,8 @@ class cudaAPI { get_symbol(DevicePrimaryCtxRetain, lib, KVIKIO_STRINGIFY(cuDevicePrimaryCtxRetain)); get_symbol(DevicePrimaryCtxRelease, lib, KVIKIO_STRINGIFY(cuDevicePrimaryCtxRelease)); get_symbol(StreamSynchronize, lib, KVIKIO_STRINGIFY(cuStreamSynchronize)); + get_symbol(StreamCreate, lib, KVIKIO_STRINGIFY(cuStreamCreate)); + get_symbol(StreamDestroy, lib, KVIKIO_STRINGIFY(cuStreamDestroy)); } public: From f83475e2068a9003f235f58ea3302a1b5b70d524 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 29 Apr 2024 10:04:29 +0200 Subject: [PATCH 5/6] posix: roll back stream argument --- cpp/include/kvikio/file_handle.hpp | 32 ++++++++++-------------------- cpp/include/kvikio/posix_io.hpp | 22 ++++++++------------ 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index d9a31570cd..9cd74b2319 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -308,8 +308,7 @@ class FileHandle { std::size_t devPtr_offset) { if (_compat_mode) { - return posix_device_read( - _fd_direct_off, devPtr_base, size, file_offset, devPtr_offset, nullptr); + return posix_device_read(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset); } #ifdef KVIKIO_CUFILE_FOUND ssize_t ret = cuFileAPI::instance().Read( @@ -359,8 +358,7 @@ class FileHandle { _nbytes = 0; // Invalidate the computed file size if (_compat_mode) { - return posix_device_write( - _fd_direct_off, devPtr_base, size, file_offset, devPtr_offset, nullptr); + return posix_device_write(_fd_direct_off, devPtr_base, size, file_offset, devPtr_offset); } #ifdef KVIKIO_CUFILE_FOUND ssize_t ret = cuFileAPI::instance().Write( @@ -422,7 +420,7 @@ class FileHandle { if (size < gds_threshold) { auto task = [this, ctx, buf, size, file_offset]() -> std::size_t { PushAndPopContext c(ctx); - return posix_device_read(_fd_direct_off, buf, size, file_offset, 0, nullptr); + return posix_device_read(_fd_direct_off, buf, size, file_offset, 0); }; return std::async(std::launch::deferred, task); } @@ -483,7 +481,7 @@ class FileHandle { if (size < gds_threshold) { auto task = [this, ctx, buf, size, file_offset]() -> std::size_t { PushAndPopContext c(ctx); - return posix_device_write(_fd_direct_off, buf, size, file_offset, 0, nullptr); + return posix_device_write(_fd_direct_off, buf, size, file_offset, 0); }; return std::async(std::launch::deferred, task); } @@ -548,14 +546,10 @@ class FileHandle { return; } #endif + CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); - if (_compat_mode) { - *bytes_read_p = static_cast(posix_device_read( - _fd_direct_off, devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p, stream)); - } else { - *bytes_read_p = - static_cast(read(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); - } + *bytes_read_p = + static_cast(read(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); } /** @@ -645,14 +639,10 @@ class FileHandle { return; } #endif + CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream)); - if (_compat_mode) { - *bytes_written_p = static_cast(posix_device_write( - _fd_direct_off, devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p, stream)); - } else { - *bytes_written_p = - static_cast(write(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); - } + *bytes_written_p = + static_cast(write(devPtr_base, *size_p, *file_offset_p, *devPtr_offset_p)); } /** diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index 02bc394c6a..560d266655 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -37,7 +37,7 @@ namespace detail { /** * @brief Singleton class to retrieve a CUDA stream for device-host copying * - * Call `AllocRetain::get` to get the CUDA stream assigned to the current + * Call `StreamsByThread::get` to get the CUDA stream assigned to the current * CUDA context and thread. */ class StreamsByThread { @@ -217,7 +217,6 @@ ssize_t posix_host_io(int fd, const void* buf, size_t count, off_t offset, bool * @param size Number of bytes to read or write. * @param file_offset Byte offset to the start of the file. * @param devPtr_offset Byte offset to the start of the device pointer. - * @param stream CUDA stream in which to enqueue the operation. * @return Number of bytes read or written. */ template @@ -225,8 +224,7 @@ std::size_t posix_device_io(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset, - CUstream stream) + std::size_t devPtr_offset) { auto alloc = AllocRetain::instance().get(); CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset; @@ -234,8 +232,8 @@ std::size_t posix_device_io(int fd, off_t byte_remaining = convert_size2off(size); const off_t chunk_size2 = convert_size2off(posix_bounce_buffer_size); - // Get a stream if none were given by the caller - if (stream == nullptr) { stream = StreamsByThread::get(); } + // Get a stream for the current CUDA context and thread + CUstream stream = StreamsByThread::get(); while (byte_remaining > 0) { const off_t nbytes_requested = std::min(chunk_size2, byte_remaining); @@ -308,17 +306,15 @@ inline std::size_t posix_host_write( * @param size Size in bytes to read. * @param file_offset Offset in the file to read from. * @param devPtr_offset Offset relative to the `devPtr_base` pointer to read into. - * @param stream CUDA stream in which to enqueue the operation. * @return Size of bytes that were successfully read. */ inline std::size_t posix_device_read(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset, - CUstream stream) + std::size_t devPtr_offset) { - return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset, stream); + return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset); } /** @@ -332,17 +328,15 @@ inline std::size_t posix_device_read(int fd, * @param size Size in bytes to write. * @param file_offset Offset in the file to write to. * @param devPtr_offset Offset relative to the `devPtr_base` pointer to write into. - * @param stream CUDA stream in which to enqueue the operation. * @return Size of bytes that were successfully written. */ inline std::size_t posix_device_write(int fd, const void* devPtr_base, std::size_t size, std::size_t file_offset, - std::size_t devPtr_offset, - CUstream stream) + std::size_t devPtr_offset) { - return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset, stream); + return detail::posix_device_io(fd, devPtr_base, size, file_offset, devPtr_offset); } } // namespace kvikio From fc6c530ab3c6b7d403e44ba549ee15ef41095479 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 6 May 2024 12:44:10 +0200 Subject: [PATCH 6/6] typo --- cpp/include/kvikio/posix_io.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/kvikio/posix_io.hpp b/cpp/include/kvikio/posix_io.hpp index 560d266655..9f2bbd4fd6 100644 --- a/cpp/include/kvikio/posix_io.hpp +++ b/cpp/include/kvikio/posix_io.hpp @@ -61,11 +61,11 @@ class StreamsByThread { { static StreamsByThread _instance; - // It no current context, we return the null/default stream + // If no current context, we return the null/default stream if (ctx == nullptr) { return nullptr; } auto key = std::make_pair(ctx, thd_id); - // Create new stream if `ctx` doesn't have one. + // Create a new stream if `ctx` doesn't have one. if (_instance._streams.find(key) == _instance._streams.end()) { CUstream stream{}; CUDA_DRIVER_TRY(cudaAPI::instance().StreamCreate(&stream, CU_STREAM_DEFAULT));