diff --git a/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp index 33f4c33d4e0..4f0c52c5954 100644 --- a/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp +++ b/cpp/include/cudf/detail/utilities/cuda_memcpy.hpp @@ -24,15 +24,11 @@ namespace CUDF_EXPORT cudf { namespace detail { -namespace impl { - enum class host_memory_kind : uint8_t { PINNED, PAGEABLE }; -void cuda_memcpy_async( +void cuda_memcpy_async_impl( void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream); -} // namespace impl - /** * @brief Asynchronously copies data from host to device memory. * @@ -47,12 +43,11 @@ void cuda_memcpy_async(device_span dst, host_span src, rmm::cuda_str { CUDF_EXPECTS(dst.size() == src.size(), "Mismatched sizes in cuda_memcpy_async"); auto const is_pinned = src.is_device_accessible(); - impl::cuda_memcpy_async( - dst.data(), - src.data(), - src.size_bytes(), - is_pinned ? impl::host_memory_kind::PINNED : impl::host_memory_kind::PAGEABLE, - stream); + cuda_memcpy_async_impl(dst.data(), + src.data(), + src.size_bytes(), + is_pinned ? host_memory_kind::PINNED : host_memory_kind::PAGEABLE, + stream); } /** @@ -69,12 +64,11 @@ void cuda_memcpy_async(host_span dst, device_span src, rmm::cuda_str { CUDF_EXPECTS(dst.size() == src.size(), "Mismatched sizes in cuda_memcpy_async"); auto const is_pinned = dst.is_device_accessible(); - impl::cuda_memcpy_async( - dst.data(), - src.data(), - src.size_bytes(), - is_pinned ? impl::host_memory_kind::PINNED : impl::host_memory_kind::PAGEABLE, - stream); + cuda_memcpy_async_impl(dst.data(), + src.data(), + src.size_bytes(), + is_pinned ? host_memory_kind::PINNED : host_memory_kind::PAGEABLE, + stream); } /** diff --git a/cpp/src/utilities/cuda_memcpy.cu b/cpp/src/utilities/cuda_memcpy.cu index e8193243229..c0af27a1748 100644 --- a/cpp/src/utilities/cuda_memcpy.cu +++ b/cpp/src/utilities/cuda_memcpy.cu @@ -25,12 +25,12 @@ #include -namespace cudf::detail::impl { +namespace cudf::detail { namespace { // Simple kernel to copy between device buffers -CUDF_KERNEL void copy_kernel(char const* src, char* dst, size_t n) +CUDF_KERNEL void copy_kernel(char const* __restrict__ src, char* __restrict__ dst, size_t n) { auto const idx = cudf::detail::grid_1d::global_thread_id(); if (idx < n) { dst[idx] = src[idx]; } @@ -61,7 +61,7 @@ void copy_pageable(void* dst, void const* src, std::size_t size, rmm::cuda_strea }; // namespace -void cuda_memcpy_async( +void cuda_memcpy_async_impl( void* dst, void const* src, size_t size, host_memory_kind kind, rmm::cuda_stream_view stream) { if (kind == host_memory_kind::PINNED) { @@ -73,4 +73,4 @@ void cuda_memcpy_async( } } -} // namespace cudf::detail::impl +} // namespace cudf::detail