From dc3f7deafcbeeb0a9a327fc62e3ce95ba8aa9c52 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 27 Oct 2022 09:02:38 -0400 Subject: [PATCH] Move Thrust's pinned_allocator into cudf Thrust 1.17 removes the experimental/pinned_allocator. While Thrust offers a replacement in `thrust::system::cuda::universal_host_pinned_memory_resource`. In doing so we also need to move the consumers to being CUDA sources which would negatively impact our compile time. Instead we move Thrust's removed pinned_allocator into cudf and continue to use it --- cpp/benchmarks/io/text/multibyte_split.cpp | 5 +- cpp/include/cudf/utilities/pinned_allocator.h | 214 ++++++++++++++++++ cpp/src/io/text/bgzip_data_chunk_source.cu | 4 +- .../io/text/data_chunk_source_factories.cpp | 4 +- cpp/src/io/utilities/hostdevice_vector.hpp | 4 +- 5 files changed, 222 insertions(+), 9 deletions(-) create mode 100644 cpp/include/cudf/utilities/pinned_allocator.h diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 380766fee46..d93107ce5a6 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -31,9 +31,9 @@ #include #include #include +#include #include -#include #include #include @@ -135,8 +135,7 @@ static void bench_multibyte_split(nvbench::state& state, auto const delim_factor = static_cast(delim_percent) / 100; auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); auto host_input = std::vector{}; - auto host_pinned_input = - thrust::host_vector>{}; + auto host_pinned_input = thrust::host_vector>{}; if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file || source_type == data_chunk_source_type::file_bgzip) { diff --git a/cpp/include/cudf/utilities/pinned_allocator.h b/cpp/include/cudf/utilities/pinned_allocator.h new file mode 100644 index 00000000000..18e6947b3b2 --- /dev/null +++ b/cpp/include/cudf/utilities/pinned_allocator.h @@ -0,0 +1,214 @@ +/* + * Copyright 2008-2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/cuda/experimental/pinned_allocator.h + * \brief An allocator which creates new elements in "pinned" memory with \p cudaMallocHost + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace cudf +{ + +/*! \p pinned_allocator is a CUDA-specific host memory allocator + * that employs \c cudaMallocHost for allocation. + * + * This implementation is ported from the experimental/pinned_allocator + * that Thrust used to provide. + * + * \see https://en.cppreference.com/w/cpp/memory/allocator + */ +template class pinned_allocator; + +template<> + class pinned_allocator +{ + public: + typedef void value_type; + typedef void * pointer; + typedef const void * const_pointer; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // convert a pinned_allocator to pinned_allocator + template + struct rebind + { + typedef pinned_allocator other; + }; // end rebind +}; // end pinned_allocator + + +template + class pinned_allocator +{ + public: + //! \{ + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + //! \} + + // convert a pinned_allocator to pinned_allocator + template + struct rebind + { + typedef pinned_allocator other; + }; // end rebind + + /*! \p pinned_allocator's null constructor does nothing. + */ + __host__ __device__ + inline pinned_allocator() {} + + /*! \p pinned_allocator's null destructor does nothing. + */ + __host__ __device__ + inline ~pinned_allocator() {} + + /*! \p pinned_allocator's copy constructor does nothing. + */ + __host__ __device__ + inline pinned_allocator(pinned_allocator const &) {} + + /*! This version of \p pinned_allocator's copy constructor + * is templated on the \c value_type of the \p pinned_allocator + * to copy from. It is provided merely for convenience; it + * does nothing. + */ + template + __host__ __device__ + inline pinned_allocator(pinned_allocator const &) {} + + /*! This method returns the address of a \c reference of + * interest. + * + * \p r The \c reference of interest. + * \return \c r's address. + */ + __host__ __device__ + inline pointer address(reference r) { return &r; } + + /*! This method returns the address of a \c const_reference + * of interest. + * + * \p r The \c const_reference of interest. + * \return \c r's address. + */ + __host__ __device__ + inline const_pointer address(const_reference r) { return &r; } + + /*! This method allocates storage for objects in pinned host + * memory. + * + * \p cnt The number of objects to allocate. + * \return a \c pointer to the newly allocated objects. + * \note This method does not invoke \p value_type's constructor. + * It is the responsibility of the caller to initialize the + * objects at the returned \c pointer. + */ + __host__ + inline pointer allocate(size_type cnt, + const_pointer = 0) + { + if(cnt > this->max_size()) + { + throw std::bad_alloc(); + } // end if + + pointer result(0); + cudaError_t error = cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type)); + + if(error) + { + cudaGetLastError(); // Clear global CUDA error state. + throw std::bad_alloc(); + } // end if + + return result; + } // end allocate() + + /*! This method deallocates pinned host memory previously allocated + * with this \c pinned_allocator. + * + * \p p A \c pointer to the previously allocated memory. + * \p cnt The number of objects previously allocated at + * \p p. + * \note This method does not invoke \p value_type's destructor. + * It is the responsibility of the caller to destroy + * the objects stored at \p p. + */ + __host__ + inline void deallocate(pointer p, size_type /*cnt*/) + { + cudaError_t error = cudaFreeHost(p); + + cudaGetLastError(); // Clear global CUDA error state. + + if(error) + { + cudaGetLastError(); // Clear global CUDA error state. + throw thrust::system_error(error, thrust::cuda_category()); + } // end if + } // end deallocate() + + /*! This method returns the maximum size of the \c cnt parameter + * accepted by the \p allocate() method. + * + * \return The maximum number of objects that may be allocated + * by a single call to \p allocate(). + */ + inline size_type max_size() const + { + return (std::numeric_limits::max)() / sizeof(T); + } // end max_size() + + /*! This method tests this \p pinned_allocator for equality to + * another. + * + * \param x The other \p pinned_allocator of interest. + * \return This method always returns \c true. + */ + __host__ __device__ + inline bool operator==(pinned_allocator const& x) const { return true; } + + /*! This method tests this \p pinned_allocator for inequality + * to another. + * + * \param x The other \p pinned_allocator of interest. + * \return This method always returns \c false. + */ + __host__ __device__ + inline bool operator!=(pinned_allocator const &x) const { return !operator==(x); } +}; // end pinned_allocator + +/*! \} + */ + +} // end cudf diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu index 7f1f6688bec..02af44f075e 100644 --- a/cpp/src/io/text/bgzip_data_chunk_source.cu +++ b/cpp/src/io/text/bgzip_data_chunk_source.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include -#include #include #include @@ -66,7 +66,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader { private: template using pinned_host_vector = - thrust::host_vector>; + thrust::host_vector>; template static void copy_to_device(const pinned_host_vector& host, diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 9a549951d66..4e12f9f8fdf 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -18,11 +18,11 @@ #include #include +#include #include #include -#include #include @@ -37,7 +37,7 @@ namespace { class istream_data_chunk_reader : public data_chunk_reader { struct host_ticket { cudaEvent_t event; - thrust::host_vector> buffer; + thrust::host_vector> buffer; }; public: diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 6e34d862ed4..80d2427f41a 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -19,12 +19,12 @@ #include #include #include +#include #include #include #include -#include /** * @brief A helper class that wraps fixed-length device memory for the GPU, and @@ -126,7 +126,7 @@ class hostdevice_vector { } private: - thrust::host_vector> h_data; + thrust::host_vector> h_data; rmm::device_uvector d_data; };