Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port thrust's pinned_allocator to cudf, since Thrust 1.17 removes the type #12004

Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,10 @@ outputs:
- test -f $PREFIX/include/cudf/detail/unary.hpp
- test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
- test -f $PREFIX/include/cudf/detail/utilities/default_stream.hpp
- test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/pinned_allocator.hpp
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
Expand Down
5 changes: 2 additions & 3 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf_test/file_utilities.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
Expand All @@ -33,7 +34,6 @@
#include <cudf/utilities/default_stream.hpp>

#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <thrust/transform.h>

#include <nvbench/nvbench.cuh>
Expand Down Expand Up @@ -135,8 +135,7 @@ static void bench_multibyte_split(nvbench::state& state,
auto const delim_factor = static_cast<double>(delim_percent) / 100;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input =
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>>{};
auto host_pinned_input = thrust::host_vector<char, cudf::detail::pinned_allocator<char>>{};

if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file ||
source_type == data_chunk_source_type::file_bgzip) {
Expand Down
202 changes: 202 additions & 0 deletions cpp/include/cudf/detail/utilities/pinned_allocator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*
* Copyright 2008-2022 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstddef>
#include <limits>
#include <new> // for bad_alloc

#include <cudf/utilities/error.hpp>

namespace cudf::detail {

/*! \p pinned_allocator is a CUDA-specific host memory allocator
* that employs \c cudaMallocHost for allocation.
*
* This implementation is ported from the experimental/pinned_allocator
* that Thrust used to provide.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <typename T>
class pinned_allocator;

/*! \p pinned_allocator is a CUDA-specific host memory allocator
* that employs \c cudaMallocHost for allocation.
*
* This implementation is ported from the experimental/pinned_allocator
* that Thrust used to provide.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <>
class pinned_allocator<void> {
public:
using value_type = void; ///< The type of the elements in the allocator
using pointer = void*; ///< The type returned by address() / allocate()
using const_pointer = const void*; ///< The type returned by address()
using size_type = std::size_t; ///< The type used for the size of the allocation
using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers

/**
* @brief converts a `pinned_allocator<void>` to `pinned_allocator<U>`
*/
template <typename U>
struct rebind {
using other = pinned_allocator<U>; ///< The rebound type
};
};

/*! \p pinned_allocator is a CUDA-specific host memory allocator
* that employs \c cudaMallocHost for allocation.
*
* This implementation is ported from the experimental/pinned_allocator
* that Thrust used to provide.
*
* \see https://en.cppreference.com/w/cpp/memory/allocator
*/
template <typename T>
class pinned_allocator {
public:
using value_type = T; ///< The type of the elements in the allocator
using pointer = T*; ///< The type returned by address() / allocate()
using const_pointer = const T*; ///< The type returned by address()
using reference = T&; ///< The parameter type for address()
using const_reference = const T&; ///< The parameter type for address()
using size_type = std::size_t; ///< The type used for the size of the allocation
using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers

/**
* @brief converts a `pinned_allocator<T>` to `pinned_allocator<U>`
*/
template <typename U>
struct rebind {
using other = pinned_allocator<U>; ///< The rebound type
};

/**
* @brief pinned_allocator's null constructor does nothing.
*/
__host__ __device__ inline pinned_allocator() {}

/**
* @brief pinned_allocator's null destructor does nothing.
*/
__host__ __device__ inline ~pinned_allocator() {}

/**
* @brief pinned_allocator's copy constructor does nothing.
*/
__host__ __device__ inline pinned_allocator(pinned_allocator const&) {}

/**
* @brief pinned_allocator's copy constructor does nothing.
*
* This version of pinned_allocator's copy constructor
* is templated on the \c value_type of the pinned_allocator
* to copy from. It is provided merely for convenience; it
* does nothing.
*/
template <typename U>
__host__ __device__ inline pinned_allocator(pinned_allocator<U> const&)
{
}

/**
* @brief This method returns the address of a \c reference of
* interest.
*
* @param r The \c reference of interest.
* @return \c r's address.
*/
__host__ __device__ inline pointer address(reference r) { return &r; }

/**
* @brief This method returns the address of a \c const_reference
* of interest.
*
* @param r The \c const_reference of interest.
* @return \c r's address.
*/
__host__ __device__ inline const_pointer address(const_reference r) { return &r; }

/**
* @brief This method allocates storage for objects in pinned host
* memory.
*
* @param cnt The number of objects to allocate.
* @return a \c pointer to the newly allocated objects.
* @note The second parameter to this function is meant as a
* hint pointer to a nearby memory location, but is
* not used by this allocator.
* @note This method does not invoke \p value_type's constructor.
* It is the responsibility of the caller to initialize the
* objects at the returned \c pointer.
*/
__host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0)
{
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if

pointer result(0);
CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
return result;
}

/**
* @brief This method deallocates pinned host memory previously allocated
* with this \c pinned_allocator.
*
* @param p A \c pointer to the previously allocated memory.
* @note The second parameter is the number of objects previously allocated
* but is ignored by this allocator.
* @note This method does not invoke \p value_type's destructor.
* It is the responsibility of the caller to destroy
* the objects stored at \p p.
*/
__host__ inline void deallocate(pointer p, size_type /*cnt*/) { CUDF_CUDA_TRY(cudaFreeHost(p)); }

/**
* @brief This method returns the maximum size of the \c cnt parameter
* accepted by the \p allocate() method.
*
* @return The maximum number of objects that may be allocated
* by a single call to \p allocate().
*/
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }

/**
* @brief This method tests this \p pinned_allocator for equality to
* another.
*
* @param x The other \p pinned_allocator of interest.
* @return This method always returns \c true.
*/
__host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; }

/**
* @brief This method tests this \p pinned_allocator for inequality
* to another.
*
* @param x The other \p pinned_allocator of interest.
* @return This method always returns \c false.
*/
__host__ __device__ inline bool operator!=(pinned_allocator const& x) const
{
return !operator==(x);
}
};
} // namespace cudf::detail
4 changes: 2 additions & 2 deletions cpp/include/cudf/utilities/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
std::declval<C&>().data()))> (*)[],
T (*)[]>>* = nullptr>
constexpr host_span(C& in) : base(in.data(), in.size())
constexpr host_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
{
}

Expand All @@ -239,7 +239,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
std::declval<C&>().data()))> (*)[],
T (*)[]>>* = nullptr>
constexpr host_span(C const& in) : base(in.data(), in.size())
constexpr host_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
{
}

Expand Down
5 changes: 2 additions & 3 deletions cpp/src/io/text/bgzip_data_chunk_source.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "io/utilities/config_utils.hpp"

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -30,7 +31,6 @@

#include <thrust/host_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <thrust/transform.h>

#include <fstream>
Expand Down Expand Up @@ -65,8 +65,7 @@ struct bgzip_nvcomp_transform_functor {
class bgzip_data_chunk_reader : public data_chunk_reader {
private:
template <typename T>
using pinned_host_vector =
thrust::host_vector<T, thrust::system::cuda::experimental::pinned_allocator<T>>;
using pinned_host_vector = thrust::host_vector<T, cudf::detail::pinned_allocator<T>>;

template <typename T>
static void copy_to_device(const pinned_host_vector<T>& host,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/text/data_chunk_source_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
#include "io/text/device_data_chunks.hpp"

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>

#include <rmm/device_buffer.hpp>

#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>

#include <fstream>

Expand All @@ -37,7 +37,7 @@ namespace {
class istream_data_chunk_reader : public data_chunk_reader {
struct host_ticket {
cudaEvent_t event;
thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>> buffer;
thrust::host_vector<char, cudf::detail::pinned_allocator<char>> buffer;
};

public:
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/utilities/hostdevice_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
Expand All @@ -24,7 +25,6 @@
#include <rmm/device_buffer.hpp>

#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>

/**
* @brief A helper class that wraps fixed-length device memory for the GPU, and
Expand Down Expand Up @@ -126,7 +126,7 @@ class hostdevice_vector {
}

private:
thrust::host_vector<T, thrust::system::cuda::experimental::pinned_allocator<T>> h_data;
thrust::host_vector<T, cudf::detail::pinned_allocator<T>> h_data;
rmm::device_uvector<T> d_data;
};

Expand Down