Skip to content

Commit

Permalink
Fix merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Jul 24, 2024
2 parents 70b6b2f + 8bba6df commit a9c20bc
Show file tree
Hide file tree
Showing 95 changed files with 3,755 additions and 763 deletions.
5 changes: 3 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ add_library(
src/interop/arrow_utilities.cpp
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/to_arrow_host.cu
src/interop/from_arrow_device.cu
src/interop/from_arrow_host.cu
src/interop/from_arrow_stream.cu
Expand Down Expand Up @@ -671,9 +672,9 @@ add_library(
src/unary/null_ops.cu
src/utilities/cuda_memcpy.cu
src/utilities/default_stream.cpp
src/utilities/host_memory.cpp
src/utilities/linked_column.cpp
src/utilities/logger.cpp
src/utilities/pinned_memory.cpp
src/utilities/prefetch.cpp
src/utilities/stacktrace.cpp
src/utilities/stream_pool.cpp
Expand Down Expand Up @@ -807,7 +808,7 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS_thread_pool>
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
)
Expand Down
16 changes: 12 additions & 4 deletions cpp/benchmarks/io/fst.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ void BM_FST_JSON(nvbench::state& state)
auto parser = cudf::io::fst::detail::make_fst(
cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
cudf::io::fst::detail::make_transition_table(pda_state_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size,
min_translated_out,
max_translated_out>(pda_out_tt),
stream);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
Expand Down Expand Up @@ -134,7 +136,9 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
auto parser = cudf::io::fst::detail::make_fst(
cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
cudf::io::fst::detail::make_transition_table(pda_state_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size,
min_translated_out,
max_translated_out>(pda_out_tt),
stream);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
Expand Down Expand Up @@ -171,7 +175,9 @@ void BM_FST_JSON_no_out(nvbench::state& state)
auto parser = cudf::io::fst::detail::make_fst(
cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
cudf::io::fst::detail::make_transition_table(pda_state_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size,
min_translated_out,
max_translated_out>(pda_out_tt),
stream);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
Expand Down Expand Up @@ -209,7 +215,9 @@ void BM_FST_JSON_no_str(nvbench::state& state)
auto parser = cudf::io::fst::detail::make_fst(
cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
cudf::io::fst::detail::make_transition_table(pda_state_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
cudf::io::fst::detail::make_translation_table<max_translation_table_size,
min_translated_out,
max_translated_out>(pda_out_tt),
stream);

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ function(find_and_configure_nanoarrow)
# Currently we need to always build nanoarrow so we don't pickup a previous installed version
set(CPM_DOWNLOAD_nanoarrow ON)
rapids_cpm_find(
nanoarrow 0.5.0
nanoarrow 0.6.0.dev
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG 11e73a8c85b45e3d49c8c541b4e1497a649fe03c
GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
)
Expand Down
20 changes: 7 additions & 13 deletions cpp/cmake/thirdparty/get_thread_pool.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,14 @@
# the License.
# =============================================================================

# This function finds rmm and sets any additional necessary environment variables.
# Need to call rapids_cpm_bs_thread_pool to get support for an installed version of thread-pool and
# to support installing it ourselves
function(find_and_configure_thread_pool)
rapids_cpm_find(
BS_thread_pool 4.1.0
CPM_ARGS
GIT_REPOSITORY https://github.com/bshoshany/thread-pool.git
GIT_TAG 097aa718f25d44315cadb80b407144ad455ee4f9
GIT_SHALLOW TRUE
)
if(NOT TARGET BS_thread_pool)
add_library(BS_thread_pool INTERFACE)
target_include_directories(BS_thread_pool INTERFACE ${BS_thread_pool_SOURCE_DIR}/include)
target_compile_definitions(BS_thread_pool INTERFACE "BS_THREAD_POOL_ENABLE_PAUSE=1")
endif()
include(${rapids-cmake-dir}/cpm/bs_thread_pool.cmake)

# Find or install thread-pool
rapids_cpm_bs_thread_pool(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)

endfunction()

find_and_configure_thread_pool()
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ void gather_bitmask(table_view const& source,
}

// Make device array of target bitmask pointers
std::vector<bitmask_type*> target_masks(target.size());
auto target_masks = make_host_vector<bitmask_type*>(target.size(), stream);
std::transform(target.begin(), target.end(), target_masks.begin(), [](auto const& col) {
return col->mutable_view().null_mask();
});
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ std::vector<size_type> segmented_count_bits(bitmask_type const* bitmask,
if (num_segments == 0) { return std::vector<size_type>{}; }

// Construct a contiguous host buffer of indices and copy to device.
auto const h_indices = std::vector<size_type>(indices_begin, indices_end);
auto h_indices = make_empty_host_vector<typename std::iterator_traits<IndexIterator>::value_type>(
std::distance(indices_begin, indices_end), stream);
std::copy(indices_begin, indices_end, std::back_inserter(h_indices));
auto const d_indices =
make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource());

Expand Down
51 changes: 51 additions & 0 deletions cpp/include/cudf/detail/utilities/host_memory.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/detail/utilities/host_vector.hpp>
#include <cudf/utilities/export.hpp>
#include <cudf/utilities/pinned_memory.hpp>

#include <rmm/resource_ref.hpp>

#include <cstddef>

namespace cudf::detail {
/**
* @brief Get the memory resource to be used for pageable memory allocations.
*
* @return Reference to the pageable memory resource
*/
CUDF_EXPORT rmm::host_async_resource_ref get_pageable_memory_resource();

/**
* @brief Get the allocator to be used for the host memory allocation.
*
* @param size The number of elements of type T to allocate
* @param stream The stream to use for the allocation
* @return The allocator to be used for the host memory allocation
*/
template <typename T>
rmm_host_allocator<T> get_host_allocator(std::size_t size, rmm::cuda_stream_view stream)
{
if (size * sizeof(T) <= get_allocate_host_as_pinned_threshold()) {
return {get_pinned_memory_resource(), stream};
}
return {get_pageable_memory_resource(), stream};
}

} // namespace cudf::detail
24 changes: 21 additions & 3 deletions cpp/include/cudf/detail/utilities/host_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ class rmm_host_allocator<void> {
};
};

template <class DesiredProperty, class... Properties>
inline constexpr bool contains_property =
(cuda::std::is_same_v<DesiredProperty, Properties> || ... || false);

/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c `rmm::host_async_resource_ref` for allocation.
*
Expand Down Expand Up @@ -100,8 +104,12 @@ class rmm_host_allocator {
/**
* @brief Construct from a `cudf::host_async_resource_ref`
*/
rmm_host_allocator(rmm::host_async_resource_ref _mr, rmm::cuda_stream_view _stream)
: mr(_mr), stream(_stream)
template <class... Properties>
rmm_host_allocator(cuda::mr::async_resource_ref<cuda::mr::host_accessible, Properties...> _mr,
rmm::cuda_stream_view _stream)
: mr(_mr),
stream(_stream),
_is_device_accessible{contains_property<cuda::mr::device_accessible, Properties...>}
{
}

Expand Down Expand Up @@ -173,15 +181,25 @@ class rmm_host_allocator {
*/
inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }

bool is_device_accessible() const { return _is_device_accessible; }

private:
rmm::host_async_resource_ref mr;
rmm::cuda_stream_view stream;
bool _is_device_accessible;
};

/**
* @brief A vector class with rmm host memory allocator
*/
template <typename T>
using host_vector = thrust::host_vector<T, rmm_host_allocator<T>>;
class host_vector : public thrust::host_vector<T, rmm_host_allocator<T>> {
public:
using base = thrust::host_vector<T, rmm_host_allocator<T>>;

host_vector(rmm_host_allocator<T> const& alloc) : base(alloc) {}

host_vector(size_t size, rmm_host_allocator<T> const& alloc) : base(size, alloc) {}
};

} // namespace cudf::detail
Loading

0 comments on commit a9c20bc

Please sign in to comment.