diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 54a4c4ea023..bb17f13db53 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -255,6 +255,7 @@ add_library(cudf src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/to_arrow.cu + src/interop/detail/arrow_allocator.cpp src/io/avro/avro.cpp src/io/avro/avro_gpu.cu src/io/avro/reader_impl.cu diff --git a/cpp/src/interop/detail/arrow_allocator.cpp b/cpp/src/interop/detail/arrow_allocator.cpp new file mode 100644 index 00000000000..cb67c893573 --- /dev/null +++ b/cpp/src/interop/detail/arrow_allocator.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace cudf { +namespace detail { + +std::unique_ptr allocate_arrow_buffer(const int64_t size, arrow::MemoryPool* ar_mr) +{ + /* + nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer + and `ValueOrDie` are used inside a CUDA compilation unit. + + To work around this issue we compile an allocation shim in C++ and use + that from our cuda sources + */ + auto result = arrow::AllocateBuffer(size, ar_mr); + CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer"); + return std::move(result).ValueOrDie(); +} + +std::shared_ptr allocate_arrow_bitmap(const int64_t size, arrow::MemoryPool* ar_mr) +{ + /* + nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer + and `ValueOrDie` are used inside a CUDA compilation unit. + + To work around this issue we compile an allocation shim in C++ and use + that from our cuda sources + */ + auto result = arrow::AllocateBitmap(size, ar_mr); + CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow bitmap"); + return std::move(result).ValueOrDie(); +} + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/interop/detail/arrow_allocator.hpp b/cpp/src/interop/detail/arrow_allocator.hpp new file mode 100644 index 00000000000..20099f91afa --- /dev/null +++ b/cpp/src/interop/detail/arrow_allocator.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +namespace detail { + +// unique_ptr because that is what AllocateBuffer returns +std::unique_ptr allocate_arrow_buffer(const int64_t size, arrow::MemoryPool* ar_mr); + +// shared_ptr because that is what AllocateBitmap returns +std::shared_ptr allocate_arrow_bitmap(const int64_t size, arrow::MemoryPool* ar_mr); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 3cd515e9981..3271804bf39 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -34,6 +34,8 @@ #include #include +#include "detail/arrow_allocator.hpp" + namespace cudf { namespace detail { namespace { @@ -48,10 +50,7 @@ std::shared_ptr fetch_data_buffer(column_view input_view, { const int64_t data_size_in_bytes = sizeof(T) * input_view.size(); - auto result = arrow::AllocateBuffer(data_size_in_bytes, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer for data"); - - std::shared_ptr data_buffer = std::move(result.ValueOrDie()); + auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr); CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), input_view.data(), @@ -59,7 +58,7 @@ std::shared_ptr fetch_data_buffer(column_view input_view, cudaMemcpyDeviceToHost, stream.value())); - return data_buffer; + return std::move(data_buffer); } /** @@ -72,9 +71,7 @@ std::shared_ptr fetch_mask_buffer(column_view input_view, const int64_t mask_size_in_bytes = cudf::bitmask_allocation_size_bytes(input_view.size()); if (input_view.has_nulls()) { - auto result = arrow::AllocateBitmap(static_cast(input_view.size()), ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer for mask"); - std::shared_ptr mask_buffer = std::move(result.ValueOrDie()); + auto mask_buffer = allocate_arrow_bitmap(static_cast(input_view.size()), ar_mr); CUDA_TRY(cudaMemcpyAsync( mask_buffer->mutable_data(), (input_view.offset() > 0) ? cudf::copy_bitmask(input_view).data() : input_view.null_mask(), @@ -163,10 +160,7 @@ std::shared_ptr dispatch_to_arrow::operator()( }); auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); - auto result = arrow::AllocateBuffer(buf_size_in_bytes, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer for data"); - - std::shared_ptr data_buffer = std::move(result.ValueOrDie()); + auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), buf.data(), @@ -176,7 +170,7 @@ std::shared_ptr dispatch_to_arrow::operator()( auto type = arrow::decimal(18, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); - auto buffers = std::vector>{mask, data_buffer}; + auto buffers = std::vector>{mask, std::move(data_buffer)}; auto data = std::make_shared(type, input.size(), buffers); return std::make_shared(data); @@ -191,10 +185,7 @@ std::shared_ptr dispatch_to_arrow::operator()(column_view in { auto bitmask = bools_to_mask(input, stream); - auto result = arrow::AllocateBuffer(static_cast(bitmask.first->size()), ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer for data"); - - std::shared_ptr data_buffer = std::move(result.ValueOrDie()); + auto data_buffer = allocate_arrow_buffer(static_cast(bitmask.first->size()), ar_mr); CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), bitmask.first->data(), @@ -203,7 +194,7 @@ std::shared_ptr dispatch_to_arrow::operator()(column_view in stream.value())); return to_arrow_array(id, static_cast(input.size()), - data_buffer, + std::move(data_buffer), fetch_mask_buffer(input, ar_mr, stream), static_cast(input.null_count())); } @@ -225,19 +216,13 @@ std::shared_ptr dispatch_to_arrow::operator()( column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; auto child_arrays = fetch_child_array(input_view, {{}, {}}, ar_mr, stream); if (child_arrays.empty()) { - arrow::Result> result; - // Empty string will have only one value in offset of 4 bytes - result = arrow::AllocateBuffer(4, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate buffer"); - std::shared_ptr tmp_offset_buffer = std::move(result.ValueOrDie()); - tmp_offset_buffer->mutable_data()[0] = 0; - - result = arrow::AllocateBuffer(0, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate buffer"); - std::shared_ptr tmp_data_buffer = std::move(result.ValueOrDie()); + auto tmp_offset_buffer = allocate_arrow_buffer(4, ar_mr); + auto tmp_data_buffer = allocate_arrow_buffer(0, ar_mr); + tmp_offset_buffer->mutable_data()[0] = 0; - return std::make_shared(0, tmp_offset_buffer, tmp_data_buffer); + return std::make_shared( + 0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer)); } auto offset_buffer = child_arrays[0]->data()->buffers[1]; auto data_buffer = child_arrays[1]->data()->buffers[1];