From 37916689d58757333f5ec9249de6680f47bf287e Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 2 Apr 2021 10:10:51 -0400 Subject: [PATCH 1/7] Remove nvstrdesc_s --- cpp/CMakeLists.txt | 1 + cpp/src/io/avro/avro_common.h | 3 + cpp/src/io/avro/avro_gpu.cu | 14 +-- cpp/src/io/avro/avro_gpu.h | 9 +- cpp/src/io/avro/reader_impl.cu | 16 +-- cpp/src/io/avro/reader_impl.hpp | 2 +- cpp/src/io/orc/orc_gpu.h | 33 +++--- cpp/src/io/orc/stripe_data.cu | 8 +- cpp/src/io/orc/stripe_enc.cu | 6 +- cpp/src/io/parquet/page_data.cu | 18 ++-- cpp/src/io/parquet/page_hdr.cu | 6 +- cpp/src/io/parquet/parquet_gpu.hpp | 12 +-- cpp/src/io/parquet/reader_impl.cu | 2 +- cpp/src/io/utilities/block_utils.cuh | 55 ---------- cpp/src/io/utilities/column_buffer.cpp | 143 +++++++++++++++++++++++++ cpp/src/io/utilities/column_buffer.hpp | 122 +-------------------- 16 files changed, 207 insertions(+), 243 deletions(-) create mode 100644 cpp/src/io/utilities/column_buffer.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 61cb13d3445..ed32e91bb91 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -250,6 +250,7 @@ add_library(cudf src/io/parquet/reader_impl.cu src/io/parquet/writer_impl.cu src/io/statistics/column_stats.cu + src/io/utilities/column_buffer.cpp src/io/utilities/data_sink.cpp src/io/utilities/datasource.cpp src/io/utilities/file_io_utilities.cpp diff --git a/cpp/src/io/avro/avro_common.h b/cpp/src/io/avro/avro_common.h index 509eca41e61..64680b9caf0 100644 --- a/cpp/src/io/avro/avro_common.h +++ b/cpp/src/io/avro/avro_common.h @@ -18,6 +18,7 @@ #include #include +#include namespace cudf { namespace io { @@ -56,6 +57,8 @@ enum type_kind_e { type_array, }; +using string_pair = cudf::io::detail::column_buffer::str_pair; + } // namespace avro } // namespace io } // namespace cudf diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index 321f5ee8963..7fd30395dbd 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -72,7 +72,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, size_t max_rows, const uint8_t *cur, const uint8_t *end, - device_span global_dictionary) + device_span global_dictionary) { uint32_t array_start = 0, array_repeat_count = 0; int array_children = 0; @@ -123,8 +123,8 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, if (kind == type_enum) { // dictionary size_t idx = schema[i].count + v; if (idx < global_dictionary.size()) { - ptr = global_dictionary[idx].ptr; - count = global_dictionary[idx].count; + ptr = global_dictionary[idx].first; + count = global_dictionary[idx].second; } } else if (v >= 0 && cur + v <= end) { // string ptr = reinterpret_cast(cur); @@ -132,8 +132,8 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, cur += count; } if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row].ptr = ptr; - static_cast(dataptr)[row].count = count; + static_cast(dataptr)[row].first = ptr; + static_cast(dataptr)[row].second = count; } } } break; @@ -230,7 +230,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) gpuDecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema_g, - device_span global_dictionary, + device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, @@ -313,7 +313,7 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) */ void DecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema, - device_span global_dictionary, + device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index 95b6e13d3f6..890b6da4d7e 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -25,13 +25,6 @@ namespace cudf { namespace io { namespace avro { namespace gpu { -/** - * @brief Struct to describe the output of a string datatype - */ -struct nvstrdesc_s { - const char *ptr; - size_t count; -}; /** * @brief Struct to describe the avro schema @@ -59,7 +52,7 @@ struct schemadesc_s { */ void DecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema, - cudf::device_span global_dictionary, + cudf::device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 42035687750..c9591b24a6e 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -235,7 +235,7 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ void reader::impl::decode_data(const rmm::device_buffer &block_data, const std::vector> &dict, - device_span global_dictionary, + device_span global_dictionary, size_t num_rows, std::vector> selection, std::vector &out_buffers, @@ -393,10 +393,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, for (const auto &sym : col_schema.symbols) { dictionary_data_size += sym.length(); } } - rmm::device_uvector d_global_dict(total_dictionary_entries, stream); + rmm::device_uvector d_global_dict(total_dictionary_entries, stream); rmm::device_uvector d_global_dict_data(dictionary_data_size, stream); if (total_dictionary_entries > 0) { - std::vector h_global_dict(total_dictionary_entries); + std::vector h_global_dict(total_dictionary_entries); std::vector h_global_dict_data(dictionary_data_size); size_t dict_pos = 0; for (size_t i = 0; i < column_types.size(); ++i) { @@ -406,10 +406,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, for (size_t j = 0; j < dict[i].second; j++) { auto const &symbols = col_schema.symbols[j]; - auto const data_dst = h_global_dict_data.data() + dict_pos; - auto const len = symbols.length(); - col_dict_entries[j].ptr = data_dst; - col_dict_entries[j].count = len; + auto const data_dst = h_global_dict_data.data() + dict_pos; + auto const len = symbols.length(); + col_dict_entries[j].first = data_dst; + col_dict_entries[j].second = len; std::copy(symbols.c_str(), symbols.c_str() + len, data_dst); dict_pos += len; @@ -418,7 +418,7 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, CUDA_TRY(cudaMemcpyAsync(d_global_dict.data(), h_global_dict.data(), - h_global_dict.size() * sizeof(gpu::nvstrdesc_s), + h_global_dict.size() * sizeof(string_pair), cudaMemcpyDefault, stream.value())); CUDA_TRY(cudaMemcpyAsync(d_global_dict_data.data(), diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp index 22fa1aaa760..715b757b800 100644 --- a/cpp/src/io/avro/reader_impl.hpp +++ b/cpp/src/io/avro/reader_impl.hpp @@ -97,7 +97,7 @@ class reader::impl { */ void decode_data(const rmm::device_buffer &block_data, const std::vector> &dict, - cudf::device_span global_dictionary, + cudf::device_span global_dictionary, size_t num_rows, std::vector> columns, std::vector &out_buffers, diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 55df0adf95b..c50131fbf14 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include @@ -30,12 +32,15 @@ namespace cudf { namespace io { namespace orc { namespace gpu { + +using cudf::detail::device_2dspan; + struct CompressedStreamInfo { CompressedStreamInfo() = default; explicit constexpr CompressedStreamInfo(const uint8_t *compressed_data_, size_t compressed_size_) : compressed_data(compressed_data_), - compressed_data_size(compressed_size_), uncompressed_data(nullptr), + compressed_data_size(compressed_size_), decctl(nullptr), decstatus(nullptr), copyctl(nullptr), @@ -67,14 +72,6 @@ enum StreamIndexType { CI_NUM_STREAMS }; -/** - * @brief Struct to describe the output of a string datatype - */ -struct nvstrdesc_s { - const char *ptr; - size_t count; -}; - /** * @brief Struct to describe a single entry in the global dictionary */ @@ -292,8 +289,8 @@ void DecodeOrcColumnData(ColumnDesc const *chunks, * @param[in, out] streams chunk streams device array [column][rowgroup] * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void EncodeOrcColumnData(detail::device_2dspan chunks, - detail::device_2dspan streams, +void EncodeOrcColumnData(device_2dspan chunks, + device_2dspan streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -307,10 +304,10 @@ void EncodeOrcColumnData(detail::device_2dspan chunks, * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ void EncodeStripeDictionaries(StripeDictionary *stripes, - detail::device_2dspan chunks, + device_2dspan chunks, uint32_t num_string_columns, uint32_t num_stripes, - detail::device_2dspan enc_streams, + device_2dspan enc_streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -321,7 +318,7 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ void set_chunk_columns(const table_device_view &view, - detail::device_2dspan chunks, + device_2dspan chunks, rmm::cuda_stream_view stream); /** @@ -331,8 +328,8 @@ void set_chunk_columns(const table_device_view &view, * @param[in,out] enc_streams chunk streams device array [column][rowgroup] * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void CompactOrcDataStreams(detail::device_2dspan strm_desc, - detail::device_2dspan enc_streams, +void CompactOrcDataStreams(device_2dspan strm_desc, + device_2dspan enc_streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -352,8 +349,8 @@ void CompressOrcDataStreams(uint8_t *compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, - detail::device_2dspan strm_desc, - detail::device_2dspan enc_streams, + device_2dspan strm_desc, + device_2dspan enc_streams, gpu_inflate_input_s *comp_in, gpu_inflate_status_s *comp_out, rmm::cuda_stream_view stream = rmm::cuda_stream_default); diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 6206d98773f..f3bc44b38c8 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -25,6 +25,8 @@ namespace io { namespace orc { namespace gpu { +using string_pair = cudf::io::detail::column_buffer::str_pair; + // Must be able to handle 512x 8-byte values. These values are base 128 encoded // so 8 byte value is expanded to 10 bytes. constexpr int bytestream_buffer_size = 512 * 8 * 2; @@ -1683,7 +1685,7 @@ __global__ void __launch_bounds__(block_size) case BINARY: case VARCHAR: case CHAR: { - nvstrdesc_s *strdesc = &static_cast(data_out)[row]; + string_pair *strdesc = &static_cast(data_out)[row]; void const *ptr = nullptr; uint32_t count = 0; if (is_dictionary(s->chunk.encoding_kind)) { @@ -1703,8 +1705,8 @@ __global__ void __launch_bounds__(block_size) count = secondary_val; } } - strdesc->ptr = static_cast(ptr); - strdesc->count = count; + strdesc->first = static_cast(ptr); + strdesc->second = count; break; } case TIMESTAMP: { diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 10932d36309..6ed9071f5b7 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -27,7 +27,7 @@ namespace io { namespace orc { namespace gpu { -using detail::device_2dspan; +using cudf::detail::device_2dspan; constexpr int scratch_buffer_size = 512 * 4; @@ -1226,8 +1226,8 @@ void CompressOrcDataStreams(uint8_t *compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, - detail::device_2dspan strm_desc, - detail::device_2dspan enc_streams, + device_2dspan strm_desc, + device_2dspan enc_streams, gpu_inflate_input_s *comp_in, gpu_inflate_status_s *comp_out, rmm::cuda_stream_view stream) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 538e238b5ea..2fad6039e0f 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -519,12 +519,12 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo if (s->dict_base) { // String dictionary uint32_t dict_pos = (s->dict_bits > 0) - ? s->dict_idx[src_pos & (non_zero_buffer_size - 1)] * sizeof(nvstrdesc_s) + ? s->dict_idx[src_pos & (non_zero_buffer_size - 1)] * sizeof(string_pair) : 0; if (dict_pos < (uint32_t)s->dict_size) { - const nvstrdesc_s *src = reinterpret_cast(s->dict_base + dict_pos); - ptr = src->ptr; - len = src->count; + const string_pair *src = reinterpret_cast(s->dict_base + dict_pos); + ptr = src->first; + len = src->second; } } else { // Plain encoding @@ -539,9 +539,9 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo *static_cast(dstv) = device_str2hash32(ptr, len); } else { // Output string descriptor - nvstrdesc_s *dst = static_cast(dstv); - dst->ptr = ptr; - dst->count = len; + string_pair *dst = static_cast(dstv); + dst->first = ptr; + dst->second = len; } } @@ -1010,7 +1010,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, // Fall through to DOUBLE case DOUBLE: s->dtype_len = 8; break; case INT96: s->dtype_len = 12; break; - case BYTE_ARRAY: s->dtype_len = sizeof(nvstrdesc_s); break; + case BYTE_ARRAY: s->dtype_len = sizeof(string_pair); break; default: // FIXED_LEN_BYTE_ARRAY: s->dtype_len = dtype_len_out; s->error |= (s->dtype_len <= 0); @@ -1094,7 +1094,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, if (((s->col.data_type & 7) == BYTE_ARRAY) && (s->col.str_dict_index)) { // String dictionary: use index s->dict_base = reinterpret_cast(s->col.str_dict_index); - s->dict_size = s->col.page_info[0].num_input_values * sizeof(nvstrdesc_s); + s->dict_size = s->col.page_info[0].num_input_values * sizeof(string_pair); } else { s->dict_base = s->col.page_info[0].page_data; // dictionary is always stored in the first page diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 34f5ee6fb1a..3dc02342371 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -447,7 +447,7 @@ extern "C" __global__ void __launch_bounds__(128) if (chunk >= num_chunks) { return; } if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) { // Data type to describe a string - nvstrdesc_s *dict_index = ck->str_dict_index; + string_pair *dict_index = ck->str_dict_index; const uint8_t *dict = ck->page_info[0].page_data; int dict_size = ck->page_info[0].uncompressed_page_size; int num_entries = ck->page_info[0].num_input_values; @@ -464,8 +464,8 @@ extern "C" __global__ void __launch_bounds__(128) } } // TODO: Could store 8 entries in shared mem, then do a single warp-wide store - dict_index[i].ptr = reinterpret_cast(dict + pos + 4); - dict_index[i].count = len; + dict_index[i].first = reinterpret_cast(dict + pos + 4); + dict_index[i].second = len; } } } diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 555259c443d..1e69f8c9a7b 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -39,6 +39,8 @@ namespace cudf { namespace io { namespace parquet { +using string_pair = cudf::io::detail::column_buffer::str_pair; + /** * @brief Struct representing an input column in the file. */ @@ -70,14 +72,6 @@ enum level_type { NUM_LEVEL_TYPES }; -/** - * @brief Struct to describe the output of a string datatype - */ -struct nvstrdesc_s { - const char *ptr; - size_t count; -}; - /** * @brief Nesting information */ @@ -211,7 +205,7 @@ struct ColumnChunkDesc { int32_t max_num_pages; // size of page_info array PageInfo *page_info; // output page info for up to num_dict_pages + // num_data_pages (dictionary pages first) - nvstrdesc_s *str_dict_index; // index for string dictionary + string_pair *str_dict_index; // index for string dictionary uint32_t **valid_map_base; // base pointers of valid bit map for this column void **column_data_base; // base pointers of column data int8_t codec; // compressed codec enum diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 698eb1569cb..b7415ccc9a9 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1199,7 +1199,7 @@ void reader::impl::decode_page_data(hostdevice_vector &chu // Build index for string dictionaries since they can't be indexed // directly due to variable-sized elements - rmm::device_vector str_dict_index; + rmm::device_vector str_dict_index; if (total_str_dict_indexes > 0) { str_dict_index.resize(total_str_dict_indexes); } // TODO (dm): hd_vec should have begin and end iterator members diff --git a/cpp/src/io/utilities/block_utils.cuh b/cpp/src/io/utilities/block_utils.cuh index 4c03f9a9ca0..19d2cd1ba03 100644 --- a/cpp/src/io/utilities/block_utils.cuh +++ b/cpp/src/io/utilities/block_utils.cuh @@ -197,60 +197,5 @@ inline __device__ void memcpy_block(void *dstv, const void *srcv, uint32_t len, } } -/** - * @brief Compares two strings - */ -template -inline __device__ T nvstr_compare(const char *as, uint32_t alen, const char *bs, uint32_t blen) -{ - uint32_t len = min(alen, blen); - uint32_t i = 0; - if (len >= 4) { - uint32_t align_a = 3 & reinterpret_cast(as); - uint32_t align_b = 3 & reinterpret_cast(bs); - const uint32_t *as32 = reinterpret_cast(as - align_a); - const uint32_t *bs32 = reinterpret_cast(bs - align_b); - uint32_t ofsa = align_a * 8; - uint32_t ofsb = align_b * 8; - do { - uint32_t a = *as32++; - uint32_t b = *bs32++; - if (ofsa) a = __funnelshift_r(a, *as32, ofsa); - if (ofsb) b = __funnelshift_r(b, *bs32, ofsb); - if (a != b) { - return (lesser == greater || __byte_perm(a, 0, 0x0123) < __byte_perm(b, 0, 0x0123)) - ? lesser - : greater; - } - i += 4; - } while (i + 4 <= len); - } - while (i < len) { - uint8_t a = as[i]; - uint8_t b = bs[i]; - if (a != b) { return (a < b) ? lesser : greater; } - ++i; - } - return (alen == blen) ? equal : (alen < blen) ? lesser : greater; -} - -inline __device__ bool nvstr_is_lesser(const char *as, uint32_t alen, const char *bs, uint32_t blen) -{ - return nvstr_compare(as, alen, bs, blen); -} - -inline __device__ bool nvstr_is_greater(const char *as, - uint32_t alen, - const char *bs, - uint32_t blen) -{ - return nvstr_compare(as, alen, bs, blen); -} - -inline __device__ bool nvstr_is_equal(const char *as, uint32_t alen, const char *bs, uint32_t blen) -{ - return nvstr_compare(as, alen, bs, blen); -} - } // namespace io } // namespace cudf diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp new file mode 100644 index 00000000000..88051912ccc --- /dev/null +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file column_buffer.cpp + * @brief cuDF-IO column_buffer class implementation + */ + +#include + +namespace cudf { +namespace io { +namespace detail { + +void column_buffer::create(size_type _size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + size = _size; + + switch (type.id()) { + case type_id::STRING: + _strings = std::make_unique>(size, stream); + cudaMemsetAsync(_strings->data(), 0, size * sizeof(str_pair), stream.value()); + break; + + // list columns store a buffer of int32's as offsets to represent + // their individual rows + case type_id::LIST: _data = create_data(data_type{type_id::INT32}, size, stream, mr); break; + + // struct columns store no data themselves. just validity and children. + case type_id::STRUCT: break; + + default: _data = create_data(type, size, stream, mr); break; + } + if (is_nullable) { + _null_mask = + cudf::detail::create_null_mask(size, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); + } +} + +/** + * @brief Creates a column from an existing set of device memory buffers. + * + * @throws std::bad_alloc if device memory allocation fails + * + * @param buffer Column buffer descriptors + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * + * @return `std::unique_ptr` Column from the existing device data + */ +std::unique_ptr make_column(column_buffer& buffer, + column_name_info* schema_info, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (schema_info != nullptr) { schema_info->name = buffer.name; } + + switch (buffer.type.id()) { + case type_id::STRING: + if (schema_info != nullptr) { + schema_info->children.push_back(column_name_info{"offsets"}); + schema_info->children.push_back(column_name_info{"chars"}); + } + return make_strings_column(*buffer._strings, stream, mr); + + case type_id::LIST: { + // make offsets column + auto offsets = + std::make_unique(data_type{type_id::INT32}, buffer.size, std::move(buffer._data)); + + column_name_info* child_info = nullptr; + if (schema_info != nullptr) { + schema_info->children.push_back(column_name_info{"offsets"}); + schema_info->children.push_back(column_name_info{""}); + child_info = &schema_info->children.back(); + } + + // make child column + CUDF_EXPECTS(buffer.children.size() > 0, "Encountered malformed column_buffer"); + auto child = make_column(buffer.children[0], child_info, stream, mr); + + // make the final list column (note : size is the # of offsets, so our actual # of rows is 1 + // less) + return make_lists_column(buffer.size - 1, + std::move(offsets), + std::move(child), + buffer._null_count, + std::move(buffer._null_mask), + stream, + mr); + } break; + + case type_id::STRUCT: { + std::vector> output_children; + output_children.reserve(buffer.children.size()); + std::transform(buffer.children.begin(), + buffer.children.end(), + std::back_inserter(output_children), + [&](column_buffer& col) { + column_name_info* child_info = nullptr; + if (schema_info != nullptr) { + schema_info->children.push_back(column_name_info{""}); + child_info = &schema_info->children.back(); + } + return make_column(col, child_info, stream, mr); + }); + + return make_structs_column(buffer.size, + std::move(output_children), + buffer._null_count, + std::move(buffer._null_mask), + stream, + mr); + } break; + + default: { + return std::make_unique(buffer.type, + buffer.size, + std::move(buffer._data), + std::move(buffer._null_mask), + buffer._null_count); + } + } +} + +} // namespace detail +} // namespace io +} // namespace cudf diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index 88444d41206..c82edc3b517 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -67,11 +67,6 @@ inline rmm::device_buffer create_data( * will be used to create a column. */ struct column_buffer { - // there is a potential bug here. In the decoding step, the buffer of - // data holding these pairs is cast to an nvstrdesc_s, which is a struct - // containing . So there is a mismatch between the - // size_type and the size_t. I believe this works because the str_pair is - // aligned out to 8 bytes anyway. using str_pair = thrust::pair; column_buffer() = default; @@ -86,7 +81,7 @@ struct column_buffer { bool _is_nullable = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : type(_type), is_nullable(_is_nullable), _null_count(0) + : _null_count(0), type(_type), is_nullable(_is_nullable) { create(_size, stream, mr); } @@ -103,30 +98,7 @@ struct column_buffer { // preprocessing steps such as in the Parquet reader void create(size_type _size, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - { - size = _size; - - switch (type.id()) { - case type_id::STRING: - _strings = std::make_unique>(size, stream); - cudaMemsetAsync(_strings->data(), 0, size * sizeof(str_pair), stream.value()); - break; - - // list columns store a buffer of int32's as offsets to represent - // their individual rows - case type_id::LIST: _data = create_data(data_type{type_id::INT32}, size, stream, mr); break; - - // struct columns store no data themselves. just validity and children. - case type_id::STRUCT: break; - - default: _data = create_data(type, size, stream, mr); break; - } - if (is_nullable) { - _null_mask = cudf::detail::create_null_mask( - size, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr); - } - } + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); auto data() { return _strings ? _strings->data() : _data.data(); } auto data_size() const { return _strings ? _strings->size() : _data.size(); } @@ -145,105 +117,19 @@ struct column_buffer { rmm::device_buffer _null_mask{}; size_type _null_count{0}; - bool is_nullable{false}; data_type type{type_id::EMPTY}; + bool is_nullable{false}; size_type size{0}; std::vector children; uint32_t user_data{0}; // arbitrary user data std::string name; }; -namespace { -/** - * @brief Creates a column from an existing set of device memory buffers. - * - * @throws std::bad_alloc if device memory allocation fails - * - * @param buffer Column buffer descriptors - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory - * - * @return `std::unique_ptr` Column from the existing device data - */ std::unique_ptr make_column( column_buffer& buffer, column_name_info* schema_info = nullptr, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - using str_pair = thrust::pair; - - if (schema_info != nullptr) { schema_info->name = buffer.name; } - - switch (buffer.type.id()) { - case type_id::STRING: - if (schema_info != nullptr) { - schema_info->children.push_back(column_name_info{"offsets"}); - schema_info->children.push_back(column_name_info{"chars"}); - } - return make_strings_column(*buffer._strings, stream, mr); - - case type_id::LIST: { - // make offsets column - auto offsets = - std::make_unique(data_type{type_id::INT32}, buffer.size, std::move(buffer._data)); - - column_name_info* child_info = nullptr; - if (schema_info != nullptr) { - schema_info->children.push_back(column_name_info{"offsets"}); - schema_info->children.push_back(column_name_info{""}); - child_info = &schema_info->children.back(); - } - - // make child column - CUDF_EXPECTS(buffer.children.size() > 0, "Encountered malformed column_buffer"); - auto child = make_column(buffer.children[0], child_info, stream, mr); - - // make the final list column (note : size is the # of offsets, so our actual # of rows is 1 - // less) - return make_lists_column(buffer.size - 1, - std::move(offsets), - std::move(child), - buffer._null_count, - std::move(buffer._null_mask), - stream, - mr); - } break; - - case type_id::STRUCT: { - std::vector> output_children; - output_children.reserve(buffer.children.size()); - std::transform(buffer.children.begin(), - buffer.children.end(), - std::back_inserter(output_children), - [&](column_buffer& col) { - column_name_info* child_info = nullptr; - if (schema_info != nullptr) { - schema_info->children.push_back(column_name_info{""}); - child_info = &schema_info->children.back(); - } - return make_column(col, child_info, stream, mr); - }); - - return make_structs_column(buffer.size, - std::move(output_children), - buffer._null_count, - std::move(buffer._null_mask), - stream, - mr); - } break; - - default: { - return std::make_unique(buffer.type, - buffer.size, - std::move(buffer._data), - std::move(buffer._null_mask), - buffer._null_count); - } - } -} - -} // namespace + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace io From 197bf73303b2f8184bc43bba22a6be26ee418d26 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 2 Apr 2021 10:57:56 -0400 Subject: [PATCH 2/7] Replace relative paths with absolute paths --- cpp/src/io/avro/avro.cpp | 2 +- cpp/src/io/avro/avro.h | 2 +- cpp/src/io/avro/avro_gpu.cu | 2 +- cpp/src/io/avro/avro_gpu.h | 2 +- cpp/src/io/avro/reader_impl.cu | 2 +- cpp/src/io/avro/reader_impl.hpp | 4 ++-- cpp/src/io/comp/brotli_dict.cpp | 2 +- cpp/src/io/comp/cpu_unbz2.cpp | 4 ++-- cpp/src/io/comp/debrotli.cu | 6 +++--- cpp/src/io/comp/gpuinflate.cu | 4 ++-- cpp/src/io/comp/snap.cu | 2 +- cpp/src/io/comp/uncomp.cpp | 4 ++-- cpp/src/io/comp/unsnap.cu | 2 +- cpp/src/io/csv/csv.h | 2 +- cpp/src/io/csv/csv_gpu.cu | 6 +++--- cpp/src/io/csv/datetime.cuh | 4 ++-- cpp/src/io/csv/reader_impl.cu | 2 +- cpp/src/io/csv/reader_impl.hpp | 4 ++-- cpp/src/io/csv/writer_impl.cu | 2 +- cpp/src/io/csv/writer_impl.hpp | 4 ++-- cpp/src/io/json/json.h | 2 +- cpp/src/io/json/json_gpu.cu | 4 ++-- cpp/src/io/json/reader_impl.cu | 2 +- cpp/src/io/json/reader_impl.hpp | 4 ++-- cpp/src/io/orc/dict_enc.cu | 4 ++-- cpp/src/io/orc/orc.h | 2 +- cpp/src/io/orc/orc_gpu.h | 2 +- cpp/src/io/orc/reader_impl.cu | 4 ++-- cpp/src/io/orc/reader_impl.hpp | 4 ++-- cpp/src/io/orc/stats_enc.cu | 4 ++-- cpp/src/io/orc/stripe_data.cu | 4 ++-- cpp/src/io/orc/stripe_enc.cu | 4 ++-- cpp/src/io/orc/stripe_init.cu | 4 ++-- cpp/src/io/orc/timezone.cpp | 2 +- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/orc/writer_impl.hpp | 4 ++-- cpp/src/io/parquet/reader_impl.cu | 2 +- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/io/statistics/column_stats.cu | 2 +- 39 files changed, 60 insertions(+), 60 deletions(-) diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 8f0599cdd5b..122f3b2208e 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "avro.h" +#include #include #include diff --git a/cpp/src/io/avro/avro.h b/cpp/src/io/avro/avro.h index 13f5e4ecb3c..5ae9f56135b 100644 --- a/cpp/src/io/avro/avro.h +++ b/cpp/src/io/avro/avro.h @@ -16,7 +16,7 @@ #pragma once -#include "avro_common.h" +#include #include #include diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index 7fd30395dbd..b112d8a3e14 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "avro_gpu.h" +#include #include diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index 890b6da4d7e..3830d069a73 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -15,7 +15,7 @@ */ #pragma once -#include "avro_common.h" +#include #include diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index c9591b24a6e..18f48b6d720 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO Avro reader class implementation */ -#include "reader_impl.hpp" +#include #include diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp index 715b757b800..10a46bdb0e7 100644 --- a/cpp/src/io/avro/reader_impl.hpp +++ b/cpp/src/io/avro/reader_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include "avro.h" -#include "avro_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/comp/brotli_dict.cpp b/cpp/src/io/comp/brotli_dict.cpp index b493ebd6bfb..5429f35da0c 100644 --- a/cpp/src/io/comp/brotli_dict.cpp +++ b/cpp/src/io/comp/brotli_dict.cpp @@ -48,7 +48,7 @@ THE SOFTWARE. */ -#include "brotli_dict.h" +#include #include namespace cudf { diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp index 28d7394e485..3755b823770 100644 --- a/cpp/src/io/comp/cpu_unbz2.cpp +++ b/cpp/src/io/comp/cpu_unbz2.cpp @@ -82,8 +82,8 @@ For more information on these sources, see the manual. #include #include #include -#include "io_uncomp.h" -#include "unbz2.h" +#include +#include namespace cudf { namespace io { diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 953872ab7ed..7c524d9fb77 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -54,8 +54,8 @@ THE SOFTWARE. */ -#include "brotli_dict.h" -#include "gpuinflate.h" +#include +#include #include @@ -90,7 +90,7 @@ inline __device__ uint32_t brev8(uint32_t x) } #define CONSTANT static const __device__ __constant__ -#include "brotli_tables.h" +#include /* typeof(MODE) == ContextType; returns ContextLut */ __inline__ __device__ int brotli_context_lut(int mode) { return (mode << 9); } diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index a31cf1717e7..99ad24328c4 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -43,8 +43,8 @@ misrepresented as being the original software. Mark Adler madler@alumni.caltech.edu */ -#include "gpuinflate.h" -#include "io_uncomp.h" +#include +#include #include diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 999d02e3a50..0f3fbe92256 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "gpuinflate.h" +#include #include diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index d5166b76892..7fb8b2b216e 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "io_uncomp.h" -#include "unbz2.h" // bz2 uncompress +#include +#include // bz2 uncompress #include #include diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 2b799b5e1bf..2bcb7b6fea2 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "gpuinflate.h" +#include #include diff --git a/cpp/src/io/csv/csv.h b/cpp/src/io/csv/csv.h index b20ca4222b2..3ec5cdb5a02 100644 --- a/cpp/src/io/csv/csv.h +++ b/cpp/src/io/csv/csv.h @@ -16,4 +16,4 @@ #pragma once -#include "csv_common.h" +#include diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 44acc7fc55f..64ff180cbe8 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "csv_common.h" -#include "csv_gpu.h" -#include "datetime.cuh" +#include +#include +#include #include #include diff --git a/cpp/src/io/csv/datetime.cuh b/cpp/src/io/csv/datetime.cuh index 7f3c2ab4942..4e4ddd09a9f 100644 --- a/cpp/src/io/csv/datetime.cuh +++ b/cpp/src/io/csv/datetime.cuh @@ -16,7 +16,7 @@ #pragma once -#include "thrust/reduce.h" +#include #include #include @@ -435,4 +435,4 @@ __inline__ __device__ int64_t to_time_delta(char const* begin, char const* end) } } // namespace io -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 76580122fe6..67d1539a3f3 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO CSV reader class implementation */ -#include "reader_impl.hpp" +#include #include #include diff --git a/cpp/src/io/csv/reader_impl.hpp b/cpp/src/io/csv/reader_impl.hpp index 2764eb0980c..2b8c1c4aae7 100644 --- a/cpp/src/io/csv/reader_impl.hpp +++ b/cpp/src/io/csv/reader_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include "csv.h" -#include "csv_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index f7e153d71f4..772c261d132 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO CSV writer class implementation */ -#include "writer_impl.hpp" +#include #include diff --git a/cpp/src/io/csv/writer_impl.hpp b/cpp/src/io/csv/writer_impl.hpp index 9c42a3666fb..e54a09013f6 100644 --- a/cpp/src/io/csv/writer_impl.hpp +++ b/cpp/src/io/csv/writer_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include "csv.h" -#include "csv_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/json/json.h b/cpp/src/io/json/json.h index 0c2309d9d64..771185b1b12 100644 --- a/cpp/src/io/json/json.h +++ b/cpp/src/io/json/json.h @@ -16,4 +16,4 @@ #pragma once -#include "json_common.h" +#include diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 75910ae6b5b..b4e3f313e6a 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "json_common.h" -#include "json_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 1a1fa8d0602..84690a16bcd 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO JSON reader class implementation */ -#include "reader_impl.hpp" +#include #include #include diff --git a/cpp/src/io/json/reader_impl.hpp b/cpp/src/io/json/reader_impl.hpp index ffd3dc58fe7..248744d283a 100644 --- a/cpp/src/io/json/reader_impl.hpp +++ b/cpp/src/io/json/reader_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include "json.h" -#include "json_gpu.h" +#include +#include #include diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index e69a61bde66..b994048b008 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "orc_common.h" -#include "orc_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index c8c5c3919d0..38e9f8f331e 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -28,7 +28,7 @@ #include #include #include -#include "orc_common.h" +#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index c50131fbf14..8e93dfd8e8c 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -16,7 +16,7 @@ #pragma once -#include "timezone.cuh" +#include #include #include diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 9f88c6584ce..639ba80425c 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -19,8 +19,8 @@ * @brief cuDF-IO ORC reader class implementation */ -#include "reader_impl.hpp" -#include "timezone.cuh" +#include +#include #include #include diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 3a2913c5548..925673061a7 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include "orc.h" -#include "orc_gpu.h" +#include +#include #include #include diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 56a55bd0a4d..ff99ae547bf 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "orc_common.h" -#include "orc_gpu.h" +#include +#include #include diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index f3bc44b38c8..1a47efea2d3 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -17,8 +17,8 @@ #include #include #include -#include "orc_common.h" -#include "orc_gpu.h" +#include +#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 6ed9071f5b7..8cc685c0077 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -19,8 +19,8 @@ #include #include #include -#include "orc_common.h" -#include "orc_gpu.h" +#include +#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 61917403b41..839b9588482 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "orc_common.h" -#include "orc_gpu.h" +#include +#include #include diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/io/orc/timezone.cpp index 81ffa954c1a..ac05dc59481 100644 --- a/cpp/src/io/orc/timezone.cpp +++ b/cpp/src/io/orc/timezone.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "timezone.cuh" +#include #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 10050806552..6f820e9555e 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO ORC writer class implementation */ -#include "writer_impl.hpp" +#include #include diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 352cb11440f..82e6880911e 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include "orc.h" -#include "orc_gpu.h" +#include +#include #include diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index b7415ccc9a9..b726af9084f 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO Parquet reader class implementation */ -#include "reader_impl.hpp" +#include #include diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 1e8a6920ea4..e270d746255 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO parquet writer class implementation */ -#include "writer_impl.hpp" +#include #include #include diff --git a/cpp/src/io/statistics/column_stats.cu b/cpp/src/io/statistics/column_stats.cu index 52f21f0a9ad..3090f5b7294 100644 --- a/cpp/src/io/statistics/column_stats.cu +++ b/cpp/src/io/statistics/column_stats.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "column_stats.h" +#include #include From 5ccbf3f383fbd29e9e7d732af762b79e0bbde8a7 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 2 Apr 2021 11:07:54 -0400 Subject: [PATCH 3/7] Style fix --- cpp/src/io/comp/cpu_unbz2.cpp | 4 ++-- cpp/src/io/orc/orc.h | 2 +- cpp/src/io/orc/stripe_data.cu | 4 ++-- cpp/src/io/orc/stripe_enc.cu | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp index 3755b823770..67d6e397b60 100644 --- a/cpp/src/io/comp/cpu_unbz2.cpp +++ b/cpp/src/io/comp/cpu_unbz2.cpp @@ -79,11 +79,11 @@ Jon L. Bentley For more information on these sources, see the manual. --*/ +#include +#include #include #include #include -#include -#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index 38e9f8f331e..16119cd581f 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -26,9 +26,9 @@ #include #include +#include #include #include -#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 1a47efea2d3..8ef971c3bf6 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -14,11 +14,11 @@ * limitations under the License. */ +#include +#include #include #include #include -#include -#include namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 8cc685c0077..96f159c8122 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -14,13 +14,13 @@ * limitations under the License. */ +#include +#include #include #include #include #include #include -#include -#include namespace cudf { namespace io { From f0a5b47b24ff85a81a142714b2746e6de6d30c75 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Sat, 3 Apr 2021 21:07:38 -0400 Subject: [PATCH 4/7] PR review fixes --- cpp/src/io/avro/avro_common.h | 2 +- cpp/src/io/avro/avro_gpu.cu | 10 +++++----- cpp/src/io/avro/avro_gpu.h | 2 +- cpp/src/io/avro/reader_impl.cu | 8 ++++---- cpp/src/io/avro/reader_impl.hpp | 2 +- cpp/src/io/json/json_gpu.cu | 6 +++--- cpp/src/io/orc/stripe_data.cu | 8 ++++---- cpp/src/io/parquet/page_data.cu | 23 ++++++++++++----------- cpp/src/io/parquet/page_hdr.cu | 8 ++++---- cpp/src/io/parquet/parquet_gpu.hpp | 4 ++-- cpp/src/io/parquet/reader_impl.cu | 2 +- cpp/src/io/utilities/column_buffer.cpp | 4 ++-- cpp/src/io/utilities/column_buffer.hpp | 8 ++++---- 13 files changed, 44 insertions(+), 43 deletions(-) diff --git a/cpp/src/io/avro/avro_common.h b/cpp/src/io/avro/avro_common.h index 64680b9caf0..3ef36863cd2 100644 --- a/cpp/src/io/avro/avro_common.h +++ b/cpp/src/io/avro/avro_common.h @@ -57,7 +57,7 @@ enum type_kind_e { type_array, }; -using string_pair = cudf::io::detail::column_buffer::str_pair; +using cudf::io::detail::string_index_pair; } // namespace avro } // namespace io diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index b112d8a3e14..a4d17d51f56 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -72,7 +72,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, size_t max_rows, const uint8_t *cur, const uint8_t *end, - device_span global_dictionary) + device_span global_dictionary) { uint32_t array_start = 0, array_repeat_count = 0; int array_children = 0; @@ -132,8 +132,8 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, cur += count; } if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row].first = ptr; - static_cast(dataptr)[row].second = count; + static_cast(dataptr)[row].first = ptr; + static_cast(dataptr)[row].second = count; } } } break; @@ -230,7 +230,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) gpuDecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema_g, - device_span global_dictionary, + device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, @@ -313,7 +313,7 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) */ void DecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema, - device_span global_dictionary, + device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index 3830d069a73..9239919b0b9 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -52,7 +52,7 @@ struct schemadesc_s { */ void DecodeAvroColumnData(block_desc_s *blocks, schemadesc_s *schema, - cudf::device_span global_dictionary, + cudf::device_span global_dictionary, const uint8_t *avro_data, uint32_t num_blocks, uint32_t schema_len, diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 18f48b6d720..b954a65c071 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -235,7 +235,7 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ void reader::impl::decode_data(const rmm::device_buffer &block_data, const std::vector> &dict, - device_span global_dictionary, + device_span global_dictionary, size_t num_rows, std::vector> selection, std::vector &out_buffers, @@ -393,10 +393,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, for (const auto &sym : col_schema.symbols) { dictionary_data_size += sym.length(); } } - rmm::device_uvector d_global_dict(total_dictionary_entries, stream); + rmm::device_uvector d_global_dict(total_dictionary_entries, stream); rmm::device_uvector d_global_dict_data(dictionary_data_size, stream); if (total_dictionary_entries > 0) { - std::vector h_global_dict(total_dictionary_entries); + std::vector h_global_dict(total_dictionary_entries); std::vector h_global_dict_data(dictionary_data_size); size_t dict_pos = 0; for (size_t i = 0; i < column_types.size(); ++i) { @@ -418,7 +418,7 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, CUDA_TRY(cudaMemcpyAsync(d_global_dict.data(), h_global_dict.data(), - h_global_dict.size() * sizeof(string_pair), + h_global_dict.size() * sizeof(string_index_pair), cudaMemcpyDefault, stream.value())); CUDA_TRY(cudaMemcpyAsync(d_global_dict_data.data(), diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp index 10a46bdb0e7..d103d573e5e 100644 --- a/cpp/src/io/avro/reader_impl.hpp +++ b/cpp/src/io/avro/reader_impl.hpp @@ -97,7 +97,7 @@ class reader::impl { */ void decode_data(const rmm::device_buffer &block_data, const std::vector> &dict, - cudf::device_span global_dictionary, + cudf::device_span global_dictionary, size_t num_rows, std::vector> columns, std::vector &out_buffers, diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index b4e3f313e6a..6ce251881cf 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -46,7 +46,7 @@ namespace json { namespace gpu { using namespace ::cudf; -using string_pair = std::pair; +using string_index_pair = std::pair; namespace { /** @@ -516,7 +516,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, if (!serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) { // Type dispatcher does not handle strings if (column_types[desc.column].id() == type_id::STRING) { - auto str_list = static_cast(output_columns[desc.column]); + auto str_list = static_cast(output_columns[desc.column]); str_list[rec_id].first = desc.value_begin; str_list[rec_id].second = value_len; @@ -537,7 +537,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, } } } else if (column_types[desc.column].id() == type_id::STRING) { - auto str_list = static_cast(output_columns[desc.column]); + auto str_list = static_cast(output_columns[desc.column]); str_list[rec_id].first = nullptr; str_list[rec_id].second = 0; } diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 8ef971c3bf6..daf6dae1357 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -25,7 +25,7 @@ namespace io { namespace orc { namespace gpu { -using string_pair = cudf::io::detail::column_buffer::str_pair; +using cudf::io::detail::string_index_pair; // Must be able to handle 512x 8-byte values. These values are base 128 encoded // so 8 byte value is expanded to 10 bytes. @@ -1685,9 +1685,9 @@ __global__ void __launch_bounds__(block_size) case BINARY: case VARCHAR: case CHAR: { - string_pair *strdesc = &static_cast(data_out)[row]; - void const *ptr = nullptr; - uint32_t count = 0; + string_index_pair *strdesc = &static_cast(data_out)[row]; + void const *ptr = nullptr; + uint32_t count = 0; if (is_dictionary(s->chunk.encoding_kind)) { auto const dict_idx = s->vals.u32[t + vals_skipped]; if (dict_idx < s->chunk.dict_len) { diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 2fad6039e0f..08e71bbfa4a 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -518,13 +518,14 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo if (s->dict_base) { // String dictionary - uint32_t dict_pos = (s->dict_bits > 0) - ? s->dict_idx[src_pos & (non_zero_buffer_size - 1)] * sizeof(string_pair) - : 0; + uint32_t dict_pos = (s->dict_bits > 0) ? s->dict_idx[src_pos & (non_zero_buffer_size - 1)] * + sizeof(string_index_pair) + : 0; if (dict_pos < (uint32_t)s->dict_size) { - const string_pair *src = reinterpret_cast(s->dict_base + dict_pos); - ptr = src->first; - len = src->second; + const string_index_pair *src = + reinterpret_cast(s->dict_base + dict_pos); + ptr = src->first; + len = src->second; } } else { // Plain encoding @@ -539,9 +540,9 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo *static_cast(dstv) = device_str2hash32(ptr, len); } else { // Output string descriptor - string_pair *dst = static_cast(dstv); - dst->first = ptr; - dst->second = len; + string_index_pair *dst = static_cast(dstv); + dst->first = ptr; + dst->second = len; } } @@ -1010,7 +1011,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, // Fall through to DOUBLE case DOUBLE: s->dtype_len = 8; break; case INT96: s->dtype_len = 12; break; - case BYTE_ARRAY: s->dtype_len = sizeof(string_pair); break; + case BYTE_ARRAY: s->dtype_len = sizeof(string_index_pair); break; default: // FIXED_LEN_BYTE_ARRAY: s->dtype_len = dtype_len_out; s->error |= (s->dtype_len <= 0); @@ -1094,7 +1095,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, if (((s->col.data_type & 7) == BYTE_ARRAY) && (s->col.str_dict_index)) { // String dictionary: use index s->dict_base = reinterpret_cast(s->col.str_dict_index); - s->dict_size = s->col.page_info[0].num_input_values * sizeof(string_pair); + s->dict_size = s->col.page_info[0].num_input_values * sizeof(string_index_pair); } else { s->dict_base = s->col.page_info[0].page_data; // dictionary is always stored in the first page diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 3dc02342371..c66bfb68503 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -447,10 +447,10 @@ extern "C" __global__ void __launch_bounds__(128) if (chunk >= num_chunks) { return; } if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) { // Data type to describe a string - string_pair *dict_index = ck->str_dict_index; - const uint8_t *dict = ck->page_info[0].page_data; - int dict_size = ck->page_info[0].uncompressed_page_size; - int num_entries = ck->page_info[0].num_input_values; + string_index_pair *dict_index = ck->str_dict_index; + const uint8_t *dict = ck->page_info[0].page_data; + int dict_size = ck->page_info[0].uncompressed_page_size; + int num_entries = ck->page_info[0].num_input_values; int pos = 0, cur = 0; for (int i = 0; i < num_entries; i++) { int len = 0; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 1e69f8c9a7b..e9b76b425a1 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -39,7 +39,7 @@ namespace cudf { namespace io { namespace parquet { -using string_pair = cudf::io::detail::column_buffer::str_pair; +using cudf::io::detail::string_index_pair; /** * @brief Struct representing an input column in the file. @@ -205,7 +205,7 @@ struct ColumnChunkDesc { int32_t max_num_pages; // size of page_info array PageInfo *page_info; // output page info for up to num_dict_pages + // num_data_pages (dictionary pages first) - string_pair *str_dict_index; // index for string dictionary + string_index_pair *str_dict_index; // index for string dictionary uint32_t **valid_map_base; // base pointers of valid bit map for this column void **column_data_base; // base pointers of column data int8_t codec; // compressed codec enum diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index b726af9084f..9262551a774 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1199,7 +1199,7 @@ void reader::impl::decode_page_data(hostdevice_vector &chu // Build index for string dictionaries since they can't be indexed // directly due to variable-sized elements - rmm::device_vector str_dict_index; + rmm::device_vector str_dict_index; if (total_str_dict_indexes > 0) { str_dict_index.resize(total_str_dict_indexes); } // TODO (dm): hd_vec should have begin and end iterator members diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 88051912ccc..45cfe3828b0 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -33,8 +33,8 @@ void column_buffer::create(size_type _size, switch (type.id()) { case type_id::STRING: - _strings = std::make_unique>(size, stream); - cudaMemsetAsync(_strings->data(), 0, size * sizeof(str_pair), stream.value()); + _strings = std::make_unique>(size, stream); + cudaMemsetAsync(_strings->data(), 0, size * sizeof(string_index_pair), stream.value()); break; // list columns store a buffer of int32's as offsets to represent diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index c82edc3b517..f2b1b71107d 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -62,13 +62,13 @@ inline rmm::device_buffer create_data( return data; } +using string_index_pair = thrust::pair; + /** * @brief Class for holding device memory buffers to column data that eventually * will be used to create a column. */ struct column_buffer { - using str_pair = thrust::pair; - column_buffer() = default; // construct without a known size. call create() later to actually @@ -81,7 +81,7 @@ struct column_buffer { bool _is_nullable = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : _null_count(0), type(_type), is_nullable(_is_nullable) + : type(_type), is_nullable(_is_nullable) { create(_size, stream, mr); } @@ -112,7 +112,7 @@ struct column_buffer { auto& null_count() { return _null_count; } - std::unique_ptr> _strings; + std::unique_ptr> _strings; rmm::device_buffer _data{}; rmm::device_buffer _null_mask{}; size_type _null_count{0}; From 4276c3aefa285bf1f1fb19b9e997758caa754ca7 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Sat, 3 Apr 2021 21:15:58 -0400 Subject: [PATCH 5/7] Fix json --- cpp/src/io/json/json_common.h | 2 ++ cpp/src/io/json/json_gpu.cu | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/json/json_common.h b/cpp/src/io/json/json_common.h index 0bcd4e95f9a..f33435c1673 100644 --- a/cpp/src/io/json/json_common.h +++ b/cpp/src/io/json/json_common.h @@ -17,6 +17,8 @@ #pragma once #include +#include #include class SerialTrieNode; +using cudf::io::detail::string_index_pair; diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 6ce251881cf..3209a245801 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -46,8 +46,6 @@ namespace json { namespace gpu { using namespace ::cudf; -using string_index_pair = std::pair; - namespace { /** * @brief CUDA Kernel that adjusts the row range to exclude the character outside of the top level From 6e7992220b4c517a577985e3cadc29e1bb6a4c83 Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Wed, 7 Apr 2021 13:42:40 -0400 Subject: [PATCH 6/7] PR review fixes --- cpp/src/io/avro/avro.cpp | 2 +- cpp/src/io/avro/avro.h | 2 +- cpp/src/io/avro/avro_gpu.cu | 2 +- cpp/src/io/avro/avro_gpu.h | 2 +- cpp/src/io/avro/reader_impl.cu | 2 +- cpp/src/io/avro/reader_impl.hpp | 4 ++-- cpp/src/io/comp/brotli_dict.cpp | 2 +- cpp/src/io/comp/cpu_unbz2.cpp | 4 ++-- cpp/src/io/comp/debrotli.cu | 6 +++--- cpp/src/io/comp/gpuinflate.cu | 4 ++-- cpp/src/io/comp/snap.cu | 2 +- cpp/src/io/comp/uncomp.cpp | 4 ++-- cpp/src/io/comp/unsnap.cu | 2 +- cpp/src/io/csv/csv.h | 19 ------------------- cpp/src/io/csv/csv_gpu.cu | 6 +++--- cpp/src/io/csv/reader_impl.cu | 2 +- cpp/src/io/csv/reader_impl.hpp | 4 ++-- cpp/src/io/csv/writer_impl.cu | 2 +- cpp/src/io/csv/writer_impl.hpp | 4 ++-- cpp/src/io/json/json.h | 19 ------------------- cpp/src/io/json/json_gpu.cu | 4 ++-- cpp/src/io/json/json_gpu.h | 2 +- cpp/src/io/json/reader_impl.cu | 2 +- cpp/src/io/json/reader_impl.hpp | 4 ++-- cpp/src/io/orc/dict_enc.cu | 4 ++-- cpp/src/io/orc/orc.cpp | 6 +++--- cpp/src/io/orc/orc.h | 2 +- cpp/src/io/orc/orc_field_reader.hpp | 2 +- cpp/src/io/orc/orc_field_writer.hpp | 2 +- cpp/src/io/orc/orc_gpu.h | 4 ++-- cpp/src/io/orc/reader_impl.cu | 6 +++--- cpp/src/io/orc/reader_impl.hpp | 4 ++-- cpp/src/io/orc/stats_enc.cu | 4 ++-- cpp/src/io/orc/stripe_data.cu | 4 ++-- cpp/src/io/orc/stripe_enc.cu | 4 ++-- cpp/src/io/orc/stripe_init.cu | 4 ++-- cpp/src/io/orc/timezone.cpp | 2 +- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/orc/writer_impl.hpp | 4 ++-- .../io/parquet/compact_protocol_writer.cpp | 2 +- .../io/parquet/compact_protocol_writer.hpp | 4 ++-- cpp/src/io/parquet/page_data.cu | 2 +- cpp/src/io/parquet/page_dict.cu | 2 +- cpp/src/io/parquet/page_enc.cu | 2 +- cpp/src/io/parquet/page_hdr.cu | 2 +- cpp/src/io/parquet/parquet.cpp | 2 +- cpp/src/io/parquet/parquet.hpp | 2 +- cpp/src/io/parquet/parquet_gpu.hpp | 2 +- cpp/src/io/parquet/reader_impl.cu | 2 +- cpp/src/io/parquet/reader_impl.hpp | 4 ++-- cpp/src/io/parquet/writer_impl.cu | 4 ++-- cpp/src/io/parquet/writer_impl.hpp | 4 ++-- cpp/src/io/statistics/column_stats.cu | 2 +- cpp/src/io/utilities/column_buffer.cpp | 2 +- cpp/src/io/utilities/data_sink.cpp | 2 +- cpp/src/io/utilities/datasource.cpp | 2 +- cpp/src/io/utilities/file_io_utilities.cpp | 2 +- cpp/src/io/utilities/parsing_utils.cuh | 2 +- 58 files changed, 84 insertions(+), 122 deletions(-) delete mode 100644 cpp/src/io/csv/csv.h delete mode 100644 cpp/src/io/json/json.h diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 122f3b2208e..8f0599cdd5b 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "avro.h" #include #include diff --git a/cpp/src/io/avro/avro.h b/cpp/src/io/avro/avro.h index 5ae9f56135b..13f5e4ecb3c 100644 --- a/cpp/src/io/avro/avro.h +++ b/cpp/src/io/avro/avro.h @@ -16,7 +16,7 @@ #pragma once -#include +#include "avro_common.h" #include #include diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index a4d17d51f56..ebd7f51a08a 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include "avro_gpu.h" #include diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index 9239919b0b9..a82d3604d02 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -15,7 +15,7 @@ */ #pragma once -#include +#include "avro_common.h" #include diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index b954a65c071..600633f0ed8 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO Avro reader class implementation */ -#include +#include "reader_impl.hpp" #include diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp index d103d573e5e..8e09da03563 100644 --- a/cpp/src/io/avro/reader_impl.hpp +++ b/cpp/src/io/avro/reader_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include -#include +#include "avro.h" +#include "avro_gpu.h" #include #include diff --git a/cpp/src/io/comp/brotli_dict.cpp b/cpp/src/io/comp/brotli_dict.cpp index 5429f35da0c..b493ebd6bfb 100644 --- a/cpp/src/io/comp/brotli_dict.cpp +++ b/cpp/src/io/comp/brotli_dict.cpp @@ -48,7 +48,7 @@ THE SOFTWARE. */ -#include +#include "brotli_dict.h" #include namespace cudf { diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp index 67d6e397b60..28d7394e485 100644 --- a/cpp/src/io/comp/cpu_unbz2.cpp +++ b/cpp/src/io/comp/cpu_unbz2.cpp @@ -79,11 +79,11 @@ Jon L. Bentley For more information on these sources, see the manual. --*/ -#include -#include #include #include #include +#include "io_uncomp.h" +#include "unbz2.h" namespace cudf { namespace io { diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 7c524d9fb77..953872ab7ed 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -54,8 +54,8 @@ THE SOFTWARE. */ -#include -#include +#include "brotli_dict.h" +#include "gpuinflate.h" #include @@ -90,7 +90,7 @@ inline __device__ uint32_t brev8(uint32_t x) } #define CONSTANT static const __device__ __constant__ -#include +#include "brotli_tables.h" /* typeof(MODE) == ContextType; returns ContextLut */ __inline__ __device__ int brotli_context_lut(int mode) { return (mode << 9); } diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index 99ad24328c4..a31cf1717e7 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -43,8 +43,8 @@ misrepresented as being the original software. Mark Adler madler@alumni.caltech.edu */ -#include -#include +#include "gpuinflate.h" +#include "io_uncomp.h" #include diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 0f3fbe92256..999d02e3a50 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "gpuinflate.h" #include diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 7fb8b2b216e..d5166b76892 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include -#include // bz2 uncompress +#include "io_uncomp.h" +#include "unbz2.h" // bz2 uncompress #include #include diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 2bcb7b6fea2..2b799b5e1bf 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "gpuinflate.h" #include diff --git a/cpp/src/io/csv/csv.h b/cpp/src/io/csv/csv.h deleted file mode 100644 index 3ec5cdb5a02..00000000000 --- a/cpp/src/io/csv/csv.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 64ff180cbe8..44acc7fc55f 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include -#include -#include +#include "csv_common.h" +#include "csv_gpu.h" +#include "datetime.cuh" #include #include diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 67d1539a3f3..76580122fe6 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO CSV reader class implementation */ -#include +#include "reader_impl.hpp" #include #include diff --git a/cpp/src/io/csv/reader_impl.hpp b/cpp/src/io/csv/reader_impl.hpp index 2b8c1c4aae7..d61c2847b7e 100644 --- a/cpp/src/io/csv/reader_impl.hpp +++ b/cpp/src/io/csv/reader_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include -#include +#include "csv_common.h" +#include "csv_gpu.h" #include #include diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 772c261d132..f7e153d71f4 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO CSV writer class implementation */ -#include +#include "writer_impl.hpp" #include diff --git a/cpp/src/io/csv/writer_impl.hpp b/cpp/src/io/csv/writer_impl.hpp index e54a09013f6..965c036dc75 100644 --- a/cpp/src/io/csv/writer_impl.hpp +++ b/cpp/src/io/csv/writer_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include -#include +#include "csv_common.h" +#include "csv_gpu.h" #include #include diff --git a/cpp/src/io/json/json.h b/cpp/src/io/json/json.h deleted file mode 100644 index 771185b1b12..00000000000 --- a/cpp/src/io/json/json.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 3209a245801..b9ced355107 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include -#include +#include "json_common.h" +#include "json_gpu.h" #include #include diff --git a/cpp/src/io/json/json_gpu.h b/cpp/src/io/json/json_gpu.h index fb8d7b2c7ab..4a68ce48f20 100644 --- a/cpp/src/io/json/json_gpu.h +++ b/cpp/src/io/json/json_gpu.h @@ -16,8 +16,8 @@ #pragma once -#include #include +#include "json_common.h" #include diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 84690a16bcd..1a1fa8d0602 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO JSON reader class implementation */ -#include +#include "reader_impl.hpp" #include #include diff --git a/cpp/src/io/json/reader_impl.hpp b/cpp/src/io/json/reader_impl.hpp index 248744d283a..e6df503619f 100644 --- a/cpp/src/io/json/reader_impl.hpp +++ b/cpp/src/io/json/reader_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include -#include +#include "json_common.h" +#include "json_gpu.h" #include diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index b994048b008..e69a61bde66 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include -#include +#include "orc_common.h" +#include "orc_gpu.h" #include #include diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index e1b6c3ace6c..63162e28120 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -#include -#include -#include +#include "orc.h" #include +#include "orc_field_reader.hpp" +#include "orc_field_writer.hpp" namespace cudf { namespace io { diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index 16119cd581f..c8c5c3919d0 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -26,9 +26,9 @@ #include #include -#include #include #include +#include "orc_common.h" namespace cudf { namespace io { diff --git a/cpp/src/io/orc/orc_field_reader.hpp b/cpp/src/io/orc/orc_field_reader.hpp index 9bb1ff4310b..8e9bca44340 100644 --- a/cpp/src/io/orc/orc_field_reader.hpp +++ b/cpp/src/io/orc/orc_field_reader.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include "orc.h" /** * @file orc_field_reader.hpp diff --git a/cpp/src/io/orc/orc_field_writer.hpp b/cpp/src/io/orc/orc_field_writer.hpp index c60e5cbd23c..13c7befa3a1 100644 --- a/cpp/src/io/orc/orc_field_writer.hpp +++ b/cpp/src/io/orc/orc_field_writer.hpp @@ -15,9 +15,9 @@ */ #pragma once -#include #include #include +#include "orc.h" /** * @file orc_field_writer.hpp diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 8e93dfd8e8c..dadc8a06281 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -16,15 +16,15 @@ #pragma once -#include +#include "timezone.cuh" #include -#include #include #include #include #include #include +#include "orc_common.h" #include diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 639ba80425c..07d3bdff8e3 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -19,11 +19,11 @@ * @brief cuDF-IO ORC reader class implementation */ -#include -#include +#include "reader_impl.hpp" +#include "timezone.cuh" #include -#include +#include "orc.h" #include #include diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 925673061a7..3a2913c5548 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include -#include +#include "orc.h" +#include "orc_gpu.h" #include #include diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index ff99ae547bf..56a55bd0a4d 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include -#include +#include "orc_common.h" +#include "orc_gpu.h" #include diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index daf6dae1357..cd031af0dfb 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include -#include #include #include #include +#include "orc_common.h" +#include "orc_gpu.h" namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 96f159c8122..6ed9071f5b7 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include #include #include #include #include #include +#include "orc_common.h" +#include "orc_gpu.h" namespace cudf { namespace io { diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 839b9588482..61917403b41 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include -#include +#include "orc_common.h" +#include "orc_gpu.h" #include diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/io/orc/timezone.cpp index ac05dc59481..81ffa954c1a 100644 --- a/cpp/src/io/orc/timezone.cpp +++ b/cpp/src/io/orc/timezone.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include "timezone.cuh" #include #include diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 6f820e9555e..10050806552 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO ORC writer class implementation */ -#include +#include "writer_impl.hpp" #include diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 82e6880911e..352cb11440f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -16,8 +16,8 @@ #pragma once -#include -#include +#include "orc.h" +#include "orc_gpu.h" #include diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index ddb5006098d..a9b8eb0ac6b 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "compact_protocol_writer.hpp" namespace cudf { namespace io { diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index 680ea078a2f..2ce9245490e 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -16,8 +16,8 @@ #pragma once -#include -#include +#include "parquet.hpp" +#include "parquet_common.hpp" #include #include diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 08e71bbfa4a..dfd9c1384c5 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include #include #include +#include "parquet_gpu.hpp" #include #include diff --git a/cpp/src/io/parquet/page_dict.cu b/cpp/src/io/parquet/page_dict.cu index 2676f30474d..30842820448 100644 --- a/cpp/src/io/parquet/page_dict.cu +++ b/cpp/src/io/parquet/page_dict.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include #include +#include "parquet_gpu.hpp" #include diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 51ec0013f1a..6c31605887a 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include +#include "parquet_gpu.hpp" #include #include diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index c66bfb68503..bc10fd92566 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -15,8 +15,8 @@ */ #include -#include #include +#include "parquet_gpu.hpp" #include diff --git a/cpp/src/io/parquet/parquet.cpp b/cpp/src/io/parquet/parquet.cpp index 40ce222825b..2a1bd0d5a18 100644 --- a/cpp/src/io/parquet/parquet.cpp +++ b/cpp/src/io/parquet/parquet.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ +#include "parquet.hpp" #include -#include namespace cudf { namespace io { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 6c1c6209266..eefff518a9a 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include "parquet_common.hpp" #include #include diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index e9b76b425a1..a7698ea8a78 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -18,9 +18,9 @@ #include #include -#include #include #include +#include "parquet_common.hpp" #include #include diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 9262551a774..1e7c63b3ce6 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -19,7 +19,7 @@ * @brief cuDF-IO Parquet reader class implementation */ -#include +#include "reader_impl.hpp" #include diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index ca200936134..ffd8975a8d2 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include -#include +#include "parquet.hpp" +#include "parquet_gpu.hpp" #include #include diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index e270d746255..bc8a37525be 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -19,10 +19,10 @@ * @brief cuDF-IO parquet writer class implementation */ -#include +#include "writer_impl.hpp" -#include #include +#include "compact_protocol_writer.hpp" #include #include diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index b8532d755eb..e5103122033 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -21,8 +21,8 @@ #pragma once -#include -#include +#include "parquet.hpp" +#include "parquet_gpu.hpp" #include #include diff --git a/cpp/src/io/statistics/column_stats.cu b/cpp/src/io/statistics/column_stats.cu index 3090f5b7294..52f21f0a9ad 100644 --- a/cpp/src/io/statistics/column_stats.cu +++ b/cpp/src/io/statistics/column_stats.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include "column_stats.h" #include diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 45cfe3828b0..2bcc8124538 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -19,7 +19,7 @@ * @brief cuDF-IO column_buffer class implementation */ -#include +#include "column_buffer.hpp" namespace cudf { namespace io { diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 10af7bcb0bd..d133d813ab3 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include "file_io_utilities.hpp" #include diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 8f2a5389b4d..ac8deccd078 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -21,7 +21,7 @@ #include #include -#include +#include "file_io_utilities.hpp" namespace cudf { namespace io { diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 322296715fc..abf3a3fdef0 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include "file_io_utilities.hpp" #include diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index b7719cba580..9cfa46aaf11 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include "column_type_histogram.hpp" #include From 7e5efd7e6ded437546a19ac365f6f4ce8f16744a Mon Sep 17 00:00:00 2001 From: Kumar Aatish Date: Fri, 16 Apr 2021 17:01:01 -0400 Subject: [PATCH 7/7] PR review fix --- cpp/src/io/utilities/column_buffer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 2bcc8124538..9170a9016c4 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -20,6 +20,7 @@ */ #include "column_buffer.hpp" +#include namespace cudf { namespace io { @@ -33,8 +34,8 @@ void column_buffer::create(size_type _size, switch (type.id()) { case type_id::STRING: - _strings = std::make_unique>(size, stream); - cudaMemsetAsync(_strings->data(), 0, size * sizeof(string_index_pair), stream.value()); + _strings = std::make_unique>( + cudf::detail::make_zeroed_device_uvector_async(size, stream)); break; // list columns store a buffer of int32's as offsets to represent