Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove nvstrdesc_s from cuio #7841

Merged
merged 8 commits into from
Apr 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ add_library(cudf
src/io/parquet/reader_impl.cu
src/io/parquet/writer_impl.cu
src/io/statistics/column_stats.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/io/avro/avro_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <stdint.h>
#include <stdio.h>
#include <io/utilities/column_buffer.hpp>

namespace cudf {
namespace io {
Expand Down Expand Up @@ -56,6 +57,8 @@ enum type_kind_e {
type_array,
};

using cudf::io::detail::string_index_pair;

} // namespace avro
} // namespace io
} // namespace cudf
14 changes: 7 additions & 7 deletions cpp/src/io/avro/avro_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
size_t max_rows,
const uint8_t *cur,
const uint8_t *end,
device_span<nvstrdesc_s> global_dictionary)
device_span<string_index_pair> global_dictionary)
{
uint32_t array_start = 0, array_repeat_count = 0;
int array_children = 0;
Expand Down Expand Up @@ -123,17 +123,17 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
if (kind == type_enum) { // dictionary
size_t idx = schema[i].count + v;
if (idx < global_dictionary.size()) {
ptr = global_dictionary[idx].ptr;
count = global_dictionary[idx].count;
ptr = global_dictionary[idx].first;
count = global_dictionary[idx].second;
}
} else if (v >= 0 && cur + v <= end) { // string
ptr = reinterpret_cast<const char *>(cur);
count = (size_t)v;
cur += count;
}
if (dataptr != nullptr && row < max_rows) {
static_cast<nvstrdesc_s *>(dataptr)[row].ptr = ptr;
static_cast<nvstrdesc_s *>(dataptr)[row].count = count;
static_cast<string_index_pair *>(dataptr)[row].first = ptr;
static_cast<string_index_pair *>(dataptr)[row].second = count;
}
}
} break;
Expand Down Expand Up @@ -230,7 +230,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
extern "C" __global__ void __launch_bounds__(num_warps * 32, 2)
gpuDecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema_g,
device_span<nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down Expand Up @@ -313,7 +313,7 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2)
*/
void DecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema,
device_span<nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down
9 changes: 1 addition & 8 deletions cpp/src/io/avro/avro_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@ namespace cudf {
namespace io {
namespace avro {
namespace gpu {
/**
* @brief Struct to describe the output of a string datatype
*/
struct nvstrdesc_s {
const char *ptr;
size_t count;
};

/**
* @brief Struct to describe the avro schema
Expand Down Expand Up @@ -59,7 +52,7 @@ struct schemadesc_s {
*/
void DecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema,
cudf::device_span<nvstrdesc_s> global_dictionary,
cudf::device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/io/avro/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_

void reader::impl::decode_data(const rmm::device_buffer &block_data,
const std::vector<std::pair<uint32_t, uint32_t>> &dict,
device_span<gpu::nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
size_t num_rows,
std::vector<std::pair<int, std::string>> selection,
std::vector<column_buffer> &out_buffers,
Expand Down Expand Up @@ -393,10 +393,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,
for (const auto &sym : col_schema.symbols) { dictionary_data_size += sym.length(); }
}

rmm::device_uvector<gpu::nvstrdesc_s> d_global_dict(total_dictionary_entries, stream);
rmm::device_uvector<string_index_pair> d_global_dict(total_dictionary_entries, stream);
rmm::device_uvector<char> d_global_dict_data(dictionary_data_size, stream);
if (total_dictionary_entries > 0) {
std::vector<gpu::nvstrdesc_s> h_global_dict(total_dictionary_entries);
std::vector<string_index_pair> h_global_dict(total_dictionary_entries);
std::vector<char> h_global_dict_data(dictionary_data_size);
size_t dict_pos = 0;
for (size_t i = 0; i < column_types.size(); ++i) {
Expand All @@ -406,10 +406,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,
for (size_t j = 0; j < dict[i].second; j++) {
auto const &symbols = col_schema.symbols[j];

auto const data_dst = h_global_dict_data.data() + dict_pos;
auto const len = symbols.length();
col_dict_entries[j].ptr = data_dst;
col_dict_entries[j].count = len;
auto const data_dst = h_global_dict_data.data() + dict_pos;
auto const len = symbols.length();
col_dict_entries[j].first = data_dst;
col_dict_entries[j].second = len;

std::copy(symbols.c_str(), symbols.c_str() + len, data_dst);
dict_pos += len;
Expand All @@ -418,7 +418,7 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,

CUDA_TRY(cudaMemcpyAsync(d_global_dict.data(),
h_global_dict.data(),
h_global_dict.size() * sizeof(gpu::nvstrdesc_s),
h_global_dict.size() * sizeof(string_index_pair),
cudaMemcpyDefault,
stream.value()));
CUDA_TRY(cudaMemcpyAsync(d_global_dict_data.data(),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/avro/reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class reader::impl {
*/
void decode_data(const rmm::device_buffer &block_data,
const std::vector<std::pair<uint32_t, uint32_t>> &dict,
cudf::device_span<gpu::nvstrdesc_s> global_dictionary,
cudf::device_span<string_index_pair> global_dictionary,
size_t num_rows,
std::vector<std::pair<int, std::string>> columns,
std::vector<column_buffer> &out_buffers,
Expand Down
19 changes: 0 additions & 19 deletions cpp/src/io/csv/csv.h

This file was deleted.

4 changes: 2 additions & 2 deletions cpp/src/io/csv/datetime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include "thrust/reduce.h"
#include <thrust/reduce.h>

#include <cudf/wrappers/durations.hpp>
#include <io/utilities/parsing_utils.cuh>
Expand Down Expand Up @@ -435,4 +435,4 @@ __inline__ __device__ int64_t to_time_delta(char const* begin, char const* end)
}

} // namespace io
} // namespace cudf
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/io/csv/reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include "csv.h"
#include "csv_common.h"
#include "csv_gpu.h"

#include <cudf/detail/utilities/trie.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/csv/writer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include "csv.h"
#include "csv_common.h"
#include "csv_gpu.h"

#include <cudf/strings/strings_column_view.hpp>
Expand Down
19 changes: 0 additions & 19 deletions cpp/src/io/json/json.h

This file was deleted.

2 changes: 2 additions & 0 deletions cpp/src/io/json/json_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#pragma once

#include <cudf/types.hpp>
#include <io/utilities/column_buffer.hpp>
#include <io/utilities/column_type_histogram.hpp>

class SerialTrieNode;
using cudf::io::detail::string_index_pair;
6 changes: 2 additions & 4 deletions cpp/src/io/json/json_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ namespace json {
namespace gpu {
using namespace ::cudf;

using string_pair = std::pair<const char *, size_t>;

namespace {
/**
* @brief CUDA Kernel that adjusts the row range to exclude the character outside of the top level
Expand Down Expand Up @@ -516,7 +514,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts,
if (!serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) {
// Type dispatcher does not handle strings
if (column_types[desc.column].id() == type_id::STRING) {
auto str_list = static_cast<string_pair *>(output_columns[desc.column]);
auto str_list = static_cast<string_index_pair *>(output_columns[desc.column]);
str_list[rec_id].first = desc.value_begin;
str_list[rec_id].second = value_len;

Expand All @@ -537,7 +535,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts,
}
}
} else if (column_types[desc.column].id() == type_id::STRING) {
auto str_list = static_cast<string_pair *>(output_columns[desc.column]);
auto str_list = static_cast<string_index_pair *>(output_columns[desc.column]);
str_list[rec_id].first = nullptr;
str_list[rec_id].second = 0;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/json_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#pragma once

#include <io/json/json_common.h>
#include <io/utilities/parsing_utils.cuh>
#include "json_common.h"

#include <hash/concurrent_unordered_map.cuh>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#pragma once

#include "json.h"
#include "json_common.h"
#include "json_gpu.h"

#include <io/utilities/column_buffer.hpp>
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/orc/orc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
* limitations under the License.
*/

#include <io/orc/orc.h>
#include <io/orc/orc_field_reader.hpp>
#include <io/orc/orc_field_writer.hpp>
#include "orc.h"
#include <string>
#include "orc_field_reader.hpp"
#include "orc_field_writer.hpp"

namespace cudf {
namespace io {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/orc_field_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
#pragma once

#include <io/orc/orc.h>
#include <string>
#include "orc.h"

/**
* @file orc_field_reader.hpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/orc_field_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
*/
#pragma once

#include <io/orc/orc.h>
#include <numeric>
#include <string>
#include "orc.h"

/**
* @file orc_field_writer.hpp
Expand Down
Loading