Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove nvstrdesc_s from cuio #7841

Merged
merged 8 commits into from
Apr 17, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ add_library(cudf
src/io/parquet/reader_impl.cu
src/io/parquet/writer_impl.cu
src/io/statistics/column_stats.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/avro/avro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "avro.h"
#include <io/avro/avro.h>
#include <string.h>
#include <unordered_map>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/avro/avro.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include "avro_common.h"
#include <io/avro/avro_common.h>

#include <stddef.h>
#include <stdint.h>
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/io/avro/avro_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <stdint.h>
#include <stdio.h>
#include <io/utilities/column_buffer.hpp>

namespace cudf {
namespace io {
Expand Down Expand Up @@ -56,6 +57,8 @@ enum type_kind_e {
type_array,
};

using cudf::io::detail::string_index_pair;

} // namespace avro
} // namespace io
} // namespace cudf
16 changes: 8 additions & 8 deletions cpp/src/io/avro/avro_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "avro_gpu.h"
#include <io/avro/avro_gpu.h>

#include <io/utilities/block_utils.cuh>

Expand Down Expand Up @@ -72,7 +72,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
size_t max_rows,
const uint8_t *cur,
const uint8_t *end,
device_span<nvstrdesc_s> global_dictionary)
device_span<string_index_pair> global_dictionary)
{
uint32_t array_start = 0, array_repeat_count = 0;
int array_children = 0;
Expand Down Expand Up @@ -123,17 +123,17 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
if (kind == type_enum) { // dictionary
size_t idx = schema[i].count + v;
if (idx < global_dictionary.size()) {
ptr = global_dictionary[idx].ptr;
count = global_dictionary[idx].count;
ptr = global_dictionary[idx].first;
count = global_dictionary[idx].second;
}
} else if (v >= 0 && cur + v <= end) { // string
ptr = reinterpret_cast<const char *>(cur);
count = (size_t)v;
cur += count;
}
if (dataptr != nullptr && row < max_rows) {
static_cast<nvstrdesc_s *>(dataptr)[row].ptr = ptr;
static_cast<nvstrdesc_s *>(dataptr)[row].count = count;
static_cast<string_index_pair *>(dataptr)[row].first = ptr;
static_cast<string_index_pair *>(dataptr)[row].second = count;
}
}
} break;
Expand Down Expand Up @@ -230,7 +230,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
extern "C" __global__ void __launch_bounds__(num_warps * 32, 2)
gpuDecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema_g,
device_span<nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down Expand Up @@ -313,7 +313,7 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2)
*/
void DecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema,
device_span<nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down
11 changes: 2 additions & 9 deletions cpp/src/io/avro/avro_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/
#pragma once

#include "avro_common.h"
#include <io/avro/avro_common.h>

#include <cudf/utilities/span.hpp>

Expand All @@ -25,13 +25,6 @@ namespace cudf {
namespace io {
namespace avro {
namespace gpu {
/**
* @brief Struct to describe the output of a string datatype
*/
struct nvstrdesc_s {
const char *ptr;
size_t count;
};

/**
* @brief Struct to describe the avro schema
Expand Down Expand Up @@ -59,7 +52,7 @@ struct schemadesc_s {
*/
void DecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema,
cudf::device_span<nvstrdesc_s> global_dictionary,
cudf::device_span<string_index_pair> global_dictionary,
const uint8_t *avro_data,
uint32_t num_blocks,
uint32_t schema_len,
Expand Down
18 changes: 9 additions & 9 deletions cpp/src/io/avro/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* @brief cuDF-IO Avro reader class implementation
*/

#include "reader_impl.hpp"
#include <io/avro/reader_impl.hpp>

#include <io/comp/gpuinflate.h>

Expand Down Expand Up @@ -235,7 +235,7 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_

void reader::impl::decode_data(const rmm::device_buffer &block_data,
const std::vector<std::pair<uint32_t, uint32_t>> &dict,
device_span<gpu::nvstrdesc_s> global_dictionary,
device_span<string_index_pair> global_dictionary,
size_t num_rows,
std::vector<std::pair<int, std::string>> selection,
std::vector<column_buffer> &out_buffers,
Expand Down Expand Up @@ -393,10 +393,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,
for (const auto &sym : col_schema.symbols) { dictionary_data_size += sym.length(); }
}

rmm::device_uvector<gpu::nvstrdesc_s> d_global_dict(total_dictionary_entries, stream);
rmm::device_uvector<string_index_pair> d_global_dict(total_dictionary_entries, stream);
rmm::device_uvector<char> d_global_dict_data(dictionary_data_size, stream);
if (total_dictionary_entries > 0) {
std::vector<gpu::nvstrdesc_s> h_global_dict(total_dictionary_entries);
std::vector<string_index_pair> h_global_dict(total_dictionary_entries);
std::vector<char> h_global_dict_data(dictionary_data_size);
size_t dict_pos = 0;
for (size_t i = 0; i < column_types.size(); ++i) {
Expand All @@ -406,10 +406,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,
for (size_t j = 0; j < dict[i].second; j++) {
auto const &symbols = col_schema.symbols[j];

auto const data_dst = h_global_dict_data.data() + dict_pos;
auto const len = symbols.length();
col_dict_entries[j].ptr = data_dst;
col_dict_entries[j].count = len;
auto const data_dst = h_global_dict_data.data() + dict_pos;
auto const len = symbols.length();
col_dict_entries[j].first = data_dst;
col_dict_entries[j].second = len;

std::copy(symbols.c_str(), symbols.c_str() + len, data_dst);
dict_pos += len;
Expand All @@ -418,7 +418,7 @@ table_with_metadata reader::impl::read(avro_reader_options const &options,

CUDA_TRY(cudaMemcpyAsync(d_global_dict.data(),
h_global_dict.data(),
h_global_dict.size() * sizeof(gpu::nvstrdesc_s),
h_global_dict.size() * sizeof(string_index_pair),
cudaMemcpyDefault,
stream.value()));
CUDA_TRY(cudaMemcpyAsync(d_global_dict_data.data(),
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/avro/reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

#pragma once

#include "avro.h"
#include "avro_gpu.h"
#include <io/avro/avro.h>
#include <io/avro/avro_gpu.h>

#include <cudf/utilities/span.hpp>
#include <io/utilities/column_buffer.hpp>
Expand Down Expand Up @@ -97,7 +97,7 @@ class reader::impl {
*/
void decode_data(const rmm::device_buffer &block_data,
const std::vector<std::pair<uint32_t, uint32_t>> &dict,
cudf::device_span<gpu::nvstrdesc_s> global_dictionary,
cudf::device_span<string_index_pair> global_dictionary,
size_t num_rows,
std::vector<std::pair<int, std::string>> columns,
std::vector<column_buffer> &out_buffers,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/comp/brotli_dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ THE SOFTWARE.

*/

#include "brotli_dict.h"
#include <io/comp/brotli_dict.h>
kaatish marked this conversation as resolved.
Show resolved Hide resolved
#include <stdint.h>

namespace cudf {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/comp/cpu_unbz2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ Jon L. Bentley
For more information on these sources, see the manual.
--*/

#include <io/comp/io_uncomp.h>
#include <io/comp/unbz2.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include "io_uncomp.h"
#include "unbz2.h"

namespace cudf {
namespace io {
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/comp/debrotli.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ THE SOFTWARE.

*/

#include "brotli_dict.h"
#include "gpuinflate.h"
#include <io/comp/brotli_dict.h>
#include <io/comp/gpuinflate.h>

#include <io/utilities/block_utils.cuh>

Expand Down Expand Up @@ -90,7 +90,7 @@ inline __device__ uint32_t brev8(uint32_t x)
}

#define CONSTANT static const __device__ __constant__
#include "brotli_tables.h"
#include <io/comp/brotli_tables.h>

/* typeof(MODE) == ContextType; returns ContextLut */
__inline__ __device__ int brotli_context_lut(int mode) { return (mode << 9); }
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/comp/gpuinflate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ misrepresented as being the original software.
Mark Adler [email protected]
*/

#include "gpuinflate.h"
#include "io_uncomp.h"
#include <io/comp/gpuinflate.h>
#include <io/comp/io_uncomp.h>

#include <io/utilities/block_utils.cuh>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/comp/snap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "gpuinflate.h"
#include <io/comp/gpuinflate.h>

#include <io/utilities/block_utils.cuh>

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/comp/uncomp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
* limitations under the License.
*/

#include "io_uncomp.h"
#include "unbz2.h" // bz2 uncompress
#include <io/comp/io_uncomp.h>
#include <io/comp/unbz2.h> // bz2 uncompress

#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "gpuinflate.h"
#include <io/comp/gpuinflate.h>

#include <io/utilities/block_utils.cuh>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/csv/csv.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@

#pragma once

#include "csv_common.h"
#include <io/csv/csv_common.h>
6 changes: 3 additions & 3 deletions cpp/src/io/csv/csv_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
* limitations under the License.
*/

#include "csv_common.h"
#include "csv_gpu.h"
#include "datetime.cuh"
#include <io/csv/csv_common.h>
#include <io/csv/csv_gpu.h>
#include <io/csv/datetime.cuh>

#include <io/utilities/block_utils.cuh>
#include <io/utilities/parsing_utils.cuh>
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/datetime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include "thrust/reduce.h"
#include <thrust/reduce.h>

#include <cudf/wrappers/durations.hpp>
#include <io/utilities/parsing_utils.cuh>
Expand Down Expand Up @@ -435,4 +435,4 @@ __inline__ __device__ int64_t to_time_delta(char const* begin, char const* end)
}

} // namespace io
} // namespace cudf
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/io/csv/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* @brief cuDF-IO CSV reader class implementation
*/

#include "reader_impl.hpp"
#include <io/csv/reader_impl.hpp>

#include <io/comp/io_uncomp.h>
#include <io/utilities/parsing_utils.cuh>
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#pragma once

#include "csv.h"
#include "csv_gpu.h"
#include <io/csv/csv.h>
#include <io/csv/csv_gpu.h>

#include <cudf/detail/utilities/trie.cuh>
#include <io/utilities/column_buffer.hpp>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* @brief cuDF-IO CSV writer class implementation
*/

#include "writer_impl.hpp"
#include <io/csv/writer_impl.hpp>

#include <strings/utilities.cuh>

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/writer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

#pragma once

#include "csv.h"
#include "csv_gpu.h"
#include <io/csv/csv.h>
#include <io/csv/csv_gpu.h>

#include <cudf/strings/strings_column_view.hpp>
#include <io/utilities/hostdevice_vector.hpp>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@

#pragma once

#include "json_common.h"
#include <io/json/json_common.h>
kaatish marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 2 additions & 0 deletions cpp/src/io/json/json_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#pragma once

#include <cudf/types.hpp>
#include <io/utilities/column_buffer.hpp>
#include <io/utilities/column_type_histogram.hpp>

class SerialTrieNode;
using cudf::io::detail::string_index_pair;
Loading