From 2c2410a213b1435b5dbb2ad8dfa508013abdfd36 Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 24 Mar 2023 23:57:49 -0700 Subject: [PATCH 1/3] fix --- cpp/src/io/utilities/trie.cu | 2 ++ cpp/src/io/utilities/trie.cuh | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index e2ace7258f7..516ec3ff50d 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -36,6 +36,8 @@ namespace detail { rmm::device_uvector create_serialized_trie(const std::vector& keys, rmm::cuda_stream_view stream) { + if (keys.empty()) {return rmm::device_uvector{0, stream};} + static constexpr int alphabet_size = std::numeric_limits::max() + 1; struct TreeTrieNode { using TrieNodePtr = std::unique_ptr; diff --git a/cpp/src/io/utilities/trie.cuh b/cpp/src/io/utilities/trie.cuh index 85834ad2f0e..6614cc08250 100644 --- a/cpp/src/io/utilities/trie.cuh +++ b/cpp/src/io/utilities/trie.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,8 +83,8 @@ trie create_serialized_trie(const std::vector& keys, rmm::cuda_stre __host__ __device__ inline bool serialized_trie_contains(device_span trie, device_span key) { - if (trie.data() == nullptr || trie.empty()) return false; - if (key.empty()) return trie.front().is_leaf; + if (trie.empty()) {return false;} + if (key.empty()) {return trie.front().is_leaf;} auto curr_node = trie.begin() + 1; for (auto curr_key = key.begin(); curr_key < key.end(); ++curr_key) { // Don't jump away from root node From 140e64c0a3e0df5d30973f3632809a0080637886 Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 24 Mar 2023 23:57:54 -0700 Subject: [PATCH 2/3] test --- cpp/tests/io/csv_test.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index a0daab767c0..193d1092cab 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -1358,6 +1358,22 @@ TEST_F(CsvReaderTest, nullHandling) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, view.column(0)); } + + // Filter enabled, but no NA values + { + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .keep_default_na(false) + .dtypes({dtype()}) + .header(-1) + .skip_blank_lines(false); + const auto result = cudf::io::read_csv(in_opts); + const auto view = result.tbl->view(); + auto expect = + cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, view.column(0)); + } } TEST_F(CsvReaderTest, FailCases) From 1c0132bc074153a9660b06c681533a900a81af94 Mon Sep 17 00:00:00 2001 From: vuule Date: Fri, 24 Mar 2023 23:59:02 -0700 Subject: [PATCH 3/3] style --- cpp/src/io/utilities/trie.cu | 2 +- cpp/src/io/utilities/trie.cuh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index 516ec3ff50d..d902cfecf40 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -36,7 +36,7 @@ namespace detail { rmm::device_uvector create_serialized_trie(const std::vector& keys, rmm::cuda_stream_view stream) { - if (keys.empty()) {return rmm::device_uvector{0, stream};} + if (keys.empty()) { return rmm::device_uvector{0, stream}; } static constexpr int alphabet_size = std::numeric_limits::max() + 1; struct TreeTrieNode { diff --git a/cpp/src/io/utilities/trie.cuh b/cpp/src/io/utilities/trie.cuh index 6614cc08250..0f87de81653 100644 --- a/cpp/src/io/utilities/trie.cuh +++ b/cpp/src/io/utilities/trie.cuh @@ -83,8 +83,8 @@ trie create_serialized_trie(const std::vector& keys, rmm::cuda_stre __host__ __device__ inline bool serialized_trie_contains(device_span trie, device_span key) { - if (trie.empty()) {return false;} - if (key.empty()) {return trie.front().is_leaf;} + if (trie.empty()) { return false; } + if (key.empty()) { return trie.front().is_leaf; } auto curr_node = trie.begin() + 1; for (auto curr_key = key.begin(); curr_key < key.end(); ++curr_key) { // Don't jump away from root node