Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.04' into find-multip…
Browse files Browse the repository at this point in the history
…le-return-list
  • Loading branch information
bdice committed Jan 28, 2022
2 parents 8fe87de + b7aa47f commit 90432ec
Show file tree
Hide file tree
Showing 65 changed files with 1,372 additions and 1,466 deletions.
29 changes: 4 additions & 25 deletions ci/cpu/prebuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,11 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
set -e

DEFAULT_CUDA_VER="11.5"
DEFAULT_PYTHON_VER="3.8"

#Always upload cudf Python package
#Always upload cudf packages
export UPLOAD_CUDF=1

#Upload libcudf once per CUDA
if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]]; then
export UPLOAD_LIBCUDF=1
else
export UPLOAD_LIBCUDF=0
fi

# upload cudf_kafka for all versions of Python
if [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_CUDF_KAFKA=1
else
export UPLOAD_CUDF_KAFKA=0
fi

#We only want to upload libcudf_kafka once per python/CUDA combo
if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_LIBCUDF_KAFKA=1
else
export UPLOAD_LIBCUDF_KAFKA=0
fi
export UPLOAD_LIBCUDF=1
export UPLOAD_CUDF_KAFKA=1
export UPLOAD_LIBCUDF_KAFKA=1

if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
#If project flash is not activate, always build both
Expand Down
8 changes: 5 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,11 @@ add_library(
src/strings/copying/concatenate.cu
src/strings/copying/copying.cu
src/strings/copying/shift.cu
src/strings/count_matches.cu
src/strings/extract/extract.cu
src/strings/extract/extract_all.cu
src/strings/filling/fill.cu
src/strings/filter_chars.cu
src/strings/findall.cu
src/strings/find.cu
src/strings/find_multiple.cu
src/strings/padding.cu
src/strings/json/json_path.cu
src/strings/regex/regcomp.cpp
Expand All @@ -441,6 +439,10 @@ add_library(
src/strings/replace/multi_re.cu
src/strings/replace/replace.cu
src/strings/replace/replace_re.cu
src/strings/search/findall.cu
src/strings/search/findall_record.cu
src/strings/search/find.cu
src/strings/search/find_multiple.cu
src/strings/split/partition.cu
src/strings/split/split.cu
src/strings/split/split_record.cu
Expand Down
5 changes: 3 additions & 2 deletions cpp/benchmarks/fixture/benchmark_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>
inline auto make_pool_instance()
{
static rmm::mr::cuda_memory_resource cuda_mr;
static rmm::mr::pool_memory_resource pool_mr{&cuda_mr};
return std::shared_ptr<rmm::mr::device_memory_resource>(&pool_mr);
static auto pool_mr =
std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(&cuda_mr);
return pool_mr;
}
} // namespace

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/contains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static void BM_contains(benchmark::State& state, contains_type ct)
cudf::strings::count_re(input, "\\d+");
break;
case contains_type::findall: // returns occurrences of matches
cudf::strings::findall_re(input, "\\d+");
cudf::strings::findall(input, "\\d+");
break;
}
}
Expand Down
14 changes: 8 additions & 6 deletions cpp/docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,9 @@ implemented using asynchronous APIs on the default stream (e.g., stream 0).

The recommended pattern for doing this is to make the definition of the external API invoke an
internal API in the `detail` namespace. The internal `detail` API has the same parameters as the
public API, plus a `rmm::cuda_stream_view` parameter at the end defaulted to
public API, plus a `rmm::cuda_stream_view` parameter at the end with no default value. If the
detail API also accepts a memory resource parameter, the stream parameter should be ideally placed
just *before* the memory resource. The public API will call the detail API and provide
`rmm::cuda_stream_default`. The implementation should be wholly contained in the `detail` API
definition and use only asynchronous versions of CUDA APIs with the stream parameter.

Expand All @@ -362,14 +364,14 @@ void external_function(...);

// cpp/include/cudf/detail/header.hpp
namespace detail{
void external_function(..., rmm::cuda_stream_view stream = rmm::cuda_stream_default)
void external_function(..., rmm::cuda_stream_view stream)
} // namespace detail

// cudf/src/implementation.cpp
namespace detail{
// defaulted stream parameter
// Use the stream parameter in the detail implementation.
void external_function(..., rmm::cuda_stream_view stream){
// implementation uses stream w/ async APIs
// Implementation uses the stream with async APIs.
rmm::device_buffer buff(...,stream);
CUDA_TRY(cudaMemcpyAsync(...,stream.value()));
kernel<<<..., stream>>>(...);
Expand All @@ -378,8 +380,8 @@ namespace detail{
} // namespace detail

void external_function(...){
CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function
detail::external_function(...);
CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function.
detail::external_function(..., rmm::cuda_stream_default);
}
```
Expand Down
9 changes: 7 additions & 2 deletions cpp/include/cudf/io/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@ class orc_reader_options {
*
* @param val Vector of fully qualified column names.
*/
void set_decimal_cols_as_float(std::vector<std::string> val)
[[deprecated(
"Decimal to float conversion is deprecated and will be remove in future release")]] void
set_decimal_cols_as_float(std::vector<std::string> val)
{
_decimal_cols_as_float = std::move(val);
}
Expand Down Expand Up @@ -344,7 +346,10 @@ class orc_reader_options_builder {
* @param val Vector of column names.
* @return this for chaining.
*/
orc_reader_options_builder& decimal_cols_as_float(std::vector<std::string> val)
[[deprecated(
"Decimal to float conversion is deprecated and will be remove in future "
"release")]] orc_reader_options_builder&
decimal_cols_as_float(std::vector<std::string> val)
{
options._decimal_cols_as_float = std::move(val);
return *this;
Expand Down
34 changes: 32 additions & 2 deletions cpp/include/cudf/strings/findall.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,37 @@ namespace strings {
* @param mr Device memory resource used to allocate the returned table's device memory.
* @return New table of strings columns.
*/
std::unique_ptr<table> findall_re(
std::unique_ptr<table> findall(
strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a lists column of strings for each matching occurrence of the
* regex pattern within each string.
*
* @code{.pseudo}
* Example:
* s = ["bunny", "rabbit", "hare", "dog"]
* r = findall_record(s, "[ab]"")
* r is now a lists column like:
* [ ["b"]
* ["a","b","b"]
* ["a"]
* null ]
* @endcode
*
* A null output row results if the pattern is not found in the corresponding row
* input string.
*
* See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
*
* @param strings Strings instance for this operation.
* @param pattern Regex pattern to match within each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New lists column of strings.
*/
std::unique_ptr<column> findall_record(
strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,16 +14,13 @@
* limitations under the License.
*/

#pragma once

#include <strings/count_matches.hpp>
#include <strings/regex/regex.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/strings/string_view.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/transform.h>
Expand All @@ -32,6 +29,7 @@ namespace cudf {
namespace strings {
namespace detail {

namespace {
/**
* @brief Functor counts the total matches to the given regex in each string.
*/
Expand All @@ -50,12 +48,13 @@ struct count_matches_fn {
int32_t end = d_str.length();
while ((begin < end) && (prog.find<stack_size>(idx, d_str, begin, end) > 0)) {
++count;
begin = end;
begin = end + (begin == end);
end = d_str.length();
}
return count;
}
};
} // namespace

/**
* @brief Returns a column of regex match counts for each string in the given column.
Expand All @@ -67,11 +66,10 @@ struct count_matches_fn {
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
std::unique_ptr<column> count_matches(
column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> count_matches(column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Create output column
auto counts = make_numeric_column(
Expand Down
50 changes: 50 additions & 0 deletions cpp/src/strings/count_matches.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/column/column.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {

class column_device_view;

namespace strings {
namespace detail {

class reprog_device;

/**
* @brief Returns a column of regex match counts for each string in the given column.
*
* A null entry will result in a zero count for that output row.
*
* @param d_strings Device view of the input strings column.
* @param d_prog Regex instance to evaluate on each string.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
std::unique_ptr<column> count_matches(
column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace strings
} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/src/strings/extract/extract_all.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include <strings/count_matches.cuh>
#include <strings/count_matches.hpp>
#include <strings/regex/regex.cuh>
#include <strings/utilities.hpp>

Expand Down
Loading

0 comments on commit 90432ec

Please sign in to comment.