Skip to content

Commit

Permalink
Merge branch 'branch-22.04' into build-time-chart
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Jan 27, 2022
2 parents 88217b3 + 5dd1c39 commit 6e776d1
Show file tree
Hide file tree
Showing 15 changed files with 347 additions and 66 deletions.
29 changes: 4 additions & 25 deletions ci/cpu/prebuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,11 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
set -e

DEFAULT_CUDA_VER="11.5"
DEFAULT_PYTHON_VER="3.8"

#Always upload cudf Python package
#Always upload cudf packages
export UPLOAD_CUDF=1

#Upload libcudf once per CUDA
if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]]; then
export UPLOAD_LIBCUDF=1
else
export UPLOAD_LIBCUDF=0
fi

# upload cudf_kafka for all versions of Python
if [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_CUDF_KAFKA=1
else
export UPLOAD_CUDF_KAFKA=0
fi

#We only want to upload libcudf_kafka once per python/CUDA combo
if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_LIBCUDF_KAFKA=1
else
export UPLOAD_LIBCUDF_KAFKA=0
fi
export UPLOAD_LIBCUDF=1
export UPLOAD_CUDF_KAFKA=1
export UPLOAD_LIBCUDF_KAFKA=1

if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
#If project flash is not activate, always build both
Expand Down
8 changes: 5 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,11 @@ add_library(
src/strings/copying/concatenate.cu
src/strings/copying/copying.cu
src/strings/copying/shift.cu
src/strings/count_matches.cu
src/strings/extract/extract.cu
src/strings/extract/extract_all.cu
src/strings/filling/fill.cu
src/strings/filter_chars.cu
src/strings/findall.cu
src/strings/find.cu
src/strings/find_multiple.cu
src/strings/padding.cu
src/strings/json/json_path.cu
src/strings/regex/regcomp.cpp
Expand All @@ -441,6 +439,10 @@ add_library(
src/strings/replace/multi_re.cu
src/strings/replace/replace.cu
src/strings/replace/replace_re.cu
src/strings/search/findall.cu
src/strings/search/findall_record.cu
src/strings/search/find.cu
src/strings/search/find_multiple.cu
src/strings/split/partition.cu
src/strings/split/split.cu
src/strings/split/split_record.cu
Expand Down
5 changes: 3 additions & 2 deletions cpp/benchmarks/fixture/benchmark_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>
inline auto make_pool_instance()
{
static rmm::mr::cuda_memory_resource cuda_mr;
static rmm::mr::pool_memory_resource pool_mr{&cuda_mr};
return std::shared_ptr<rmm::mr::device_memory_resource>(&pool_mr);
static auto pool_mr =
std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(&cuda_mr);
return pool_mr;
}
} // namespace

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/contains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static void BM_contains(benchmark::State& state, contains_type ct)
cudf::strings::count_re(input, "\\d+");
break;
case contains_type::findall: // returns occurrences of matches
cudf::strings::findall_re(input, "\\d+");
cudf::strings::findall(input, "\\d+");
break;
}
}
Expand Down
34 changes: 32 additions & 2 deletions cpp/include/cudf/strings/findall.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,37 @@ namespace strings {
* @param mr Device memory resource used to allocate the returned table's device memory.
* @return New table of strings columns.
*/
std::unique_ptr<table> findall_re(
std::unique_ptr<table> findall(
strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a lists column of strings for each matching occurrence of the
* regex pattern within each string.
*
* @code{.pseudo}
* Example:
* s = ["bunny", "rabbit", "hare", "dog"]
* r = findall_record(s, "[ab]"")
* r is now a lists column like:
* [ ["b"]
* ["a","b","b"]
* ["a"]
* null ]
* @endcode
*
* A null output row results if the pattern is not found in the corresponding row
* input string.
*
* See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
*
* @param strings Strings instance for this operation.
* @param pattern Regex pattern to match within each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New lists column of strings.
*/
std::unique_ptr<column> findall_record(
strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,16 +14,13 @@
* limitations under the License.
*/

#pragma once

#include <strings/count_matches.hpp>
#include <strings/regex/regex.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/strings/string_view.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/transform.h>
Expand All @@ -32,6 +29,7 @@ namespace cudf {
namespace strings {
namespace detail {

namespace {
/**
* @brief Functor counts the total matches to the given regex in each string.
*/
Expand All @@ -50,12 +48,13 @@ struct count_matches_fn {
int32_t end = d_str.length();
while ((begin < end) && (prog.find<stack_size>(idx, d_str, begin, end) > 0)) {
++count;
begin = end;
begin = end + (begin == end);
end = d_str.length();
}
return count;
}
};
} // namespace

/**
* @brief Returns a column of regex match counts for each string in the given column.
Expand All @@ -67,11 +66,10 @@ struct count_matches_fn {
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
std::unique_ptr<column> count_matches(
column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> count_matches(column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Create output column
auto counts = make_numeric_column(
Expand Down
50 changes: 50 additions & 0 deletions cpp/src/strings/count_matches.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/column/column.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {

class column_device_view;

namespace strings {
namespace detail {

class reprog_device;

/**
* @brief Returns a column of regex match counts for each string in the given column.
*
* A null entry will result in a zero count for that output row.
*
* @param d_strings Device view of the input strings column.
* @param d_prog Regex instance to evaluate on each string.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
std::unique_ptr<column> count_matches(
column_device_view const& d_strings,
reprog_device const& d_prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace strings
} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/src/strings/extract/extract_all.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include <strings/count_matches.cuh>
#include <strings/count_matches.hpp>
#include <strings/regex/regex.cuh>
#include <strings/utilities.hpp>

Expand Down
File renamed without changes.
File renamed without changes.
14 changes: 7 additions & 7 deletions cpp/src/strings/findall.cu → cpp/src/strings/search/findall.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ struct findall_count_fn : public findall_fn<stack_size> {
} // namespace

//
std::unique_ptr<table> findall_re(
std::unique_ptr<table> findall(
strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
rmm::cuda_stream_view stream = rmm::cuda_stream_default)
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto const strings_count = strings.size();
auto const d_strings = column_device_view::create(strings.parent(), stream);
Expand Down Expand Up @@ -205,12 +205,12 @@ std::unique_ptr<table> findall_re(

// external API

std::unique_ptr<table> findall_re(strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<table> findall(strings_column_view const& strings,
std::string const& pattern,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::findall_re(strings, pattern, mr);
return detail::findall(strings, pattern, rmm::cuda_stream_default, mr);
}

} // namespace strings
Expand Down
Loading

0 comments on commit 6e776d1

Please sign in to comment.