Skip to content

Commit

Permalink
fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Mar 18, 2021
2 parents 0a1db67 + 4723051 commit 88ae901
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 75 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,5 +190,6 @@ ConfigureBench(STRINGS_BENCH
string/find_benchmark.cpp
string/replace_benchmark.cpp
string/split_benchmark.cpp
string/substring_benchmark.cpp
string/translate_benchmark.cpp
string/url_decode_benchmark.cpp)
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/substring_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/substring.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>

#include <thrust/iterator/constant_iterator.h>

class StringSubstring : public cudf::benchmark {
};

enum substring_type { position, multi_position, delimiter, multi_delimiter };

static void BM_substring(benchmark::State& state, substring_type rt)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));
auto starts_itr = thrust::constant_iterator<cudf::size_type>(1);
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
auto delim_itr = thrust::constant_iterator<std::string>(" ");
cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (rt) {
case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1);
break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringSubstring, name) \
(::benchmark::State & st) { BM_substring(st, substring_type::name); } \
BENCHMARK_REGISTER_F(StringSubstring, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(position)
STRINGS_BENCHMARK_DEFINE(multi_position)
STRINGS_BENCHMARK_DEFINE(delimiter)
STRINGS_BENCHMARK_DEFINE(multi_delimiter)
136 changes: 61 additions & 75 deletions cpp/src/strings/substring.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,17 +43,25 @@ namespace {
* using the provided start, stop, and step parameters.
*/
struct substring_fn {
const column_device_view d_column;
numeric_scalar_device_view<size_type> d_start, d_stop, d_step;
const int32_t* d_offsets{};
column_device_view const d_column;
numeric_scalar_device_view<size_type> const d_start;
numeric_scalar_device_view<size_type> const d_stop;
numeric_scalar_device_view<size_type> const d_step;
int32_t* d_offsets{};
char* d_chars{};

__device__ cudf::size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) return 0; // null string
string_view d_str = d_column.template element<string_view>(idx);
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_column.template element<string_view>(idx);
auto const length = d_str.length();
if (length == 0) return 0; // empty string
if (length == 0) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
size_type const step = d_step.is_valid() ? d_step.value() : 1;
auto const begin = [&] { // always inclusive
// when invalid, default depends on step
Expand Down Expand Up @@ -88,7 +96,7 @@ struct substring_fn {
if (d_buffer) d_buffer += from_char_utf8(*itr, d_buffer);
itr += step;
}
return bytes;
if (!d_chars) d_offsets[idx] = bytes;
}
};

Expand All @@ -103,42 +111,26 @@ std::unique_ptr<column> slice_strings(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
size_type strings_count = strings.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings.is_empty()) return make_empty_strings_column(stream, mr);

if (step.is_valid()) CUDF_EXPECTS(step.value(stream) != 0, "Step parameter must not be 0");

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
auto d_start = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(start));
auto d_stop = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
auto d_step = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));

// copy the null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0), substring_fn{d_column, d_start, d_stop, d_step});
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_new_offsets = offsets_column->view().data<int32_t>();

// build chars column
auto bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(
strings_count, strings.null_count(), bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
substring_fn{d_column, d_start, d_stop, d_step, d_new_offsets, d_chars});
auto const d_column = column_device_view::create(strings.parent(), stream);
auto const d_start = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(start));
auto const d_stop = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
auto const d_step = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
auto children = make_strings_children(substring_fn{*d_column, d_start, d_stop, d_step},
strings.size(),
strings.null_count(),
stream,
mr);

return make_strings_column(strings.size(),
std::move(children.first),
std::move(children.second),
strings.null_count(),
std::move(null_mask),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down Expand Up @@ -166,25 +158,33 @@ namespace {
* This both calculates the output size and executes the substring.
*/
struct substring_from_fn {
const column_device_view d_column;
const cudf::detail::input_indexalator starts;
const cudf::detail::input_indexalator stops;
const int32_t* d_offsets{};
column_device_view const d_column;
cudf::detail::input_indexalator const starts;
cudf::detail::input_indexalator const stops;
int32_t* d_offsets{};
char* d_chars{};

__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) return 0; // null string
string_view d_str = d_column.template element<string_view>(idx);
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_column.template element<string_view>(idx);
auto const length = d_str.length();
auto const start = starts[idx];
if (start >= length) return 0; // empty string
if (start >= length) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const stop = stops[idx];
auto const end = (((stop < 0) || (stop > length)) ? length : stop);

string_view d_substr = d_str.substr(start, end - start);
if (d_chars) memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes());
return d_substr.size_bytes();
auto const d_substr = d_str.substr(start, end - start);
if (d_chars)
memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes());
else
d_offsets[idx] = d_substr.size_bytes();
}
};

Expand Down Expand Up @@ -212,32 +212,18 @@ std::unique_ptr<column> compute_substrings_from_fn(column_device_view const& d_c
auto strings_count = d_column.size();

// Copy the null mask
rmm::device_buffer null_mask{0, stream, mr};
if (d_column.nullable())
null_mask = rmm::device_buffer(
d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr);

// Build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), substring_from_fn{d_column, starts, stops});
auto offsets_column = cudf::strings::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_new_offsets = offsets_column->view().data<int32_t>();

// Build chars column
auto bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
cudf::strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<cudf::size_type>(0),
strings_count,
substring_from_fn{d_column, starts, stops, d_new_offsets, d_chars});
rmm::device_buffer null_mask =
!d_column.nullable()
? rmm::device_buffer{0, stream, mr}
: rmm::device_buffer(
d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr);

auto children = make_strings_children(
substring_from_fn{d_column, starts, stops}, strings_count, null_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
null_count,
std::move(null_mask),
stream,
Expand Down
32 changes: 32 additions & 0 deletions cpp/tests/merge/merge_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,4 +729,36 @@ TEST_F(MergeTest, KeysWithNulls)
}
}

template <typename T>
struct FixedPointTestBothReps : public cudf::test::BaseFixture {
};

template <typename T>
using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;

TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes);

TYPED_TEST(FixedPointTestBothReps, FixedPointMerge)
{
using namespace numeric;
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;

auto const a = fp_wrapper<RepType>{{4, 22, 33, 44, 55}, scale_type{-1}};
auto const b = fp_wrapper<RepType>{{5, 7, 10}, scale_type{-1}};
auto const table_a = cudf::table_view(std::vector<cudf::column_view>{a});
auto const table_b = cudf::table_view(std::vector<cudf::column_view>{b});
auto const tables = std::vector<cudf::table_view>{table_a, table_b};

auto const key_cols = std::vector<cudf::size_type>{0};
auto const order = std::vector<cudf::order>{cudf::order::ASCENDING};

auto const exp = fp_wrapper<RepType>{{4, 5, 7, 10, 22, 33, 44, 55}, scale_type{-1}};
auto const exp_table = cudf::table_view(std::vector<cudf::column_view>{exp});

auto const result = cudf::merge(tables, key_cols, order);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(exp_table.column(0), result->view().column(0));
}

CUDF_TEST_PROGRAM_MAIN()
21 changes: 21 additions & 0 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ public long getDeviceMemorySize() {

private static native ContiguousTable[] contiguousSplit(long inputTable, int[] indices);

private static native long[] partition(long inputTable, long partitionView,
int numberOfPartitions, int[] outputOffsets);

private static native long[] hashPartition(long inputTable,
int[] columnsToHash,
int hashTypeId,
Expand Down Expand Up @@ -1257,6 +1260,24 @@ public Table repeat(ColumnVector counts, boolean checkCount) {
return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView(), checkCount));
}

/**
* Partition this table using the mapping in partitionMap. partitionMap must be an integer
* column. The number of rows in partitionMap must be the same as this table. Each row
* in the map will indicate which partition the rows in the table belong to.
* @param partitionMap the partitions for each row.
* @param numberOfPartitions number of partitions
* @return {@link PartitionedTable} Table that exposes a limited functionality of the
* {@link Table} class
*/
public PartitionedTable partition(ColumnView partitionMap, int numberOfPartitions) {
int[] partitionOffsets = new int[numberOfPartitions];
return new PartitionedTable(new Table(partition(
getNativeView(),
partitionMap.getNativeView(),
partitionOffsets.length,
partitionOffsets)), partitionOffsets);
}

/**
* Find smallest indices in a sorted table where values should be inserted to maintain order.
* <pre>
Expand Down
Loading

0 comments on commit 88ae901

Please sign in to comment.