Skip to content

Commit

Permalink
Merge branch 'branch-24.08' into feat_15699
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel authored Jun 7, 2024
2 parents 9b646d1 + 9bd16bb commit 0c04204
Show file tree
Hide file tree
Showing 87 changed files with 2,584 additions and 688 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/cuda11.8-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/cuda11.8-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/cuda12.2-conda/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-conda"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
2 changes: 1 addition & 1 deletion .devcontainer/cuda12.2-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"runArgs": [
"--rm",
"--name",
"${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-pip"
],
"hostRequirements": {"gpu": "optional"},
"features": {
Expand Down
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ java/ @rapidsai/cudf-java-codeowners
/.pre-commit-config.yaml @rapidsai/ci-codeowners

#packaging code owners
/.devcontainers/ @rapidsai/packaging-codeowners
/.devcontainer/ @rapidsai/packaging-codeowners
/conda/ @rapidsai/packaging-codeowners
/dependencies.yaml @rapidsai/packaging-codeowners
/build.sh @rapidsai/packaging-codeowners
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ repos:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.3
rev: v0.4.8
hooks:
- id: ruff
files: python/.*$
Expand Down
6 changes: 0 additions & 6 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,13 @@ pushd docs/cudf
make dirhtml
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/html"
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/cudf/html"
make text
mkdir -p "${RAPIDS_DOCS_DIR}/cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/cudf/txt"
popd

rapids-logger "Build dask-cuDF Sphinx docs"
pushd docs/dask_cudf
make dirhtml
mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/html"
mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html"
make text
mkdir -p "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
mv build/text/* "${RAPIDS_DOCS_DIR}/dask-cudf/txt"
popd

rapids-upload-docs
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/offsets_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ struct input_offsetalator : base_normalator<input_offsetalator, int64_t> {
*/
__device__ inline int64_t operator[](size_type idx) const
{
void const* tp = p_ + (idx * this->width_);
void const* tp = p_ + (static_cast<int64_t>(idx) * this->width_);
return this->width_ == sizeof(int32_t) ? static_cast<int64_t>(*static_cast<int32_t const*>(tp))
: *static_cast<int64_t const*>(tp);
}
Expand All @@ -79,7 +79,7 @@ struct input_offsetalator : base_normalator<input_offsetalator, int64_t> {
cudf_assert((dtype.id() == type_id::INT32 || dtype.id() == type_id::INT64) &&
"Unexpected offsets type");
#endif
p_ += (this->width_ * offset);
p_ += (this->width_ * static_cast<int64_t>(offset));
}

protected:
Expand Down Expand Up @@ -121,7 +121,7 @@ struct output_offsetalator : base_normalator<output_offsetalator, int64_t> {
__device__ inline output_offsetalator const operator[](size_type idx) const
{
output_offsetalator tmp{*this};
tmp.p_ += (idx * this->width_);
tmp.p_ += (static_cast<int64_t>(idx) * this->width_);
return tmp;
}

Expand Down
56 changes: 32 additions & 24 deletions cpp/src/io/utilities/data_casting.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/strings/detail/strings_children.cuh>
Expand Down Expand Up @@ -417,6 +418,7 @@ struct bitfield_block {
* @param null_mask Null mask
* @param null_count_data pointer to store null count
* @param options Settings for controlling string processing behavior
* @param d_sizes Output size of each row
* @param d_offsets Offsets to identify where to store the results for each string
* @param d_chars Character array to store the characters of strings
*/
Expand All @@ -427,7 +429,8 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
bitmask_type* null_mask,
size_type* null_count_data,
cudf::io::parse_options_view const options,
size_type* d_offsets,
size_type* d_sizes,
cudf::detail::input_offsetalator d_offsets,
char* d_chars)
{
constexpr auto BLOCK_SIZE =
Expand Down Expand Up @@ -455,7 +458,7 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
istring = get_next_string()) {
// skip nulls
if (null_mask != nullptr && not bit_is_set(null_mask, istring)) {
if (!d_chars && lane == 0) d_offsets[istring] = 0;
if (!d_chars && lane == 0) { d_sizes[istring] = 0; }
continue; // gride-stride return;
}

Expand All @@ -476,7 +479,7 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
if (lane == 0) {
clear_bit(null_mask, istring);
atomicAdd(null_count_data, 1);
if (!d_chars) d_offsets[istring] = 0;
if (!d_chars) { d_sizes[istring] = 0; }
}
continue; // gride-stride return;
}
Expand All @@ -491,7 +494,7 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
// Copy literal/numeric value
if (not is_string_value) {
if (!d_chars) {
if (lane == 0) { d_offsets[istring] = in_end - in_begin; }
if (lane == 0) { d_sizes[istring] = in_end - in_begin; }
} else {
for (thread_index_type char_index = lane; char_index < (in_end - in_begin);
char_index += BLOCK_SIZE) {
Expand Down Expand Up @@ -621,8 +624,8 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
clear_bit(null_mask, istring);
atomicAdd(null_count_data, 1);
}
last_offset = 0;
d_offsets[istring] = 0;
last_offset = 0;
d_sizes[istring] = 0;
}
if constexpr (!is_warp) { __syncthreads(); }
break; // gride-stride return;
Expand Down Expand Up @@ -729,7 +732,7 @@ CUDF_KERNEL void parse_fn_string_parallel(str_tuple_it str_tuples,
}
}
} // char for-loop
if (!d_chars && lane == 0) { d_offsets[istring] = last_offset; }
if (!d_chars && lane == 0) { d_sizes[istring] = last_offset; }
} // grid-stride for-loop
}

Expand All @@ -739,13 +742,14 @@ struct string_parse {
bitmask_type* null_mask;
size_type* null_count_data;
cudf::io::parse_options_view const options;
size_type* d_offsets{};
size_type* d_sizes{};
cudf::detail::input_offsetalator d_offsets;
char* d_chars{};

__device__ void operator()(size_type idx)
{
if (null_mask != nullptr && not bit_is_set(null_mask, idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const in_begin = str_tuples[idx].first;
Expand All @@ -761,7 +765,7 @@ struct string_parse {
if (is_null_literal && null_mask != nullptr) {
clear_bit(null_mask, idx);
atomicAdd(null_count_data, 1);
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
}
Expand All @@ -773,9 +777,9 @@ struct string_parse {
clear_bit(null_mask, idx);
atomicAdd(null_count_data, 1);
}
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
} else {
if (!d_chars) d_offsets[idx] = str_process_info.bytes;
if (!d_chars) { d_sizes[idx] = str_process_info.bytes; }
}
}
};
Expand Down Expand Up @@ -811,13 +815,12 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
size_type{0},
thrust::maximum<size_type>{});

auto offsets = cudf::make_numeric_column(
data_type{type_to_id<size_type>()}, col_size + 1, cudf::mask_state::UNALLOCATED, stream, mr);
auto d_offsets = offsets->mutable_view().data<size_type>();
auto sizes = rmm::device_uvector<size_type>(col_size, stream);
auto d_sizes = sizes.data();
auto null_count_data = d_null_count.data();

auto single_thread_fn = string_parse<decltype(str_tuples)>{
str_tuples, static_cast<bitmask_type*>(null_mask.data()), null_count_data, options, d_offsets};
str_tuples, static_cast<bitmask_type*>(null_mask.data()), null_count_data, options, d_sizes};
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
col_size,
Expand All @@ -838,7 +841,8 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
static_cast<bitmask_type*>(null_mask.data()),
null_count_data,
options,
d_offsets,
d_sizes,
cudf::detail::input_offsetalator{},
nullptr);
}

Expand All @@ -853,20 +857,22 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
static_cast<bitmask_type*>(null_mask.data()),
null_count_data,
options,
d_offsets,
d_sizes,
cudf::detail::input_offsetalator{},
nullptr);
}
auto const bytes =
cudf::detail::sizes_to_offsets(d_offsets, d_offsets + col_size + 1, d_offsets, stream);
CUDF_EXPECTS(bytes <= std::numeric_limits<size_type>::max(),
"Size of output exceeds the column size limit",
std::overflow_error);

auto [offsets, bytes] =
cudf::strings::detail::make_offsets_child_column(sizes.begin(), sizes.end(), stream, mr);
auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());

// CHARS column
rmm::device_uvector<char> chars(bytes, stream, mr);
auto d_chars = chars.data();

single_thread_fn.d_chars = d_chars;
single_thread_fn.d_chars = d_chars;
single_thread_fn.d_offsets = d_offsets;

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
col_size,
Expand All @@ -882,6 +888,7 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
static_cast<bitmask_type*>(null_mask.data()),
null_count_data,
options,
d_sizes,
d_offsets,
d_chars);
}
Expand All @@ -897,6 +904,7 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
static_cast<bitmask_type*>(null_mask.data()),
null_count_data,
options,
d_sizes,
d_offsets,
d_chars);
}
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/quantiles/quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <thrust/iterator/transform_iterator.h>

#include <memory>
#include <stdexcept>
#include <vector>

namespace cudf {
Expand Down Expand Up @@ -78,7 +79,8 @@ std::unique_ptr<table> quantiles(table_view const& input,

CUDF_EXPECTS(interp == interpolation::HIGHER || interp == interpolation::LOWER ||
interp == interpolation::NEAREST,
"multi-column quantiles require a non-arithmetic interpolation strategy.");
"multi-column quantiles require a non-arithmetic interpolation strategy.",
std::invalid_argument);

CUDF_EXPECTS(input.num_rows() > 0, "multi-column quantiles require at least one input row.");

Expand Down
35 changes: 18 additions & 17 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ function(ConfigureTest CMAKE_TEST_NAME)
INSTALL_COMPONENT_SET testing
)

set_tests_properties(
${CMAKE_TEST_NAME}
PROPERTIES
ENVIRONMENT
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
)
if(CUDF_BUILD_STREAMS_TEST_UTIL)
set_tests_properties(
${CMAKE_TEST_NAME}
PROPERTIES
ENVIRONMENT
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
)
endif()
endfunction()

# ##################################################################################################
Expand Down Expand Up @@ -401,14 +403,10 @@ ConfigureTest(SPAN_TEST utilities_tests/span_tests.cu)
ConfigureTest(SPAN_TEST_DEVICE_VECTOR utilities_tests/span_tests.cu)

# Overwrite the environments set by ConfigureTest
set_tests_properties(
SPAN_TEST
PROPERTIES
ENVIRONMENT
"GTEST_FILTER=-${_allowlist_filter};GTEST_CUDF_STREAM_MODE=new_cudf_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_cudf>"
)
set_tests_properties(
SPAN_TEST_DEVICE_VECTOR PROPERTIES ENVIRONMENT "GTEST_FILTER=${_allowlist_filter}"
set_property(
TEST SPAN_TEST SPAN_TEST_DEVICE_VECTOR
APPEND
PROPERTY ENVIRONMENT "GTEST_FILTER=-${_allowlist_filter}"
)

# ##################################################################################################
Expand Down Expand Up @@ -572,6 +570,7 @@ ConfigureTest(
large_strings/concatenate_tests.cpp
large_strings/case_tests.cpp
large_strings/large_strings_fixture.cpp
large_strings/many_strings_tests.cpp
large_strings/merge_tests.cpp
large_strings/parquet_tests.cpp
large_strings/reshape_tests.cpp
Expand Down Expand Up @@ -671,9 +670,11 @@ target_include_directories(JIT_PARSER_TEST PRIVATE "$<BUILD_INTERFACE:${CUDF_SOU

# ##################################################################################################
# * stream testing ---------------------------------------------------------------------------------
ConfigureTest(
STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu
)
if(CUDF_BUILD_STREAMS_TEST_UTIL)
ConfigureTest(
STREAM_IDENTIFICATION_TEST identify_stream_usage/test_default_stream_identification.cu
)
endif()

ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing)
Expand Down
1 change: 0 additions & 1 deletion cpp/tests/io/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2374,7 +2374,6 @@ TEST_F(JsonReaderTest, MapTypes)
EXPECT_EQ(col.type().id(), types[i]) << "column[" << i << "].type";
i++;
}
std::cout << "\n";
};

// json
Expand Down
11 changes: 11 additions & 0 deletions cpp/tests/large_strings/large_strings_fixture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ cudf::column_view StringsLargeTest::long_column()
return g_ls_data->get_column(name);
}

cudf::column_view StringsLargeTest::very_long_column()
{
std::string name("long2");
if (!g_ls_data->has_key(name)) {
auto itr = thrust::constant_iterator<std::string_view>("12345");
auto input = cudf::test::strings_column_wrapper(itr, itr + 30'000'000);
g_ls_data->add_column(name, input.release());
}
return g_ls_data->get_column(name);
}

std::unique_ptr<LargeStringsData> StringsLargeTest::get_ls_data()
{
CUDF_EXPECTS(g_ls_data == nullptr, "invalid call to get_ls_data");
Expand Down
Loading

0 comments on commit 0c04204

Please sign in to comment.