Skip to content

Commit

Permalink
Fix string functors to adapt to the new make_strings_children from …
Browse files Browse the repository at this point in the history
…cudf (#2034)

* Fix string functors

Signed-off-by: Nghia Truong <[email protected]>

* Fix style

Signed-off-by: Nghia Truong <[email protected]>

* Include changes from cudf

Signed-off-by: Nghia Truong <[email protected]>

---------

Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia authored May 13, 2024
1 parent 324f89a commit 19d8d48
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 26 deletions.
9 changes: 5 additions & 4 deletions src/main/cpp/src/cast_decimal_to_string.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ namespace {
template <typename DecimalType>
struct decimal_to_non_ansi_string_fn {
column_device_view d_decimals;
size_type* d_offsets{};
char* d_chars{};
cudf::size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Calculates the size of the string required to convert the element, in base-10 format.
Expand Down Expand Up @@ -162,13 +163,13 @@ struct decimal_to_non_ansi_string_fn {
__device__ void operator()(size_type idx)
{
if (d_decimals.is_null(idx)) {
if (d_chars == nullptr) { d_offsets[idx] = 0; }
if (d_chars == nullptr) { d_sizes[idx] = 0; }
return;
}
if (d_chars != nullptr) {
decimal_to_non_ansi_string(idx);
} else {
d_offsets[idx] = compute_output_size(d_decimals.element<DecimalType>(idx));
d_sizes[idx] = compute_output_size(d_decimals.element<DecimalType>(idx));
}
}
};
Expand Down
11 changes: 6 additions & 5 deletions src/main/cpp/src/cast_float_to_string.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,8 +34,9 @@ namespace {
template <typename FloatType>
struct float_to_string_fn {
cudf::column_device_view d_floats;
cudf::size_type* d_offsets;
cudf::size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

__device__ cudf::size_type compute_output_size(cudf::size_type idx) const
{
Expand All @@ -56,13 +57,13 @@ struct float_to_string_fn {
__device__ void operator()(cudf::size_type idx) const
{
if (d_floats.is_null(idx)) {
if (d_chars == nullptr) { d_offsets[idx] = 0; }
if (d_chars == nullptr) { d_sizes[idx] = 0; }
return;
}
if (d_chars != nullptr) {
float_to_string(idx);
} else {
d_offsets[idx] = compute_output_size(idx);
d_sizes[idx] = compute_output_size(idx);
}
}
};
Expand Down Expand Up @@ -124,4 +125,4 @@ std::unique_ptr<cudf::column> float_to_string(cudf::column_view const& floats,
return detail::float_to_string(floats, stream, mr);
}

} // namespace spark_rapids_jni
} // namespace spark_rapids_jni
9 changes: 5 additions & 4 deletions src/main/cpp/src/format_float.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ template <typename FloatType>
struct format_float_fn {
cudf::column_device_view d_floats;
int digits;
cudf::size_type* d_offsets;
cudf::size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

__device__ cudf::size_type compute_output_size(FloatType const value) const
{
Expand All @@ -56,13 +57,13 @@ struct format_float_fn {
__device__ void operator()(cudf::size_type const idx) const
{
if (d_floats.is_null(idx)) {
if (d_chars == nullptr) { d_offsets[idx] = 0; }
if (d_chars == nullptr) { d_sizes[idx] = 0; }
return;
}
if (d_chars != nullptr) {
format_float(idx);
} else {
d_offsets[idx] = compute_output_size(d_floats.element<FloatType>(idx));
d_sizes[idx] = compute_output_size(d_floats.element<FloatType>(idx));
}
}
};
Expand Down Expand Up @@ -128,4 +129,4 @@ std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
return detail::format_float(floats, digits, stream, mr);
}

} // namespace spark_rapids_jni
} // namespace spark_rapids_jni
19 changes: 7 additions & 12 deletions src/main/cpp/src/map_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@

#include "map_utils_debug.cuh"

//
#include <limits>

//
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
Expand All @@ -31,11 +27,11 @@
#include <cudf/strings/string_view.hpp>
#include <cudf/strings/strings_column_view.hpp>

//
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

//
#include <cub/device/device_radix_sort.cuh>
#include <cuda/functional>
#include <thrust/binary_search.h>
#include <thrust/copy.h>
#include <thrust/count.h>
Expand All @@ -51,9 +47,7 @@
#include <thrust/transform.h>
#include <thrust/transform_reduce.h>

//
#include <cub/device/device_radix_sort.cuh>
#include <cuda/functional>
#include <limits>

namespace spark_rapids_jni {

Expand Down Expand Up @@ -520,8 +514,9 @@ struct substring_fn {
cudf::device_span<char const> const d_string;
cudf::device_span<thrust::pair<SymbolOffsetT, SymbolOffsetT> const> const d_ranges;

cudf::size_type* d_offsets{};
char* d_chars{};
cudf::size_type* d_sizes;
char* d_chars;
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(cudf::size_type const idx)
{
Expand All @@ -530,7 +525,7 @@ struct substring_fn {
if (d_chars) {
memcpy(d_chars + d_offsets[idx], d_string.data() + range.first, size);
} else {
d_offsets[idx] = size;
d_sizes[idx] = size;
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/cudf
Submodule cudf updated 86 files
+11 −5 .github/CODEOWNERS
+3 −0 .pre-commit-config.yaml
+1 −1 ci/configure_cpp_static.sh
+3 −0 ci/run_cudf_examples.sh
+6 −6 conda/environments/all_cuda-118_arch-x86_64.yaml
+6 −6 conda/environments/all_cuda-122_arch-x86_64.yaml
+1 −1 conda/recipes/cudf/meta.yaml
+1 −1 conda/recipes/libcudf/conda_build_config.yaml
+81 −105 cpp/benchmarks/join/conditional_join.cu
+16 −67 cpp/benchmarks/join/distinct_join.cu
+27 −134 cpp/benchmarks/join/join.cu
+64 −69 cpp/benchmarks/join/join_common.hpp
+30 −30 cpp/benchmarks/join/left_join.cu
+46 −216 cpp/benchmarks/join/mixed_join.cu
+2 −2 cpp/benchmarks/json/json.cu
+15 −2 cpp/cmake/thirdparty/get_arrow.cmake
+71 −25 cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+1 −0 cpp/examples/build.sh
+25 −0 cpp/examples/parquet_io/CMakeLists.txt
+ cpp/examples/parquet_io/example.parquet
+172 −0 cpp/examples/parquet_io/parquet_io.cpp
+157 −0 cpp/examples/parquet_io/parquet_io.hpp
+4 −4 cpp/include/cudf/io/parquet.hpp
+150 −88 cpp/include/cudf/strings/detail/strings_children.cuh
+0 −186 cpp/include/cudf/strings/detail/strings_children_ex.cuh
+2 −2 cpp/src/io/csv/writer_impl.cu
+28 −29 cpp/src/io/json/read_json.cu
+3 −3 cpp/src/io/json/write_json.cu
+2 −3 cpp/src/strings/capitalize.cu
+3 −5 cpp/src/strings/case.cu
+2 −3 cpp/src/strings/char_types/char_types.cu
+3 −4 cpp/src/strings/combine/concatenate.cu
+2 −2 cpp/src/strings/combine/join.cu
+3 −3 cpp/src/strings/combine/join_list_elements.cu
+3 −3 cpp/src/strings/convert/convert_booleans.cu
+2 −2 cpp/src/strings/convert/convert_datetime.cu
+6 −6 cpp/src/strings/convert/convert_durations.cu
+3 −3 cpp/src/strings/convert/convert_fixed_point.cu
+3 −3 cpp/src/strings/convert/convert_floats.cu
+3 −3 cpp/src/strings/convert/convert_hex.cu
+3 −3 cpp/src/strings/convert/convert_integers.cu
+4 −4 cpp/src/strings/convert/convert_ipv4.cu
+2 −2 cpp/src/strings/convert/convert_lists.cu
+2 −2 cpp/src/strings/convert/convert_urls.cu
+2 −3 cpp/src/strings/filter_chars.cu
+5 −5 cpp/src/strings/padding.cu
+4 −4 cpp/src/strings/repeat_strings.cu
+2 −2 cpp/src/strings/replace/multi.cu
+2 −2 cpp/src/strings/replace/multi_re.cu
+2 −2 cpp/src/strings/replace/replace.cu
+2 −2 cpp/src/strings/replace/replace_slice.cu
+2 −2 cpp/src/strings/slice.cu
+2 −2 cpp/src/strings/translate.cu
+2 −2 cpp/src/text/detokenize.cu
+3 −3 cpp/src/text/generate_ngrams.cu
+3 −3 cpp/src/text/normalize.cu
+3 −3 cpp/src/text/replace.cu
+6 −2 cpp/tests/CMakeLists.txt
+107 −3 cpp/tests/io/json_chunked_reader.cpp
+64 −0 cpp/tests/large_strings/reshape_tests.cpp
+39 −13 dependencies.yaml
+4 −5 java/src/test/java/ai/rapids/cudf/CudaTest.java
+12 −2 python/cudf/cudf/_lib/cpp/io/parquet.pxd
+25 −3 python/cudf/cudf/_lib/parquet.pyx
+23 −0 python/cudf/cudf/core/column/categorical.py
+33 −14 python/cudf/cudf/core/column_accessor.py
+5 −0 python/cudf/cudf/core/dataframe.py
+8 −2 python/cudf/cudf/core/index.py
+4 −5 python/cudf/cudf/io/parquet.py
+ python/cudf/cudf/tests/data/parquet/usec_timestamp.parquet
+19 −0 python/cudf/cudf/tests/test_categorical.py
+14 −0 python/cudf/cudf/tests/test_column_accessor.py
+19 −13 python/cudf/cudf/tests/test_dataframe.py
+5 −10 python/cudf/cudf/tests/test_index.py
+38 −3 python/cudf/cudf/tests/test_parquet.py
+8 −3 python/cudf/cudf/utils/ioutils.py
+3 −3 python/cudf/pyproject.toml
+1 −1 python/cudf_kafka/pyproject.toml
+1 −0 python/cudf_polars/LICENSE
+1 −0 python/cudf_polars/README.md
+1 −0 python/cudf_polars/cudf_polars/VERSION
+13 −0 python/cudf_polars/cudf_polars/__init__.py
+171 −0 python/cudf_polars/pyproject.toml
+1 −1 python/dask_cudf/dask_cudf/backends.py
+1 −3 python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+1 −1 python/dask_cudf/dask_cudf/tests/test_sort.py

0 comments on commit 19d8d48

Please sign in to comment.