Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into sort_values_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Feb 20, 2024
2 parents cb2e1e6 + 8a673cd commit 21a368e
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 82 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -37,7 +37,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -46,7 +46,7 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -57,7 +57,7 @@ jobs:
if: github.ref_type == 'branch'
needs: python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
arch: "amd64"
branch: ${{ inputs.branch }}
Expand All @@ -69,7 +69,7 @@ jobs:
sha: ${{ inputs.sha }}
wheel-build-cudf:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
build-2_28-wheels: "true"
Expand All @@ -80,7 +80,7 @@ jobs:
wheel-publish-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -90,7 +90,7 @@ jobs:
wheel-build-dask-cudf:
needs: wheel-publish-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
build_type: ${{ inputs.build_type || 'branch' }}
Expand All @@ -101,7 +101,7 @@ jobs:
wheel-publish-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand Down
36 changes: 18 additions & 18 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ jobs:
#- pandas-tests-diff
#- pandas-tests-diff-comment
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
with:
build_type: pull-request
conda-cpp-checks:
Expand All @@ -54,34 +54,34 @@ jobs:
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
with:
build_type: pull-request
conda-python-cudf-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
with:
build_type: pull-request
test_script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
with:
build_type: pull-request
test_script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand All @@ -91,7 +91,7 @@ jobs:
conda-notebook-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand All @@ -101,7 +101,7 @@ jobs:
docs-build:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand All @@ -111,37 +111,37 @@ jobs:
wheel-build-cudf:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
with:
build_type: pull-request
build-2_28-wheels: "true"
script: "ci/build_wheel_cudf.sh"
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
build_type: pull-request
script: "ci/build_wheel_dask_cudf.sh"
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
devcontainer:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@test-cuda-12.2
uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
with:
build_command: |
sccache -z;
Expand All @@ -150,7 +150,7 @@ jobs:
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
build_type: pull-request
Expand All @@ -159,7 +159,7 @@ jobs:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
build_type: pull-request
Expand All @@ -171,7 +171,7 @@ jobs:
# needs: [pandas-tests-main, pandas-tests-pr]
# secrets: inherit
# # This branch exports a `job_output` output that the downstream job reads.
# uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
# uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
# with:
# node_type: cpu4
# build_type: pull-request
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ jobs:
enable_check_symbols: true
conda-cpp-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-cpp-memcheck-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -45,7 +45,7 @@ jobs:
run_script: "ci/test_cpp_memcheck.sh"
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -55,7 +55,7 @@ jobs:
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -64,7 +64,7 @@ jobs:
test_script: "ci/test_python_other.sh"
conda-java-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -76,7 +76,7 @@ jobs:
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -88,7 +88,7 @@ jobs:
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -97,7 +97,7 @@ jobs:
script: ci/test_wheel_cudf.sh
wheel-tests-dask-cudf:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
build_type: nightly
Expand All @@ -107,7 +107,7 @@ jobs:
script: ci/test_wheel_dask_cudf.sh
unit-tests-cudf-pandas:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -117,7 +117,7 @@ jobs:
pandas-tests:
# run the Pandas unit tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@test-cuda-12.2
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
with:
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
build_type: nightly
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/io/orc/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <cudf/detail/timezone.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/bit.hpp>
Expand Down Expand Up @@ -100,7 +101,9 @@ std::size_t gather_stream_info(std::size_t stripe_index,

for (auto const& stream : stripefooter->streams) {
if (!stream.column_id || *stream.column_id >= orc2gdf.size()) {
dst_offset += stream.length;
// Ignore reading this stream from source.
cudf::logger().warn("Unexpected stream in the input ORC source. The stream will be ignored.");
src_offset += stream.length;
continue;
}

Expand All @@ -125,8 +128,7 @@ std::size_t gather_stream_info(std::size_t stripe_index,
}
}
}
}
if (col != -1) {
} else if (col != -1) {
if (src_offset >= stripeinfo->indexLength || use_index) {
auto& chunk = chunks[stripe_index][col];
auto const index_type = get_stream_index_type(stream.kind);
Expand Down
40 changes: 40 additions & 0 deletions cpp/src/strings/split/split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,46 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,

} // namespace

std::unique_ptr<column> create_offsets_from_positions(strings_column_view const& input,
device_span<int64_t const> const& positions,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const d_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());

// first, create a vector of string indices for each position
auto indices = rmm::device_uvector<size_type>(positions.size(), stream);
thrust::upper_bound(rmm::exec_policy_nosync(stream),
d_offsets,
d_offsets + input.size(),
positions.begin(),
positions.end(),
indices.begin());

// compute position offsets per string
auto counts = rmm::device_uvector<size_type>(input.size(), stream);
// memset to zero-out the counts for any null-entries or strings with no positions
thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), counts.begin(), counts.end(), 0);

// next, count the number of positions per string
auto d_counts = counts.data();
auto d_indices = indices.data();
thrust::for_each_n(
rmm::exec_policy_nosync(stream),
thrust::counting_iterator<int64_t>(0),
positions.size(),
[d_indices, d_counts] __device__(int64_t idx) {
auto const str_idx = d_indices[idx] - 1;
cuda::atomic_ref<size_type, cuda::thread_scope_device> ref{*(d_counts + str_idx)};
ref.fetch_add(1L, cuda::std::memory_order_relaxed);
});

// finally, convert the counts into offsets
return std::get<0>(
cudf::strings::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr));
}

std::unique_ptr<table> split(strings_column_view const& strings_column,
string_scalar const& delimiter,
size_type maxsplit,
Expand Down
Loading

0 comments on commit 21a368e

Please sign in to comment.