Skip to content

Commit

Permalink
Merge branch 'branch-23.04' into perf-split-record
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Feb 8, 2023
2 parents 436cd58 + 0161ba8 commit d7dcb2a
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 101 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -37,7 +37,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -46,7 +46,7 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -55,7 +55,7 @@ jobs:
skip_upload_pkgs: libcudf-example
wheel-build-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -67,7 +67,7 @@ jobs:
wheel-publish-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -77,7 +77,7 @@ jobs:
wheel-build-dask-cudf:
needs: wheel-publish-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -88,7 +88,7 @@ jobs:
wheel-publish-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@branch-23.04
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand Down
26 changes: 13 additions & 13 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,47 +25,47 @@ jobs:
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.04
checks:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.04
conda-cpp-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.04
with:
build_type: pull-request
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@branch-23.04
with:
build_type: pull-request
conda-python-cudf-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: pull-request
test_script: "ci/test_python_cudf.sh"
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: pull-request
test_script: "ci/test_python_other.sh"
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: pull-request
node_type: "gpu-latest-1"
Expand All @@ -75,7 +75,7 @@ jobs:
conda-notebook-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: pull-request
node_type: "gpu-latest-1"
Expand All @@ -85,7 +85,7 @@ jobs:
wheel-build-cudf:
needs: checks
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@branch-23.04
with:
build_type: pull-request
package-name: cudf
Expand All @@ -94,7 +94,7 @@ jobs:
wheel-tests-cudf:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
with:
build_type: pull-request
package-name: cudf
Expand All @@ -106,7 +106,7 @@ jobs:
wheel-build-dask-cudf:
needs: wheel-tests-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@branch-23.04
with:
build_type: pull-request
package-name: dask_cudf
Expand All @@ -115,7 +115,7 @@ jobs:
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
with:
build_type: pull-request
package-name: dask_cudf
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ on:
jobs:
conda-cpp-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -34,7 +34,7 @@ jobs:
conda-python-other-tests:
# Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -43,7 +43,7 @@ jobs:
test_script: "ci/test_python_other.sh"
conda-java-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -55,7 +55,7 @@ jobs:
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -67,7 +67,7 @@ jobs:
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand All @@ -78,7 +78,7 @@ jobs:
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
wheel-tests-dask-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.02
uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@branch-23.04
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand Down
50 changes: 42 additions & 8 deletions cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,41 @@ struct page_state_s {
* specified row bounds
*
* @param s The page to be checked
* @param min_row The starting row index
* @param start_row The starting row index
* @param num_rows The number of rows
*
* @return True if the page spans the beginning or the end of the row bounds
*/
inline __device__ bool is_bounds_page(page_state_s* const s, size_t min_row, size_t num_rows)
inline __device__ bool is_bounds_page(page_state_s* const s, size_t start_row, size_t num_rows)
{
size_t const page_begin = s->col.start_row + s->page.chunk_row;
size_t const page_end = page_begin + s->page.num_rows;
size_t const begin = min_row;
size_t const end = min_row + num_rows;
size_t const begin = start_row;
size_t const end = start_row + num_rows;

return ((page_begin <= begin && page_end >= begin) || (page_begin <= end && page_end >= end));
}

/**
* @brief Returns whether or not a page is completely contained within the specified
* row bounds
*
* @param s The page to be checked
* @param start_row The starting row index
* @param num_rows The number of rows
*
* @return True if the page is completely contained within the row bounds
*/
inline __device__ bool is_page_contained(page_state_s* const s, size_t start_row, size_t num_rows)
{
size_t const page_begin = s->col.start_row + s->page.chunk_row;
size_t const page_end = page_begin + s->page.num_rows;
size_t const begin = start_row;
size_t const end = start_row + num_rows;

return page_begin >= begin && page_end <= end;
}

/**
* @brief Read a 32-bit varint integer
*
Expand Down Expand Up @@ -1728,10 +1749,11 @@ __global__ void __launch_bounds__(block_size)
auto const thread_depth = depth + t;
if (thread_depth < s->page.num_output_nesting_levels) {
// if we are not a bounding page (as checked above) then we are either
// returning 0 rows from the page (completely outside the bounds) or all
// rows in the page (completely within the bounds)
// returning all rows/values from this page, or 0 of them
pp->nesting[thread_depth].batch_size =
s->num_rows == 0 ? 0 : pp->nesting[thread_depth].size;
(s->num_rows == 0 && !is_page_contained(s, min_row, num_rows))
? 0
: pp->nesting[thread_depth].size;
}
depth += blockDim.x;
}
Expand Down Expand Up @@ -1838,7 +1860,19 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;

// if we have no work to do (eg, in a skip_rows/num_rows case) in this page.
if (s->num_rows == 0 && !(has_repetition && is_bounds_page(s, min_row, num_rows))) { return; }
//
// corner case: in the case of lists, we can have pages that contain "0" rows if the current row
// starts before this page and ends after this page:
// P0 P1 P2
// |---------|---------|----------|
// ^------------------^
// row start row end
// P1 will contain 0 rows
//
if (s->num_rows == 0 && !(has_repetition && (is_bounds_page(s, min_row, num_rows) ||
is_page_contained(s, min_row, num_rows)))) {
return;
}

if (s->dict_base) {
out_thread0 = (s->dict_bits > 0) ? 64 : 32;
Expand Down
Loading

0 comments on commit d7dcb2a

Please sign in to comment.