Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Mar 7, 2023
2 parents f200a5a + 97d8d12 commit 8ff7170
Show file tree
Hide file tree
Showing 62 changed files with 1,048 additions and 315 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,8 @@ jobs:
build_type: pull-request
package-name: cudf
# Install cupy-cuda11x for arm from a special index url
# Install tokenizers last binary wheel to avoid a Rust compile from the latest sdist
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
test-smoketest: "python ./ci/wheel_smoke_test_cudf.py"
wheel-build-dask-cudf:
needs: wheel-tests-cudf
Expand All @@ -125,7 +124,7 @@ jobs:
build_type: pull-request
package-name: dask_cudf
package-dir: python/dask_cudf
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl"
before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && python -m pip install --no-deps ./local-cudf/cudf*.whl"
uses-setup-env-vars: false
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
Expand All @@ -134,5 +133,5 @@ jobs:
with:
build_type: pull-request
package-name: dask_cudf
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl"
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && python -m pip install --no-deps ./local-cudf-dep/cudf*.whl"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: cudf
test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
test-before-arm64: "python -m pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64"
test-unittest: "python -m pytest -v -n 8 ./python/cudf/cudf/tests"
wheel-tests-dask-cudf:
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/[email protected]
Expand All @@ -97,4 +97,4 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: dask_cudf
test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
test-unittest: "python -m pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
7 changes: 4 additions & 3 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ popd


if [[ ${RAPIDS_BUILD_TYPE} == "branch" ]]; then
aws s3 sync --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
aws s3 sync --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
aws s3 sync --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
rapids-logger "Upload Docs to S3"
aws s3 sync --no-progress --delete cpp/doxygen/html "s3://rapidsai-docs/libcudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_html "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/html"
aws s3 sync --no-progress --delete docs/cudf/_text "s3://rapidsai-docs/cudf/${VERSION_NUMBER}/txt"
fi
25 changes: 11 additions & 14 deletions ci/release/apply_wheel_modifications.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,19 @@ sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/dask_cudf/dask
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/custreamz/custreamz/__init__.py

# setup.py versions
sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf/setup.py
sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/dask_cudf/setup.py
sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/cudf_kafka/setup.py
sed -i "s/version=.*,/version=\"${VERSION}\",/g" python/custreamz/setup.py

# cudf setup.py cuda suffixes
sed -i "s/name=\"cudf\"/name=\"cudf${CUDA_SUFFIX}\"/g" python/cudf/setup.py
sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/setup.py
sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/setup.py
sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/setup.py
# pyproject.toml versions
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/dask_cudf/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/cudf_kafka/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/custreamz/pyproject.toml

# cudf pyproject.toml cuda suffixes
sed -i "s/^name = \"cudf\"/name = \"cudf${CUDA_SUFFIX}\"/g" python/cudf/pyproject.toml
sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
sed -i "s/ptxcompiler/ptxcompiler${CUDA_SUFFIX}/g" python/cudf/pyproject.toml
sed -i "s/cubinlinker/cubinlinker${CUDA_SUFFIX}/g" python/cudf/pyproject.toml

# dask_cudf setup.py cuda suffixes
sed -i "s/name=\"dask-cudf\"/name=\"dask-cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/setup.py
# dask_cudf pyproject.toml cuda suffixes
sed -i "s/^name = \"dask_cudf\"/name = \"dask_cudf${CUDA_SUFFIX}\"/g" python/dask_cudf/pyproject.toml
# Need to provide the == to avoid modifying the URL
sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/setup.py
sed -i "s/\"cudf==/\"cudf${CUDA_SUFFIX}==/g" python/dask_cudf/pyproject.toml
15 changes: 6 additions & 9 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/dask
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/cudf_kafka/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/custreamz/custreamz/__init__.py

# Python setup.py updates
sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf/setup.py
sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/dask_cudf/setup.py
sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/cudf_kafka/setup.py
sed_runner "s/version=.*,/version=\"${NEXT_FULL_TAG}\",/g" python/custreamz/setup.py
# Python pyproject.toml updates
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/dask_cudf/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cudf_kafka/pyproject.toml
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/custreamz/pyproject.toml

# rapids-cmake version
sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
Expand Down Expand Up @@ -89,12 +89,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_
# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")

# Dependency versions in setup.py
sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/setup.py
sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/setup.py

# Dependency versions in pyproject.toml
sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/cudf/pyproject.toml
sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/dask_cudf/pyproject.toml

for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-action-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
Expand Down
12 changes: 6 additions & 6 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,31 @@ trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest dask_cudf"
pushd python/dask_cudf
pushd python/dask_cudf/dask_cudf
pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
--numprocesses=8 \
--dist=loadscope \
--cov-config=.coveragerc \
--cov-config=../.coveragerc \
--cov=dask_cudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
--cov-report=term \
dask_cudf
tests
popd

rapids-logger "pytest custreamz"
pushd python/custreamz
pushd python/custreamz/custreamz
pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-custreamz.xml" \
--numprocesses=8 \
--dist=loadscope \
--cov-config=.coveragerc \
--cov-config=../.coveragerc \
--cov=custreamz \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
--cov-report=term \
custreamz
tests
popd

rapids-logger "Test script exiting with value: $EXITCODE"
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ dependencies:
- doxygen=1.8.20
- fastavro>=0.22.9
- fsspec>=0.6.0
- gcc_linux-64=9.*
- gcc_linux-64=11.*
- hypothesis
- ipython
- libarrow=10
- librdkafka=1.7.0
- librmm=23.04.*
- mimesis>=4.1.0
- moto>=4.0.8
- myst-nb
Expand All @@ -49,7 +50,7 @@ dependencies:
- pandoc<=2.0.0
- pip
- pre-commit
- protobuf=4.21
- protobuf>=4.21.6,<4.22
- ptxcompiler
- pyarrow=10
- pydata-sphinx-theme
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
c_compiler_version:
- 9
- 11

cxx_compiler_version:
- 9
- 11

sysroot_version:
- "2.17"
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ requirements:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- protobuf =4.21
- protobuf >=4.21.6,<4.22
- python
- cython >=0.29,<0.30
- scikit-build >=0.13.1
Expand All @@ -57,7 +57,7 @@ requirements:
- rmm ={{ minor_version }}
- cudatoolkit ={{ cuda_version }}
run:
- protobuf =4.21
- protobuf >=4.21.6,<4.22
- python
- typing_extensions
- pandas >=1.0,<1.6.0dev0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf_kafka/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
c_compiler_version:
- 9
- 11

cxx_compiler_version:
- 9
- 11

sysroot_version:
- "2.17"
4 changes: 2 additions & 2 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
c_compiler_version:
- 9
- 11

cxx_compiler_version:
- 9
- 11

cuda_compiler:
- nvcc
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,8 @@ ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengt
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cu)
ConfigureNVBench(FST_NVBENCH io/fst.cu)
ConfigureNVBench(NESTED_JSON_NVBENCH io/json/nested_json.cpp)
ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader_input.cpp)
ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)

# ##################################################################################################
# * io benchmark ---------------------------------------------------------------------
Expand Down
128 changes: 128 additions & 0 deletions cpp/benchmarks/io/json/json_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/io/json.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
// run on most GPUs, but large enough to allow highest throughput
constexpr size_t data_size = 512 << 20;
constexpr cudf::size_type num_cols = 64;

void json_read_common(cudf::io::json_writer_options const& write_opts,
cuio_source_sink_pair& source_sink,
nvbench::state& state)
{
cudf::io::write_json(write_opts);

cudf::io::json_reader_options read_opts =
cudf::io::json_reader_options::builder(source_sink.make_source_info());

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
[&](nvbench::launch& launch, auto& timer) {
try_drop_l3_cache();

timer.start();
cudf::io::read_json(read_opts);
timer.stop();
});

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <cudf::io::io_type IO>
void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
static_cast<int32_t>(data_type::STRING),
static_cast<int32_t>(data_type::LIST),
static_cast<int32_t>(data_type::STRUCT)});

auto const source_type = IO;

auto const tbl = create_random_table(
cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
auto const view = tbl->view();

cuio_source_sink_pair source_sink(source_type);
cudf::io::json_writer_options const write_opts =
cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null");

json_read_common(write_opts, source_sink, state);
}

template <data_type DataType, cudf::io::io_type IO>
void BM_json_read_data_type(
nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
{
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const source_type = IO;

auto const tbl = create_random_table(
cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
auto const view = tbl->view();

cuio_source_sink_pair source_sink(source_type);
cudf::io::json_writer_options const write_opts =
cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view).na_rep("null");

json_read_common(write_opts, source_sink, state);
}

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::FLOAT,
data_type::DECIMAL,
data_type::TIMESTAMP,
data_type::DURATION,
data_type::STRING,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_json_read_data_type,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
.set_name("json_read_data_type")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);

NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list))
.set_name("json_read_io")
.set_type_axes_names({"io"})
.set_min_samples(4);
Loading

0 comments on commit 8ff7170

Please sign in to comment.