Skip to content

Commit

Permalink
Move strings_udf code into cuDF (#12669)
Browse files Browse the repository at this point in the history
With the merge of #11452 we have the machinery to build and deploy PTX libraries of shim functions as part of cuDF's build process. With this there is no reason to keep the `strings_udf` code separate anymore. This PR removes the separate package and all of it's related CI plumbing as well as supports the strings feature by default, just like GroupBy.

Authors:
  - https://github.com/brandon-b-miller
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: #12669
  • Loading branch information
brandon-b-miller authored Feb 22, 2023
1 parent 904b8c7 commit f90ae52
Show file tree
Hide file tree
Showing 61 changed files with 1,613 additions and 5,149 deletions.
1 change: 0 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
python/cudf/cudf/_version.py export-subst
python/strings_udf/strings_udf/_version.py export-subst
python/cudf_kafka/cudf_kafka/_version.py export-subst
python/custreamz/custreamz/_version.py export-subst
python/dask_cudf/dask_cudf/_version.py export-subst
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ python/cudf_kafka/*/_lib/**/*.cpp
python/cudf_kafka/*/_lib/**/*.h
python/custreamz/*/_lib/**/*.cpp
python/custreamz/*/_lib/**/*.h
python/strings_udf/strings_udf/_lib/*.cpp
python/strings_udf/strings_udf/*.ptx
.Python
env/
develop-eggs/
Expand Down
10 changes: 1 addition & 9 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz strings_udf -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand Down Expand Up @@ -337,14 +337,6 @@ if buildAll || hasArg cudf; then
fi
fi

if buildAll || hasArg strings_udf; then

cd ${REPODIR}/python/strings_udf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
fi

# Build and install the dask_cudf Python package
if buildAll || hasArg dask_cudf; then
Expand Down
7 changes: 1 addition & 6 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand Down Expand Up @@ -38,10 +38,5 @@ rapids-mamba-retry mambabuild \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/custreamz

rapids-mamba-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/strings_udf

rapids-upload-conda-to-s3 python
2 changes: 0 additions & 2 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
# Python update
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt

# Strings UDF update
sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
Expand Down
36 changes: 0 additions & 36 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,41 +44,5 @@ pytest \
custreamz
popd

set -e
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
strings_udf
set +e

rapids-logger "pytest strings_udf"
pushd python/strings_udf/strings_udf
pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-strings-udf.xml" \
--numprocesses=8 \
--dist=loadscope \
--cov-config=.coveragerc \
--cov=strings_udf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/strings-udf-coverage.xml" \
--cov-report=term \
tests
popd

rapids-logger "pytest cudf with strings_udf"
pushd python/cudf/cudf
pytest \
--cache-clear \
--ignore="benchmarks" \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-strings-udf.xml" \
--numprocesses=8 \
--dist=loadscope \
--cov-config=../.coveragerc \
--cov=cudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-strings-udf-coverage.xml" \
--cov-report=term \
tests/test_udf_masked_ops.py
popd

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
1 change: 1 addition & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ requirements:
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- nvtx >=0.2.1
- packaging
- ptxcompiler >=0.7.0
- cachetools
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0
Expand Down
4 changes: 0 additions & 4 deletions conda/recipes/strings_udf/build.sh

This file was deleted.

14 changes: 0 additions & 14 deletions conda/recipes/strings_udf/conda_build_config.yaml

This file was deleted.

78 changes: 0 additions & 78 deletions conda/recipes/strings_udf/meta.yaml

This file was deleted.

Loading

0 comments on commit f90ae52

Please sign in to comment.