Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cudf_kafka to use skbuild #14292

Merged
merged 33 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
56a677d
First pass for cudf_kafka scikit build
jdye64 Oct 17, 2023
ca69e96
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
jdye64 Oct 17, 2023
1954ff8
Updates to replace CUSPATIAL with CUDF_KAFKA
jdye64 Oct 17, 2023
f4f7fc6
Added _lib file
jdye64 Oct 17, 2023
8660304
Update dependencies.
bdice Oct 17, 2023
b256bdf
Add standard cudf_kafka alias target
vyasr Oct 17, 2023
7eae18f
Fix path
vyasr Oct 17, 2023
922ce92
Fix style
vyasr Oct 17, 2023
2ac5ee5
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Oct 20, 2023
ec8ffa3
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Oct 23, 2023
e259001
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 3, 2023
37dc381
Add LICENSE and README.md.
bdice Nov 3, 2023
af3b838
Add CUDA compilers for building cudf-kafka, revert hacks.
bdice Nov 3, 2023
1c55261
Update conda_build_config.yaml to include cuda compiler keys.
bdice Nov 3, 2023
2dcbe29
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
0ae64e0
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
8cf4f0c
Test some changes.
bdice Nov 8, 2023
918ad25
Rename CUDA_KAFKA to CUDF_KAFKA.
bdice Nov 8, 2023
529338e
Add PyArrow and NumPy include dirs, and fix include paths of kafka.pxd.
bdice Nov 8, 2023
25d47e7
Also set CMAKE_CUDA_ARCHITECTURES for cudf_kafka.
bdice Nov 8, 2023
467eb1c
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
d46a4e2
Try to fix install dir.
bdice Nov 9, 2023
37724a6
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 9, 2023
26a2cdc
Add trailing slash.
bdice Nov 9, 2023
c789362
Only add FIND_CUDF_KAFKA_CPP if requesting to build cudf_kafka.
bdice Nov 9, 2023
f7cf156
No verbose flags in build.
bdice Nov 9, 2023
740e93c
Remove else.
bdice Nov 9, 2023
64a5d61
Remove build_python_cudf_kafka.
bdice Nov 13, 2023
d46f283
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 13, 2023
27841c7
Require libcudf and libcudf_kafka when building cudf_kafka Python pac…
bdice Nov 13, 2023
6d9037e
Style.
bdice Nov 13, 2023
7dd2a5d
Make the find_package call unconditional
vyasr Nov 14, 2023
0989b21
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON"
fi

# Append `-DFIND_CUDF_KAFKA_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
if buildAll || hasArg cudf_kafka && [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_KAFKA_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_KAFKA_CPP=ON"
fi


# If clean given, run it prior to any other steps
if hasArg clean; then
Expand All @@ -257,7 +262,7 @@ fi
################################################################################
# Configure, build, and install libcudf

if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then
if buildAll || hasArg libcudf || hasArg cudf || hasArg cudf_kafka || hasArg cudfjar; then
vyasr marked this conversation as resolved.
Show resolved Hide resolved
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
Expand Down Expand Up @@ -369,7 +374,7 @@ fi
# build cudf_kafka Python package
if hasArg cudf_kafka; then
cd ${REPODIR}/python/cudf_kafka
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS}" \
vyasr marked this conversation as resolved.
Show resolved Hide resolved
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \
python -m pip install --no-build-isolation --no-deps .
fi
Expand Down
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'

# Python CMakeLists updates
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
sed_runner 's/'"cudf_kafka_version .*)"'/'"cudf_kafka_version ${NEXT_FULL_TAG})"'/g' python/cudf_kafka/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
Expand Down
1 change: 0 additions & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ dependencies:
- cmake>=3.26.4
- cramjam
- cuda-cudart-dev
- cuda-gdb
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
Expand Down
13 changes: 0 additions & 13 deletions conda/recipes/cudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
# Need to set CUDA_HOME inside conda environments because the hacked together
# setup.py for cudf-kafka searches that way.
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*}
if [[ ${CUDA_MAJOR} == "12" ]]; then
target_name="x86_64-linux"
if [[ ! $(arch) == "x86_64" ]]; then
target_name="sbsa-linux"
fi
export CUDA_HOME="${PREFIX}/targets/${target_name}/"
fi
./build.sh -v cudf_kafka
6 changes: 6 additions & 0 deletions conda/recipes/cudf_kafka/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ sysroot_version:

cmake_version:
- ">=3.26.4"

cuda_compiler:
- cuda-nvcc

cuda11_compiler:
- nvcc
21 changes: 12 additions & 9 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,31 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- RAPIDS_CUDA_VERSION
ignore_run_exports_from:
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }}
{% endif %}

requirements:
build:
- cmake {{ cmake_version }}
- ninja
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
{% if cuda_major == "12" %}
- cuda-gdb
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }} ={{ cuda_version }}
{% else %}
- {{ compiler('cuda') }}
{% endif %}
- cuda-version ={{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- python
- cython >=3.0.0
- cuda-version ={{ cuda_version }}
- cudf ={{ version }}
- libcudf_kafka ={{ version }}
- scikit-build >=0.13.1
- setuptools
{% if cuda_major == "12" %}
- cuda-cudart-dev
Expand Down
8 changes: 5 additions & 3 deletions cpp/libcudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include(rapids-export)
include(rapids-find)

project(
CUDA_KAFKA
CUDF_KAFKA
VERSION 23.12.00
LANGUAGES CXX
)
Expand Down Expand Up @@ -64,7 +64,7 @@ add_library(cudf_kafka SHARED src/kafka_consumer.cpp src/kafka_callback.cpp)
# ##################################################################################################
# * include paths ---------------------------------------------------------------------------------
target_include_directories(
cudf_kafka PUBLIC "$<BUILD_INTERFACE:${CUDA_KAFKA_SOURCE_DIR}/include>"
cudf_kafka PUBLIC "$<BUILD_INTERFACE:${CUDF_KAFKA_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
)

Expand All @@ -85,6 +85,8 @@ set_target_properties(
CXX_STANDARD_REQUIRED ON
)

add_library(cudf_kafka::cudf_kafka ALIAS cudf_kafka)

# ##################################################################################################
# * cudf_kafka Install ----------------------------------------------------------------------------
rapids_cmake_install_lib_dir(lib_dir)
Expand All @@ -94,7 +96,7 @@ install(
EXPORT cudf_kafka-exports
)

install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION include)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
vyasr marked this conversation as resolved.
Show resolved Hide resolved

rapids_export(
INSTALL cudf_kafka
Expand Down
16 changes: 8 additions & 8 deletions cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -35,21 +35,21 @@ function(find_and_configure_cudf VERSION)
endif()
endfunction()

set(CUDA_KAFKA_MIN_VERSION_cudf
"${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.${CUDA_KAFKA_VERSION_PATCH}"
set(CUDF_KAFKA_MIN_VERSION
"${CUDF_KAFKA_VERSION_MAJOR}.${CUDF_KAFKA_VERSION_MINOR}.${CUDF_KAFKA_VERSION_PATCH}"
)
find_and_configure_cudf(${CUDA_KAFKA_MIN_VERSION_cudf})
find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION})

if(cudf_REQUIRES_CUDA)
rapids_cuda_init_architectures(CUDA_KAFKA)
rapids_cuda_init_architectures(CUDF_KAFKA)

# Since we are building cudf as part of ourselves we need to enable the CUDA language in the
# top-most scope
enable_language(CUDA)

# Since CUDA_KAFKA only enables CUDA optionally we need to manually include the file that
# Since CUDF_KAFKA only enables CUDA optionally we need to manually include the file that
# rapids_cuda_init_architectures relies on `project` calling
if(DEFINED CMAKE_PROJECT_CUDA_KAFKA_INCLUDE)
include("${CMAKE_PROJECT_CUDA_KAFKA_INCLUDE}")
if(DEFINED CMAKE_PROJECT_CUDF_KAFKA_INCLUDE)
include("${CMAKE_PROJECT_CUDF_KAFKA_INCLUDE}")
endif()
endif()
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function(ConfigureTest test_name)
add_executable(${test_name} ${ARGN})
set_target_properties(
${test_name}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDA_KAFKA_BINARY_DIR}/gtests>"
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_KAFKA_BINARY_DIR}/gtests>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
Expand Down
18 changes: 11 additions & 7 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ files:
- build_all
- build_cpp
- build_wheels
- build_python
- build_python_common
- build_python_cudf
- build_python_cudf_kafka
- cudatoolkit
- develop
- docs
Expand Down Expand Up @@ -71,8 +72,8 @@ files:
table: build-system
includes:
- build_all
- build_python
- build_python_common
- build_python_cudf
- build_wheels
py_run_cudf:
output: pyproject
Expand Down Expand Up @@ -138,8 +139,9 @@ files:
extras:
table: build-system
includes:
- build_wheels
- build_python_common
- build_python_cudf_kafka
- build_wheels
py_run_cudf_kafka:
output: pyproject
pyproject_dir: python/cudf_kafka
Expand Down Expand Up @@ -264,7 +266,7 @@ dependencies:
# Hard pin the patch version used during the build. This must be kept
# in sync with the version pinned in get_arrow.cmake.
- pyarrow==14.0.1.*
build_python:
build_python_cudf:
common:
- output_types: [conda, requirements, pyproject]
packages:
Expand All @@ -276,6 +278,11 @@ dependencies:
- output_types: pyproject
packages:
- protoc-wheel
build_python_cudf_kafka:
common:
- output_types: [conda, requirements, pyproject]
packages:
- scikit-build>=0.13.1
bdice marked this conversation as resolved.
Show resolved Hide resolved
libarrow_run:
common:
- output_types: conda
Expand All @@ -302,9 +309,6 @@ dependencies:
- cuda-nvrtc-dev
- cuda-nvtx-dev
- libcurand-dev
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- cuda-gdb
- matrix:
cuda: "11.8"
packages:
Expand Down
6 changes: 0 additions & 6 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ target_link_libraries(strings_udf cudf_strings_udf)
# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
# /opt/_internal"
find_package(NumPy REQUIRED)
set(targets_using_numpy interop avro csv orc json parquet)
foreach(target IN LISTS targets_using_numpy)
target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
# Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
# target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
endforeach()

set(targets_using_dlpack interop)
foreach(target IN LISTS targets_using_dlpack)
Expand Down
94 changes: 94 additions & 0 deletions python/cudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)

set(cudf_kafka_version 23.12.00)

include(../../fetch_rapids.cmake)
include(rapids-cuda)
rapids_cuda_init_architectures(cudf-kafka-python)
vyasr marked this conversation as resolved.
Show resolved Hide resolved

project(
cudf-kafka-python
VERSION ${cudf_kafka_version}
LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C
# language to be enabled here. The test project that is built in scikit-build to verify
# various linking options for the python library is hardcoded to build with C, so until
# that is fixed we need to keep C.
C CXX CUDA
vyasr marked this conversation as resolved.
Show resolved Hide resolved
)

option(FIND_CUDF_KAFKA_CPP
"Search for existing cudf_kafka C++ installations before defaulting to local files" OFF
)

option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF)

# If the user requested it we attempt to find cudf_kafka.
if(FIND_CUDF_KAFKA_CPP)
find_package(cudf_kafka ${cudf_kafka_version} REQUIRED)
else()
set(cudf_kafka_FOUND OFF)
bdice marked this conversation as resolved.
Show resolved Hide resolved
bdice marked this conversation as resolved.
Show resolved Hide resolved
endif()

if(NOT cudf_kafka_FOUND)
set(BUILD_TESTS OFF)
set(BUILD_BENCHMARKS OFF)
set(_exclude_from_all "")
if(CUDF_KAFKA_BUILD_WHEELS)
vyasr marked this conversation as resolved.
Show resolved Hide resolved

# Statically link cudart if building wheels
set(CUDA_STATIC_RUNTIME ON)
set(CUDF_KAFKA_USE_CUDF_STATIC ON)
set(CUDF_KAFKA_EXCLUDE_CUDF_FROM_ALL ON)

# Always build wheels against the pyarrow libarrow.
set(USE_LIBARROW_FROM_PYARROW ON)

# Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp
# https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL
set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON)

# Don't install the cudf_kafka C++ targets into wheels
set(_exclude_from_all EXCLUDE_FROM_ALL)
endif()

add_subdirectory(../../cpp/libcudf_kafka cudf_kafka-cpp ${_exclude_from_all})

set(cython_lib_dir cudf_kafka)

if(CUDF_KAFKA_BUILD_WHEELS)
include(cmake/Modules/WheelHelpers.cmake)
get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES)
# Ensure all the shared objects we need at runtime are in the wheel
add_target_libs_to_wheel(
LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}
)
endif()

# Since there are multiple subpackages of cudf_kafka._lib that require access to libcudf_kafka, we
# place the library in the cudf_kafka directory as a single source of truth and modify the other
# rpaths appropriately.
install(TARGETS cudf_kafka DESTINATION ${cython_lib_dir})
endif()

include(rapids-cython)
rapids_cython_init()

add_subdirectory(cudf_kafka/_lib)

if(DEFINED cython_lib_dir)
rapids_cython_add_rpath_entries(TARGET cudf_kafka PATHS "${cython_lib_dir}")
endif()
1 change: 1 addition & 0 deletions python/cudf_kafka/LICENSE
1 change: 1 addition & 0 deletions python/cudf_kafka/README.md
Loading
Loading