Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cudf_kafka to use skbuild #14292

Merged
merged 33 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
56a677d
First pass for cudf_kafka scikit build
jdye64 Oct 17, 2023
ca69e96
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
jdye64 Oct 17, 2023
1954ff8
Updates to replace CUSPATIAL with CUDF_KAFKA
jdye64 Oct 17, 2023
f4f7fc6
Added _lib file
jdye64 Oct 17, 2023
8660304
Update dependencies.
bdice Oct 17, 2023
b256bdf
Add standard cudf_kafka alias target
vyasr Oct 17, 2023
7eae18f
Fix path
vyasr Oct 17, 2023
922ce92
Fix style
vyasr Oct 17, 2023
2ac5ee5
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Oct 20, 2023
ec8ffa3
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Oct 23, 2023
e259001
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 3, 2023
37dc381
Add LICENSE and README.md.
bdice Nov 3, 2023
af3b838
Add CUDA compilers for building cudf-kafka, revert hacks.
bdice Nov 3, 2023
1c55261
Update conda_build_config.yaml to include cuda compiler keys.
bdice Nov 3, 2023
2dcbe29
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
0ae64e0
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
8cf4f0c
Test some changes.
bdice Nov 8, 2023
918ad25
Rename CUDA_KAFKA to CUDF_KAFKA.
bdice Nov 8, 2023
529338e
Add PyArrow and NumPy include dirs, and fix include paths of kafka.pxd.
bdice Nov 8, 2023
25d47e7
Also set CMAKE_CUDA_ARCHITECTURES for cudf_kafka.
bdice Nov 8, 2023
467eb1c
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 8, 2023
d46a4e2
Try to fix install dir.
bdice Nov 9, 2023
37724a6
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 9, 2023
26a2cdc
Add trailing slash.
bdice Nov 9, 2023
c789362
Only add FIND_CUDF_KAFKA_CPP if requesting to build cudf_kafka.
bdice Nov 9, 2023
f7cf156
No verbose flags in build.
bdice Nov 9, 2023
740e93c
Remove else.
bdice Nov 9, 2023
64a5d61
Remove build_python_cudf_kafka.
bdice Nov 13, 2023
d46f283
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 13, 2023
27841c7
Require libcudf and libcudf_kafka when building cudf_kafka Python pac…
bdice Nov 13, 2023
6d9037e
Style.
bdice Nov 13, 2023
7dd2a5d
Make the find_package call unconditional
vyasr Nov 14, 2023
0989b21
Merge branch 'branch-23.12' into cudf_kafka_scikit_build
bdice Nov 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ fi
# build cudf_kafka Python package
if hasArg cudf_kafka; then
cd ${REPODIR}/python/cudf_kafka
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \
SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS}" \
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \
python -m pip install --no-build-isolation --no-deps .
fi
Expand Down
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'

# Python CMakeLists updates
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
sed_runner 's/'"cudf_kafka_version .*)"'/'"cudf_kafka_version ${NEXT_FULL_TAG})"'/g' python/cudf_kafka/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
Expand Down
1 change: 0 additions & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ dependencies:
- cmake>=3.26.4
- cramjam
- cuda-cudart-dev
- cuda-gdb
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
Expand Down
13 changes: 0 additions & 13 deletions conda/recipes/cudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
# Need to set CUDA_HOME inside conda environments because the hacked together
# setup.py for cudf-kafka searches that way.
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*}
if [[ ${CUDA_MAJOR} == "12" ]]; then
target_name="x86_64-linux"
if [[ ! $(arch) == "x86_64" ]]; then
target_name="sbsa-linux"
fi
export CUDA_HOME="${PREFIX}/targets/${target_name}/"
fi
./build.sh -v cudf_kafka
6 changes: 6 additions & 0 deletions conda/recipes/cudf_kafka/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ sysroot_version:

cmake_version:
- ">=3.26.4"

cuda_compiler:
- cuda-nvcc

cuda11_compiler:
- nvcc
21 changes: 12 additions & 9 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,31 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- RAPIDS_CUDA_VERSION
ignore_run_exports_from:
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }}
{% endif %}

requirements:
build:
- cmake {{ cmake_version }}
- ninja
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
{% if cuda_major == "12" %}
- cuda-gdb
{% if cuda_major == "11" %}
- {{ compiler('cuda11') }} ={{ cuda_version }}
{% else %}
- {{ compiler('cuda') }}
{% endif %}
- cuda-version ={{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- python
- cython >=3.0.3
- cuda-version ={{ cuda_version }}
- cudf ={{ version }}
- libcudf_kafka ={{ version }}
- scikit-build >=0.13.1
- setuptools
{% if cuda_major == "12" %}
- cuda-cudart-dev
Expand Down
8 changes: 5 additions & 3 deletions cpp/libcudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include(rapids-export)
include(rapids-find)

project(
CUDA_KAFKA
CUDF_KAFKA
VERSION 23.12.00
LANGUAGES CXX
)
Expand Down Expand Up @@ -64,7 +64,7 @@ add_library(cudf_kafka SHARED src/kafka_consumer.cpp src/kafka_callback.cpp)
# ##################################################################################################
# * include paths ---------------------------------------------------------------------------------
target_include_directories(
cudf_kafka PUBLIC "$<BUILD_INTERFACE:${CUDA_KAFKA_SOURCE_DIR}/include>"
cudf_kafka PUBLIC "$<BUILD_INTERFACE:${CUDF_KAFKA_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
)

Expand All @@ -85,6 +85,8 @@ set_target_properties(
CXX_STANDARD_REQUIRED ON
)

add_library(cudf_kafka::cudf_kafka ALIAS cudf_kafka)

# ##################################################################################################
# * cudf_kafka Install ----------------------------------------------------------------------------
rapids_cmake_install_lib_dir(lib_dir)
Expand All @@ -94,7 +96,7 @@ install(
EXPORT cudf_kafka-exports
)

install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION include)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
vyasr marked this conversation as resolved.
Show resolved Hide resolved

rapids_export(
INSTALL cudf_kafka
Expand Down
16 changes: 8 additions & 8 deletions cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -35,21 +35,21 @@ function(find_and_configure_cudf VERSION)
endif()
endfunction()

set(CUDA_KAFKA_MIN_VERSION_cudf
"${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.${CUDA_KAFKA_VERSION_PATCH}"
set(CUDF_KAFKA_MIN_VERSION
"${CUDF_KAFKA_VERSION_MAJOR}.${CUDF_KAFKA_VERSION_MINOR}.${CUDF_KAFKA_VERSION_PATCH}"
)
find_and_configure_cudf(${CUDA_KAFKA_MIN_VERSION_cudf})
find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION})

if(cudf_REQUIRES_CUDA)
rapids_cuda_init_architectures(CUDA_KAFKA)
rapids_cuda_init_architectures(CUDF_KAFKA)

# Since we are building cudf as part of ourselves we need to enable the CUDA language in the
# top-most scope
enable_language(CUDA)

# Since CUDA_KAFKA only enables CUDA optionally we need to manually include the file that
# Since CUDF_KAFKA only enables CUDA optionally we need to manually include the file that
# rapids_cuda_init_architectures relies on `project` calling
if(DEFINED CMAKE_PROJECT_CUDA_KAFKA_INCLUDE)
include("${CMAKE_PROJECT_CUDA_KAFKA_INCLUDE}")
if(DEFINED CMAKE_PROJECT_CUDF_KAFKA_INCLUDE)
include("${CMAKE_PROJECT_CUDF_KAFKA_INCLUDE}")
endif()
endif()
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function(ConfigureTest test_name)
add_executable(${test_name} ${ARGN})
set_target_properties(
${test_name}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDA_KAFKA_BINARY_DIR}/gtests>"
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_KAFKA_BINARY_DIR}/gtests>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
Expand Down
13 changes: 5 additions & 8 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ files:
- build_all
- build_cpp
- build_wheels
- build_python
- build_python_common
- build_python_cudf
- cudatoolkit
- develop
- docs
Expand Down Expand Up @@ -71,8 +71,8 @@ files:
table: build-system
includes:
- build_all
- build_python
- build_python_common
- build_python_cudf
- build_wheels
py_run_cudf:
output: pyproject
Expand Down Expand Up @@ -138,8 +138,8 @@ files:
extras:
table: build-system
includes:
- build_wheels
- build_python_common
- build_wheels
py_run_cudf_kafka:
output: pyproject
pyproject_dir: python/cudf_kafka
Expand Down Expand Up @@ -259,16 +259,16 @@ dependencies:
- cython>=3.0.3
# TODO: Pin to numpy<1.25 until cudf requires pandas 2
- &numpy numpy>=1.21,<1.25
- scikit-build>=0.13.1
- output_types: [conda, requirements, pyproject]
packages:
# Hard pin the patch version used during the build. This must be kept
# in sync with the version pinned in get_arrow.cmake.
- pyarrow==14.0.1.*
build_python:
build_python_cudf:
common:
- output_types: [conda, requirements, pyproject]
packages:
- scikit-build>=0.13.1
- rmm==23.12.*
- output_types: conda
packages:
Expand Down Expand Up @@ -302,9 +302,6 @@ dependencies:
- cuda-nvrtc-dev
- cuda-nvtx-dev
- libcurand-dev
# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates
# cudf_kafka to use scikit-build
- cuda-gdb
- matrix:
cuda: "11.8"
packages:
Expand Down
6 changes: 0 additions & 6 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ target_link_libraries(strings_udf cudf_strings_udf)
# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
# /opt/_internal"
find_package(NumPy REQUIRED)
set(targets_using_numpy interop avro csv orc json parquet)
foreach(target IN LISTS targets_using_numpy)
target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
# Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
# target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
endforeach()

set(targets_using_dlpack interop)
foreach(target IN LISTS targets_using_dlpack)
Expand Down
47 changes: 47 additions & 0 deletions python/cudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)

set(cudf_kafka_version 23.12.00)

include(../../fetch_rapids.cmake)

project(
cudf-kafka-python
VERSION ${cudf_kafka_version}
LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C
# language to be enabled here. The test project that is built in scikit-build to verify
# various linking options for the python library is hardcoded to build with C, so until
# that is fixed we need to keep C.
C CXX
)

find_package(cudf_kafka ${cudf_kafka_version} REQUIRED)

if(NOT cudf_kafka_FOUND)
message(
FATAL_ERROR
"cudf_kafka package not found. cudf_kafka C++ is required to build this Python package."
)
endif()

include(rapids-cython)
rapids_cython_init()

add_subdirectory(cudf_kafka/_lib)

if(DEFINED cython_lib_dir)
rapids_cython_add_rpath_entries(TARGET cudf_kafka PATHS "${cython_lib_dir}")
endif()
1 change: 1 addition & 0 deletions python/cudf_kafka/LICENSE
1 change: 1 addition & 0 deletions python/cudf_kafka/README.md
62 changes: 62 additions & 0 deletions python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

set(cython_sources kafka.pyx)
set(linked_libraries cudf_kafka::cudf_kafka)

rapids_cython_create_modules(
CXX ASSOCIATED_TARGETS cudf_kafka
SOURCE_FILES "${cython_sources}"
LINKED_LIBRARIES "${linked_libraries}"
)

# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in
# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24.
# find_package(Python REQUIRED COMPONENTS Development NumPy)

# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake
# inside the manylinux images used to build wheels because manylinux images do not contain
# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should
# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try
# switching back, but it may not work if that implicitly still requires Python libraries. In that
# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution
# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static
# libs to be found, but that should be a last resort since it implies a dependency that isn't really
# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
# /opt/_internal"
find_package(NumPy REQUIRED)

find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
ERROR_VARIABLE PYARROW_ERROR
RESULT_VARIABLE PYARROW_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(${PYARROW_RESULT})
message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}")
endif()

# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts
# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the
# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers.
# These requirements will go away once all scalar-related Cython code is removed from cudf.
foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
endforeach()
4 changes: 2 additions & 2 deletions python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ from cudf._lib.cpp.io.datasource cimport datasource
from cudf._lib.io.datasource cimport Datasource


cdef extern from "kafka_callback.hpp" \
cdef extern from "cudf_kafka/kafka_callback.hpp" \
namespace "cudf::io::external::kafka" nogil:
ctypedef object (*python_callable_type)()


cdef extern from "kafka_consumer.hpp" \
cdef extern from "cudf_kafka/kafka_consumer.hpp" \
namespace "cudf::io::external::kafka" nogil:

cpdef cppclass kafka_consumer:
Expand Down
1 change: 1 addition & 0 deletions python/cudf_kafka/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ requires = [
"cython>=3.0.3",
"numpy>=1.21,<1.25",
"pyarrow==14.0.1.*",
"scikit-build>=0.13.1",
"setuptools",
"wheel",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
Expand Down
Loading