From 56a677d3fcd978990e5a4d9266d102ab06234206 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 17 Oct 2023 13:15:50 -0400 Subject: [PATCH 01/23] First pass for cudf_kafka scikit build --- dependencies.yaml | 3 +- python/cudf_kafka/CMakeLists.txt | 90 ++++++++++++++++++ .../cmake/Modules/WheelHelpers.cmake | 71 ++++++++++++++ python/cudf_kafka/pyproject.toml | 2 + python/cudf_kafka/setup.py | 95 +------------------ 5 files changed, 170 insertions(+), 91 deletions(-) create mode 100644 python/cudf_kafka/CMakeLists.txt create mode 100644 python/cudf_kafka/cmake/Modules/WheelHelpers.cmake diff --git a/dependencies.yaml b/dependencies.yaml index c19e8765be3..2cd6d1777ff 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -121,8 +121,9 @@ files: extras: table: build-system includes: - - build_wheels - build_python_common + - build_python + - build_wheels py_run_cudf_kafka: output: pyproject pyproject_dir: python/cudf_kafka diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt new file mode 100644 index 00000000000..4d88627b1be --- /dev/null +++ b/python/cudf_kafka/CMakeLists.txt @@ -0,0 +1,90 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +set(cudf_kafka_version 23.12.00) + +include(../../fetch_rapids.cmake) +include(rapids-cuda) +rapids_cuda_init_architectures(cudf-kafka-python) + +project( + cudf-kafka-python + VERSION ${cudf_kafka_version} + LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C + # language to be enabled here. The test project that is built in scikit-build to verify + # various linking options for the python library is hardcoded to build with C, so until + # that is fixed we need to keep C. + C CXX CUDA) + +option(FIND_CUSPATIAL_CPP "Search for existing cuspatial C++ installations before defaulting to local files" + OFF) + +option(CUSPATIAL_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) + +# If the user requested it we attempt to find cuspatial. +if(FIND_CUSPATIAL_CPP) + find_package(cuspatial ${cuspatial_version}) +else() + set(cuspatial_FOUND OFF) +endif() + +if(NOT cuspatial_FOUND) + set(BUILD_TESTS OFF) + set(BUILD_BENCHMARKS OFF) + set(_exclude_from_all "") + if(CUSPATIAL_BUILD_WHEELS) + + # Statically link cudart if building wheels + set(CUDA_STATIC_RUNTIME ON) + set(CUSPATIAL_USE_CUDF_STATIC ON) + set(CUSPATIAL_EXCLUDE_CUDF_FROM_ALL ON) + + # Always build wheels against the pyarrow libarrow. + set(USE_LIBARROW_FROM_PYARROW ON) + + # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp + # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL + set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) + + # Don't install the cuSpatial C++ targets into wheels + set(_exclude_from_all EXCLUDE_FROM_ALL) + endif() + + add_subdirectory(../../cpp cuspatial-cpp ${_exclude_from_all}) + + set(cython_lib_dir cuspatial) + + if(CUSPATIAL_BUILD_WHEELS) + include(cmake/Modules/WheelHelpers.cmake) + get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) + # Ensure all the shared objects we need at runtime are in the wheel + add_target_libs_to_wheel(LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) + endif() + + # Since there are multiple subpackages of cuspatial._lib that require access to libcuspatial, we place the + # library in the cuspatial directory as a single source of truth and modify the other rpaths + # appropriately. + install(TARGETS cuspatial DESTINATION ${cython_lib_dir}) +endif() + +include(rapids-cython) +rapids_cython_init() + +add_subdirectory(cuspatial/_lib) + +if(DEFINED cython_lib_dir) + rapids_cython_add_rpath_entries(TARGET cuspatial PATHS "${cython_lib_dir}") +endif() diff --git a/python/cudf_kafka/cmake/Modules/WheelHelpers.cmake b/python/cudf_kafka/cmake/Modules/WheelHelpers.cmake new file mode 100644 index 00000000000..41d720c527a --- /dev/null +++ b/python/cudf_kafka/cmake/Modules/WheelHelpers.cmake @@ -0,0 +1,71 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= +include_guard(GLOBAL) + +# Making libraries available inside wheels by installing the associated targets. +function(add_target_libs_to_wheel) + list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") + + set(options "") + set(one_value "LIB_DIR") + set(multi_value "TARGETS") + cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) + + message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") + + foreach(target IN LISTS __TARGETS) + + if(NOT TARGET ${target}) + message(VERBOSE "No target named ${target}") + continue() + endif() + + get_target_property(alias_target ${target} ALIASED_TARGET) + if(alias_target) + set(target ${alias_target}) + endif() + + get_target_property(is_imported ${target} IMPORTED) + if(NOT is_imported) + # If the target isn't imported, install it into the the wheel + install(TARGETS ${target} DESTINATION ${__LIB_DIR}) + message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") + else() + # If the target is imported, make sure it's global + get_target_property(already_global ${target} IMPORTED_GLOBAL) + if(NOT already_global) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() + + # Find the imported target's library so we can copy it into the wheel + set(lib_loc) + foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) + get_target_property(lib_loc ${target} ${prop}) + if(lib_loc) + message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") + break() + endif() + message(VERBOSE "${target} has no value for property ${prop}") + endforeach() + + if(NOT lib_loc) + message(FATAL_ERROR "Found no libs to install for target ${target}") + endif() + + # Copy the imported library into the wheel + install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) + message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") + endif() + endforeach() +endfunction() diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 78a7a83ac3a..8126d2ccacf 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -5,7 +5,9 @@ requires = [ "cython>=3.0.0", "numpy>=1.21,<1.25", + "protoc-wheel", "pyarrow==12.0.1.*", + "scikit-build>=0.13.1", "setuptools", "wheel", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py index d955d95858a..23483dfb65f 100644 --- a/python/cudf_kafka/setup.py +++ b/python/cudf_kafka/setup.py @@ -1,96 +1,11 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. -import os -import shutil -import sysconfig -from distutils.sysconfig import get_python_lib - -import numpy as np -import pyarrow as pa -from Cython.Build import cythonize -from setuptools import find_packages, setup -from setuptools.extension import Extension - -cython_files = ["cudf_kafka/_lib/*.pyx"] - -CUDA_HOME = os.environ.get("CUDA_HOME", False) -if not CUDA_HOME: - path_to_cuda_gdb = shutil.which("cuda-gdb") - if path_to_cuda_gdb is None: - raise OSError( - "Could not locate CUDA. " - "Please set the environment variable " - "CUDA_HOME to the path to the CUDA installation " - "and try again." - ) - CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb)) - -if not os.path.isdir(CUDA_HOME): - raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") - -cuda_include_dir = os.path.join(CUDA_HOME, "include") - -CUDF_ROOT = os.environ.get( - "CUDF_ROOT", - os.path.abspath( - os.path.join( - os.path.dirname(os.path.abspath(__file__)), "../../cpp/build/" - ) - ), -) -CUDF_KAFKA_ROOT = os.environ.get( - "CUDF_KAFKA_ROOT", "../../cpp/libcudf_kafka/build" -) - -try: - nthreads = int(os.environ.get("PARALLEL_LEVEL", "0") or "0") -except Exception: - nthreads = 0 - -extensions = [ - Extension( - "*", - sources=cython_files, - include_dirs=[ - os.path.abspath(os.path.join(CUDF_ROOT, "../include/cudf")), - os.path.abspath(os.path.join(CUDF_ROOT, "../include")), - os.path.abspath( - os.path.join(CUDF_ROOT, "../libcudf_kafka/include/cudf_kafka") - ), - os.path.join(CUDF_ROOT, "include"), - os.path.join(CUDF_ROOT, "_deps/libcudacxx-src/include"), - os.path.join( - os.path.dirname(sysconfig.get_path("include")), - "rapids/libcudacxx", - ), - os.path.dirname(sysconfig.get_path("include")), - np.get_include(), - pa.get_include(), - cuda_include_dir, - ], - library_dirs=( - [ - get_python_lib(), - os.path.join(os.sys.prefix, "lib"), - CUDF_KAFKA_ROOT, - ] - ), - libraries=["cudf", "cudf_kafka"], - language="c++", - extra_compile_args=["-std=c++17", "-DFMT_HEADER_ONLY=1"], - ) -] +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +from setuptools import find_packages +from skbuild import setup packages = find_packages(include=["cudf_kafka*"]) + setup( - # Include the separately-compiled shared library - ext_modules=cythonize( - extensions, - nthreads=nthreads, - compiler_directives=dict( - profile=False, language_level=3, embedsignature=True - ), - ), packages=packages, - package_data={key: ["*.pxd"] for key in packages}, + package_data={key: ["*.pxd", "*.hpp", "*.cuh"] for key in packages}, zip_safe=False, ) From 1954ff861d244037ae6eb482419fac0394e29a45 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 17 Oct 2023 14:20:15 -0400 Subject: [PATCH 02/23] Updates to replace CUSPATIAL with CUDF_KAFKA --- python/cudf_kafka/CMakeLists.txt | 38 +++++++++---------- .../cmake/Modules/WheelHelpers.cmake | 0 2 files changed, 19 insertions(+), 19 deletions(-) rename python/cudf_kafka/{ => cudf_kafka}/cmake/Modules/WheelHelpers.cmake (100%) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 4d88627b1be..2fe7ee1ab40 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -29,28 +29,28 @@ project( # that is fixed we need to keep C. C CXX CUDA) -option(FIND_CUSPATIAL_CPP "Search for existing cuspatial C++ installations before defaulting to local files" +option(FIND_CUDF_KAFKA_CPP "Search for existing cudf_kafka C++ installations before defaulting to local files" OFF) -option(CUSPATIAL_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) +option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) -# If the user requested it we attempt to find cuspatial. -if(FIND_CUSPATIAL_CPP) - find_package(cuspatial ${cuspatial_version}) +# If the user requested it we attempt to find cudf_kafka. +if(FIND_CUDF_KAFKA_CPP) + find_package(cudf_kafka ${cudf_kafka_version}) else() - set(cuspatial_FOUND OFF) + set(cudf_kafka_FOUND OFF) endif() -if(NOT cuspatial_FOUND) +if(NOT cudf_kafka_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) set(_exclude_from_all "") - if(CUSPATIAL_BUILD_WHEELS) + if(CUDF_KAFKA_BUILD_WHEELS) # Statically link cudart if building wheels set(CUDA_STATIC_RUNTIME ON) - set(CUSPATIAL_USE_CUDF_STATIC ON) - set(CUSPATIAL_EXCLUDE_CUDF_FROM_ALL ON) + set(CUDF_KAFKA_USE_CUDF_STATIC ON) + set(CUDF_KAFKA_EXCLUDE_CUDF_FROM_ALL ON) # Always build wheels against the pyarrow libarrow. set(USE_LIBARROW_FROM_PYARROW ON) @@ -59,32 +59,32 @@ if(NOT cuspatial_FOUND) # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - # Don't install the cuSpatial C++ targets into wheels + # Don't install the cudf_kafka C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) endif() - add_subdirectory(../../cpp cuspatial-cpp ${_exclude_from_all}) + add_subdirectory(../../cpp cudf_kafka-cpp ${_exclude_from_all}) - set(cython_lib_dir cuspatial) + set(cython_lib_dir cudf_kafka) - if(CUSPATIAL_BUILD_WHEELS) + if(CUDF_KAFKA_BUILD_WHEELS) include(cmake/Modules/WheelHelpers.cmake) get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) # Ensure all the shared objects we need at runtime are in the wheel add_target_libs_to_wheel(LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) endif() - # Since there are multiple subpackages of cuspatial._lib that require access to libcuspatial, we place the - # library in the cuspatial directory as a single source of truth and modify the other rpaths + # Since there are multiple subpackages of cudf_kafka._lib that require access to libcudf_kafka, we place the + # library in the cudf_kafka directory as a single source of truth and modify the other rpaths # appropriately. - install(TARGETS cuspatial DESTINATION ${cython_lib_dir}) + install(TARGETS cudf_kafka DESTINATION ${cython_lib_dir}) endif() include(rapids-cython) rapids_cython_init() -add_subdirectory(cuspatial/_lib) +add_subdirectory(cudf_kafka/_lib) if(DEFINED cython_lib_dir) - rapids_cython_add_rpath_entries(TARGET cuspatial PATHS "${cython_lib_dir}") + rapids_cython_add_rpath_entries(TARGET cudf_kafka PATHS "${cython_lib_dir}") endif() diff --git a/python/cudf_kafka/cmake/Modules/WheelHelpers.cmake b/python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake similarity index 100% rename from python/cudf_kafka/cmake/Modules/WheelHelpers.cmake rename to python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake From f4f7fc68a153b7e80ce270a528860b271f747707 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 17 Oct 2023 14:42:39 -0400 Subject: [PATCH 03/23] Added _lib file --- .../cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt new file mode 100644 index 00000000000..abf4fa13453 --- /dev/null +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -0,0 +1,25 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources + kafka.pyx + ) +set(linked_libraries cudf_kafka::cudf_kafka) + +rapids_cython_create_modules( + CXX + ASSOCIATED_TARGETS cudf_kafka + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" +) From 86603046a5f3e1d35f5ad4158d5cb191dc41b7e0 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 17 Oct 2023 11:42:10 -0700 Subject: [PATCH 04/23] Update dependencies. --- ci/release/update-version.sh | 1 + conda/recipes/cudf_kafka/meta.yaml | 1 + dependencies.yaml | 14 ++++++++++---- python/cudf_kafka/pyproject.toml | 1 - 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5e735a71994..e7dca33ea69 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -43,6 +43,7 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' # Python CMakeLists updates sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt +sed_runner 's/'"cudf_kafka_version .*)"'/'"cudf_kafka_version ${NEXT_FULL_TAG})"'/g' python/cudf_kafka/CMakeLists.txt # cpp libcudf_kafka update sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index ec0cc402511..b302fdc6a03 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -47,6 +47,7 @@ requirements: - cuda-version ={{ cuda_version }} - cudf ={{ version }} - libcudf_kafka ={{ version }} + - scikit-build >=0.13.1 - setuptools run: - python diff --git a/dependencies.yaml b/dependencies.yaml index 2cd6d1777ff..f8d0d71efb6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -9,8 +9,9 @@ files: - build_all - build_cpp - build_wheels - - build_python - build_python_common + - build_python_cudf + - build_python_cudf_kafka - cudatoolkit - develop - docs @@ -70,8 +71,8 @@ files: table: build-system includes: - build_all - - build_python - build_python_common + - build_python_cudf - build_wheels py_run_cudf: output: pyproject @@ -122,7 +123,7 @@ files: table: build-system includes: - build_python_common - - build_python + - build_python_cudf_kafka - build_wheels py_run_cudf_kafka: output: pyproject @@ -247,7 +248,7 @@ dependencies: - pyarrow==12.0.1.* # TODO: Pin to numpy<1.25 until cudf requires pandas 2 - &numpy numpy>=1.21,<1.25 - build_python: + build_python_cudf: common: - output_types: [conda, requirements, pyproject] packages: @@ -259,6 +260,11 @@ dependencies: - output_types: pyproject packages: - protoc-wheel + build_python_cudf_kafka: + common: + - output_types: [conda, requirements, pyproject] + packages: + - scikit-build>=0.13.1 libarrow_run: common: - output_types: [conda, requirements] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 8126d2ccacf..b05221b237c 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -5,7 +5,6 @@ requires = [ "cython>=3.0.0", "numpy>=1.21,<1.25", - "protoc-wheel", "pyarrow==12.0.1.*", "scikit-build>=0.13.1", "setuptools", From b256bdf030b8ce8eebc07bc469e107c5c324b86b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 17 Oct 2023 13:25:09 -0700 Subject: [PATCH 05/23] Add standard cudf_kafka alias target --- cpp/libcudf_kafka/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 1a15a3ec2cd..c0605c3777a 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -85,6 +85,8 @@ set_target_properties( CXX_STANDARD_REQUIRED ON ) +add_library(cudf_kafka::cudf_kafka ALIAS cudf_kafka) + # ################################################################################################## # * cudf_kafka Install ---------------------------------------------------------------------------- rapids_cmake_install_lib_dir(lib_dir) From 7eae18f3ce3d4325d8c4548cb7e14091c72381f7 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 17 Oct 2023 13:27:11 -0700 Subject: [PATCH 06/23] Fix path --- python/cudf_kafka/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 2fe7ee1ab40..937031df07a 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -63,7 +63,7 @@ if(NOT cudf_kafka_FOUND) set(_exclude_from_all EXCLUDE_FROM_ALL) endif() - add_subdirectory(../../cpp cudf_kafka-cpp ${_exclude_from_all}) + add_subdirectory(../../cpp/libcudf_kafka cudf_kafka-cpp ${_exclude_from_all}) set(cython_lib_dir cudf_kafka) From 922ce92a0926edbb018cbd2ec9f1e980557e3a4e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 17 Oct 2023 13:27:46 -0700 Subject: [PATCH 07/23] Fix style --- python/cudf_kafka/CMakeLists.txt | 18 +++++++++++------- .../cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 7 ++----- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 937031df07a..4088a804647 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -27,10 +27,12 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX CUDA) + C CXX CUDA +) -option(FIND_CUDF_KAFKA_CPP "Search for existing cudf_kafka C++ installations before defaulting to local files" - OFF) +option(FIND_CUDF_KAFKA_CPP + "Search for existing cudf_kafka C++ installations before defaulting to local files" OFF +) option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) @@ -71,12 +73,14 @@ if(NOT cudf_kafka_FOUND) include(cmake/Modules/WheelHelpers.cmake) get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel(LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) + add_target_libs_to_wheel( + LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs} + ) endif() - # Since there are multiple subpackages of cudf_kafka._lib that require access to libcudf_kafka, we place the - # library in the cudf_kafka directory as a single source of truth and modify the other rpaths - # appropriately. + # Since there are multiple subpackages of cudf_kafka._lib that require access to libcudf_kafka, we + # place the library in the cudf_kafka directory as a single source of truth and modify the other + # rpaths appropriately. install(TARGETS cudf_kafka DESTINATION ${cython_lib_dir}) endif() diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index abf4fa13453..3b5555bed36 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -12,14 +12,11 @@ # the License. # ============================================================================= -set(cython_sources - kafka.pyx - ) +set(cython_sources kafka.pyx) set(linked_libraries cudf_kafka::cudf_kafka) rapids_cython_create_modules( - CXX - ASSOCIATED_TARGETS cudf_kafka + CXX ASSOCIATED_TARGETS cudf_kafka SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) From 37dc3811ca8d1f52b588ae71fb10091099a7daa3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 2 Nov 2023 19:10:37 -0500 Subject: [PATCH 08/23] Add LICENSE and README.md. --- python/cudf_kafka/LICENSE | 1 + python/cudf_kafka/README.md | 1 + 2 files changed, 2 insertions(+) create mode 120000 python/cudf_kafka/LICENSE create mode 120000 python/cudf_kafka/README.md diff --git a/python/cudf_kafka/LICENSE b/python/cudf_kafka/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/python/cudf_kafka/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/cudf_kafka/README.md b/python/cudf_kafka/README.md new file mode 120000 index 00000000000..fe840054137 --- /dev/null +++ b/python/cudf_kafka/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file From af3b838518ee03c5f5d7c377a26c55e695257dbc Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 2 Nov 2023 19:22:19 -0500 Subject: [PATCH 09/23] Add CUDA compilers for building cudf-kafka, revert hacks. --- .../all_cuda-120_arch-x86_64.yaml | 1 - conda/recipes/cudf_kafka/build.sh | 13 ------------ conda/recipes/cudf_kafka/meta.yaml | 20 ++++++++++--------- dependencies.yaml | 3 --- python/cudf_kafka/setup.py | 4 +++- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 4f39424bbc6..d9e445ae514 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -17,7 +17,6 @@ dependencies: - cmake>=3.26.4 - cramjam - cuda-cudart-dev -- cuda-gdb - cuda-nvcc - cuda-nvrtc-dev - cuda-nvtx-dev diff --git a/conda/recipes/cudf_kafka/build.sh b/conda/recipes/cudf_kafka/build.sh index f4bb6e1bc91..9458349d101 100644 --- a/conda/recipes/cudf_kafka/build.sh +++ b/conda/recipes/cudf_kafka/build.sh @@ -1,16 +1,3 @@ # Copyright (c) 2020-2023, NVIDIA CORPORATION. -# This assumes the script is executed from the root of the repo directory -# Need to set CUDA_HOME inside conda environments because the hacked together -# setup.py for cudf-kafka searches that way. -# TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates -# cudf_kafka to use scikit-build -CUDA_MAJOR=${RAPIDS_CUDA_VERSION%%.*} -if [[ ${CUDA_MAJOR} == "12" ]]; then - target_name="x86_64-linux" - if [[ ! $(arch) == "x86_64" ]]; then - target_name="sbsa-linux" - fi - export CUDA_HOME="${PREFIX}/targets/${target_name}/" -fi ./build.sh -v cudf_kafka diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 88d147216ec..fdb9df65561 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -33,22 +33,24 @@ build: - SCCACHE_S3_KEY_PREFIX=cudf-kafka-linux64 # [linux64] - SCCACHE_S3_USE_SSL - SCCACHE_S3_NO_CREDENTIALS - # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates - # cudf_kafka to use scikit-build - - RAPIDS_CUDA_VERSION + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} requirements: build: - cmake {{ cmake_version }} + - ninja - {{ compiler('c') }} - {{ compiler('cxx') }} - - ninja - - sysroot_{{ target_platform }} {{ sysroot_version }} - # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates - # cudf_kafka to use scikit-build - {% if cuda_major == "12" %} - - cuda-gdb + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} {% endif %} + - cuda-version ={{ cuda_version }} + - sysroot_{{ target_platform }} {{ sysroot_version }} host: - python - cython >=3.0.0 diff --git a/dependencies.yaml b/dependencies.yaml index e71cb2cc5af..b0f71bf9a62 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -290,9 +290,6 @@ dependencies: - cuda-nvrtc-dev - cuda-nvtx-dev - libcurand-dev - # TODO: Remove after https://github.com/rapidsai/cudf/pull/14292 updates - # cudf_kafka to use scikit-build - - cuda-gdb - matrix: cuda: "11.8" packages: diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py index b9c5120f30d..6a99e9ed968 100644 --- a/python/cudf_kafka/setup.py +++ b/python/cudf_kafka/setup.py @@ -6,6 +6,8 @@ setup( packages=packages, - package_data={key: ["VERSION", "*.pxd", "*.hpp", "*.cuh"] for key in packages}, + package_data={ + key: ["VERSION", "*.pxd", "*.hpp", "*.cuh"] for key in packages + }, zip_safe=False, ) From 1c55261abe663082289219f2bcb34e00786c4eab Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 2 Nov 2023 20:39:09 -0500 Subject: [PATCH 10/23] Update conda_build_config.yaml to include cuda compiler keys. --- conda/recipes/cudf_kafka/conda_build_config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml index b63a136ad2d..c98c2701653 100644 --- a/conda/recipes/cudf_kafka/conda_build_config.yaml +++ b/conda/recipes/cudf_kafka/conda_build_config.yaml @@ -9,3 +9,9 @@ sysroot_version: cmake_version: - ">=3.26.4" + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc From 8cf4f0cdb0aff2ebb660175678b1132131066e70 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 8 Nov 2023 12:43:17 -0800 Subject: [PATCH 11/23] Test some changes. --- build.sh | 9 +++++++-- python/cudf_kafka/CMakeLists.txt | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index 2ad69712e5d..d0fb187e65e 100755 --- a/build.sh +++ b/build.sh @@ -234,6 +234,11 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON" fi +# Append `-DFIND_CUDF_KAFKA_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. +if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_KAFKA_CPP"* ]]; then + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_KAFKA_CPP=ON" +fi + # If clean given, run it prior to any other steps if hasArg clean; then @@ -369,9 +374,9 @@ fi # build cudf_kafka Python package if hasArg cudf_kafka; then cd ${REPODIR}/python/cudf_kafka - SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR}" \ + SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ - python -m pip install --no-build-isolation --no-deps . + python -m pip install --no-build-isolation --no-deps . -vvv fi # build custreamz Python package diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 4088a804647..657cb55bbc9 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -38,7 +38,8 @@ option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel. # If the user requested it we attempt to find cudf_kafka. if(FIND_CUDF_KAFKA_CPP) - find_package(cudf_kafka ${cudf_kafka_version}) + find_package(cudf ${cudf_version} REQUIRED) + find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) else() set(cudf_kafka_FOUND OFF) endif() From 918ad25400be534e084a4c92f051d6165488b0f8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 8 Nov 2023 13:12:21 -0800 Subject: [PATCH 12/23] Rename CUDA_KAFKA to CUDF_KAFKA. --- cpp/libcudf_kafka/CMakeLists.txt | 4 ++-- .../cmake/thirdparty/get_cudf.cmake | 16 ++++++++-------- cpp/libcudf_kafka/tests/CMakeLists.txt | 2 +- python/cudf_kafka/CMakeLists.txt | 1 - 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index c0605c3777a..dda4d40ed8c 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -21,7 +21,7 @@ include(rapids-export) include(rapids-find) project( - CUDA_KAFKA + CUDF_KAFKA VERSION 23.12.00 LANGUAGES CXX ) @@ -64,7 +64,7 @@ add_library(cudf_kafka SHARED src/kafka_consumer.cpp src/kafka_callback.cpp) # ################################################################################################## # * include paths --------------------------------------------------------------------------------- target_include_directories( - cudf_kafka PUBLIC "$" + cudf_kafka PUBLIC "$" "$" ) diff --git a/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake b/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake index aa4c5b60e7a..20aa9873f43 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -35,21 +35,21 @@ function(find_and_configure_cudf VERSION) endif() endfunction() -set(CUDA_KAFKA_MIN_VERSION_cudf - "${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.${CUDA_KAFKA_VERSION_PATCH}" +set(CUDF_KAFKA_MIN_VERSION + "${CUDF_KAFKA_VERSION_MAJOR}.${CUDF_KAFKA_VERSION_MINOR}.${CUDF_KAFKA_VERSION_PATCH}" ) -find_and_configure_cudf(${CUDA_KAFKA_MIN_VERSION_cudf}) +find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION}) if(cudf_REQUIRES_CUDA) - rapids_cuda_init_architectures(CUDA_KAFKA) + rapids_cuda_init_architectures(CUDF_KAFKA) # Since we are building cudf as part of ourselves we need to enable the CUDA language in the # top-most scope enable_language(CUDA) - # Since CUDA_KAFKA only enables CUDA optionally we need to manually include the file that + # Since CUDF_KAFKA only enables CUDA optionally we need to manually include the file that # rapids_cuda_init_architectures relies on `project` calling - if(DEFINED CMAKE_PROJECT_CUDA_KAFKA_INCLUDE) - include("${CMAKE_PROJECT_CUDA_KAFKA_INCLUDE}") + if(DEFINED CMAKE_PROJECT_CUDF_KAFKA_INCLUDE) + include("${CMAKE_PROJECT_CUDF_KAFKA_INCLUDE}") endif() endif() diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index 68a5327b455..b819cb6fc3b 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -26,7 +26,7 @@ function(ConfigureTest test_name) add_executable(${test_name} ${ARGN}) set_target_properties( ${test_name} - PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" INSTALL_RPATH "\$ORIGIN/../../../lib" ) target_link_libraries( diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 657cb55bbc9..3f52d3a3161 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -38,7 +38,6 @@ option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel. # If the user requested it we attempt to find cudf_kafka. if(FIND_CUDF_KAFKA_CPP) - find_package(cudf ${cudf_version} REQUIRED) find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) else() set(cudf_kafka_FOUND OFF) From 529338eebddc9fb6339e4363c14c533c048c166d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 8 Nov 2023 13:58:45 -0800 Subject: [PATCH 13/23] Add PyArrow and NumPy include dirs, and fix include paths of kafka.pxd. --- python/cudf/cudf/_lib/CMakeLists.txt | 6 --- .../cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 40 +++++++++++++++++++ python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 4 +- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 1b543b94589..c041c7f4842 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -81,12 +81,6 @@ target_link_libraries(strings_udf cudf_strings_udf) # necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C # /opt/_internal" find_package(NumPy REQUIRED) -set(targets_using_numpy interop avro csv orc json parquet) -foreach(target IN LISTS targets_using_numpy) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. - # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") -endforeach() set(targets_using_dlpack interop) foreach(target IN LISTS targets_using_dlpack) diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index 3b5555bed36..3262b7d5ebe 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,3 +20,43 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) + +# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was +# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in +# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. +# find_package(Python REQUIRED COMPONENTS Development NumPy) + +# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake +# inside the manylinux images used to build wheels because manylinux images do not contain +# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should +# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try +# switching back, but it may not work if that implicitly still requires Python libraries. In that +# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution +# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static +# libs to be found, but that should be a last resort since it implies a dependency that isn't really +# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C +# /opt/_internal" +find_package(NumPy REQUIRED) + +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" + OUTPUT_VARIABLE PYARROW_INCLUDE_DIR + ERROR_VARIABLE PYARROW_ERROR + RESULT_VARIABLE PYARROW_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if(${PYARROW_RESULT}) + message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") +endif() + +# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. +# These requirements will go away once all scalar-related Cython code is removed from cudf. +foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") +endforeach() diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index ca729c62512..068837d04ee 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -11,12 +11,12 @@ from cudf._lib.cpp.io.datasource cimport datasource from cudf._lib.io.datasource cimport Datasource -cdef extern from "kafka_callback.hpp" \ +cdef extern from "cudf_kafka/kafka_callback.hpp" \ namespace "cudf::io::external::kafka" nogil: ctypedef object (*python_callable_type)() -cdef extern from "kafka_consumer.hpp" \ +cdef extern from "cudf_kafka/kafka_consumer.hpp" \ namespace "cudf::io::external::kafka" nogil: cpdef cppclass kafka_consumer: From 25d47e7f89be910ad0f74cd2e04e3d3aef518f73 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 8 Nov 2023 14:59:42 -0800 Subject: [PATCH 14/23] Also set CMAKE_CUDA_ARCHITECTURES for cudf_kafka. --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index d0fb187e65e..fa84f17db35 100755 --- a/build.sh +++ b/build.sh @@ -262,7 +262,7 @@ fi ################################################################################ # Configure, build, and install libcudf -if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then +if buildAll || hasArg libcudf || hasArg cudf || hasArg cudf_kafka || hasArg cudfjar; then if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}" if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then From d46a4e2e831304837cd5d62a8e061dc1b71252c3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 8 Nov 2023 18:10:40 -0600 Subject: [PATCH 15/23] Try to fix install dir. --- cpp/libcudf_kafka/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index dda4d40ed8c..bc3240111ce 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -96,7 +96,7 @@ install( EXPORT cudf_kafka-exports ) -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION include) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) rapids_export( INSTALL cudf_kafka From 26a2cdc9d93f085b19c738671fba5d44f31c0a8a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Nov 2023 15:42:35 -0600 Subject: [PATCH 16/23] Add trailing slash. --- cpp/libcudf_kafka/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index bc3240111ce..4128afa3935 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -96,7 +96,7 @@ install( EXPORT cudf_kafka-exports ) -install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) rapids_export( INSTALL cudf_kafka From c7893625f6bd895ebc045e9395c415f863b6fb8c Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Nov 2023 15:50:26 -0600 Subject: [PATCH 17/23] Only add FIND_CUDF_KAFKA_CPP if requesting to build cudf_kafka. --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index fa84f17db35..e75049b1ea9 100755 --- a/build.sh +++ b/build.sh @@ -235,7 +235,7 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then fi # Append `-DFIND_CUDF_KAFKA_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_KAFKA_CPP"* ]]; then +if buildAll || hasArg cudf_kafka && [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_KAFKA_CPP"* ]]; then EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_KAFKA_CPP=ON" fi From f7cf156920e6ee911ef93f90fcd30ea574268b48 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Nov 2023 15:50:36 -0600 Subject: [PATCH 18/23] No verbose flags in build. --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index e75049b1ea9..dfa42c38099 100755 --- a/build.sh +++ b/build.sh @@ -376,7 +376,7 @@ if hasArg cudf_kafka; then cd ${REPODIR}/python/cudf_kafka SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ - python -m pip install --no-build-isolation --no-deps . -vvv + python -m pip install --no-build-isolation --no-deps . fi # build custreamz Python package From 740e93ca17251b0328f6e9986ed457b86ad54119 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Nov 2023 16:27:04 -0600 Subject: [PATCH 19/23] Remove else. --- python/cudf_kafka/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 3f52d3a3161..8127ba87b40 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -39,8 +39,6 @@ option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel. # If the user requested it we attempt to find cudf_kafka. if(FIND_CUDF_KAFKA_CPP) find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) -else() - set(cudf_kafka_FOUND OFF) endif() if(NOT cudf_kafka_FOUND) From 64a5d618baf808286bb481dc9c2efe95728f9680 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 13 Nov 2023 15:43:43 -0600 Subject: [PATCH 20/23] Remove build_python_cudf_kafka. --- dependencies.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 73ab84364c5..30d2eed9c28 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -11,7 +11,6 @@ files: - build_wheels - build_python_common - build_python_cudf - - build_python_cudf_kafka - cudatoolkit - develop - docs @@ -140,7 +139,6 @@ files: table: build-system includes: - build_python_common - - build_python_cudf_kafka - build_wheels py_run_cudf_kafka: output: pyproject @@ -261,6 +259,7 @@ dependencies: - cython>=3.0.0 # TODO: Pin to numpy<1.25 until cudf requires pandas 2 - &numpy numpy>=1.21,<1.25 + - scikit-build>=0.13.1 - output_types: [conda, requirements, pyproject] packages: # Hard pin the patch version used during the build. This must be kept @@ -270,7 +269,6 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - scikit-build>=0.13.1 - rmm==23.12.* - output_types: conda packages: @@ -278,11 +276,6 @@ dependencies: - output_types: pyproject packages: - protoc-wheel - build_python_cudf_kafka: - common: - - output_types: [conda, requirements, pyproject] - packages: - - scikit-build>=0.13.1 libarrow_run: common: - output_types: conda From 27841c738849812dba4da6eceb1768885f8dd67e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 13 Nov 2023 16:13:26 -0600 Subject: [PATCH 21/23] Require libcudf and libcudf_kafka when building cudf_kafka Python package (which disallows wheel builds, but we aren't shipping wheels anyway). --- build.sh | 9 +-- python/cudf_kafka/CMakeLists.txt | 50 +------------ .../cmake/Modules/WheelHelpers.cmake | 71 ------------------- 3 files changed, 4 insertions(+), 126 deletions(-) delete mode 100644 python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake diff --git a/build.sh b/build.sh index dfa42c38099..e5beb51dedf 100755 --- a/build.sh +++ b/build.sh @@ -234,11 +234,6 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON" fi -# Append `-DFIND_CUDF_KAFKA_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. -if buildAll || hasArg cudf_kafka && [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_KAFKA_CPP"* ]]; then - EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_KAFKA_CPP=ON" -fi - # If clean given, run it prior to any other steps if hasArg clean; then @@ -262,7 +257,7 @@ fi ################################################################################ # Configure, build, and install libcudf -if buildAll || hasArg libcudf || hasArg cudf || hasArg cudf_kafka || hasArg cudfjar; then +if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}" if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then @@ -374,7 +369,7 @@ fi # build cudf_kafka Python package if hasArg cudf_kafka; then cd ${REPODIR}/python/cudf_kafka - SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} ${EXTRA_CMAKE_ARGS}" \ + SKBUILD_CONFIGURE_OPTIONS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS}" \ SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL:-1}" \ python -m pip install --no-build-isolation --no-deps . fi diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 8127ba87b40..5b3397fa64d 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -17,8 +17,6 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) set(cudf_kafka_version 23.12.00) include(../../fetch_rapids.cmake) -include(rapids-cuda) -rapids_cuda_init_architectures(cudf-kafka-python) project( cudf-kafka-python @@ -27,59 +25,15 @@ project( # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. - C CXX CUDA + C CXX ) -option(FIND_CUDF_KAFKA_CPP - "Search for existing cudf_kafka C++ installations before defaulting to local files" OFF -) - -option(CUDF_KAFKA_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) - -# If the user requested it we attempt to find cudf_kafka. if(FIND_CUDF_KAFKA_CPP) find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) endif() if(NOT cudf_kafka_FOUND) - set(BUILD_TESTS OFF) - set(BUILD_BENCHMARKS OFF) - set(_exclude_from_all "") - if(CUDF_KAFKA_BUILD_WHEELS) - - # Statically link cudart if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(CUDF_KAFKA_USE_CUDF_STATIC ON) - set(CUDF_KAFKA_EXCLUDE_CUDF_FROM_ALL ON) - - # Always build wheels against the pyarrow libarrow. - set(USE_LIBARROW_FROM_PYARROW ON) - - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - - # Don't install the cudf_kafka C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp/libcudf_kafka cudf_kafka-cpp ${_exclude_from_all}) - - set(cython_lib_dir cudf_kafka) - - if(CUDF_KAFKA_BUILD_WHEELS) - include(cmake/Modules/WheelHelpers.cmake) - get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) - # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel( - LIB_DIR ${cython_lib_dir} TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs} - ) - endif() - - # Since there are multiple subpackages of cudf_kafka._lib that require access to libcudf_kafka, we - # place the library in the cudf_kafka directory as a single source of truth and modify the other - # rpaths appropriately. - install(TARGETS cudf_kafka DESTINATION ${cython_lib_dir}) + message(FATAL_ERROR "cudf_kafka package not found. cudf_kafka C++ is required to build this Python package.") endif() include(rapids-cython) diff --git a/python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake b/python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake deleted file mode 100644 index 41d720c527a..00000000000 --- a/python/cudf_kafka/cudf_kafka/cmake/Modules/WheelHelpers.cmake +++ /dev/null @@ -1,71 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= -include_guard(GLOBAL) - -# Making libraries available inside wheels by installing the associated targets. -function(add_target_libs_to_wheel) - list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") - - set(options "") - set(one_value "LIB_DIR") - set(multi_value "TARGETS") - cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) - - message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") - - foreach(target IN LISTS __TARGETS) - - if(NOT TARGET ${target}) - message(VERBOSE "No target named ${target}") - continue() - endif() - - get_target_property(alias_target ${target} ALIASED_TARGET) - if(alias_target) - set(target ${alias_target}) - endif() - - get_target_property(is_imported ${target} IMPORTED) - if(NOT is_imported) - # If the target isn't imported, install it into the the wheel - install(TARGETS ${target} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") - else() - # If the target is imported, make sure it's global - get_target_property(already_global ${target} IMPORTED_GLOBAL) - if(NOT already_global) - set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) - endif() - - # Find the imported target's library so we can copy it into the wheel - set(lib_loc) - foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) - get_target_property(lib_loc ${target} ${prop}) - if(lib_loc) - message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") - break() - endif() - message(VERBOSE "${target} has no value for property ${prop}") - endforeach() - - if(NOT lib_loc) - message(FATAL_ERROR "Found no libs to install for target ${target}") - endif() - - # Copy the imported library into the wheel - install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") - endif() - endforeach() -endfunction() From 6d9037e232e038da208f8000bac88dc9b4f5c735 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 13 Nov 2023 16:18:52 -0600 Subject: [PATCH 22/23] Style. --- python/cudf_kafka/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index 5b3397fa64d..bed9c639eb0 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -33,7 +33,10 @@ if(FIND_CUDF_KAFKA_CPP) endif() if(NOT cudf_kafka_FOUND) - message(FATAL_ERROR "cudf_kafka package not found. cudf_kafka C++ is required to build this Python package.") + message( + FATAL_ERROR + "cudf_kafka package not found. cudf_kafka C++ is required to build this Python package." + ) endif() include(rapids-cython) From 7dd2a5dae09f063d1377c63d993febf14f4b6b63 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Nov 2023 18:05:12 -0800 Subject: [PATCH 23/23] Make the find_package call unconditional --- python/cudf_kafka/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index bed9c639eb0..d55c3fdc076 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -28,9 +28,7 @@ project( C CXX ) -if(FIND_CUDF_KAFKA_CPP) - find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) -endif() +find_package(cudf_kafka ${cudf_kafka_version} REQUIRED) if(NOT cudf_kafka_FOUND) message(