From fc0f1816ed7dbd0cabb91906d380e59dee1c4528 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 11 Dec 2023 09:56:32 -0800 Subject: [PATCH] Simplify Python CMake (#14565) This PR makes a number of changes to streamline the CMake code in the Python build. There are additional potential improvements that may be possible but I'd prefer to wait on until after the scikit-build-core migration because I suspect that there may be subtle differences (particularly around the install rules). The major improvements here are: - nvcomp installation no longer needs as much special casing thanks to https://github.com/rapidsai/rapids-cmake/pull/496 adding missing targets - get_arrow.cmake now determines how to find arrow by checking for 1) the presence of Python, 2) the presence of pyarrow, and 3) the presence of libarrow.so.* inside the pyarrow directory. This approach works for both pip and conda packages as well as pure C++ builds and removes the need for manual determination of where to look by the builder. The assumption baked in is that the developer has installed the desired pyarrow package when building. - The `CUDF_BUILD_WHEELS` variable is now removed. All Python builds that don't find the C++ library all go down the same path now. This is particularly relevant for doing something like a local `pip install`. This is the desired behavior because if you're building the Python package in that kind of environment you always want the same behaviors. The different case is when the Python build finds the C++ build (e.g. in a conda environment where libcudf is a separate package). - The logic for linking to pyarrow headers is now centralized in a single function since it needs to be used by every Cython target. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - https://github.com/brandon-b-miller - Robert Maynard (https://github.com/robertmaynard) - Ray Douglass (https://github.com/raydouglass) --- ci/build_wheel_cudf.sh | 2 +- cpp/CMakeLists.txt | 5 +- cpp/cmake/thirdparty/get_arrow.cmake | 43 +++++++---- python/cudf/CMakeLists.txt | 73 ++++++++----------- .../cmake/Modules/LinkPyarrowHeaders.cmake | 55 ++++++++++++++ python/cudf/cmake/Modules/WheelHelpers.cmake | 44 ++++------- python/cudf/cudf/_lib/CMakeLists.txt | 45 +----------- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 9 +-- .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 22 +----- python/cudf/cudf/_lib/strings/CMakeLists.txt | 9 +-- .../cudf/_lib/strings/convert/CMakeLists.txt | 9 +-- .../cudf/_lib/strings/split/CMakeLists.txt | 9 +-- .../cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 42 +---------- 13 files changed, 141 insertions(+), 226 deletions(-) create mode 100644 python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 456a3a289d1..e79b9a35aa2 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" +export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON" ./ci/build_wheel.sh cudf ${package_dir} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 234ad1ebf03..dbd67a1e6ac 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) @@ -90,9 +92,6 @@ option( ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 05aa5730b4d..d971a32db70 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -34,20 +34,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # version number soname, just `${MAJOR_VERSION}00` set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") - find_package(Python REQUIRED) - execute_process( + string( + APPEND + initial_code_block + [=[ +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY +) +list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") +]=] + ) + string( + APPEND + final_code_block + [=[ +list(POP_BACK CMAKE_PREFIX_PATH) +]=] ) - list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") rapids_find_generate_module( Arrow NO_CONFIG VERSION "${PYARROW_VERSION}" LIBRARY_NAMES "${PYARROW_LIB}" BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports - HEADER_NAMES arrow/python/arrow_to_pandas.h + HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block + FINAL_CODE_BLOCK final_code_block ) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) @@ -62,20 +77,20 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently # ignoring these limitations since we don't anticipate using this feature except for building # wheels. - EXECUTE_PROCESS( + execute_process( COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6 OUTPUT_VARIABLE GLIBC_EXECUTABLE OUTPUT_STRIP_TRAILING_WHITESPACE ) - EXECUTE_PROCESS( + execute_process( COMMAND ${GLIBC_EXECUTABLE} OUTPUT_VARIABLE GLIBC_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ) - STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) - STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) - STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) - LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) + string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) + string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) + string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) + list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) if(GLIBC_VERSION_MINOR LESS 28) target_compile_options( Arrow::Arrow INTERFACE "$<$:-D_GLIBCXX_USE_CXX11_ABI=0>" @@ -85,16 +100,14 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) rapids_export_package(BUILD Arrow cudf-exports) rapids_export_package(INSTALL Arrow cudf-exports) - - list(POP_BACK CMAKE_PREFIX_PATH) endfunction() # This function finds arrow and sets any additional necessary environment variables. function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON - ENABLE_PARQUET + ENABLE_PARQUET PYARROW_LIBARROW ) - if(USE_LIBARROW_FROM_PYARROW) + if(PYARROW_LIBARROW) # Generate a FindArrow.cmake to find pyarrow's libarrow.so find_libarrow_in_python_wheel(${VERSION}) set(ARROW_FOUND @@ -434,5 +447,5 @@ endif() find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} - ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} + ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} ) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index c0e16bedb1b..8a9ea00d640 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -33,28 +33,29 @@ project( option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) -option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) mark_as_advanced(USE_LIBARROW_FROM_PYARROW) -# Always build wheels against the pyarrow libarrow. -if(CUDF_BUILD_WHEELS) - set(USE_LIBARROW_FROM_PYARROW ON) -endif() +# Find Python early so that later commands can use it +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) + include(rapids-cpm) + include(rapids-export) + include(rapids-find) + rapids_cpm_init() + if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO: - # These options should probably all become optional since in practice they aren't meaningful - # except in the case where we actually compile Arrow. + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow + # libraries. These variables have no effect because we are always searching for arrow via + # pyarrow, but they must be set as they are required arguments to the function in + # get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) set(CUDF_ENABLE_ARROW_S3 OFF) set(CUDF_ENABLE_ARROW_ORC OFF) set(CUDF_ENABLE_ARROW_PYTHON OFF) set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(rapids-find) - include(rapids-export) include(../../cpp/cmake/thirdparty/get_arrow.cmake) endif() @@ -62,8 +63,6 @@ if(FIND_CUDF_CPP) # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack # for the interop.pyx - include(rapids-cpm) - rapids_cpm_init() include(../../cpp/cmake/thirdparty/get_dlpack.cmake) else() set(cudf_FOUND OFF) @@ -74,42 +73,32 @@ include(rapids-cython) if(NOT cudf_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(CUDF_BUILD_TESTUTIL OFF) + set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + set(CUDA_STATIC_RUNTIME ON) - set(_exclude_from_all "") - if(CUDF_BUILD_WHEELS) - # We don't build C++ tests when building wheels, so we can also omit the test util and shrink - # the wheel by avoiding embedding GTest. - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - # Statically link cudart if building wheels - set(CUDA_STATIC_RUNTIME ON) - - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - - # Don't install the cuDF C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) - - if(CUDF_BUILD_WHEELS) - include(cmake/Modules/WheelHelpers.cmake) - get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) - # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) - endif() - # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the - # library in the cudf directory as a single source of truth and modify the other rpaths - # appropriately. + # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous + # components like headers from libcudacxx, but we do need the libraries. However, we want to + # control where they are installed to. Since there are multiple subpackages of cudf._lib that + # require access to libcudf, we place the library and all its dependent artifacts in the cudf + # directory as a single source of truth and modify the other rpaths appropriately. set(cython_lib_dir cudf) - install(TARGETS cudf DESTINATION ${cython_lib_dir}) + include(cmake/Modules/WheelHelpers.cmake) + # TODO: This install is currently overzealous. We should only install the libraries that are + # downloaded by CPM during the build, not libraries that were found on the system. However, in + # practice right this would only be a problem is if libcudf was not found but some of the + # dependencies were, and we have no real use cases where that happens. + install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${cython_lib_dir} + ) endif() rapids_cython_init() +include(cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(cudf/_lib) add_subdirectory(udf_cpp) diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake new file mode 100644 index 00000000000..8dc47c470ba --- /dev/null +++ b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake @@ -0,0 +1,55 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= +include_guard(GLOBAL) + +# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was +# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in +# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. +# find_package(Python REQUIRED COMPONENTS Development NumPy) + +# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake +# inside the manylinux images used to build wheels because manylinux images do not contain +# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should +# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try +# switching back, but it may not work if that implicitly still requires Python libraries. In that +# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution +# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static +# libs to be found, but that should be a last resort since it implies a dependency that isn't really +# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C +# /opt/_internal" +find_package(NumPy REQUIRED) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" + OUTPUT_VARIABLE PYARROW_INCLUDE_DIR + ERROR_VARIABLE PYARROW_ERROR + RESULT_VARIABLE PYARROW_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if(${PYARROW_RESULT}) + message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") +endif() + +# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of +# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. These requirements will go away once all +# scalar-related Cython code is removed from cudf. +function(link_to_pyarrow_headers targets) + foreach(target IN LISTS targets) + # PyArrow headers require numpy headers. + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") + endforeach() +endfunction() diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index c0351e8bbcc..278d6751c15 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -14,15 +14,15 @@ include_guard(GLOBAL) # Making libraries available inside wheels by installing the associated targets. -function(add_target_libs_to_wheel) - list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") +function(install_aliased_imported_targets) + list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets") set(options "") - set(one_value "LIB_DIR") + set(one_value "DESTINATION") set(multi_value "TARGETS") cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) - message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") + message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'") foreach(target IN LISTS __TARGETS) @@ -39,33 +39,21 @@ function(add_target_libs_to_wheel) get_target_property(is_imported ${target} IMPORTED) if(NOT is_imported) # If the target isn't imported, install it into the wheel - install(TARGETS ${target} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") + install(TARGETS ${target} DESTINATION ${__DESTINATION}) + message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})") else() # If the target is imported, make sure it's global - get_target_property(already_global ${target} IMPORTED_GLOBAL) - if(NOT already_global) - set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + get_target_property(type ${target} TYPE) + if(${type} STREQUAL "UNKNOWN_LIBRARY") + install(FILES $ DESTINATION ${__DESTINATION}) + message(VERBOSE "install(FILES $ DESTINATION ${__DESTINATION})") + else() + install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION}) + message( + VERBOSE + "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})" + ) endif() - - # Find the imported target's library so we can copy it into the wheel - set(lib_loc) - foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) - get_target_property(lib_loc ${target} ${prop}) - if(lib_loc) - message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") - break() - endif() - message(VERBOSE "${target} has no value for property ${prop}") - endforeach() - - if(NOT lib_loc) - message(FATAL_ERROR "Found no libs to install for target ${target}") - endif() - - # Copy the imported library into the wheel - install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") endif() endforeach() endfunction() diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index c041c7f4842..6d4c2a33316 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -65,50 +65,7 @@ rapids_cython_create_modules( target_link_libraries(strings_udf cudf_strings_udf) -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -set(targets_using_dlpack interop) -foreach(target IN LISTS targets_using_dlpack) - target_include_directories(${target} PRIVATE "${DLPACK_INCLUDE_DIR}") -endforeach() - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(cpp) add_subdirectory(io) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index d7cbdeb5bda..55301789812 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index 5185b2d4bb5..870a00f99a9 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -21,24 +21,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() - -# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for -# more info -find_package(NumPy REQUIRED) -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. - # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index fc11f047ab4..081b84db79c 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -40,14 +40,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(convert) add_subdirectory(split) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index f55bb1fb780..ebd7a793bf4 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 2f2063482af..105e73788fe 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -20,11 +20,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index 3262b7d5ebe..4f3b9220a4f 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,43 +20,5 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) - -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +include(../../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake) +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}")