diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 456a3a289d1..e79b9a35aa2 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" +export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON" ./ci/build_wheel.sh cudf ${package_dir} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 234ad1ebf03..dbd67a1e6ac 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) @@ -90,9 +92,6 @@ option( ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 05aa5730b4d..d971a32db70 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -34,20 +34,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # version number soname, just `${MAJOR_VERSION}00` set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") - find_package(Python REQUIRED) - execute_process( + string( + APPEND + initial_code_block + [=[ +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY +) +list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") +]=] + ) + string( + APPEND + final_code_block + [=[ +list(POP_BACK CMAKE_PREFIX_PATH) +]=] ) - list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") rapids_find_generate_module( Arrow NO_CONFIG VERSION "${PYARROW_VERSION}" LIBRARY_NAMES "${PYARROW_LIB}" BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports - HEADER_NAMES arrow/python/arrow_to_pandas.h + HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block + FINAL_CODE_BLOCK final_code_block ) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) @@ -62,20 +77,20 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently # ignoring these limitations since we don't anticipate using this feature except for building # wheels. - EXECUTE_PROCESS( + execute_process( COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6 OUTPUT_VARIABLE GLIBC_EXECUTABLE OUTPUT_STRIP_TRAILING_WHITESPACE ) - EXECUTE_PROCESS( + execute_process( COMMAND ${GLIBC_EXECUTABLE} OUTPUT_VARIABLE GLIBC_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ) - STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) - STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) - STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) - LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) + string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) + string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) + string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) + list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) if(GLIBC_VERSION_MINOR LESS 28) target_compile_options( Arrow::Arrow INTERFACE "$<$:-D_GLIBCXX_USE_CXX11_ABI=0>" @@ -85,16 +100,14 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) rapids_export_package(BUILD Arrow cudf-exports) rapids_export_package(INSTALL Arrow cudf-exports) - - list(POP_BACK CMAKE_PREFIX_PATH) endfunction() # This function finds arrow and sets any additional necessary environment variables. function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON - ENABLE_PARQUET + ENABLE_PARQUET PYARROW_LIBARROW ) - if(USE_LIBARROW_FROM_PYARROW) + if(PYARROW_LIBARROW) # Generate a FindArrow.cmake to find pyarrow's libarrow.so find_libarrow_in_python_wheel(${VERSION}) set(ARROW_FOUND @@ -434,5 +447,5 @@ endif() find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} - ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} + ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} ) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index c0e16bedb1b..8a9ea00d640 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -33,28 +33,29 @@ project( option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) -option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) mark_as_advanced(USE_LIBARROW_FROM_PYARROW) -# Always build wheels against the pyarrow libarrow. -if(CUDF_BUILD_WHEELS) - set(USE_LIBARROW_FROM_PYARROW ON) -endif() +# Find Python early so that later commands can use it +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) + include(rapids-cpm) + include(rapids-export) + include(rapids-find) + rapids_cpm_init() + if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO: - # These options should probably all become optional since in practice they aren't meaningful - # except in the case where we actually compile Arrow. + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow + # libraries. These variables have no effect because we are always searching for arrow via + # pyarrow, but they must be set as they are required arguments to the function in + # get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) set(CUDF_ENABLE_ARROW_S3 OFF) set(CUDF_ENABLE_ARROW_ORC OFF) set(CUDF_ENABLE_ARROW_PYTHON OFF) set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(rapids-find) - include(rapids-export) include(../../cpp/cmake/thirdparty/get_arrow.cmake) endif() @@ -62,8 +63,6 @@ if(FIND_CUDF_CPP) # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack # for the interop.pyx - include(rapids-cpm) - rapids_cpm_init() include(../../cpp/cmake/thirdparty/get_dlpack.cmake) else() set(cudf_FOUND OFF) @@ -74,42 +73,32 @@ include(rapids-cython) if(NOT cudf_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(CUDF_BUILD_TESTUTIL OFF) + set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + set(CUDA_STATIC_RUNTIME ON) - set(_exclude_from_all "") - if(CUDF_BUILD_WHEELS) - # We don't build C++ tests when building wheels, so we can also omit the test util and shrink - # the wheel by avoiding embedding GTest. - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - # Statically link cudart if building wheels - set(CUDA_STATIC_RUNTIME ON) - - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - - # Don't install the cuDF C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) - - if(CUDF_BUILD_WHEELS) - include(cmake/Modules/WheelHelpers.cmake) - get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) - # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) - endif() - # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the - # library in the cudf directory as a single source of truth and modify the other rpaths - # appropriately. + # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous + # components like headers from libcudacxx, but we do need the libraries. However, we want to + # control where they are installed to. Since there are multiple subpackages of cudf._lib that + # require access to libcudf, we place the library and all its dependent artifacts in the cudf + # directory as a single source of truth and modify the other rpaths appropriately. set(cython_lib_dir cudf) - install(TARGETS cudf DESTINATION ${cython_lib_dir}) + include(cmake/Modules/WheelHelpers.cmake) + # TODO: This install is currently overzealous. We should only install the libraries that are + # downloaded by CPM during the build, not libraries that were found on the system. However, in + # practice right this would only be a problem is if libcudf was not found but some of the + # dependencies were, and we have no real use cases where that happens. + install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${cython_lib_dir} + ) endif() rapids_cython_init() +include(cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(cudf/_lib) add_subdirectory(udf_cpp) diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake new file mode 100644 index 00000000000..8dc47c470ba --- /dev/null +++ b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake @@ -0,0 +1,55 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= +include_guard(GLOBAL) + +# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was +# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in +# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. +# find_package(Python REQUIRED COMPONENTS Development NumPy) + +# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake +# inside the manylinux images used to build wheels because manylinux images do not contain +# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should +# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try +# switching back, but it may not work if that implicitly still requires Python libraries. In that +# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution +# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static +# libs to be found, but that should be a last resort since it implies a dependency that isn't really +# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C +# /opt/_internal" +find_package(NumPy REQUIRED) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" + OUTPUT_VARIABLE PYARROW_INCLUDE_DIR + ERROR_VARIABLE PYARROW_ERROR + RESULT_VARIABLE PYARROW_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if(${PYARROW_RESULT}) + message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") +endif() + +# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of +# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. These requirements will go away once all +# scalar-related Cython code is removed from cudf. +function(link_to_pyarrow_headers targets) + foreach(target IN LISTS targets) + # PyArrow headers require numpy headers. + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") + endforeach() +endfunction() diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index c0351e8bbcc..278d6751c15 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -14,15 +14,15 @@ include_guard(GLOBAL) # Making libraries available inside wheels by installing the associated targets. -function(add_target_libs_to_wheel) - list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") +function(install_aliased_imported_targets) + list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets") set(options "") - set(one_value "LIB_DIR") + set(one_value "DESTINATION") set(multi_value "TARGETS") cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) - message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") + message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'") foreach(target IN LISTS __TARGETS) @@ -39,33 +39,21 @@ function(add_target_libs_to_wheel) get_target_property(is_imported ${target} IMPORTED) if(NOT is_imported) # If the target isn't imported, install it into the wheel - install(TARGETS ${target} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") + install(TARGETS ${target} DESTINATION ${__DESTINATION}) + message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})") else() # If the target is imported, make sure it's global - get_target_property(already_global ${target} IMPORTED_GLOBAL) - if(NOT already_global) - set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + get_target_property(type ${target} TYPE) + if(${type} STREQUAL "UNKNOWN_LIBRARY") + install(FILES $ DESTINATION ${__DESTINATION}) + message(VERBOSE "install(FILES $ DESTINATION ${__DESTINATION})") + else() + install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION}) + message( + VERBOSE + "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})" + ) endif() - - # Find the imported target's library so we can copy it into the wheel - set(lib_loc) - foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) - get_target_property(lib_loc ${target} ${prop}) - if(lib_loc) - message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") - break() - endif() - message(VERBOSE "${target} has no value for property ${prop}") - endforeach() - - if(NOT lib_loc) - message(FATAL_ERROR "Found no libs to install for target ${target}") - endif() - - # Copy the imported library into the wheel - install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") endif() endforeach() endfunction() diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index c041c7f4842..6d4c2a33316 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -65,50 +65,7 @@ rapids_cython_create_modules( target_link_libraries(strings_udf cudf_strings_udf) -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -set(targets_using_dlpack interop) -foreach(target IN LISTS targets_using_dlpack) - target_include_directories(${target} PRIVATE "${DLPACK_INCLUDE_DIR}") -endforeach() - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(cpp) add_subdirectory(io) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index d7cbdeb5bda..55301789812 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index 5185b2d4bb5..870a00f99a9 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -21,24 +21,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() - -# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for -# more info -find_package(NumPy REQUIRED) -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. - # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index fc11f047ab4..081b84db79c 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -40,14 +40,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(convert) add_subdirectory(split) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index f55bb1fb780..ebd7a793bf4 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 2f2063482af..105e73788fe 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -20,11 +20,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index 3262b7d5ebe..4f3b9220a4f 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,43 +20,5 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) - -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +include(../../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake) +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}")