From 580795ea5efb93d80851a9f84387371ea58e5b01 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sun, 3 Dec 2023 17:13:40 -0800 Subject: [PATCH 01/35] Rely on rapids-cmake to make nvcomp targets global --- python/cudf/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index c0e16bedb1b..76835fb41bd 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -16,6 +16,8 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) set(cudf_version 24.02.00) +set(rapids-cmake-repo "vyasr/rapids-cmake") +set(rapids-cmake-branch "feat/nvcomp_global_targets") include(../../fetch_rapids.cmake) include(rapids-cuda) rapids_cuda_init_architectures(cudf-python) @@ -87,7 +89,7 @@ if(NOT cudf_FOUND) # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) + #set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) # Don't install the cuDF C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) From 4f782bfbf280c56325f1c7d99eaae25ed5b04f12 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 00:57:12 -0800 Subject: [PATCH 02/35] Handle installation the same for non-wheel FIND_CUDF_CPP builds as wheel builds --- python/cudf/CMakeLists.txt | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 76835fb41bd..530a51f93a5 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -97,17 +97,13 @@ if(NOT cudf_FOUND) add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) - if(CUDF_BUILD_WHEELS) - include(cmake/Modules/WheelHelpers.cmake) - get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) - # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) - endif() # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the # library in the cudf directory as a single source of truth and modify the other rpaths # appropriately. + # Ensure all the shared objects we need at runtime are in the wheel set(cython_lib_dir cudf) - install(TARGETS cudf DESTINATION ${cython_lib_dir}) + include(cmake/Modules/WheelHelpers.cmake) + add_target_libs_to_wheel(LIB_DIR cudf TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp) endif() rapids_cython_init() From a6c0957be404902a388e6cf061bd535815c71342 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 08:09:24 -0800 Subject: [PATCH 03/35] Simplify installation helper function --- python/cudf/CMakeLists.txt | 2 +- python/cudf/cmake/Modules/WheelHelpers.cmake | 39 ++++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 530a51f93a5..14ae92f3125 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -103,7 +103,7 @@ if(NOT cudf_FOUND) # Ensure all the shared objects we need at runtime are in the wheel set(cython_lib_dir cudf) include(cmake/Modules/WheelHelpers.cmake) - add_target_libs_to_wheel(LIB_DIR cudf TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp) + install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) endif() rapids_cython_init() diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index c0351e8bbcc..140cdecac06 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -15,14 +15,14 @@ include_guard(GLOBAL) # Making libraries available inside wheels by installing the associated targets. function(add_target_libs_to_wheel) - list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") + list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets") set(options "") - set(one_value "LIB_DIR") + set(one_value "DESTINATION") set(multi_value "TARGETS") cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) - message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") + message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'") foreach(target IN LISTS __TARGETS) @@ -39,33 +39,18 @@ function(add_target_libs_to_wheel) get_target_property(is_imported ${target} IMPORTED) if(NOT is_imported) # If the target isn't imported, install it into the wheel - install(TARGETS ${target} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") + install(TARGETS ${target} DESTINATION ${__DESTINATION}) + message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})") else() # If the target is imported, make sure it's global - get_target_property(already_global ${target} IMPORTED_GLOBAL) - if(NOT already_global) - set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + get_target_property(type ${target} TYPE) + if(${type} STREQUAL "UNKNOWN_LIBRARY") + install(FILES $ DESTINATION ${__DESTINATION}) + message(VERBOSE "install(FILES $ DESTINATION ${__DESTINATION})") + else() + install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION}) + message(VERBOSE "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})") endif() - - # Find the imported target's library so we can copy it into the wheel - set(lib_loc) - foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) - get_target_property(lib_loc ${target} ${prop}) - if(lib_loc) - message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") - break() - endif() - message(VERBOSE "${target} has no value for property ${prop}") - endforeach() - - if(NOT lib_loc) - message(FATAL_ERROR "Found no libs to install for target ${target}") - endif() - - # Copy the imported library into the wheel - install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) - message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") endif() endforeach() endfunction() From 24cfcffbefe59d34a057e909ebabe31d9819da8e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 08:14:35 -0800 Subject: [PATCH 04/35] Cleanup and comments --- python/cudf/CMakeLists.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 14ae92f3125..6cf636d2dca 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -87,20 +87,19 @@ if(NOT cudf_FOUND) # Statically link cudart if building wheels set(CUDA_STATIC_RUNTIME ON) - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - #set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - # Don't install the cuDF C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) endif() add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) - # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the - # library in the cudf directory as a single source of truth and modify the other rpaths + # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to + # remove extraneous components like headers from libcudacxx, but we do need + # the libraries. However, we want to control where they are installed to. + # Since there are multiple subpackages of cudf._lib that require access to + # libcudf, we place the library and all its dependent artifacts in the cudf + # directory as a single source of truth and modify the other rpaths # appropriately. - # Ensure all the shared objects we need at runtime are in the wheel set(cython_lib_dir cudf) include(cmake/Modules/WheelHelpers.cmake) install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) From 0a4736882cb1ad3b31eab46eb4bdd39a1f436335 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 08:15:28 -0800 Subject: [PATCH 05/35] Also skip test util build when building for Python outside wheels --- python/cudf/CMakeLists.txt | 7 ++----- python/cudf/cmake/Modules/WheelHelpers.cmake | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 6cf636d2dca..be2932eddd4 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -76,14 +76,11 @@ include(rapids-cython) if(NOT cudf_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) + set(CUDF_BUILD_TESTUTIL OFF) + set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) set(_exclude_from_all "") if(CUDF_BUILD_WHEELS) - # We don't build C++ tests when building wheels, so we can also omit the test util and shrink - # the wheel by avoiding embedding GTest. - set(CUDF_BUILD_TESTUTIL OFF) - set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) - # Statically link cudart if building wheels set(CUDA_STATIC_RUNTIME ON) diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index 140cdecac06..652c29646dd 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -14,7 +14,7 @@ include_guard(GLOBAL) # Making libraries available inside wheels by installing the associated targets. -function(add_target_libs_to_wheel) +function(install_aliased_imported_targets) list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets") set(options "") From 63701828bc89494c5f7c48f87cc33f57682bc255 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 08:57:20 -0800 Subject: [PATCH 06/35] Fully unify wheel builds with non-wheel Python builds --- ci/build_wheel_cudf.sh | 2 +- python/cudf/CMakeLists.txt | 48 ++++++++++++++------------------------ 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 456a3a289d1..4caaa2ce346 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,7 +5,7 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" +export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF" ./ci/build_wheel.sh cudf ${package_dir} diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index be2932eddd4..9a8410acd7e 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -35,30 +35,26 @@ project( option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) -option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) -# Always build wheels against the pyarrow libarrow. -if(CUDF_BUILD_WHEELS) - set(USE_LIBARROW_FROM_PYARROW ON) -endif() +# When building the Python package, always defer to pyarrow to tell us where to +# find libarrow. This works whether libarrow is installed by pyarrow itself +# (i.e. as part of a wheel) or as part of a separate libarrow package (i.e. in +# conda) because pyarrow always knows where it is. +set(USE_LIBARROW_FROM_PYARROW ON) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) - if(USE_LIBARROW_FROM_PYARROW) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO: - # These options should probably all become optional since in practice they aren't meaningful - # except in the case where we actually compile Arrow. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(rapids-find) - include(rapids-export) - include(../../cpp/cmake/thirdparty/get_arrow.cmake) - endif() + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO: + # These options should probably all become optional since in practice they aren't meaningful + # except in the case where we actually compile Arrow. + set(CUDF_USE_ARROW_STATIC OFF) + set(CUDF_ENABLE_ARROW_S3 OFF) + set(CUDF_ENABLE_ARROW_ORC OFF) + set(CUDF_ENABLE_ARROW_PYTHON OFF) + set(CUDF_ENABLE_ARROW_PARQUET OFF) + include(rapids-find) + include(rapids-export) + include(../../cpp/cmake/thirdparty/get_arrow.cmake) find_package(cudf ${cudf_version} REQUIRED) @@ -78,17 +74,9 @@ if(NOT cudf_FOUND) set(BUILD_BENCHMARKS OFF) set(CUDF_BUILD_TESTUTIL OFF) set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) + set(CUDA_STATIC_RUNTIME ON) - set(_exclude_from_all "") - if(CUDF_BUILD_WHEELS) - # Statically link cudart if building wheels - set(CUDA_STATIC_RUNTIME ON) - - # Don't install the cuDF C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) + add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to # remove extraneous components like headers from libcudacxx, but we do need From d907ca2dc0ee2c5607d0bf81528f882ceb668bbe Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 10:19:10 -0800 Subject: [PATCH 07/35] Remove unnecessary variable settings --- python/cudf/CMakeLists.txt | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 9a8410acd7e..e3e78be5c89 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -39,27 +39,23 @@ option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulti # When building the Python package, always defer to pyarrow to tell us where to # find libarrow. This works whether libarrow is installed by pyarrow itself # (i.e. as part of a wheel) or as part of a separate libarrow package (i.e. in -# conda) because pyarrow always knows where it is. +# conda) because pyarrow always knows where it is. libcudf must be built +# against an ABI-compatible arrow library for the package to load at runtime, +# so searching this way is always correct. set(USE_LIBARROW_FROM_PYARROW ON) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO: - # These options should probably all become optional since in practice they aren't meaningful - # except in the case where we actually compile Arrow. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) + # We need to find arrow before libcudf since libcudf requires it but doesn't + # bundle arrow libraries. include(rapids-find) include(rapids-export) include(../../cpp/cmake/thirdparty/get_arrow.cmake) find_package(cudf ${cudf_version} REQUIRED) - # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack - # for the interop.pyx + # an installed version of libcudf doesn't provide the dlpack headers so we + # need to download dlpack for the interop.pyx include(rapids-cpm) rapids_cpm_init() include(../../cpp/cmake/thirdparty/get_dlpack.cmake) From 57edaf4bc0b4f41aa10daad1ec6850430a16307c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 10:24:06 -0800 Subject: [PATCH 08/35] Test allowing C++ lib to install itself --- python/cudf/CMakeLists.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e3e78be5c89..1eaa55460bf 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -72,7 +72,7 @@ if(NOT cudf_FOUND) set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) set(CUDA_STATIC_RUNTIME ON) - add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) + add_subdirectory(../../cpp cudf-cpp) # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to # remove extraneous components like headers from libcudacxx, but we do need @@ -81,9 +81,13 @@ if(NOT cudf_FOUND) # libcudf, we place the library and all its dependent artifacts in the cudf # directory as a single source of truth and modify the other rpaths # appropriately. - set(cython_lib_dir cudf) - include(cmake/Modules/WheelHelpers.cmake) - install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) + #set(cython_lib_dir cudf) + #include(cmake/Modules/WheelHelpers.cmake) + # TODO: Currently this installation is overzealous. We really should only + # install libraries that were downloaded as part of the build (or arrow from + # pyarrow, a special case), not libraries that were found (e.g. in the conda + # environment). +# install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) endif() rapids_cython_init() From c30a571c0b0e4acc03a22e2984e9824095cc874b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 11:18:48 -0800 Subject: [PATCH 09/35] Revert to base rapids-cmake repo --- python/cudf/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 1eaa55460bf..3b2f3eb7547 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -16,8 +16,6 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) set(cudf_version 24.02.00) -set(rapids-cmake-repo "vyasr/rapids-cmake") -set(rapids-cmake-branch "feat/nvcomp_global_targets") include(../../fetch_rapids.cmake) include(rapids-cuda) rapids_cuda_init_architectures(cudf-python) From 007fac5106b6ca52104fa4a848866b6f7bb3ba71 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 11:26:59 -0800 Subject: [PATCH 10/35] Revert "Test allowing C++ lib to install itself" This reverts commit 2ace699e1ddf4463fa071cc245c6cd97946aed58. --- python/cudf/CMakeLists.txt | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 3b2f3eb7547..29c69cbd323 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -70,7 +70,7 @@ if(NOT cudf_FOUND) set(CUDF_BUILD_STREAMS_TEST_UTIL OFF) set(CUDA_STATIC_RUNTIME ON) - add_subdirectory(../../cpp cudf-cpp) + add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to # remove extraneous components like headers from libcudacxx, but we do need @@ -79,13 +79,9 @@ if(NOT cudf_FOUND) # libcudf, we place the library and all its dependent artifacts in the cudf # directory as a single source of truth and modify the other rpaths # appropriately. - #set(cython_lib_dir cudf) - #include(cmake/Modules/WheelHelpers.cmake) - # TODO: Currently this installation is overzealous. We really should only - # install libraries that were downloaded as part of the build (or arrow from - # pyarrow, a special case), not libraries that were found (e.g. in the conda - # environment). -# install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) + set(cython_lib_dir cudf) + include(cmake/Modules/WheelHelpers.cmake) + install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) endif() rapids_cython_init() From 4bf4e127c64a154ca9dc20e3ff360e32bb71a3b8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 11:50:57 -0800 Subject: [PATCH 11/35] Add comment --- python/cudf/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 29c69cbd323..855cafa0749 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -81,6 +81,11 @@ if(NOT cudf_FOUND) # appropriately. set(cython_lib_dir cudf) include(cmake/Modules/WheelHelpers.cmake) + # TODO: This install is currently overzealous. We should only install the + # libraries that are downloaded by CPM during the build, not libraries that + # were found on the system. However, in practice right this would only be a + # problem is if libcudf was not found but some of the dependencies were, and + # we have no real use cases where that happens. install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) endif() From fc06ea84ac38f93e7a07e61149785ce5bda07e9b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 11:58:12 -0800 Subject: [PATCH 12/35] Remove now unnecessary CMake args spec --- ci/build_wheel_cudf.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 4caaa2ce346..6025a13d8e5 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,8 +5,6 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF" - ./ci/build_wheel.sh cudf ${package_dir} python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* From 4bfe83996afee1b3677dbd9e10724a428c234515 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 12:03:01 -0800 Subject: [PATCH 13/35] Fix style --- python/cudf/CMakeLists.txt | 45 ++++++++++---------- python/cudf/cmake/Modules/WheelHelpers.cmake | 5 ++- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 855cafa0749..e4164841a96 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -34,26 +34,25 @@ option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulti OFF ) -# When building the Python package, always defer to pyarrow to tell us where to -# find libarrow. This works whether libarrow is installed by pyarrow itself -# (i.e. as part of a wheel) or as part of a separate libarrow package (i.e. in -# conda) because pyarrow always knows where it is. libcudf must be built -# against an ABI-compatible arrow library for the package to load at runtime, -# so searching this way is always correct. +# When building the Python package, always defer to pyarrow to tell us where to find libarrow. This +# works whether libarrow is installed by pyarrow itself (i.e. as part of a wheel) or as part of a +# separate libarrow package (i.e. in conda) because pyarrow always knows where it is. libcudf must +# be built against an ABI-compatible arrow library for the package to load at runtime, so searching +# this way is always correct. set(USE_LIBARROW_FROM_PYARROW ON) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) - # We need to find arrow before libcudf since libcudf requires it but doesn't - # bundle arrow libraries. + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow + # libraries. include(rapids-find) include(rapids-export) include(../../cpp/cmake/thirdparty/get_arrow.cmake) find_package(cudf ${cudf_version} REQUIRED) - # an installed version of libcudf doesn't provide the dlpack headers so we - # need to download dlpack for the interop.pyx + # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack + # for the interop.pyx include(rapids-cpm) rapids_cpm_init() include(../../cpp/cmake/thirdparty/get_dlpack.cmake) @@ -72,21 +71,21 @@ if(NOT cudf_FOUND) add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL) - # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to - # remove extraneous components like headers from libcudacxx, but we do need - # the libraries. However, we want to control where they are installed to. - # Since there are multiple subpackages of cudf._lib that require access to - # libcudf, we place the library and all its dependent artifacts in the cudf - # directory as a single source of truth and modify the other rpaths - # appropriately. + # libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous + # components like headers from libcudacxx, but we do need the libraries. However, we want to + # control where they are installed to. Since there are multiple subpackages of cudf._lib that + # require access to libcudf, we place the library and all its dependent artifacts in the cudf + # directory as a single source of truth and modify the other rpaths appropriately. set(cython_lib_dir cudf) include(cmake/Modules/WheelHelpers.cmake) - # TODO: This install is currently overzealous. We should only install the - # libraries that are downloaded by CPM during the build, not libraries that - # were found on the system. However, in practice right this would only be a - # problem is if libcudf was not found but some of the dependencies were, and - # we have no real use cases where that happens. - install_aliased_imported_targets(TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION ${cython_lib_dir}) + # TODO: This install is currently overzealous. We should only install the libraries that are + # downloaded by CPM during the build, not libraries that were found on the system. However, in + # practice right this would only be a problem is if libcudf was not found but some of the + # dependencies were, and we have no real use cases where that happens. + install_aliased_imported_targets( + TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp + DESTINATION ${cython_lib_dir} + ) endif() rapids_cython_init() diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index 652c29646dd..278d6751c15 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -49,7 +49,10 @@ function(install_aliased_imported_targets) message(VERBOSE "install(FILES $ DESTINATION ${__DESTINATION})") else() install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION}) - message(VERBOSE "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})") + message( + VERBOSE + "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})" + ) endif() endif() endforeach() From 0b8749e0c1991c7905e0bef40e358ffeaa6f7aec Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 13:39:05 -0800 Subject: [PATCH 14/35] Centralize logic for pyarrow/numpy headers --- python/cudf/CMakeLists.txt | 1 + python/cudf/cudf/_lib/CMakeLists.txt | 45 +------------------ python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 9 +--- .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 22 +-------- python/cudf/cudf/_lib/strings/CMakeLists.txt | 9 +--- .../cudf/_lib/strings/convert/CMakeLists.txt | 9 +--- .../cudf/_lib/strings/split/CMakeLists.txt | 9 +--- .../cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 42 +---------------- 8 files changed, 9 insertions(+), 137 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e4164841a96..002f8c91c2c 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -90,6 +90,7 @@ endif() rapids_cython_init() +include(cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(cudf/_lib) add_subdirectory(udf_cpp) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index c041c7f4842..6d4c2a33316 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -65,50 +65,7 @@ rapids_cython_create_modules( target_link_libraries(strings_udf cudf_strings_udf) -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -set(targets_using_dlpack interop) -foreach(target IN LISTS targets_using_dlpack) - target_include_directories(${target} PRIVATE "${DLPACK_INCLUDE_DIR}") -endforeach() - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(cpp) add_subdirectory(io) diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index d7cbdeb5bda..55301789812 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index 5185b2d4bb5..870a00f99a9 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -21,24 +21,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() - -# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for -# more info -find_package(NumPy REQUIRED) -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. - # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index fc11f047ab4..081b84db79c 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -40,14 +40,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") add_subdirectory(convert) add_subdirectory(split) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index f55bb1fb780..ebd7a793bf4 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -22,11 +22,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 2f2063482af..105e73788fe 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -20,11 +20,4 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf ) -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That requirement will go away once all -# scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index 3262b7d5ebe..a59e1ac7291 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,43 +20,5 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) - -# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was -# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in -# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -# find_package(Python REQUIRED COMPONENTS Development NumPy) - -# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake -# inside the manylinux images used to build wheels because manylinux images do not contain -# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should -# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try -# switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution -# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static -# libs to be found, but that should be a last resort since it implies a dependency that isn't really -# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C -# /opt/_internal" -find_package(NumPy REQUIRED) - -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# TODO: Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. That in turn requires including numpy headers. -# These requirements will go away once all scalar-related Cython code is removed from cudf. -foreach(target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") -endforeach() +include(cmake/Modules/LinkPyarrowHeaders.cmake +link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") From bab05c62b624e95ccd187951091acf2177744ac1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 13:53:02 -0800 Subject: [PATCH 15/35] Add missed files --- .../cmake/Modules/LinkPyarrowHeaders.cmake | 57 +++++++++++++++++++ .../cudf_kafka/cmake/LinkPyarrowHeaders.cmake | 1 + 2 files changed, 58 insertions(+) create mode 100644 python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake create mode 120000 python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake new file mode 100644 index 00000000000..d0c2b5df6f2 --- /dev/null +++ b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake @@ -0,0 +1,57 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= +include_guard(GLOBAL) + +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was +# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in +# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. +# find_package(Python REQUIRED COMPONENTS Development NumPy) + +# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake +# inside the manylinux images used to build wheels because manylinux images do not contain +# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should +# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try +# switching back, but it may not work if that implicitly still requires Python libraries. In that +# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution +# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static +# libs to be found, but that should be a last resort since it implies a dependency that isn't really +# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C +# /opt/_internal" +find_package(NumPy REQUIRED) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" + OUTPUT_VARIABLE PYARROW_INCLUDE_DIR + ERROR_VARIABLE PYARROW_ERROR + RESULT_VARIABLE PYARROW_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if(${PYARROW_RESULT}) + message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") +endif() + +# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts +# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. These requirements will +# go away once all scalar-related Cython code is removed from cudf. +function(link_to_pyarrow_headers targets) + foreach(target IN LISTS targets) + # PyArrow headers require numpy headers. + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") + endforeach() +endfunction() diff --git a/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake b/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake new file mode 120000 index 00000000000..04b942e12f0 --- /dev/null +++ b/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake @@ -0,0 +1 @@ +../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake \ No newline at end of file From 1849e7df0f35f447f688ecca9007a5ef65efb171 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 14:12:25 -0800 Subject: [PATCH 16/35] Just have cudf-kafka pull from cudf --- python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake | 1 - python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 120000 python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake diff --git a/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake b/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake deleted file mode 120000 index 04b942e12f0..00000000000 --- a/python/cudf_kafka/cmake/LinkPyarrowHeaders.cmake +++ /dev/null @@ -1 +0,0 @@ -../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake \ No newline at end of file diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index a59e1ac7291..4f3b9220a4f 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,5 +20,5 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) -include(cmake/Modules/LinkPyarrowHeaders.cmake +include(../../../cudf/cmake/Modules/LinkPyarrowHeaders.cmake) link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") From fab060250e2b376421fc0b2fe6a09d1713c60439 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 14:16:31 -0800 Subject: [PATCH 17/35] One more style fix --- python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake index d0c2b5df6f2..1b839ba8bc6 100644 --- a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake +++ b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake @@ -44,10 +44,10 @@ if(${PYARROW_RESULT}) message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") endif() -# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts -# of cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. These requirements will -# go away once all scalar-related Cython code is removed from cudf. +# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of +# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the +# requirement for arrow headers infects all of cudf. These requirements will go away once all +# scalar-related Cython code is removed from cudf. function(link_to_pyarrow_headers targets) foreach(target IN LISTS targets) # PyArrow headers require numpy headers. From a0e6029ac23d494309530e5166668c340a5f5e6a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 15:17:33 -0800 Subject: [PATCH 18/35] Put back variables needed for get_arrow.cmake --- python/cudf/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 002f8c91c2c..03bb7ac399d 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -47,6 +47,11 @@ if(FIND_CUDF_CPP) # libraries. include(rapids-find) include(rapids-export) + set(CUDF_USE_ARROW_STATIC OFF) + set(CUDF_ENABLE_ARROW_S3 OFF) + set(CUDF_ENABLE_ARROW_ORC OFF) + set(CUDF_ENABLE_ARROW_PYTHON OFF) + set(CUDF_ENABLE_ARROW_PARQUET OFF) include(../../cpp/cmake/thirdparty/get_arrow.cmake) find_package(cudf ${cudf_version} REQUIRED) From 7c0690dd09632ae22f153786e49931532d15646a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 15:19:52 -0800 Subject: [PATCH 19/35] Add comment --- python/cudf/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 03bb7ac399d..c7195d628f9 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -47,6 +47,9 @@ if(FIND_CUDF_CPP) # libraries. include(rapids-find) include(rapids-export) + # These variables have no effect because we are always searching for arrow + # via pyarrow, but they must be set as they are required arguments to the + # function in get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) set(CUDF_ENABLE_ARROW_S3 OFF) set(CUDF_ENABLE_ARROW_ORC OFF) From 38e039c2c775bf93fc2721ffa75c9a5d31572c2a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Dec 2023 15:23:52 -0800 Subject: [PATCH 20/35] Style --- python/cudf/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index c7195d628f9..433786ae7d2 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -47,9 +47,8 @@ if(FIND_CUDF_CPP) # libraries. include(rapids-find) include(rapids-export) - # These variables have no effect because we are always searching for arrow - # via pyarrow, but they must be set as they are required arguments to the - # function in get_arrow.cmake. + # These variables have no effect because we are always searching for arrow via pyarrow, but they + # must be set as they are required arguments to the function in get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) set(CUDF_ENABLE_ARROW_S3 OFF) set(CUDF_ENABLE_ARROW_ORC OFF) From 1dc7517cc0f158e77f8c16b9361b91bc23d4a482 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 05:41:26 +0000 Subject: [PATCH 21/35] Check for the presence of libarrow.so.* inside pyarrow to determine whether to search there or not --- cpp/cmake/thirdparty/get_arrow.cmake | 1 - python/cudf/CMakeLists.txt | 25 ++++++++++++++----- .../cmake/Modules/LinkPyarrowHeaders.cmake | 2 -- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 05aa5730b4d..03393a41e7a 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -34,7 +34,6 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # version number soname, just `${MAJOR_VERSION}00` set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") - find_package(Python REQUIRED) execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 433786ae7d2..2fd02e6cb3a 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -34,12 +34,25 @@ option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulti OFF ) -# When building the Python package, always defer to pyarrow to tell us where to find libarrow. This -# works whether libarrow is installed by pyarrow itself (i.e. as part of a wheel) or as part of a -# separate libarrow package (i.e. in conda) because pyarrow always knows where it is. libcudf must -# be built against an ABI-compatible arrow library for the package to load at runtime, so searching -# this way is always correct. -set(USE_LIBARROW_FROM_PYARROW ON) +# If the pyarrow package contains libarrow we want to link against it directly +# instead of searching for a separate libarrow. Even if libcudf was built +# against a different libarrow, that library would have to be ABI-compatible +# with the one in pyarrow for the packages to work together, and for wheels we +# must use the library in the pyarrow wheel. +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) + +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" + OUTPUT_VARIABLE _pyarrow_lib_dir + OUTPUT_STRIP_TRAILING_WHITESPACE +) +file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}.*") +list(LENGTH _pyarrow_libs _pyarrow_libs_len) +if(_pyarrow_libs_len GREATER 0) + set(USE_LIBARROW_FROM_PYARROW ON) +else() + set(USE_LIBARROW_FROM_PYARROW OFF) +endif() # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) diff --git a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake index 1b839ba8bc6..8dc47c470ba 100644 --- a/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake +++ b/python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake @@ -13,8 +13,6 @@ # ============================================================================= include_guard(GLOBAL) -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) - # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. From 1db2665c2a8f7bf3b327725efef722f1b3f66b52 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 05:42:17 +0000 Subject: [PATCH 22/35] Allow for the linker name to be found instead of the soname --- python/cudf/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 2fd02e6cb3a..ff73fa42250 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -46,7 +46,7 @@ execute_process( OUTPUT_VARIABLE _pyarrow_lib_dir OUTPUT_STRIP_TRAILING_WHITESPACE ) -file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}.*") +file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}*") list(LENGTH _pyarrow_libs _pyarrow_libs_len) if(_pyarrow_libs_len GREATER 0) set(USE_LIBARROW_FROM_PYARROW ON) From c1d759fbc3c7aa328ce26a27105d2893d5107aa1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 07:17:14 +0000 Subject: [PATCH 23/35] Style --- python/cudf/CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index ff73fa42250..655f68f15d1 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -34,11 +34,10 @@ option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulti OFF ) -# If the pyarrow package contains libarrow we want to link against it directly -# instead of searching for a separate libarrow. Even if libcudf was built -# against a different libarrow, that library would have to be ABI-compatible -# with the one in pyarrow for the packages to work together, and for wheels we -# must use the library in the pyarrow wheel. +# If the pyarrow package contains libarrow we want to link against it directly instead of searching +# for a separate libarrow. Even if libcudf was built against a different libarrow, that library +# would have to be ABI-compatible with the one in pyarrow for the packages to work together, and for +# wheels we must use the library in the pyarrow wheel. find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) execute_process( From d29aeccb61cecf125297dd2c6d4d503ac82f5842 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 07:17:28 +0000 Subject: [PATCH 24/35] Init cpm earlier --- python/cudf/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 655f68f15d1..8bc94ab3a68 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -59,6 +59,8 @@ if(FIND_CUDF_CPP) # libraries. include(rapids-find) include(rapids-export) + include(rapids-cpm) + rapids_cpm_init() # These variables have no effect because we are always searching for arrow via pyarrow, but they # must be set as they are required arguments to the function in get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) @@ -72,8 +74,6 @@ if(FIND_CUDF_CPP) # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack # for the interop.pyx - include(rapids-cpm) - rapids_cpm_init() include(../../cpp/cmake/thirdparty/get_dlpack.cmake) else() set(cudf_FOUND OFF) From ff42ca3e449e7a3ad4d8ba9151a2573c977ec3c9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 18:15:55 +0000 Subject: [PATCH 25/35] Move pyarrow detection to get_arrow.cmake so that it also affects C++ builds --- cpp/CMakeLists.txt | 3 --- cpp/cmake/thirdparty/get_arrow.cmake | 36 ++++++++++++++++++++++++---- python/cudf/CMakeLists.txt | 18 +------------- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 234ad1ebf03..a4a900b9911 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -90,9 +90,6 @@ option( ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) -option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) - message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 03393a41e7a..3475dab2623 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -32,7 +32,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"), # the soname is not generated by concatenating the major, minor, and patch versions into a single # version number soname, just `${MAJOR_VERSION}00` - set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") + set(PYARROW_LIB "libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_SO_VER}00") execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" @@ -90,10 +90,10 @@ endfunction() # This function finds arrow and sets any additional necessary environment variables. function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON - ENABLE_PARQUET + ENABLE_PARQUET PYARROW_LIBARROW ) - if(USE_LIBARROW_FROM_PYARROW) + if(PYARROW_LIBARROW) # Generate a FindArrow.cmake to find pyarrow's libarrow.so find_libarrow_in_python_wheel(${VERSION}) set(ARROW_FOUND @@ -431,7 +431,35 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() +# If the pyarrow package contains libarrow we want to link against it directly instead of searching +# for a separate libarrow. Even if libcudf was built against a different libarrow, that library +# would have to be ABI-compatible with the one in pyarrow for the packages to work together, and for +# wheels we must use the library in the pyarrow wheel, so it's best to simply be consistent. +find_package(Python 3.9 COMPONENTS Interpreter) + +if (${Python_FOUND}) + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import importlib; print(importlib.util.find_spec('pyarrow') is not None)" + OUTPUT_VARIABLE _pyarrow_installed + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if (${_pyarrow_installed} STREQUAL "True") + execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" + OUTPUT_VARIABLE _pyarrow_lib_dir + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}*") + list(LENGTH _pyarrow_libs _pyarrow_libs_len) + if(_pyarrow_libs_len GREATER 0) + set(USE_LIBARROW_FROM_PYARROW ON) + else() + set(USE_LIBARROW_FROM_PYARROW OFF) + endif() + endif() +endif() + find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} - ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} + ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} ) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 8bc94ab3a68..a76a38e2728 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -34,25 +34,9 @@ option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulti OFF ) -# If the pyarrow package contains libarrow we want to link against it directly instead of searching -# for a separate libarrow. Even if libcudf was built against a different libarrow, that library -# would have to be ABI-compatible with the one in pyarrow for the packages to work together, and for -# wheels we must use the library in the pyarrow wheel. +# Find Python early so that later commands can use it find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" - OUTPUT_VARIABLE _pyarrow_lib_dir - OUTPUT_STRIP_TRAILING_WHITESPACE -) -file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}*") -list(LENGTH _pyarrow_libs _pyarrow_libs_len) -if(_pyarrow_libs_len GREATER 0) - set(USE_LIBARROW_FROM_PYARROW ON) -else() - set(USE_LIBARROW_FROM_PYARROW OFF) -endif() - # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow From cc20ac821e31792309e0f05a1d1892bd995e1b8a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 18:28:35 +0000 Subject: [PATCH 26/35] Style --- cpp/cmake/thirdparty/get_arrow.cmake | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 3475dab2623..0956389eb53 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -437,13 +437,14 @@ endif() # wheels we must use the library in the pyarrow wheel, so it's best to simply be consistent. find_package(Python 3.9 COMPONENTS Interpreter) -if (${Python_FOUND}) +if(${Python_FOUND}) execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import importlib; print(importlib.util.find_spec('pyarrow') is not None)" + COMMAND "${Python_EXECUTABLE}" -c + "import importlib; print(importlib.util.find_spec('pyarrow') is not None)" OUTPUT_VARIABLE _pyarrow_installed OUTPUT_STRIP_TRAILING_WHITESPACE ) - if (${_pyarrow_installed} STREQUAL "True") + if(${_pyarrow_installed} STREQUAL "True") execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" OUTPUT_VARIABLE _pyarrow_lib_dir From 7c7ea76cfc81586ec9a1bb42972c277455ad89a5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 18:29:47 +0000 Subject: [PATCH 27/35] Ensure import works on all Python versions --- cpp/cmake/thirdparty/get_arrow.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 0956389eb53..744932460e5 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -440,7 +440,7 @@ find_package(Python 3.9 COMPONENTS Interpreter) if(${Python_FOUND}) execute_process( COMMAND "${Python_EXECUTABLE}" -c - "import importlib; print(importlib.util.find_spec('pyarrow') is not None)" + "import importlib.util; print(importlib.util.find_spec('pyarrow') is not None)" OUTPUT_VARIABLE _pyarrow_installed OUTPUT_STRIP_TRAILING_WHITESPACE ) From d68cf41349594aa96f8fd9c993f0aa7d658ed6fb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 18:46:30 +0000 Subject: [PATCH 28/35] Ensure variable is always set --- cpp/cmake/thirdparty/get_arrow.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 744932460e5..6d889f9f181 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -437,6 +437,7 @@ endif() # wheels we must use the library in the pyarrow wheel, so it's best to simply be consistent. find_package(Python 3.9 COMPONENTS Interpreter) +set(USE_LIBARROW_FROM_PYARROW OFF) if(${Python_FOUND}) execute_process( COMMAND "${Python_EXECUTABLE}" -c @@ -454,8 +455,6 @@ if(${Python_FOUND}) list(LENGTH _pyarrow_libs _pyarrow_libs_len) if(_pyarrow_libs_len GREATER 0) set(USE_LIBARROW_FROM_PYARROW ON) - else() - set(USE_LIBARROW_FROM_PYARROW OFF) endif() endif() endif() From d26798a93e87db61cfb83d8747e8de5f0726f7f0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 20:33:42 +0000 Subject: [PATCH 29/35] Stop using CMAKE_SHARED_LIBRARY_SUFFIX --- cpp/cmake/thirdparty/get_arrow.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 6d889f9f181..c0646e44f4b 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -32,7 +32,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"), # the soname is not generated by concatenating the major, minor, and patch versions into a single # version number soname, just `${MAJOR_VERSION}00` - set(PYARROW_LIB "libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_SO_VER}00") + set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" @@ -451,7 +451,7 @@ if(${Python_FOUND}) OUTPUT_VARIABLE _pyarrow_lib_dir OUTPUT_STRIP_TRAILING_WHITESPACE ) - file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.${CMAKE_SHARED_LIBRARY_SUFFIX}*") + file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.so*") list(LENGTH _pyarrow_libs _pyarrow_libs_len) if(_pyarrow_libs_len GREATER 0) set(USE_LIBARROW_FROM_PYARROW ON) From 15e7166458427f1f642240e43a1156b31bc99974 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 23:03:39 +0000 Subject: [PATCH 30/35] Make sure libcudf exports its pyarrow dependency --- cpp/cmake/thirdparty/get_arrow.cmake | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index c0646e44f4b..0530e5dfd85 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -40,6 +40,21 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) OUTPUT_STRIP_TRAILING_WHITESPACE ) list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") + + string( + APPEND + initial_code_block + [=[ +find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +execute_process( + COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" + OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY +) +list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") +]=] + ) rapids_find_generate_module( Arrow NO_CONFIG VERSION "${PYARROW_VERSION}" @@ -47,6 +62,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports HEADER_NAMES arrow/python/arrow_to_pandas.h + INITIAL_CODE_BLOCK initial_code_block ) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) From 355b0b26e592775ca925b21e60c15cad7f09d09a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 5 Dec 2023 23:13:14 +0000 Subject: [PATCH 31/35] Fix style --- cpp/cmake/thirdparty/get_arrow.cmake | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 0530e5dfd85..d09f3f46d81 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -42,9 +42,9 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") string( - APPEND - initial_code_block - [=[ + APPEND + initial_code_block + [=[ find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" @@ -61,8 +61,7 @@ list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") LIBRARY_NAMES "${PYARROW_LIB}" BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports - HEADER_NAMES arrow/python/arrow_to_pandas.h - INITIAL_CODE_BLOCK initial_code_block + HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block ) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) From 7224c9b1fb95fcd3a1232213212aa68cfff57792 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 8 Dec 2023 20:19:21 +0000 Subject: [PATCH 32/35] Address most reviews --- cpp/cmake/thirdparty/get_arrow.cmake | 31 +++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index d09f3f46d81..f3a6de52707 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -34,12 +34,9 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # version number soname, just `${MAJOR_VERSION}00` set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") - execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" - OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") + if(NOT DEFINED Python_EXECUTABLE) + message(FATAL_ERROR "You must run FindPython before calling find_libarrow_in_python_wheel") + endif() string( APPEND @@ -53,6 +50,13 @@ execute_process( COMMAND_ERROR_IS_FATAL ANY ) list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") +]=] + ) + string( + APPEND + final_code_block + [=[ +list(POP_BACK CMAKE_PREFIX_PATH) ]=] ) rapids_find_generate_module( @@ -62,6 +66,7 @@ list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block + FINAL_CODE_BLOCK final_code_block ) find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) @@ -76,20 +81,20 @@ list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") # benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently # ignoring these limitations since we don't anticipate using this feature except for building # wheels. - EXECUTE_PROCESS( + execute_process( COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6 OUTPUT_VARIABLE GLIBC_EXECUTABLE OUTPUT_STRIP_TRAILING_WHITESPACE ) - EXECUTE_PROCESS( + execute_process( COMMAND ${GLIBC_EXECUTABLE} OUTPUT_VARIABLE GLIBC_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ) - STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) - STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) - STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) - LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) + string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT}) + string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION}) + string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION}) + list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR) if(GLIBC_VERSION_MINOR LESS 28) target_compile_options( Arrow::Arrow INTERFACE "$<$:-D_GLIBCXX_USE_CXX11_ABI=0>" @@ -99,8 +104,6 @@ list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") rapids_export_package(BUILD Arrow cudf-exports) rapids_export_package(INSTALL Arrow cudf-exports) - - list(POP_BACK CMAKE_PREFIX_PATH) endfunction() # This function finds arrow and sets any additional necessary environment variables. From e8a90becc91b8a380c168fa4ada5cacd781bc584 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 8 Dec 2023 20:30:38 +0000 Subject: [PATCH 33/35] Go back to making USE_LIBARROW_FROM_PYARROW explicitly required --- ci/build_wheel_cudf.sh | 2 ++ cpp/CMakeLists.txt | 2 ++ cpp/cmake/thirdparty/get_arrow.cmake | 32 ---------------------------- python/cudf/CMakeLists.txt | 2 ++ 4 files changed, 6 insertions(+), 32 deletions(-) diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 6025a13d8e5..e79b9a35aa2 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,6 +5,8 @@ set -euo pipefail package_dir="python/cudf" +export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON" + ./ci/build_wheel.sh cudf ${package_dir} python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a4a900b9911..dbd67a1e6ac 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index f3a6de52707..d971a32db70 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -34,10 +34,6 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) # version number soname, just `${MAJOR_VERSION}00` set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") - if(NOT DEFINED Python_EXECUTABLE) - message(FATAL_ERROR "You must run FindPython before calling find_libarrow_in_python_wheel") - endif() - string( APPEND initial_code_block @@ -449,34 +445,6 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() -# If the pyarrow package contains libarrow we want to link against it directly instead of searching -# for a separate libarrow. Even if libcudf was built against a different libarrow, that library -# would have to be ABI-compatible with the one in pyarrow for the packages to work together, and for -# wheels we must use the library in the pyarrow wheel, so it's best to simply be consistent. -find_package(Python 3.9 COMPONENTS Interpreter) - -set(USE_LIBARROW_FROM_PYARROW OFF) -if(${Python_FOUND}) - execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "import importlib.util; print(importlib.util.find_spec('pyarrow') is not None)" - OUTPUT_VARIABLE _pyarrow_installed - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - if(${_pyarrow_installed} STREQUAL "True") - execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" - OUTPUT_VARIABLE _pyarrow_lib_dir - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - file(GLOB _pyarrow_libs "${_pyarrow_lib_dir}/libarrow.so*") - list(LENGTH _pyarrow_libs _pyarrow_libs_len) - if(_pyarrow_libs_len GREATER 0) - set(USE_LIBARROW_FROM_PYARROW ON) - endif() - endif() -endif() - find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index a76a38e2728..9b3c39ee268 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -33,6 +33,8 @@ project( option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) +option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) +mark_as_advanced(USE_LIBARROW_FROM_PYARROW) # Find Python early so that later commands can use it find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) From c5af8fce84c24e689ffa2dd0a60a14506e745672 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 8 Dec 2023 21:11:51 +0000 Subject: [PATCH 34/35] Only include the pre-libcudf find in python if we're forcing libarrow from pyarrow --- python/cudf/CMakeLists.txt | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 9b3c39ee268..4318339ed4e 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -41,20 +41,22 @@ find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) - # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow - # libraries. - include(rapids-find) - include(rapids-export) - include(rapids-cpm) - rapids_cpm_init() - # These variables have no effect because we are always searching for arrow via pyarrow, but they - # must be set as they are required arguments to the function in get_arrow.cmake. - set(CUDF_USE_ARROW_STATIC OFF) - set(CUDF_ENABLE_ARROW_S3 OFF) - set(CUDF_ENABLE_ARROW_ORC OFF) - set(CUDF_ENABLE_ARROW_PYTHON OFF) - set(CUDF_ENABLE_ARROW_PARQUET OFF) - include(../../cpp/cmake/thirdparty/get_arrow.cmake) + if(USE_LIBARROW_FROM_PYARROW) + # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow + # libraries. + include(rapids-find) + include(rapids-export) + include(rapids-cpm) + rapids_cpm_init() + # These variables have no effect because we are always searching for arrow via pyarrow, but they + # must be set as they are required arguments to the function in get_arrow.cmake. + set(CUDF_USE_ARROW_STATIC OFF) + set(CUDF_ENABLE_ARROW_S3 OFF) + set(CUDF_ENABLE_ARROW_ORC OFF) + set(CUDF_ENABLE_ARROW_PYTHON OFF) + set(CUDF_ENABLE_ARROW_PARQUET OFF) + include(../../cpp/cmake/thirdparty/get_arrow.cmake) + endif() find_package(cudf ${cudf_version} REQUIRED) From 2fd5b5b7a0a52f5c795711c3229b220daf1638cb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 8 Dec 2023 21:33:34 +0000 Subject: [PATCH 35/35] Set up rapids-cmake functions unconditionally since dlpack needs them --- python/cudf/CMakeLists.txt | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 4318339ed4e..8a9ea00d640 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -41,15 +41,16 @@ find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) + include(rapids-cpm) + include(rapids-export) + include(rapids-find) + rapids_cpm_init() + if(USE_LIBARROW_FROM_PYARROW) # We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow - # libraries. - include(rapids-find) - include(rapids-export) - include(rapids-cpm) - rapids_cpm_init() - # These variables have no effect because we are always searching for arrow via pyarrow, but they - # must be set as they are required arguments to the function in get_arrow.cmake. + # libraries. These variables have no effect because we are always searching for arrow via + # pyarrow, but they must be set as they are required arguments to the function in + # get_arrow.cmake. set(CUDF_USE_ARROW_STATIC OFF) set(CUDF_ENABLE_ARROW_S3 OFF) set(CUDF_ENABLE_ARROW_ORC OFF)