Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify Python CMake #14565

Merged
merged 36 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
580795e
Rely on rapids-cmake to make nvcomp targets global
vyasr Dec 4, 2023
4f782bf
Handle installation the same for non-wheel FIND_CUDF_CPP builds as wh…
vyasr Dec 4, 2023
a6c0957
Simplify installation helper function
vyasr Dec 4, 2023
24cfcff
Cleanup and comments
vyasr Dec 4, 2023
0a47368
Also skip test util build when building for Python outside wheels
vyasr Dec 4, 2023
6370182
Fully unify wheel builds with non-wheel Python builds
vyasr Dec 4, 2023
d907ca2
Remove unnecessary variable settings
vyasr Dec 4, 2023
57edaf4
Test allowing C++ lib to install itself
vyasr Dec 4, 2023
c30a571
Revert to base rapids-cmake repo
vyasr Dec 4, 2023
007fac5
Revert "Test allowing C++ lib to install itself"
vyasr Dec 4, 2023
4bf4e12
Add comment
vyasr Dec 4, 2023
fc06ea8
Remove now unnecessary CMake args spec
vyasr Dec 4, 2023
4bfe839
Fix style
vyasr Dec 4, 2023
0b8749e
Centralize logic for pyarrow/numpy headers
vyasr Dec 4, 2023
bab05c6
Add missed files
vyasr Dec 4, 2023
1849e7d
Just have cudf-kafka pull from cudf
vyasr Dec 4, 2023
fab0602
One more style fix
vyasr Dec 4, 2023
a0e6029
Put back variables needed for get_arrow.cmake
vyasr Dec 4, 2023
7c0690d
Add comment
vyasr Dec 4, 2023
38e039c
Style
vyasr Dec 4, 2023
1dc7517
Check for the presence of libarrow.so.* inside pyarrow to determine w…
vyasr Dec 5, 2023
1db2665
Allow for the linker name to be found instead of the soname
vyasr Dec 5, 2023
c1d759f
Style
vyasr Dec 5, 2023
d29aecc
Init cpm earlier
vyasr Dec 5, 2023
ff42ca3
Move pyarrow detection to get_arrow.cmake so that it also affects C++…
vyasr Dec 5, 2023
cc20ac8
Style
vyasr Dec 5, 2023
7c7ea76
Ensure import works on all Python versions
vyasr Dec 5, 2023
d68cf41
Ensure variable is always set
vyasr Dec 5, 2023
d26798a
Stop using CMAKE_SHARED_LIBRARY_SUFFIX
vyasr Dec 5, 2023
15e7166
Make sure libcudf exports its pyarrow dependency
vyasr Dec 5, 2023
355b0b2
Fix style
vyasr Dec 5, 2023
79b90ff
Merge remote-tracking branch 'upstream/branch-24.02' into fix/simplif…
vyasr Dec 8, 2023
7224c9b
Address most reviews
vyasr Dec 8, 2023
e8a90be
Go back to making USE_LIBARROW_FROM_PYARROW explicitly required
vyasr Dec 8, 2023
c5af8fc
Only include the pre-libcudf find in python if we're forcing libarrow…
vyasr Dec 8, 2023
2fd5b5b
Set up rapids-cmake functions unconditionally since dlpack needs them
vyasr Dec 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf"

export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"

./ci/build_wheel.sh cudf ${package_dir}

Expand Down
5 changes: 2 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
Expand All @@ -90,9 +92,6 @@ option(
)
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)

option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
Expand Down
43 changes: 28 additions & 15 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
# version number soname, just `${MAJOR_VERSION}00`
set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00")

find_package(Python REQUIRED)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
execute_process(
string(
APPEND
initial_code_block
[=[
find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
COMMAND_ERROR_IS_FATAL ANY
)
list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
]=]
)
string(
APPEND
final_code_block
[=[
list(POP_BACK CMAKE_PREFIX_PATH)
]=]
)
list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
rapids_find_generate_module(
Arrow NO_CONFIG
VERSION "${PYARROW_VERSION}"
LIBRARY_NAMES "${PYARROW_LIB}"
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
HEADER_NAMES arrow/python/arrow_to_pandas.h
HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block
FINAL_CODE_BLOCK final_code_block
)

find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -62,20 +77,20 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
# benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently
# ignoring these limitations since we don't anticipate using this feature except for building
# wheels.
EXECUTE_PROCESS(
execute_process(
COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6
OUTPUT_VARIABLE GLIBC_EXECUTABLE
OUTPUT_STRIP_TRAILING_WHITESPACE
)
EXECUTE_PROCESS(
execute_process(
COMMAND ${GLIBC_EXECUTABLE}
OUTPUT_VARIABLE GLIBC_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
if(GLIBC_VERSION_MINOR LESS 28)
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
Expand All @@ -85,16 +100,14 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)

rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)

list(POP_BACK CMAKE_PREFIX_PATH)
endfunction()

# This function finds arrow and sets any additional necessary environment variables.
function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON
ENABLE_PARQUET
ENABLE_PARQUET PYARROW_LIBARROW
)

if(USE_LIBARROW_FROM_PYARROW)
if(PYARROW_LIBARROW)
# Generate a FindArrow.cmake to find pyarrow's libarrow.so
find_libarrow_in_python_wheel(${VERSION})
set(ARROW_FOUND
Expand Down Expand Up @@ -434,5 +447,5 @@ endif()

find_and_configure_arrow(
${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET}
${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW}
)
73 changes: 31 additions & 42 deletions python/cudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,37 +33,36 @@ project(
option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
OFF
)
option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF)
option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

# Always build wheels against the pyarrow libarrow.
if(CUDF_BUILD_WHEELS)
set(USE_LIBARROW_FROM_PYARROW ON)
endif()
# Find Python early so that later commands can use it
find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)

# If the user requested it we attempt to find CUDF.
if(FIND_CUDF_CPP)
include(rapids-cpm)
include(rapids-export)
include(rapids-find)
rapids_cpm_init()

if(USE_LIBARROW_FROM_PYARROW)
# We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO:
# These options should probably all become optional since in practice they aren't meaningful
# except in the case where we actually compile Arrow.
# We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow
# libraries. These variables have no effect because we are always searching for arrow via
# pyarrow, but they must be set as they are required arguments to the function in
# get_arrow.cmake.
set(CUDF_USE_ARROW_STATIC OFF)
set(CUDF_ENABLE_ARROW_S3 OFF)
set(CUDF_ENABLE_ARROW_ORC OFF)
set(CUDF_ENABLE_ARROW_PYTHON OFF)
set(CUDF_ENABLE_ARROW_PARQUET OFF)
include(rapids-find)
include(rapids-export)
include(../../cpp/cmake/thirdparty/get_arrow.cmake)
endif()

find_package(cudf ${cudf_version} REQUIRED)

# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
# for the interop.pyx
include(rapids-cpm)
rapids_cpm_init()
include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
else()
set(cudf_FOUND OFF)
Expand All @@ -74,42 +73,32 @@ include(rapids-cython)
if(NOT cudf_FOUND)
set(BUILD_TESTS OFF)
set(BUILD_BENCHMARKS OFF)
set(CUDF_BUILD_TESTUTIL OFF)
set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
set(CUDA_STATIC_RUNTIME ON)

set(_exclude_from_all "")
if(CUDF_BUILD_WHEELS)
# We don't build C++ tests when building wheels, so we can also omit the test util and shrink
# the wheel by avoiding embedding GTest.
set(CUDF_BUILD_TESTUTIL OFF)
set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL)

# Statically link cudart if building wheels
set(CUDA_STATIC_RUNTIME ON)

# Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp
# https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL
set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON)

# Don't install the cuDF C++ targets into wheels
set(_exclude_from_all EXCLUDE_FROM_ALL)
endif()

add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all})

if(CUDF_BUILD_WHEELS)
include(cmake/Modules/WheelHelpers.cmake)
get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES)
# Ensure all the shared objects we need at runtime are in the wheel
add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs})
endif()
# Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the
# library in the cudf directory as a single source of truth and modify the other rpaths
# appropriately.
# libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous
# components like headers from libcudacxx, but we do need the libraries. However, we want to
# control where they are installed to. Since there are multiple subpackages of cudf._lib that
# require access to libcudf, we place the library and all its dependent artifacts in the cudf
# directory as a single source of truth and modify the other rpaths appropriately.
set(cython_lib_dir cudf)
install(TARGETS cudf DESTINATION ${cython_lib_dir})
include(cmake/Modules/WheelHelpers.cmake)
# TODO: This install is currently overzealous. We should only install the libraries that are
# downloaded by CPM during the build, not libraries that were found on the system. However, in
# practice right this would only be a problem is if libcudf was not found but some of the
# dependencies were, and we have no real use cases where that happens.
Comment on lines +89 to +92
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't want to try and fix this right now. I would like to manage the scikit-build-core transition first, then revisit the best way for us to handle install rules. Some anecdotal experiments suggest that the configure/build paths might be a bit different in scikit-build-core, so I don't want to do redundant work.

install_aliased_imported_targets(
TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
DESTINATION ${cython_lib_dir}
)
endif()

rapids_cython_init()

include(cmake/Modules/LinkPyarrowHeaders.cmake)
add_subdirectory(cudf/_lib)
add_subdirectory(udf_cpp)

Expand Down
55 changes: 55 additions & 0 deletions python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# =============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================
include_guard(GLOBAL)

# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in
# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24.
# find_package(Python REQUIRED COMPONENTS Development NumPy)

# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake
# inside the manylinux images used to build wheels because manylinux images do not contain
# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should
# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try
# switching back, but it may not work if that implicitly still requires Python libraries. In that
# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution
# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static
# libs to be found, but that should be a last resort since it implies a dependency that isn't really
# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
# /opt/_internal"
find_package(NumPy REQUIRED)

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
ERROR_VARIABLE PYARROW_ERROR
RESULT_VARIABLE PYARROW_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(${PYARROW_RESULT})
message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}")
endif()

# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of
# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the
# requirement for arrow headers infects all of cudf. These requirements will go away once all
# scalar-related Cython code is removed from cudf.
function(link_to_pyarrow_headers targets)
foreach(target IN LISTS targets)
# PyArrow headers require numpy headers.
target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
endforeach()
endfunction()
44 changes: 16 additions & 28 deletions python/cudf/cmake/Modules/WheelHelpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
include_guard(GLOBAL)

# Making libraries available inside wheels by installing the associated targets.
function(add_target_libs_to_wheel)
list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel")
function(install_aliased_imported_targets)
list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets")

set(options "")
set(one_value "LIB_DIR")
set(one_value "DESTINATION")
set(multi_value "TARGETS")
cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN})

message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'")
message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'")

foreach(target IN LISTS __TARGETS)

Expand All @@ -39,33 +39,21 @@ function(add_target_libs_to_wheel)
get_target_property(is_imported ${target} IMPORTED)
if(NOT is_imported)
# If the target isn't imported, install it into the wheel
install(TARGETS ${target} DESTINATION ${__LIB_DIR})
message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})")
install(TARGETS ${target} DESTINATION ${__DESTINATION})
message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})")
else()
# If the target is imported, make sure it's global
get_target_property(already_global ${target} IMPORTED_GLOBAL)
if(NOT already_global)
set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
get_target_property(type ${target} TYPE)
if(${type} STREQUAL "UNKNOWN_LIBRARY")
install(FILES $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})
message(VERBOSE "install(FILES $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})")
else()
install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION})
message(
VERBOSE
"install(IMPORTED_RUNTIME_ARTIFACTS $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})"
)
endif()

# Find the imported target's library so we can copy it into the wheel
set(lib_loc)
foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG)
get_target_property(lib_loc ${target} ${prop})
if(lib_loc)
message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}")
break()
endif()
message(VERBOSE "${target} has no value for property ${prop}")
endforeach()

if(NOT lib_loc)
message(FATAL_ERROR "Found no libs to install for target ${target}")
endif()

# Copy the imported library into the wheel
install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})
message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})")
endif()
endforeach()
endfunction()
Loading
Loading