Skip to content

Commit

Permalink
Simplify Python CMake (#14565)
Browse files Browse the repository at this point in the history
This PR makes a number of changes to streamline the CMake code in the Python build. There are additional potential improvements that may be possible but I'd prefer to wait on until after the scikit-build-core migration because I suspect that there may be subtle differences (particularly around the install rules). The major improvements here are:
- nvcomp installation no longer needs as much special casing thanks to rapidsai/rapids-cmake#496 adding missing targets
- get_arrow.cmake now determines how to find arrow by checking for 1) the presence of Python, 2) the presence of pyarrow, and 3) the presence of libarrow.so.* inside the pyarrow directory. This approach works for both pip and conda packages as well as pure C++ builds and removes the need for manual determination of where to look by the builder. The assumption baked in is that the developer has installed the desired pyarrow package when building.
- The `CUDF_BUILD_WHEELS` variable is now removed. All Python builds that don't find the C++ library all go down the same path now. This is particularly relevant for doing something like a local `pip install`. This is the desired behavior because if you're building the Python package in that kind of environment you always want the same behaviors. The different case is when the Python build finds the C++ build (e.g. in a conda environment where libcudf is a separate package).
- The logic for linking to pyarrow headers is now centralized in a single function since it needs to be used by every Cython target.

Authors:
   - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
   - https://github.com/brandon-b-miller
   - Robert Maynard (https://github.com/robertmaynard)
   - Ray Douglass (https://github.com/raydouglass)
  • Loading branch information
vyasr authored Dec 11, 2023
1 parent 3c32e5d commit fc0f181
Show file tree
Hide file tree
Showing 13 changed files with 141 additions and 226 deletions.
2 changes: 1 addition & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf"

export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"

./ci/build_wheel.sh cudf ${package_dir}

Expand Down
5 changes: 2 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
Expand All @@ -90,9 +92,6 @@ option(
)
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)

option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
Expand Down
43 changes: 28 additions & 15 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,35 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
# version number soname, just `${MAJOR_VERSION}00`
set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00")

find_package(Python REQUIRED)
execute_process(
string(
APPEND
initial_code_block
[=[
find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
COMMAND_ERROR_IS_FATAL ANY
)
list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
]=]
)
string(
APPEND
final_code_block
[=[
list(POP_BACK CMAKE_PREFIX_PATH)
]=]
)
list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
rapids_find_generate_module(
Arrow NO_CONFIG
VERSION "${PYARROW_VERSION}"
LIBRARY_NAMES "${PYARROW_LIB}"
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
HEADER_NAMES arrow/python/arrow_to_pandas.h
HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block
FINAL_CODE_BLOCK final_code_block
)

find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
Expand All @@ -62,20 +77,20 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
# benchmarks will not work without updating GBench (and possibly NVBench) builds. We are currently
# ignoring these limitations since we don't anticipate using this feature except for building
# wheels.
EXECUTE_PROCESS(
execute_process(
COMMAND ${CMAKE_C_COMPILER} -print-file-name=libc.so.6
OUTPUT_VARIABLE GLIBC_EXECUTABLE
OUTPUT_STRIP_TRAILING_WHITESPACE
)
EXECUTE_PROCESS(
execute_process(
COMMAND ${GLIBC_EXECUTABLE}
OUTPUT_VARIABLE GLIBC_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
STRING(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
STRING(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
STRING(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
LIST(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
string(REGEX MATCH "stable release version ([0-9]+\\.[0-9]+)" GLIBC_VERSION ${GLIBC_OUTPUT})
string(REPLACE "stable release version " "" GLIBC_VERSION ${GLIBC_VERSION})
string(REPLACE "." ";" GLIBC_VERSION_LIST ${GLIBC_VERSION})
list(GET GLIBC_VERSION_LIST 1 GLIBC_VERSION_MINOR)
if(GLIBC_VERSION_MINOR LESS 28)
target_compile_options(
Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
Expand All @@ -85,16 +100,14 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)

rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)

list(POP_BACK CMAKE_PREFIX_PATH)
endfunction()

# This function finds arrow and sets any additional necessary environment variables.
function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON
ENABLE_PARQUET
ENABLE_PARQUET PYARROW_LIBARROW
)

if(USE_LIBARROW_FROM_PYARROW)
if(PYARROW_LIBARROW)
# Generate a FindArrow.cmake to find pyarrow's libarrow.so
find_libarrow_in_python_wheel(${VERSION})
set(ARROW_FOUND
Expand Down Expand Up @@ -434,5 +447,5 @@ endif()

find_and_configure_arrow(
${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET}
${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW}
)
73 changes: 31 additions & 42 deletions python/cudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,37 +33,36 @@ project(
option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
OFF
)
option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF)
option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

# Always build wheels against the pyarrow libarrow.
if(CUDF_BUILD_WHEELS)
set(USE_LIBARROW_FROM_PYARROW ON)
endif()
# Find Python early so that later commands can use it
find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)

# If the user requested it we attempt to find CUDF.
if(FIND_CUDF_CPP)
include(rapids-cpm)
include(rapids-export)
include(rapids-find)
rapids_cpm_init()

if(USE_LIBARROW_FROM_PYARROW)
# We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO:
# These options should probably all become optional since in practice they aren't meaningful
# except in the case where we actually compile Arrow.
# We need to find arrow before libcudf since libcudf requires it but doesn't bundle arrow
# libraries. These variables have no effect because we are always searching for arrow via
# pyarrow, but they must be set as they are required arguments to the function in
# get_arrow.cmake.
set(CUDF_USE_ARROW_STATIC OFF)
set(CUDF_ENABLE_ARROW_S3 OFF)
set(CUDF_ENABLE_ARROW_ORC OFF)
set(CUDF_ENABLE_ARROW_PYTHON OFF)
set(CUDF_ENABLE_ARROW_PARQUET OFF)
include(rapids-find)
include(rapids-export)
include(../../cpp/cmake/thirdparty/get_arrow.cmake)
endif()

find_package(cudf ${cudf_version} REQUIRED)

# an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
# for the interop.pyx
include(rapids-cpm)
rapids_cpm_init()
include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
else()
set(cudf_FOUND OFF)
Expand All @@ -74,42 +73,32 @@ include(rapids-cython)
if(NOT cudf_FOUND)
set(BUILD_TESTS OFF)
set(BUILD_BENCHMARKS OFF)
set(CUDF_BUILD_TESTUTIL OFF)
set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
set(CUDA_STATIC_RUNTIME ON)

set(_exclude_from_all "")
if(CUDF_BUILD_WHEELS)
# We don't build C++ tests when building wheels, so we can also omit the test util and shrink
# the wheel by avoiding embedding GTest.
set(CUDF_BUILD_TESTUTIL OFF)
set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
add_subdirectory(../../cpp cudf-cpp EXCLUDE_FROM_ALL)

# Statically link cudart if building wheels
set(CUDA_STATIC_RUNTIME ON)

# Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp
# https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL
set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON)

# Don't install the cuDF C++ targets into wheels
set(_exclude_from_all EXCLUDE_FROM_ALL)
endif()

add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all})

if(CUDF_BUILD_WHEELS)
include(cmake/Modules/WheelHelpers.cmake)
get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES)
# Ensure all the shared objects we need at runtime are in the wheel
add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs})
endif()
# Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the
# library in the cudf directory as a single source of truth and modify the other rpaths
# appropriately.
# libcudf targets are excluded by default above via EXCLUDE_FROM_ALL to remove extraneous
# components like headers from libcudacxx, but we do need the libraries. However, we want to
# control where they are installed to. Since there are multiple subpackages of cudf._lib that
# require access to libcudf, we place the library and all its dependent artifacts in the cudf
# directory as a single source of truth and modify the other rpaths appropriately.
set(cython_lib_dir cudf)
install(TARGETS cudf DESTINATION ${cython_lib_dir})
include(cmake/Modules/WheelHelpers.cmake)
# TODO: This install is currently overzealous. We should only install the libraries that are
# downloaded by CPM during the build, not libraries that were found on the system. However, in
# practice right this would only be a problem is if libcudf was not found but some of the
# dependencies were, and we have no real use cases where that happens.
install_aliased_imported_targets(
TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp
DESTINATION ${cython_lib_dir}
)
endif()

rapids_cython_init()

include(cmake/Modules/LinkPyarrowHeaders.cmake)
add_subdirectory(cudf/_lib)
add_subdirectory(udf_cpp)

Expand Down
55 changes: 55 additions & 0 deletions python/cudf/cmake/Modules/LinkPyarrowHeaders.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# =============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================
include_guard(GLOBAL)

# TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
# fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in
# CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24.
# find_package(Python REQUIRED COMPONENTS Development NumPy)

# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake
# inside the manylinux images used to build wheels because manylinux images do not contain
# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should
# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try
# switching back, but it may not work if that implicitly still requires Python libraries. In that
# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution
# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static
# libs to be found, but that should be a last resort since it implies a dependency that isn't really
# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
# /opt/_internal"
find_package(NumPy REQUIRED)

execute_process(
COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
ERROR_VARIABLE PYARROW_ERROR
RESULT_VARIABLE PYARROW_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(${PYARROW_RESULT})
message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}")
endif()

# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of
# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the
# requirement for arrow headers infects all of cudf. These requirements will go away once all
# scalar-related Cython code is removed from cudf.
function(link_to_pyarrow_headers targets)
foreach(target IN LISTS targets)
# PyArrow headers require numpy headers.
target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
endforeach()
endfunction()
44 changes: 16 additions & 28 deletions python/cudf/cmake/Modules/WheelHelpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
include_guard(GLOBAL)

# Making libraries available inside wheels by installing the associated targets.
function(add_target_libs_to_wheel)
list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel")
function(install_aliased_imported_targets)
list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets")

set(options "")
set(one_value "LIB_DIR")
set(one_value "DESTINATION")
set(multi_value "TARGETS")
cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN})

message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'")
message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'")

foreach(target IN LISTS __TARGETS)

Expand All @@ -39,33 +39,21 @@ function(add_target_libs_to_wheel)
get_target_property(is_imported ${target} IMPORTED)
if(NOT is_imported)
# If the target isn't imported, install it into the wheel
install(TARGETS ${target} DESTINATION ${__LIB_DIR})
message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})")
install(TARGETS ${target} DESTINATION ${__DESTINATION})
message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})")
else()
# If the target is imported, make sure it's global
get_target_property(already_global ${target} IMPORTED_GLOBAL)
if(NOT already_global)
set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
get_target_property(type ${target} TYPE)
if(${type} STREQUAL "UNKNOWN_LIBRARY")
install(FILES $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})
message(VERBOSE "install(FILES $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})")
else()
install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION})
message(
VERBOSE
"install(IMPORTED_RUNTIME_ARTIFACTS $<TARGET_FILE:${target}> DESTINATION ${__DESTINATION})"
)
endif()

# Find the imported target's library so we can copy it into the wheel
set(lib_loc)
foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG)
get_target_property(lib_loc ${target} ${prop})
if(lib_loc)
message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}")
break()
endif()
message(VERBOSE "${target} has no value for property ${prop}")
endforeach()

if(NOT lib_loc)
message(FATAL_ERROR "Found no libs to install for target ${target}")
endif()

# Copy the imported library into the wheel
install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})
message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})")
endif()
endforeach()
endfunction()
Loading

0 comments on commit fc0f181

Please sign in to comment.