diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9d35e3f97f..5d1d536704 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -26,5 +26,5 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" diff --git a/README.md b/README.md index e0e26ac560..50e4328fbd 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ KvikIO (pronounced "kuh-VICK-eye-oh", see [here](https://ordnet.dk/ddo_en/dict?q bindings to [cuFile](https://docs.nvidia.com/gpudirect-storage/api-reference-guide/index.html), which enables [GPUDirect Storage (GDS)](https://developer.nvidia.com/blog/gpudirect-storage/). KvikIO also works efficiently when GDS isn't available and can read/write both host and device data seamlessly. -The C++ library is header-only making it easy to include in [existing projects](https://github.com/rapidsai/kvikio/blob/HEAD/cpp/examples/downstream/). ### Features diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 0000000000..b1ede832da --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name=$1 +package_dir=$2 + +source rapids-configure-sccache +source rapids-date-string + +rapids-generate-version > ./VERSION + +cd "${package_dir}" + +sccache --zero-stats + +rapids-logger "Building '${package_name}' wheel" +python -m pip wheel \ + -w dist \ + -v \ + --no-deps \ + --disable-pip-version-check \ + . + +sccache --show-adv-stats diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh index b11cdf6677..0367842a8c 100755 --- a/ci/build_wheel_cpp.sh +++ b/ci/build_wheel_cpp.sh @@ -6,19 +6,34 @@ set -euo pipefail package_name="libkvikio" package_dir="python/libkvikio" -source rapids-configure-sccache -source rapids-date-string +rapids-logger "Generating build requirements" -rapids-generate-version > ./VERSION +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \ +| tee /tmp/requirements-build.txt -cd "${package_dir}" +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt -sccache --zero-stats +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 -python -m pip install wheel -python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check - -sccache --show-adv-stats +export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" +./ci/build_wheel.sh "${package_name}" "${package_dir}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp dist + +mkdir -p ${package_dir}/final_dist +python -m auditwheel repair \ + --exclude libnvcomp.so.4 \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* + +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh index 44cb76586c..0c709f2fe6 100755 --- a/ci/build_wheel_python.sh +++ b/ci/build_wheel_python.sh @@ -6,35 +6,23 @@ set -euo pipefail package_name="kvikio" package_dir="python/kvikio" -source rapids-configure-sccache -source rapids-date-string - RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -rapids-generate-version > ./VERSION - -CPP_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libkvikio_dist) - -cd "${package_dir}" - -# ensure 'kvikio' wheel builds always use the 'libkvikio' just built in the same CI run +# Ensure 'kvikio' wheel builds always use the 'libkvikio' just built in the same CI run # -# using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints # are used when creating the isolated build environment -echo "libkvikio-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${CPP_WHEELHOUSE}/libkvikio_*.whl)" > ./constraints.txt - -sccache --zero-stats - -PIP_CONSTRAINT="${PWD}/constraints.txt" \ -SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" \ - python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check +RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libkvikio_dist +echo "libkvikio-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libkvikio_dist/libkvikio_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -sccache --show-adv-stats +export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" +./ci/build_wheel.sh "${package_name}" "${package_dir}" -mkdir -p final_dist python -m auditwheel repair \ + --exclude libkvikio.so \ --exclude libnvcomp.so.4 \ - -w final_dist \ - dist/* + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index a3f014ca3f..46ae5cbcf2 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -4,10 +4,14 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -WHEELHOUSE="${PWD}/dist/" -RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python "${WHEELHOUSE}" -python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" +# Download and install the libkvikio and kvikio wheels built in the previous step +RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + +python -m pip install -v \ + "$(echo ./dist/libkvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" # If running CUDA 11.8 on arm64, we skip tests marked "cufile" since # cuFile didn't support arm until 12.4 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 786ccb9266..772a97e560 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -35,14 +35,23 @@ rapids_cmake_write_version_file(include/kvikio/version_config.hpp) # Set a default build type if none was specified rapids_cmake_build_type(Release) -# build options -option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON) +# ################################################################################################## +# * build options ---------------------------------------------------------------------------------- + +option(BUILD_SHARED_LIBS "Build KvikIO shared library" ON) option(KvikIO_BUILD_EXAMPLES "Configure CMake to build examples" ON) option(KvikIO_BUILD_TESTS "Configure CMake to build tests" ON) +option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON) +option(KvikIO_CUDA_SUPPORT "Configure CMake to build with CUDA support" ON) +option(KvikIO_EXPORT_NVCOMP "Export NVCOMP as a dependency" ON) +# ################################################################################################## +# * conda environment ------------------------------------------------------------------------------ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) -# find packages we depend on +# ################################################################################################## +# * dependencies ----------------------------------------------------------------------------------- + rapids_cpm_init() rapids_find_package( @@ -55,99 +64,95 @@ if(KvikIO_REMOTE_SUPPORT) include(cmake/thirdparty/get_libcurl.cmake) endif() +if(KvikIO_CUDA_SUPPORT) + rapids_find_package( + CUDAToolkit REQUIRED + BUILD_EXPORT_SET kvikio-exports + INSTALL_EXPORT_SET kvikio-exports + ) + include(cmake/thirdparty/get_nvtx.cmake) +endif() + rapids_find_package( - CUDAToolkit + cuFile BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports ) -if(CUDAToolkit_FOUND) - rapids_find_package( - cuFile - BUILD_EXPORT_SET kvikio-exports - INSTALL_EXPORT_SET kvikio-exports +if(NOT cuFile_FOUND) + message( + WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" ) - if(NOT cuFile_FOUND) - message( - WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" - ) +else() + # Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND) + file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) + string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) + if(cuFileBatchIOSetUp_location EQUAL "-1") + set(cuFile_BATCH_API_FOUND FALSE) else() - file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) - string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) - if(cuFileBatchIOSetUp_location EQUAL "-1") - set(cuFile_BATCH_API_FOUND FALSE) - else() - set(cuFile_BATCH_API_FOUND TRUE) - endif() - message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") - string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) - if(cuFileReadAsync_location EQUAL "-1") - set(cuFile_STREAM_API_FOUND FALSE) - else() - set(cuFile_STREAM_API_FOUND TRUE) - endif() - message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") + set(cuFile_BATCH_API_FOUND TRUE) endif() - - include(cmake/thirdparty/get_nvtx.cmake) + message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") + string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) + if(cuFileReadAsync_location EQUAL "-1") + set(cuFile_STREAM_API_FOUND FALSE) + else() + set(cuFile_STREAM_API_FOUND TRUE) + endif() + message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") endif() include(cmake/thirdparty/get_thread_pool.cmake) -# library targets -add_library(kvikio INTERFACE) -add_library(kvikio::kvikio ALIAS kvikio) +# ################################################################################################## +# * library targets -------------------------------------------------------------------------------- -# We enable CUDA and cuFile both here and in the FINAL_CODE_BLOCK export block. While the code block -# below (in FINAL_CODE_BLOCK) sets this information when KvikIO is imported from a -# kvikio-config.cmake file, this code block is intended to be used by projects that include KvikIO's -# source directory in their own CMake build. -# -# Normally we would just set the below without using $, and without the -# final_code_string, but in this case we want to conditionally set these things at import time, not -# export time, since KvikIO is a header-only library that can adapt to different build environments. - -# Enable CUDA in KvikIO -if(CUDAToolkit_FOUND) - if(CUDA_STATIC_RUNTIME) - target_link_libraries(kvikio INTERFACE $) - else() - target_link_libraries(kvikio INTERFACE $) - endif() - target_compile_definitions(kvikio INTERFACE $) -else() - message(WARNING "Building KvikIO without CUDA") -endif() +file(GLOB SOURCES "src/*.cpp") +add_library(kvikio ${SOURCES}) -# Enable supported cuFile features in KvikIO -if(cuFile_FOUND) - target_link_libraries(kvikio INTERFACE $) - target_compile_definitions(kvikio INTERFACE $) - if(cuFile_BATCH_API_FOUND) - target_compile_definitions( - kvikio INTERFACE $ - ) - endif() - if(cuFile_STREAM_API_FOUND) - target_compile_definitions( - kvikio INTERFACE $ - ) - endif() -endif() +# To avoid symbol conflicts when statically linking to libcurl.a (see get_libcurl.cmake) and its +# dependency OpenSSL, we exclude them when building libkvikio.so. This way, libkvikio.so will not +# expose any OpenSSL symbols that could conflict with downstream users like CPython that also links +# to (another version of) OpenSSL. +target_link_options(kvikio PRIVATE "LINKER:--exclude-libs,ALL") + +add_library(kvikio::kvikio ALIAS kvikio) target_include_directories( - kvikio INTERFACE "$" - "$" + kvikio + PUBLIC "$" "${CUDAToolkit_INCLUDE_DIRS}" + "${cuFile_INCLUDE_DIRS}" + INTERFACE "$" ) + +# Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. target_link_libraries( - kvikio INTERFACE Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} - $ + kvikio + PUBLIC Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} $ + PRIVATE $ ) -if(TARGET CURL::libcurl) - target_link_libraries(kvikio INTERFACE $) - target_compile_definitions(kvikio INTERFACE $) -endif() -target_compile_features(kvikio INTERFACE cxx_std_17) + +target_compile_definitions( + kvikio + PUBLIC $<$:KVIKIO_LIBCURL_FOUND> + $<$:KVIKIO_CUDA_FOUND> + $<$:KVIKIO_CUFILE_FOUND> + $<$:KVIKIO_CUFILE_BATCH_API_FOUND> + $<$:KVIKIO_CUFILE_STREAM_API_FOUND> +) + +set_target_properties( + kvikio + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON +) + +# ################################################################################################## +# * add examples ----------------------------------------------------------------------------------- # optionally build examples if(KvikIO_BUILD_EXAMPLES) @@ -168,10 +173,21 @@ if(CUDAToolkit_FOUND add_subdirectory(tests) endif() +# ################################################################################################## +# * install targets -------------------------------------------------------------------------------- + +rapids_cmake_install_lib_dir(lib_dir) include(CPack) +include(GNUInstallDirs) + +set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME kvikio) + +install( + TARGETS kvikio + DESTINATION ${lib_dir} + EXPORT kvikio-exports +) -# install export targets -install(TARGETS kvikio EXPORT kvikio-exports) install(DIRECTORY include/kvikio/ DESTINATION include/kvikio) install(FILES ${KvikIO_BINARY_DIR}/include/kvikio/version_config.hpp DESTINATION include/kvikio) @@ -185,68 +201,7 @@ rapids_export_find_package_file( set(doc_string [=[ -Provide targets for KvikIO: C++ bindings for cuFile. -]=] -) - -set(final_code_string - [=[ -get_property(already_set_kvikio DIRECTORY PROPERTY kvikio_already_set_defines SET) -if(NOT already_set_kvikio) - set_property(DIRECTORY PROPERTY kvikio_already_set_defines "ON") - - find_package(CUDAToolkit QUIET) - if(CUDAToolkit_FOUND) - if(CUDA_STATIC_RUNTIME) - target_link_libraries(kvikio::kvikio INTERFACE CUDA::cudart_static) - else() - target_link_libraries(kvikio::kvikio INTERFACE CUDA::cudart) - endif() - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUDA_FOUND) - else() - message(WARNING "Building KvikIO without CUDA") - endif() - - # Find cuFile and determine which features are supported - find_package(cuFile QUIET) - if(NOT cuFile_FOUND) - message(WARNING "KvikIO: cuFile not found") - else() - file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) - string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) - if(cuFileBatchIOSetUp_location EQUAL "-1") - set(cuFile_BATCH_API_FOUND FALSE) - else() - set(cuFile_BATCH_API_FOUND TRUE) - endif() - message(STATUS "KvikIO: Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") - string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) - if(cuFileReadAsync_location EQUAL "-1") - set(cuFile_STREAM_API_FOUND FALSE) - else() - set(cuFile_STREAM_API_FOUND TRUE) - endif() - message(STATUS "KvikIO: Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") - endif() - - # Enable supported cuFile features in KvikIO - if(cuFile_FOUND) - target_link_libraries(kvikio::kvikio INTERFACE cufile::cuFile_interface) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_FOUND) - if(cuFile_BATCH_API_FOUND) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_BATCH_API_FOUND) - endif() - if(cuFile_STREAM_API_FOUND) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_STREAM_API_FOUND) - endif() - endif() - - if(TARGET CURL::libcurl) - target_link_libraries(kvikio::kvikio INTERFACE CURL::libcurl) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_LIBCURL_FOUND) - endif() - -endif() +Provide targets for KvikIO. ]=] ) @@ -256,7 +211,6 @@ rapids_export( GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string - FINAL_CODE_BLOCK final_code_string ) rapids_export( @@ -265,5 +219,4 @@ rapids_export( GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string - FINAL_CODE_BLOCK final_code_string ) diff --git a/cpp/cmake/thirdparty/get_libcurl.cmake b/cpp/cmake/thirdparty/get_libcurl.cmake index 5694494b9a..6b137bbde2 100644 --- a/cpp/cmake/thirdparty/get_libcurl.cmake +++ b/cpp/cmake/thirdparty/get_libcurl.cmake @@ -31,6 +31,7 @@ function(find_and_configure_libcurl) GIT_TAG curl-7_87_0 OPTIONS "BUILD_CURL_EXE OFF" "BUILD_SHARED_LIBS OFF" "BUILD_TESTING OFF" "CURL_USE_LIBPSL OFF" "CURL_DISABLE_LDAP ON" "CMAKE_POSITION_INDEPENDENT_CODE ON" + EXCLUDE_FROM_ALL YES # Don't install libcurl.a (only needed when building libkvikio.so) ) if(DEFINED CACHE_HAS_BUILD_TESTING) set(BUILD_TESTING diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md index 21a33b1d45..497fb3e13e 100644 --- a/cpp/doxygen/main_page.md +++ b/cpp/doxygen/main_page.md @@ -5,7 +5,7 @@ bindings to [cuFile](https://docs.nvidia.com/gpudirect-storage/api-reference-gui which enables [GPUDirect Storage (GDS)](https://developer.nvidia.com/blog/gpudirect-storage/). KvikIO also works efficiently when GDS isn't available and can read/write both host and device data seamlessly. -KvikIO C++ is a header-only library that is part of the [RAPIDS](https://rapids.ai/) suite of open-source software libraries for GPU-accelerated data science. +KvikIO C++ is part of the [RAPIDS](https://rapids.ai/) suite of open-source software libraries for GPU-accelerated data science. --- **Notice** this is the documentation for the C++ library. For the Python documentation, see under [kvikio](https://docs.rapids.ai/api/kvikio/nightly/). @@ -23,9 +23,7 @@ KvikIO C++ is a header-only library that is part of the [RAPIDS](https://rapids. ## Installation -KvikIO is a header-only library and as such doesn't need installation. -However, for convenience we release Conda packages that makes it easy -to include KvikIO in your CMake projects. +For convenience we release Conda packages that makes it easy to include KvikIO in your CMake projects. ### Conda/Mamba diff --git a/cpp/examples/downstream/CMakeLists.txt b/cpp/examples/downstream/CMakeLists.txt index a80d0ba44f..5dddd30441 100644 --- a/cpp/examples/downstream/CMakeLists.txt +++ b/cpp/examples/downstream/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -28,6 +28,4 @@ include(cmake/get_kvikio.cmake) add_executable(downstream_example downstream_example.cpp) -# Notice, even though KvikIO is a header-only library, we link to it here. Linking to -# `kvikio::kvikio` makes CMake include the headers of KvikIO when building. target_link_libraries(downstream_example PRIVATE kvikio::kvikio) diff --git a/cpp/include/kvikio/remote_handle.hpp b/cpp/include/kvikio/remote_handle.hpp index 5bb18f6396..8ac2798f31 100644 --- a/cpp/include/kvikio/remote_handle.hpp +++ b/cpp/include/kvikio/remote_handle.hpp @@ -30,7 +30,6 @@ #include #include #include -#include #include namespace kvikio { @@ -128,78 +127,10 @@ class BounceBufferH2D { } }; -/** - * @brief Context used by the "CURLOPT_WRITEFUNCTION" callbacks. - */ -struct CallbackContext { - char* buf; // Output buffer to read into. - std::size_t size; // Total number of bytes to read. - std::ptrdiff_t offset; // Offset into `buf` to start reading. - bool overflow_error; // Flag to indicate overflow. - CallbackContext(void* buf, std::size_t size) - : buf{static_cast(buf)}, size{size}, offset{0}, overflow_error{0} - { - } - BounceBufferH2D* bounce_buffer{nullptr}; // Only used by callback_device_memory -}; - -/** - * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output host buffer. - * - * See . - * - * @param data Data downloaded by libcurl that is ready for consumption. - * @param size Size of each element in `nmemb`; size is always 1. - * @param nmemb Size of the data in `nmemb`. - * @param context A pointer to an instance of `CallbackContext`. - */ -inline std::size_t callback_host_memory(char* data, - std::size_t size, - std::size_t nmemb, - void* context) -{ - auto ctx = reinterpret_cast(context); - std::size_t const nbytes = size * nmemb; - if (ctx->size < ctx->offset + nbytes) { - ctx->overflow_error = true; - return CURL_WRITEFUNC_ERROR; - } - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes); - std::memcpy(ctx->buf + ctx->offset, data, nbytes); - ctx->offset += nbytes; - return nbytes; -} - -/** - * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output device buffer. - * - * See . - * - * @param data Data downloaded by libcurl that is ready for consumption. - * @param size Size of each element in `nmemb`; size is always 1. - * @param nmemb Size of the data in `nmemb`. - * @param context A pointer to an instance of `CallbackContext`. - */ -inline std::size_t callback_device_memory(char* data, - std::size_t size, - std::size_t nmemb, - void* context) -{ - auto ctx = reinterpret_cast(context); - std::size_t const nbytes = size * nmemb; - if (ctx->size < ctx->offset + nbytes) { - ctx->overflow_error = true; - return CURL_WRITEFUNC_ERROR; - } - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes); - - ctx->bounce_buffer->write(data, nbytes); - ctx->offset += nbytes; - return nbytes; -} - } // namespace detail +class CurlHandle; // Prototype + /** * @brief Abstract base class for remote endpoints. * @@ -243,7 +174,7 @@ class HttpEndpoint : public RemoteEndpoint { * @param url The full http url to the remote file. */ HttpEndpoint(std::string url) : _url{std::move(url)} {} - void setopt(CurlHandle& curl) override { curl.setopt(CURLOPT_URL, _url.c_str()); } + void setopt(CurlHandle& curl) override; std::string str() const override { return _url; } ~HttpEndpoint() override = default; }; @@ -424,12 +355,7 @@ class S3Endpoint : public RemoteEndpoint { { } - void setopt(CurlHandle& curl) override - { - curl.setopt(CURLOPT_URL, _url.c_str()); - curl.setopt(CURLOPT_AWS_SIGV4, _aws_sigv4.c_str()); - curl.setopt(CURLOPT_USERPWD, _aws_userpwd.c_str()); - } + void setopt(CurlHandle& curl) override; std::string str() const override { return _url; } ~S3Endpoint() override = default; }; @@ -461,23 +387,7 @@ class RemoteHandle { * * @param endpoint Remote endpoint used for subsequently IO. */ - RemoteHandle(std::unique_ptr endpoint) - { - auto curl = create_curl_handle(); - - endpoint->setopt(curl); - curl.setopt(CURLOPT_NOBODY, 1L); - curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); - curl.perform(); - curl_off_t cl; - curl.getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl); - if (cl < 0) { - throw std::runtime_error("cannot get size of " + endpoint->str() + - ", content-length not provided by the server"); - } - _nbytes = cl; - _endpoint = std::move(endpoint); - } + RemoteHandle(std::unique_ptr endpoint); // A remote handle is moveable but not copyable. RemoteHandle(RemoteHandle&& o) = default; @@ -513,53 +423,7 @@ class RemoteHandle { * @param file_offset File offset in bytes. * @return Number of bytes read, which is always `size`. */ - std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0) - { - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size); - - if (file_offset + size > _nbytes) { - std::stringstream ss; - ss << "cannot read " << file_offset << "+" << size << " bytes into a " << _nbytes - << " bytes file (" << _endpoint->str() << ")"; - throw std::invalid_argument(ss.str()); - } - bool const is_host_mem = is_host_memory(buf); - auto curl = create_curl_handle(); - _endpoint->setopt(curl); - - std::string const byte_range = - std::to_string(file_offset) + "-" + std::to_string(file_offset + size - 1); - curl.setopt(CURLOPT_RANGE, byte_range.c_str()); - - if (is_host_mem) { - curl.setopt(CURLOPT_WRITEFUNCTION, detail::callback_host_memory); - } else { - curl.setopt(CURLOPT_WRITEFUNCTION, detail::callback_device_memory); - } - detail::CallbackContext ctx{buf, size}; - curl.setopt(CURLOPT_WRITEDATA, &ctx); - - try { - if (is_host_mem) { - curl.perform(); - } else { - PushAndPopContext c(get_context_from_pointer(buf)); - // We use a bounce buffer to avoid many small memory copies to device. Libcurl has a - // maximum chunk size of 16kb (`CURL_MAX_WRITE_SIZE`) but chunks are often much smaller. - detail::BounceBufferH2D bounce_buffer(detail::StreamsByThread::get(), buf); - ctx.bounce_buffer = &bounce_buffer; - curl.perform(); - } - } catch (std::runtime_error const& e) { - if (ctx.overflow_error) { - std::stringstream ss; - ss << "maybe the server doesn't support file ranges? [" << e.what() << "]"; - throw std::overflow_error(ss.str()); - } - throw; - } - return size; - } + std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0); /** * @brief Read from remote source into buffer (host or device memory) in parallel. @@ -576,17 +440,7 @@ class RemoteHandle { std::future pread(void* buf, std::size_t size, std::size_t file_offset = 0, - std::size_t task_size = defaults::task_size()) - { - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size); - auto task = [this](void* devPtr_base, - std::size_t size, - std::size_t file_offset, - std::size_t devPtr_offset) -> std::size_t { - return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); - }; - return parallel_io(task, buf, size, file_offset, task_size, 0); - } + std::size_t task_size = defaults::task_size()); }; } // namespace kvikio diff --git a/cpp/include/kvikio/shim/utils.hpp b/cpp/include/kvikio/shim/utils.hpp index f805ed8ee3..7a3c439899 100644 --- a/cpp/include/kvikio/shim/utils.hpp +++ b/cpp/include/kvikio/shim/utils.hpp @@ -24,11 +24,10 @@ namespace kvikio { // Macros used for defining symbol visibility. -// Since KvikIO is header-only, we rely on the linker to disambiguate inline functions -// and static methods that have (or return) static references. To do this, the relevant -// function/method must have `__attribute__((visibility("default")))`. If not, then if -// KvikIO is used in two different DSOs, the function will appear twice, and there will -// be two static objects. +// Since KvikIO declares global default values in headers, we rely on the linker to disambiguate +// inline and static methods that have (or return) static references. To do this, the relevant +// function/method must have `__attribute__((visibility("default")))`. If not, then if KvikIO is +// used in two different DSOs, the function will appear twice, and there will be two static objects. // See and . #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) && !defined(__MINGW64__) #define KVIKIO_EXPORT __attribute__((visibility("default"))) diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp new file mode 100644 index 0000000000..527811e143 --- /dev/null +++ b/cpp/src/remote_handle.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace kvikio { + +void HttpEndpoint::setopt(CurlHandle& curl) { curl.setopt(CURLOPT_URL, _url.c_str()); } + +void S3Endpoint::setopt(CurlHandle& curl) +{ + curl.setopt(CURLOPT_URL, _url.c_str()); + curl.setopt(CURLOPT_AWS_SIGV4, _aws_sigv4.c_str()); + curl.setopt(CURLOPT_USERPWD, _aws_userpwd.c_str()); +} + +RemoteHandle::RemoteHandle(std::unique_ptr endpoint) +{ + auto curl = create_curl_handle(); + + endpoint->setopt(curl); + curl.setopt(CURLOPT_NOBODY, 1L); + curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); + curl.perform(); + curl_off_t cl; + curl.getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl); + if (cl < 0) { + throw std::runtime_error("cannot get size of " + endpoint->str() + + ", content-length not provided by the server"); + } + _nbytes = cl; + _endpoint = std::move(endpoint); +} + +namespace { + +/** + * @brief Context used by the "CURLOPT_WRITEFUNCTION" callbacks. + */ +struct CallbackContext { + char* buf; // Output buffer to read into. + std::size_t size; // Total number of bytes to read. + std::ptrdiff_t offset; // Offset into `buf` to start reading. + bool overflow_error; // Flag to indicate overflow. + CallbackContext(void* buf, std::size_t size) + : buf{static_cast(buf)}, size{size}, offset{0}, overflow_error{0} + { + } + detail::BounceBufferH2D* bounce_buffer{nullptr}; // Only used by callback_device_memory +}; + +/** + * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output host buffer. + * + * See . + * + * @param data Data downloaded by libcurl that is ready for consumption. + * @param size Size of each element in `nmemb`; size is always 1. + * @param nmemb Size of the data in `nmemb`. + * @param context A pointer to an instance of `CallbackContext`. + */ +inline std::size_t callback_host_memory(char* data, + std::size_t size, + std::size_t nmemb, + void* context) +{ + auto ctx = reinterpret_cast(context); + std::size_t const nbytes = size * nmemb; + if (ctx->size < ctx->offset + nbytes) { + ctx->overflow_error = true; + return CURL_WRITEFUNC_ERROR; + } + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes); + std::memcpy(ctx->buf + ctx->offset, data, nbytes); + ctx->offset += nbytes; + return nbytes; +} + +/** + * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output device buffer. + * + * See . + * + * @param data Data downloaded by libcurl that is ready for consumption. + * @param size Size of each element in `nmemb`; size is always 1. + * @param nmemb Size of the data in `nmemb`. + * @param context A pointer to an instance of `CallbackContext`. + */ +inline std::size_t callback_device_memory(char* data, + std::size_t size, + std::size_t nmemb, + void* context) +{ + auto ctx = reinterpret_cast(context); + std::size_t const nbytes = size * nmemb; + if (ctx->size < ctx->offset + nbytes) { + ctx->overflow_error = true; + return CURL_WRITEFUNC_ERROR; + } + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes); + + ctx->bounce_buffer->write(data, nbytes); + ctx->offset += nbytes; + return nbytes; +} +} // namespace + +std::size_t RemoteHandle::read(void* buf, std::size_t size, std::size_t file_offset) +{ + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size); + + if (file_offset + size > _nbytes) { + std::stringstream ss; + ss << "cannot read " << file_offset << "+" << size << " bytes into a " << _nbytes + << " bytes file (" << _endpoint->str() << ")"; + throw std::invalid_argument(ss.str()); + } + bool const is_host_mem = is_host_memory(buf); + auto curl = create_curl_handle(); + _endpoint->setopt(curl); + + std::string const byte_range = + std::to_string(file_offset) + "-" + std::to_string(file_offset + size - 1); + curl.setopt(CURLOPT_RANGE, byte_range.c_str()); + + if (is_host_mem) { + curl.setopt(CURLOPT_WRITEFUNCTION, callback_host_memory); + } else { + curl.setopt(CURLOPT_WRITEFUNCTION, callback_device_memory); + } + CallbackContext ctx{buf, size}; + curl.setopt(CURLOPT_WRITEDATA, &ctx); + + try { + if (is_host_mem) { + curl.perform(); + } else { + PushAndPopContext c(get_context_from_pointer(buf)); + // We use a bounce buffer to avoid many small memory copies to device. Libcurl has a + // maximum chunk size of 16kb (`CURL_MAX_WRITE_SIZE`) but chunks are often much smaller. + detail::BounceBufferH2D bounce_buffer(detail::StreamsByThread::get(), buf); + ctx.bounce_buffer = &bounce_buffer; + curl.perform(); + } + } catch (std::runtime_error const& e) { + if (ctx.overflow_error) { + std::stringstream ss; + ss << "maybe the server doesn't support file ranges? [" << e.what() << "]"; + throw std::overflow_error(ss.str()); + } + throw; + } + return size; +} + +std::future RemoteHandle::pread(void* buf, + std::size_t size, + std::size_t file_offset, + std::size_t task_size) +{ + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size); + auto task = [this](void* devPtr_base, + std::size_t size, + std::size_t file_offset, + std::size_t devPtr_offset) -> std::size_t { + return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); + }; + return parallel_io(task, buf, size, file_offset, task_size, 0); +} + +} // namespace kvikio diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 429bd8b722..e9024795f5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -12,9 +12,6 @@ # the License. # ============================================================================= -# ################################################################################################## -# enable testing ----------------------------------------------------------------------------------- -# ################################################################################################## enable_testing() include(rapids-test) @@ -32,7 +29,8 @@ set_target_properties( CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON ) -target_link_libraries(cpp_tests PRIVATE kvikio::kvikio GTest::gmock GTest::gtest) +target_link_libraries(cpp_tests PRIVATE kvikio::kvikio GTest::gmock GTest::gtest CUDA::cudart) + rapids_test_add( NAME cpp_tests COMMAND cpp_tests diff --git a/dependencies.yaml b/dependencies.yaml index ae99fb5d83..b80ed69337 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -64,6 +64,7 @@ files: includes: - depends_on_cupy - depends_on_nvcomp + - depends_on_libkvikio - run py_rapids_build_libkvikio: output: pyproject @@ -282,6 +283,31 @@ dependencies: - matrix: packages: - nvidia-nvcomp==4.1.0.6 + depends_on_libkvikio: + common: + - output_types: conda + packages: + - &libkvikio_unsuffixed libkvikio==24.12.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for libkvikio-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu12==24.12.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu11==24.12.*,>=0.0.0a0 + - {matrix: null, packages: [*libkvikio_unsuffixed]} docs: common: - output_types: [conda, requirements] diff --git a/python/kvikio/cmake/thirdparty/get_nvcomp.cmake b/python/kvikio/cmake/thirdparty/get_nvcomp.cmake index 9361624c07..a2c6326e76 100644 --- a/python/kvikio/cmake/thirdparty/get_nvcomp.cmake +++ b/python/kvikio/cmake/thirdparty/get_nvcomp.cmake @@ -18,7 +18,11 @@ set(KVIKIO_USE_PROPRIETARY_BINARY ON) function(find_and_configure_nvcomp) include(${rapids-cmake-dir}/cpm/nvcomp.cmake) - rapids_cpm_nvcomp(USE_PROPRIETARY_BINARY ${KVIKIO_USE_PROPRIETARY_BINARY}) + set(export_args) + if(KvikIO_EXPORT_NVCOMP) + set(export_args BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports) + endif() + rapids_cpm_nvcomp(${export_args} USE_PROPRIETARY_BINARY ${KVIKIO_USE_PROPRIETARY_BINARY}) # Per-thread default stream if(TARGET nvcomp AND PER_THREAD_DEFAULT_STREAM) diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index a2bfffaf48..f4db6d1d05 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -1,6 +1,17 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. +# If libkvikio was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libkvikio +except ModuleNotFoundError: + pass +else: + libkvikio.load_library() + del libkvikio + + from kvikio._version import __git_commit__, __version__ from kvikio.cufile import CuFile from kvikio.remote_file import RemoteFile, is_remote_file_available diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index cb9491e75e..5921e4b762 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -20,6 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cupy-cuda11x>=12.0.0", + "libkvikio==24.12.*,>=0.0.0a0", "numcodecs !=0.12.0", "numpy>=1.23,<3.0a0", "nvidia-nvcomp==4.1.0.6", diff --git a/python/libkvikio/CMakeLists.txt b/python/libkvikio/CMakeLists.txt index 278e09f462..270e8ff5be 100644 --- a/python/libkvikio/CMakeLists.txt +++ b/python/libkvikio/CMakeLists.txt @@ -38,6 +38,20 @@ unset(kvikio_FOUND) set(KvikIO_BUILD_EXAMPLES OFF) set(KvikIO_BUILD_TESTS OFF) +if(USE_NVCOMP_RUNTIME_WHEEL) + set(KvikIO_EXPORT_NVCOMP OFF) +endif() set(CUDA_STATIC_RUNTIME ON) add_subdirectory(../../cpp kvikio-cpp) + +if(USE_NVCOMP_RUNTIME_WHEEL) + set(rpaths "$ORIGIN/../../nvidia/nvcomp") + foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_property( + TARGET ${tgt} + PROPERTY INSTALL_RPATH ${rpaths} + APPEND + ) + endforeach() +endif() diff --git a/python/libkvikio/libkvikio/__init__.py b/python/libkvikio/libkvikio/__init__.py index 995cd1027d..a221295d4c 100644 --- a/python/libkvikio/libkvikio/__init__.py +++ b/python/libkvikio/libkvikio/__init__.py @@ -13,5 +13,6 @@ # limitations under the License. from libkvikio._version import __git_commit__, __version__ +from libkvikio.load import load_library -__all__ = ["__git_commit__", "__version__"] +__all__ = ["__git_commit__", "__version__", "load_library"] diff --git a/python/libkvikio/libkvikio/load.py b/python/libkvikio/libkvikio/load.py new file mode 100644 index 0000000000..a6b0898e18 --- /dev/null +++ b/python/libkvikio/libkvikio/load.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + + +def load_library(): + # Dynamically load libkvikio.so. Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + libkvikio_lib = None + try: + libkvikio_lib = ctypes.CDLL("libkvikio.so", ctypes.RTLD_GLOBAL) + except OSError: + # If neither of these directories contain the library, we assume we are in an + # environment where the C++ library is already installed somewhere else and the + # CMake build of the libkvikio Python package was a no-op. + # + # Note that this approach won't work for real editable installs of the libkvikio + # package. scikit-build-core has limited support for importlib.resources so + # there isn't a clean way to support that case yet. + for lib_dir in ("lib", "lib64"): + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), lib_dir, "libkvikio.so") + ): + libkvikio_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL) + break + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libkvikio was loaded from. + return libkvikio_lib