Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmake option: CUDF_KVIKIO_REMOTE_IO #17291

Merged
merged 7 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ option(
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
option(CUDF_STATIC_LINTERS "Enable static linters during compilation" OFF)

option(
CUDF_KVIKIO_REMOTE_IO
"Enable remote IO (e.g. AWS S3) support through KvikIO. If disabled, cudf-python will still be able to do remote IO through fsspec."
ON
)

message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
Expand All @@ -109,6 +115,9 @@ message(
"CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}"
)
message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
message(VERBOSE
"CUDF: Build with remote IO (e.g. AWS S3) support through KvikIO: ${CUDF_KVIKIO_REMOTE_IO}"
)

# Set a default build type if none was specified
rapids_cmake_build_type("Release")
Expand Down Expand Up @@ -890,6 +899,9 @@ target_compile_definitions(cudf PRIVATE "RMM_LOGGING_LEVEL=LIBCUDF_LOGGING_LEVEL
# Define spdlog level
target_compile_definitions(cudf PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${LIBCUDF_LOGGING_LEVEL}")

# Enable remote IO through KvikIO
target_compile_definitions(cudf PRIVATE $<$<BOOL:${CUDF_KVIKIO_REMOTE_IO}>:CUDF_KVIKIO_REMOTE_IO>)

# Compile stringified JIT sources first
add_dependencies(cudf jitify_preprocess_run)

Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_kvikio.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function(find_and_configure_kvikio VERSION)
GIT_REPOSITORY https://github.com/rapidsai/kvikio.git
GIT_TAG branch-${VERSION}
GIT_SHALLOW TRUE SOURCE_SUBDIR cpp
OPTIONS "KvikIO_BUILD_EXAMPLES OFF"
OPTIONS "KvikIO_BUILD_EXAMPLES OFF" "KvikIO_REMOTE_SUPPORT ${CUDF_KVIKIO_REMOTE_IO}"
)

include("${rapids-cmake-dir}/export/find_package_root.cmake")
Expand Down
19 changes: 16 additions & 3 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <cudf/utilities/span.hpp>

#include <kvikio/file_handle.hpp>
#include <kvikio/remote_handle.hpp>

#include <rmm/device_buffer.hpp>

Expand All @@ -37,6 +36,10 @@
#include <regex>
#include <vector>

#ifdef CUDF_KVIKIO_REMOTE_IO
#include <kvikio/remote_handle.hpp>
#endif

namespace cudf {
namespace io {
namespace {
Expand Down Expand Up @@ -391,6 +394,7 @@ class user_datasource_wrapper : public datasource {
datasource* const source; ///< A non-owning pointer to the user-implemented datasource
};

#ifdef CUDF_KVIKIO_REMOTE_IO
madsbk marked this conversation as resolved.
Show resolved Hide resolved
/**
* @brief Remote file source backed by KvikIO, which handles S3 filepaths seamlessly.
*/
Expand Down Expand Up @@ -463,14 +467,23 @@ class remote_file_source : public datasource {
static bool is_supported_remote_url(std::string const& url)
{
// Regular expression to match "s3://"
std::regex pattern{R"(^s3://)", std::regex_constants::icase};
static std::regex pattern{R"(^s3://)", std::regex_constants::icase};
return std::regex_search(url, pattern);
}

private:
kvikio::RemoteHandle _kvikio_file;
};

#else
/**
* @brief When KvikIO remote IO is disabled, `is_supported_remote_url()` return false always.
*/
class remote_file_source : public file_source {
public:
explicit remote_file_source(char const* filepath) : file_source(filepath) {}
static constexpr bool is_supported_remote_url(std::string const&) { return false; }
};
#endif
} // namespace

std::unique_ptr<datasource> datasource::create(std::string const& filepath,
Expand Down
Loading