diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 966728d7647..7a556d2c0f6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -310,6 +310,7 @@ add_library( src/io/statistics/parquet_column_statistics.cu src/io/text/multibyte_split.cu src/io/utilities/column_buffer.cpp + src/io/utilities/config_utils.cpp src/io/utilities/data_sink.cpp src/io/utilities/datasource.cpp src/io/utilities/file_io_utilities.cpp diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp new file mode 100644 index 00000000000..2c1dc1cc0aa --- /dev/null +++ b/cpp/src/io/utilities/config_utils.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config_utils.hpp" + +#include + +#include +#include + +namespace cudf::io::detail { + +std::string getenv_or(std::string const& env_var_name, std::string_view default_val) +{ + auto const env_val = std::getenv(env_var_name.c_str()); + return std::string{(env_val == nullptr) ? default_val : env_val}; +} + +namespace cufile_integration { + +namespace { +/** + * @brief Defines which cuFile usage to enable. + */ +enum class usage_policy : uint8_t { OFF, GDS, ALWAYS }; + +/** + * @brief Get the current usage policy. + */ +usage_policy get_env_policy() +{ + static auto const env_val = getenv_or("LIBCUDF_CUFILE_POLICY", "GDS"); + if (env_val == "OFF") return usage_policy::OFF; + if (env_val == "GDS") return usage_policy::GDS; + if (env_val == "ALWAYS") return usage_policy::ALWAYS; + CUDF_FAIL("Invalid LIBCUDF_CUFILE_POLICY value: " + env_val); +} +} // namespace + +bool is_always_enabled() { return get_env_policy() == usage_policy::ALWAYS; } + +bool is_gds_enabled() { return is_always_enabled() or get_env_policy() == usage_policy::GDS; } + +} // namespace cufile_integration + +namespace nvcomp_integration { + +namespace { +/** + * @brief Defines which nvCOMP usage to enable. + */ +enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS }; + +/** + * @brief Get the current usage policy. + */ +usage_policy get_env_policy() +{ + static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE"); + if (env_val == "OFF") return usage_policy::OFF; + if (env_val == "STABLE") return usage_policy::STABLE; + if (env_val == "ALWAYS") return usage_policy::ALWAYS; + CUDF_FAIL("Invalid LIBCUDF_NVCOMP_POLICY value: " + env_val); +} +} // namespace + +bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; } + +bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_policy::STABLE; } + +} // namespace nvcomp_integration + +} // namespace cudf::io::detail diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/config_utils.hpp index a1d8e747e44..baa45fef08a 100644 --- a/cpp/src/io/utilities/config_utils.hpp +++ b/cpp/src/io/utilities/config_utils.hpp @@ -15,7 +15,6 @@ */ #pragma once -#include #include namespace cudf::io::detail { @@ -24,44 +23,34 @@ namespace cudf::io::detail { * @brief Returns the value of the environment variable, or a default value if the variable is not * present. */ -inline std::string getenv_or(std::string const& env_var_name, std::string_view default_val) -{ - auto const env_val = std::getenv(env_var_name.c_str()); - return std::string{(env_val == nullptr) ? default_val : env_val}; -} +std::string getenv_or(std::string const& env_var_name, std::string_view default_val); -namespace nvcomp_integration { +namespace cufile_integration { -namespace { /** - * @brief Defines which nvCOMP usage to enable. + * @brief Returns true if cuFile and its compatibility mode are enabled. */ -enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS }; +bool is_always_enabled(); /** - * @brief Get the current usage policy. + * @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled). */ -inline usage_policy get_env_policy() -{ - static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE"); - if (env_val == "OFF") return usage_policy::OFF; - if (env_val == "ALWAYS") return usage_policy::ALWAYS; - return usage_policy::STABLE; -} -} // namespace +bool is_gds_enabled(); + +} // namespace cufile_integration + +namespace nvcomp_integration { /** * @brief Returns true if all nvCOMP uses are enabled. */ -inline bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; } +bool is_all_enabled(); /** * @brief Returns true if stable nvCOMP use is enabled. */ -inline bool is_stable_enabled() -{ - return is_all_enabled() or get_env_policy() == usage_policy::STABLE; -} +bool is_stable_enabled(); } // namespace nvcomp_integration + } // namespace cudf::io::detail diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 7afffaede9e..3de6f35cb0d 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -14,15 +14,16 @@ * limitations under the License. */ +#include "file_io_utilities.hpp" + #include +#include +#include #include #include #include -#include -#include "file_io_utilities.hpp" - namespace cudf { namespace io { namespace { @@ -239,7 +240,7 @@ std::unique_ptr datasource::create(const std::string& filepath, size_t size) { #ifdef CUFILE_FOUND - if (detail::cufile_config::instance()->is_required()) { + if (detail::cufile_integration::is_always_enabled()) { // avoid mmap as GDS is expected to be used for most reads return std::make_unique(filepath.c_str()); } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 387452e171a..7a48b7d7301 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -51,45 +51,14 @@ file_wrapper::~file_wrapper() { close(fd); } #ifdef CUFILE_FOUND -cufile_config::cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)} -{ - if (is_enabled()) { - // Modify the config file based on the policy - auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json"); - std::ifstream user_config_file(config_file_path); - // Modified config file is stored in a temporary directory - auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json"; - std::ofstream cudf_config_file(cudf_config_path); - - std::string line; - while (std::getline(user_config_file, line)) { - std::string const tag = "\"allow_compat_mode\""; - if (line.find(tag) != std::string::npos) { - // TODO: only replace the true/false value - // Enable compatiblity mode when cuDF does not fall back to host path - cudf_config_file << tag << ": " << (is_required() ? "true" : "false") << ",\n"; - } else { - cudf_config_file << line << '\n'; - } - - // Point libcufile to the modified config file - CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0, - "Failed to set the cuFile config file environment variable."); - } - } -} -cufile_config const* cufile_config::instance() -{ - static cufile_config _instance; - return &_instance; -} - /** * @brief Class that dynamically loads the cuFile library and manages the cuFile driver. */ class cufile_shim { private: cufile_shim(); + void modify_cufile_json() const; + void load_cufile_lib(); void* cf_lib = nullptr; decltype(cuFileDriverOpen)* driver_open = nullptr; @@ -116,25 +85,60 @@ class cufile_shim { decltype(cuFileWrite)* write = nullptr; }; +void cufile_shim::modify_cufile_json() const +{ + std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON"; + temp_directory tmp_config_dir{"cudf_cufile_config"}; + + // Modify the config file based on the policy + auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json"); + std::ifstream user_config_file(config_file_path); + // Modified config file is stored in a temporary directory + auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json"; + std::ofstream cudf_config_file(cudf_config_path); + + std::string line; + while (std::getline(user_config_file, line)) { + std::string const tag = "\"allow_compat_mode\""; + if (line.find(tag) != std::string::npos) { + // TODO: only replace the true/false value instead of replacing the whole line + // Enable compatibility mode when cuDF does not fall back to host path + cudf_config_file << tag << ": " + << (cufile_integration::is_always_enabled() ? "true" : "false") << ",\n"; + } else { + cudf_config_file << line << '\n'; + } + + // Point libcufile to the modified config file + CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0, + "Failed to set the cuFile config file environment variable."); + } +} + +void cufile_shim::load_cufile_lib() +{ + cf_lib = dlopen("libcufile.so", RTLD_NOW); + driver_open = reinterpret_cast(dlsym(cf_lib, "cuFileDriverOpen")); + CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol"); + driver_close = reinterpret_cast(dlsym(cf_lib, "cuFileDriverClose")); + CUDF_EXPECTS(driver_close != nullptr, "could not find cuFile cuFileDriverClose symbol"); + handle_register = + reinterpret_cast(dlsym(cf_lib, "cuFileHandleRegister")); + CUDF_EXPECTS(handle_register != nullptr, "could not find cuFile cuFileHandleRegister symbol"); + handle_deregister = + reinterpret_cast(dlsym(cf_lib, "cuFileHandleDeregister")); + CUDF_EXPECTS(handle_deregister != nullptr, "could not find cuFile cuFileHandleDeregister symbol"); + read = reinterpret_cast(dlsym(cf_lib, "cuFileRead")); + CUDF_EXPECTS(read != nullptr, "could not find cuFile cuFileRead symbol"); + write = reinterpret_cast(dlsym(cf_lib, "cuFileWrite")); + CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol"); +} + cufile_shim::cufile_shim() { try { - cf_lib = dlopen("libcufile.so", RTLD_NOW); - driver_open = reinterpret_cast(dlsym(cf_lib, "cuFileDriverOpen")); - CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol"); - driver_close = reinterpret_cast(dlsym(cf_lib, "cuFileDriverClose")); - CUDF_EXPECTS(driver_close != nullptr, "could not find cuFile cuFileDriverClose symbol"); - handle_register = - reinterpret_cast(dlsym(cf_lib, "cuFileHandleRegister")); - CUDF_EXPECTS(handle_register != nullptr, "could not find cuFile cuFileHandleRegister symbol"); - handle_deregister = - reinterpret_cast(dlsym(cf_lib, "cuFileHandleDeregister")); - CUDF_EXPECTS(handle_deregister != nullptr, - "could not find cuFile cuFileHandleDeregister symbol"); - read = reinterpret_cast(dlsym(cf_lib, "cuFileRead")); - CUDF_EXPECTS(read != nullptr, "could not find cuFile cuFileRead symbol"); - write = reinterpret_cast(dlsym(cf_lib, "cuFileWrite")); - CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol"); + modify_cufile_json(); + load_cufile_lib(); CUDF_EXPECTS(driver_open().err == CU_FILE_SUCCESS, "Failed to initialize cuFile driver"); } catch (cudf::logic_error const& err) { @@ -285,11 +289,11 @@ std::future cufile_output_impl::write_async(void const* data, size_t offse std::unique_ptr make_cufile_input(std::string const& filepath) { #ifdef CUFILE_FOUND - if (cufile_config::instance()->is_enabled()) { + if (cufile_integration::is_gds_enabled()) { try { return std::make_unique(filepath); } catch (...) { - if (cufile_config::instance()->is_required()) throw; + if (cufile_integration::is_always_enabled()) throw; } } #endif @@ -299,11 +303,11 @@ std::unique_ptr make_cufile_input(std::string const& filepath std::unique_ptr make_cufile_output(std::string const& filepath) { #ifdef CUFILE_FOUND - if (cufile_config::instance()->is_enabled()) { + if (cufile_integration::is_gds_enabled()) { try { return std::make_unique(filepath); } catch (...) { - if (cufile_config::instance()->is_required()) throw; + if (cufile_integration::is_always_enabled()) throw; } } #endif diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 0119f9b7abd..ede0eb6f925 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -162,32 +162,6 @@ class cufile_output : public cufile_io_base { class cufile_shim; -/** - * @brief Class that manages cuFile configuration. - */ -class cufile_config { - std::string const default_policy = "OFF"; - std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON"; - - std::string const policy = default_policy; - temp_directory tmp_config_dir{"cudf_cufile_config"}; - - cufile_config(); - - public: - /** - * @brief Returns true when cuFile use is enabled. - */ - bool is_enabled() const { return policy == "ALWAYS" or policy == "GDS"; } - - /** - * @brief Returns true when cuDF should not fall back to host IO. - */ - bool is_required() const { return policy == "ALWAYS"; } - - static cufile_config const* instance(); -}; - /** * @brief Class that provides RAII for cuFile file registration. */ diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst index 29cbc2024fc..20f3ec87ccb 100644 --- a/docs/cudf/source/basics/io-gds-integration.rst +++ b/docs/cudf/source/basics/io-gds-integration.rst @@ -5,17 +5,18 @@ Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. The SDK is available for download `here `_. +GDS is also included in CUDA Toolkit 11.4 and higher. -Use of GPUDirect Storage in cuDF is disabled by default, and can be enabled through environment variable ``LIBCUDF_CUFILE_POLICY``. +Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. This variable also controls the GDS compatibility mode. -There are three special values for the environment variable: +There are three valid values for the environment variable: - "GDS": Enable GDS use; GDS compatibility mode is *off*. - "ALWAYS": Enable GDS use; GDS compatibility mode is *on*. -- "OFF": Compretely disable GDS use. +- "OFF": Completely disable GDS use. -Any other value (or no value set) will keep the GDS disabled for use in cuDF and IO will be done using cuDF's CPU bounce buffers. +If no value is set, behavior will be the same as the "GDS" option. This environment variable also affects how cuDF treats GDS errors. When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers. @@ -30,5 +31,3 @@ Operations that support the use of GPUDirect Storage: - `to_csv` - `to_parquet` - `to_orc` - -NOTE: current GDS integration is not fully optimized and enabling GDS will not lead to performance improvements in all cases. diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst index af89ab5285f..521833e2afd 100644 --- a/docs/cudf/source/basics/io-nvcomp-integration.rst +++ b/docs/cudf/source/basics/io-nvcomp-integration.rst @@ -3,15 +3,16 @@ nvCOMP Integration Some types of compression/decompression can be performed using either `nvCOMP library `_ or the internal implementation. -Which implementation is used by default depends on the data format and the compression type. Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``. +Which implementation is used by default depends on the data format and the compression type. +Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``. -There are three special values for the environment variable: +There are three valid values for the environment variable: - "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations. - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead. -Any other value (or no value set) will result in the same behavior as the "STABLE" option. +If no value is set, behavior will be the same as the "STABLE" option. .. table:: Current policy for nvCOMP use for different types