Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cuFile direct device reads/writes by default in cuIO #9722

Merged
merged 15 commits into from
Nov 19, 2021
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ add_library(
src/io/statistics/parquet_column_statistics.cu
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
Expand Down
86 changes: 86 additions & 0 deletions cpp/src/io/utilities/config_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "config_utils.hpp"

#include <cudf/utilities/error.hpp>

#include <cstdlib>
#include <string>

namespace cudf::io::detail {

std::string getenv_or(std::string const& env_var_name, std::string_view default_val)
{
auto const env_val = std::getenv(env_var_name.c_str());
return std::string{(env_val == nullptr) ? default_val : env_val};
}

namespace cufile_integration {

namespace {
/**
* @brief Defines which cuFile usage to enable.
*/
enum class usage_policy : uint8_t { OFF, GDS, ALWAYS };

/**
* @brief Get the current usage policy.
*/
usage_policy get_env_policy()
{
static auto const env_val = getenv_or("LIBCUDF_CUFILE_POLICY", "GDS");
if (env_val == "OFF") return usage_policy::OFF;
if (env_val == "GDS") return usage_policy::GDS;
if (env_val == "ALWAYS") return usage_policy::ALWAYS;
CUDF_FAIL("Invalid LIBCUDF_CUFILE_POLICY value: " + env_val);
}
} // namespace

bool is_always_enabled() { return get_env_policy() == usage_policy::ALWAYS; }

bool is_gds_enabled() { return is_always_enabled() or get_env_policy() == usage_policy::GDS; }

} // namespace cufile_integration

namespace nvcomp_integration {

namespace {
/**
* @brief Defines which nvCOMP usage to enable.
*/
enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS };

/**
* @brief Get the current usage policy.
*/
usage_policy get_env_policy()
{
static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE");
if (env_val == "OFF") return usage_policy::OFF;
if (env_val == "STABLE") return usage_policy::STABLE;
if (env_val == "ALWAYS") return usage_policy::ALWAYS;
CUDF_FAIL("Invalid LIBCUDF_NVCOMP_POLICY value: " + env_val);
}
} // namespace

bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; }

bool is_stable_enabled() { return is_all_enabled() or get_env_policy() == usage_policy::STABLE; }

} // namespace nvcomp_integration

} // namespace cudf::io::detail
37 changes: 13 additions & 24 deletions cpp/src/io/utilities/config_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
#pragma once

#include <cstdlib>
#include <string>

namespace cudf::io::detail {
Expand All @@ -24,44 +23,34 @@ namespace cudf::io::detail {
* @brief Returns the value of the environment variable, or a default value if the variable is not
* present.
*/
inline std::string getenv_or(std::string const& env_var_name, std::string_view default_val)
{
auto const env_val = std::getenv(env_var_name.c_str());
return std::string{(env_val == nullptr) ? default_val : env_val};
}
std::string getenv_or(std::string const& env_var_name, std::string_view default_val);

namespace nvcomp_integration {
namespace cufile_integration {

namespace {
/**
* @brief Defines which nvCOMP usage to enable.
* @brief Returns true if cuFile and its compatibility mode are enabled.
*/
enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS };
bool is_always_enabled();

/**
* @brief Get the current usage policy.
* @brief Returns true if only direct IO through cuFile is enabled (compatibility mode is disabled).
*/
inline usage_policy get_env_policy()
{
static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE");
if (env_val == "OFF") return usage_policy::OFF;
if (env_val == "ALWAYS") return usage_policy::ALWAYS;
return usage_policy::STABLE;
}
} // namespace
bool is_gds_enabled();

} // namespace cufile_integration

namespace nvcomp_integration {

/**
* @brief Returns true if all nvCOMP uses are enabled.
*/
inline bool is_all_enabled() { return get_env_policy() == usage_policy::ALWAYS; }
bool is_all_enabled();

/**
* @brief Returns true if stable nvCOMP use is enabled.
*/
inline bool is_stable_enabled()
{
return is_all_enabled() or get_env_policy() == usage_policy::STABLE;
}
bool is_stable_enabled();

} // namespace nvcomp_integration

} // namespace cudf::io::detail
9 changes: 5 additions & 4 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
* limitations under the License.
*/

#include "file_io_utilities.hpp"

#include <cudf/io/datasource.hpp>
#include <cudf/utilities/error.hpp>
#include <io/utilities/config_utils.hpp>

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>

#include <cudf/utilities/error.hpp>
#include "file_io_utilities.hpp"

namespace cudf {
namespace io {
namespace {
Expand Down Expand Up @@ -239,7 +240,7 @@ std::unique_ptr<datasource> datasource::create(const std::string& filepath,
size_t size)
{
#ifdef CUFILE_FOUND
if (detail::cufile_config::instance()->is_required()) {
if (detail::cufile_integration::is_always_enabled()) {
// avoid mmap as GDS is expected to be used for most reads
return std::make_unique<direct_read_source>(filepath.c_str());
}
Expand Down
110 changes: 57 additions & 53 deletions cpp/src/io/utilities/file_io_utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,45 +51,14 @@ file_wrapper::~file_wrapper() { close(fd); }

#ifdef CUFILE_FOUND

cufile_config::cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)}
{
if (is_enabled()) {
// Modify the config file based on the policy
auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json");
std::ifstream user_config_file(config_file_path);
// Modified config file is stored in a temporary directory
auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json";
std::ofstream cudf_config_file(cudf_config_path);

std::string line;
while (std::getline(user_config_file, line)) {
std::string const tag = "\"allow_compat_mode\"";
if (line.find(tag) != std::string::npos) {
// TODO: only replace the true/false value
// Enable compatiblity mode when cuDF does not fall back to host path
cudf_config_file << tag << ": " << (is_required() ? "true" : "false") << ",\n";
} else {
cudf_config_file << line << '\n';
}

// Point libcufile to the modified config file
CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0,
"Failed to set the cuFile config file environment variable.");
}
}
}
cufile_config const* cufile_config::instance()
{
static cufile_config _instance;
return &_instance;
}

/**
* @brief Class that dynamically loads the cuFile library and manages the cuFile driver.
*/
class cufile_shim {
private:
cufile_shim();
void modify_cufile_json() const;
void load_cufile_lib();

void* cf_lib = nullptr;
decltype(cuFileDriverOpen)* driver_open = nullptr;
Expand All @@ -116,25 +85,60 @@ class cufile_shim {
decltype(cuFileWrite)* write = nullptr;
};

void cufile_shim::modify_cufile_json() const
{
std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON";
temp_directory tmp_config_dir{"cudf_cufile_config"};

// Modify the config file based on the policy
auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json");
std::ifstream user_config_file(config_file_path);
// Modified config file is stored in a temporary directory
auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json";
std::ofstream cudf_config_file(cudf_config_path);

std::string line;
while (std::getline(user_config_file, line)) {
std::string const tag = "\"allow_compat_mode\"";
if (line.find(tag) != std::string::npos) {
// TODO: only replace the true/false value instead of replacing the whole line
// Enable compatibility mode when cuDF does not fall back to host path
cudf_config_file << tag << ": "
<< (cufile_integration::is_always_enabled() ? "true" : "false") << ",\n";
} else {
cudf_config_file << line << '\n';
}

// Point libcufile to the modified config file
CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0,
"Failed to set the cuFile config file environment variable.");
}
}

void cufile_shim::load_cufile_lib()
{
cf_lib = dlopen("libcufile.so", RTLD_NOW);
driver_open = reinterpret_cast<decltype(driver_open)>(dlsym(cf_lib, "cuFileDriverOpen"));
CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol");
driver_close = reinterpret_cast<decltype(driver_close)>(dlsym(cf_lib, "cuFileDriverClose"));
CUDF_EXPECTS(driver_close != nullptr, "could not find cuFile cuFileDriverClose symbol");
handle_register =
reinterpret_cast<decltype(handle_register)>(dlsym(cf_lib, "cuFileHandleRegister"));
CUDF_EXPECTS(handle_register != nullptr, "could not find cuFile cuFileHandleRegister symbol");
handle_deregister =
reinterpret_cast<decltype(handle_deregister)>(dlsym(cf_lib, "cuFileHandleDeregister"));
CUDF_EXPECTS(handle_deregister != nullptr, "could not find cuFile cuFileHandleDeregister symbol");
read = reinterpret_cast<decltype(read)>(dlsym(cf_lib, "cuFileRead"));
CUDF_EXPECTS(read != nullptr, "could not find cuFile cuFileRead symbol");
write = reinterpret_cast<decltype(write)>(dlsym(cf_lib, "cuFileWrite"));
CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol");
}

cufile_shim::cufile_shim()
{
try {
cf_lib = dlopen("libcufile.so", RTLD_NOW);
driver_open = reinterpret_cast<decltype(driver_open)>(dlsym(cf_lib, "cuFileDriverOpen"));
CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol");
driver_close = reinterpret_cast<decltype(driver_close)>(dlsym(cf_lib, "cuFileDriverClose"));
CUDF_EXPECTS(driver_close != nullptr, "could not find cuFile cuFileDriverClose symbol");
handle_register =
reinterpret_cast<decltype(handle_register)>(dlsym(cf_lib, "cuFileHandleRegister"));
CUDF_EXPECTS(handle_register != nullptr, "could not find cuFile cuFileHandleRegister symbol");
handle_deregister =
reinterpret_cast<decltype(handle_deregister)>(dlsym(cf_lib, "cuFileHandleDeregister"));
CUDF_EXPECTS(handle_deregister != nullptr,
"could not find cuFile cuFileHandleDeregister symbol");
read = reinterpret_cast<decltype(read)>(dlsym(cf_lib, "cuFileRead"));
CUDF_EXPECTS(read != nullptr, "could not find cuFile cuFileRead symbol");
write = reinterpret_cast<decltype(write)>(dlsym(cf_lib, "cuFileWrite"));
CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol");
modify_cufile_json();
load_cufile_lib();

CUDF_EXPECTS(driver_open().err == CU_FILE_SUCCESS, "Failed to initialize cuFile driver");
} catch (cudf::logic_error const& err) {
Expand Down Expand Up @@ -285,11 +289,11 @@ std::future<void> cufile_output_impl::write_async(void const* data, size_t offse
std::unique_ptr<cufile_input_impl> make_cufile_input(std::string const& filepath)
{
#ifdef CUFILE_FOUND
if (cufile_config::instance()->is_enabled()) {
if (cufile_integration::is_gds_enabled()) {
try {
return std::make_unique<cufile_input_impl>(filepath);
} catch (...) {
if (cufile_config::instance()->is_required()) throw;
if (cufile_integration::is_always_enabled()) throw;
}
}
#endif
Expand All @@ -299,11 +303,11 @@ std::unique_ptr<cufile_input_impl> make_cufile_input(std::string const& filepath
std::unique_ptr<cufile_output_impl> make_cufile_output(std::string const& filepath)
{
#ifdef CUFILE_FOUND
if (cufile_config::instance()->is_enabled()) {
if (cufile_integration::is_gds_enabled()) {
try {
return std::make_unique<cufile_output_impl>(filepath);
} catch (...) {
if (cufile_config::instance()->is_required()) throw;
if (cufile_integration::is_always_enabled()) throw;
}
}
#endif
Expand Down
26 changes: 0 additions & 26 deletions cpp/src/io/utilities/file_io_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,32 +162,6 @@ class cufile_output : public cufile_io_base {

class cufile_shim;

/**
* @brief Class that manages cuFile configuration.
*/
class cufile_config {
std::string const default_policy = "OFF";
std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON";

std::string const policy = default_policy;
temp_directory tmp_config_dir{"cudf_cufile_config"};

cufile_config();

public:
/**
* @brief Returns true when cuFile use is enabled.
*/
bool is_enabled() const { return policy == "ALWAYS" or policy == "GDS"; }

/**
* @brief Returns true when cuDF should not fall back to host IO.
*/
bool is_required() const { return policy == "ALWAYS"; }

static cufile_config const* instance();
};

/**
* @brief Class that provides RAII for cuFile file registration.
*/
Expand Down
Loading