Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add log messages about cuIO's nvCOMP and cuFile use #13132

Merged
merged 18 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 77 additions & 4 deletions cpp/src/io/comp/nvcomp_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@ auto batched_decompress_async(compression_type compression, Args&&... args)
}
}

std::string compression_type_name(compression_type compression)
{
switch (compression) {
case compression_type::SNAPPY: return "Snappy";
case compression_type::ZSTD: return "Zstandard";
case compression_type::DEFLATE: return "Deflate";
}
return "compression_type(" + std::to_string(static_cast<int>(compression)) + ")";
}

size_t batched_decompress_temp_size(compression_type compression,
size_t num_chunks,
size_t max_uncomp_chunk_size,
Expand Down Expand Up @@ -439,8 +449,23 @@ feature_status_parameters::feature_status_parameters()
cudaDeviceGetAttribute(&compute_capability_major, cudaDevAttrComputeCapabilityMajor, device));
}

std::optional<std::string> is_compression_disabled(compression_type compression,
feature_status_parameters params)
// Represents all parameters required to determine status of a compression/decompression feature
using feature_status_inputs = std::pair<compression_type, feature_status_parameters>;
struct hash_feature_status_inputs {
size_t operator()(feature_status_inputs const& fsi) const
{
// Outside of unit tests, the same `feature_status_parameters` value will always be passed
// within a run; for simplicity, only use `compression_type` for the hash
return std::hash<compression_type>{}(fsi.first);
}
};

// Hash map type that stores feature status for different combinations of input parameters
using feature_status_memo_map =
vuule marked this conversation as resolved.
Show resolved Hide resolved
std::unordered_map<feature_status_inputs, std::optional<std::string>, hash_feature_status_inputs>;

std::optional<std::string> is_compression_disabled_impl(compression_type compression,
feature_status_parameters params)
{
switch (compression) {
case compression_type::DEFLATE: {
Expand Down Expand Up @@ -477,6 +502,30 @@ std::optional<std::string> is_compression_disabled(compression_type compression,
return "Unsupported compression type";
}

std::optional<std::string> is_compression_disabled(compression_type compression,
feature_status_parameters params)
{
static feature_status_memo_map comp_status_reason;
if (auto mem_res_it = comp_status_reason.find(feature_status_inputs{compression, params});
mem_res_it != comp_status_reason.end()) {
return mem_res_it->second;
}

// The rest of the function will execute only once per run, the memoized result will be returned
// in all subsequent calls with the same compression type
auto const reason = is_compression_disabled_impl(compression, params);
comp_status_reason[{compression, params}] = reason;
if (reason.has_value()) {
CUDF_LOG_INFO("nvCOMP is disabled for {} compression; reason: {}",
compression_type_name(compression),
reason.value());
} else {
CUDF_LOG_INFO("nvCOMP is enabled for {} compression", compression_type_name(compression));
}

return reason;
}

std::optional<std::string> is_zstd_decomp_disabled(feature_status_parameters const& params)
{
if (not NVCOMP_HAS_ZSTD_DECOMP(
Expand All @@ -503,8 +552,8 @@ std::optional<std::string> is_zstd_decomp_disabled(feature_status_parameters con
return std::nullopt;
}

std::optional<std::string> is_decompression_disabled(compression_type compression,
feature_status_parameters params)
std::optional<std::string> is_decompression_disabled_impl(compression_type compression,
feature_status_parameters params)
{
switch (compression) {
case compression_type::DEFLATE: {
Expand All @@ -531,6 +580,30 @@ std::optional<std::string> is_decompression_disabled(compression_type compressio
return "Unsupported compression type";
}

std::optional<std::string> is_decompression_disabled(compression_type compression,
feature_status_parameters params)
{
static feature_status_memo_map decomp_status_reason;
if (auto mem_res_it = decomp_status_reason.find(feature_status_inputs{compression, params});
mem_res_it != decomp_status_reason.end()) {
return mem_res_it->second;
}

// The rest of the function will execute only once per run, the memoized result will be returned
// in all subsequent calls with the same compression type
auto const reason = is_decompression_disabled_impl(compression, params);
decomp_status_reason[{compression, params}] = reason;
if (reason.has_value()) {
CUDF_LOG_INFO("nvCOMP is disabled for {} decompression; reason: {}",
compression_type_name(compression),
reason.value());
} else {
CUDF_LOG_INFO("nvCOMP is enabled for {} decompression", compression_type_name(compression));
}

return reason;
}

size_t compress_input_alignment_bits(compression_type compression)
{
switch (compression) {
Expand Down
15 changes: 14 additions & 1 deletion cpp/src/io/comp/nvcomp_adapter.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -55,6 +55,19 @@ struct feature_status_parameters {
}
};

/**
* @brief Equality operator overload. Required to use `feature_status_parameters` as a map key.
*/
inline bool operator==(feature_status_parameters const& lhs, feature_status_parameters const& rhs)
{
return lhs.lib_major_version == rhs.lib_major_version and
lhs.lib_minor_version == rhs.lib_minor_version and
lhs.lib_patch_version == rhs.lib_patch_version and
lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and
lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and
lhs.compute_capability_major == rhs.compute_capability_major;
}
Comment on lines +63 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this cover all the members of feature_status_parameters. So the compiler should generate this operator == for you.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Humnn, it seems that the default operator==() is available from C++ 20 :|

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it didn't work without the overload unfortunately.


/**
* @brief If a compression type is disabled through nvCOMP, returns the reason as a string.
*
Expand Down
26 changes: 22 additions & 4 deletions cpp/src/io/utilities/file_io_utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,18 @@ std::unique_ptr<cufile_input_impl> make_cufile_input(std::string const& filepath
{
if (cufile_integration::is_gds_enabled()) {
try {
return std::make_unique<cufile_input_impl>(filepath);
auto const cufile_in = std::make_unique<cufile_input_impl>(filepath);
CUDF_LOG_INFO("File successfully opened for reading with GDS.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this log? It succeeds, and nothing special here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal of the messages in this PR is to provide some transparency into what method is used for file IO. We have direct use of GDS (this branch), no GDS (host IO + memcpy), and kvikIO. I included messages that make this selection apparent. FWIW, this is not the default branch, so very few users/devs will see this message.
I guess we can derive that lack of messages means that GDS is used, but I would prefer to log this case as INFO (off by default). If we find that this is too verbose, I'll remove some of these.

} catch (...) {
if (cufile_integration::is_always_enabled()) throw;
if (cufile_integration::is_always_enabled()) {
CUDF_LOG_ERROR(
"Failed to open file for reading with GDS. Enable bounce buffer fallback to read this "
"file.");
throw;
}
CUDF_LOG_INFO(
"Failed to open file for reading with GDS. Data will be read from the file using a bounce "
"buffer (possible performance impact).");
}
}
return nullptr;
Expand All @@ -285,9 +294,18 @@ std::unique_ptr<cufile_output_impl> make_cufile_output(std::string const& filepa
{
if (cufile_integration::is_gds_enabled()) {
try {
return std::make_unique<cufile_output_impl>(filepath);
auto const cufile_out = std::make_unique<cufile_output_impl>(filepath);
CUDF_LOG_INFO("File successfully opened for writing with GDS.");
} catch (...) {
if (cufile_integration::is_always_enabled()) throw;
if (cufile_integration::is_always_enabled()) {
CUDF_LOG_ERROR(
"Failed to open file for writing with GDS. Enable bounce buffer fallback to write to "
"this file.");
throw;
}
CUDF_LOG_INFO(
"Failed to open file for writing with GDS. Data will be written to the file using a bounce "
"buffer (possible performance impact).");
}
}
return nullptr;
Expand Down