From bd1ef2959e64c9f44a709cd9aaaad5dc72611c23 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Apr 2023 22:49:34 -0700 Subject: [PATCH] Log cuIO warnings using the libcudf logger (#13043) Log warning messages in the following scenarios: - cuIO reader benchmark is ran without dropping the L3 cache. - An nvCOMP call fails and we fall back to older API. - Mangling of duplicated column names is disabled and some columns are dropped. - Default compression fails in ORC writer and we fall back to uncompressed output. - Parquet writer is writing a decimal column with precision < 10 as int64. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/13043 --- cpp/benchmarks/io/cuio_common.cpp | 17 ++++++++++++++++- cpp/src/io/comp/nvcomp_adapter.cpp | 3 +++ cpp/src/io/csv/reader_impl.cu | 4 +++- cpp/src/io/orc/stripe_enc.cu | 1 + cpp/src/io/parquet/writer_impl.cu | 3 +++ 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 2ed6550a75e..6b8af91b842 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -173,10 +174,24 @@ std::string exec_cmd(std::string_view cmd) return error_out; } +void log_l3_warning_once() +{ + static bool is_logged = false; + if (not is_logged) { + CUDF_LOG_WARN( + "Running benchmarks without dropping the L3 cache; results may not reflect file IO " + "throughput"); + is_logged = true; + } +} + void try_drop_l3_cache() { static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr; - if (not is_drop_cache_enabled) { return; } + if (not is_drop_cache_enabled) { + log_l3_warning_once(); + return; + } std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"}; CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(), diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index fd070363108..363ae6af1ad 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -269,6 +269,9 @@ size_t batched_compress_temp_size(compression_type compression, compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size); } catch (...) { // Ignore errors in the expanded version; fall back to the old API in case of failure + CUDF_LOG_WARN( + "CompressGetTempSizeEx call failed, falling back to CompressGetTempSize; this may increase " + "the memory usage"); } #endif diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 9c1ff67d97c..2da5b8f09db 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -712,7 +712,9 @@ table_with_metadata read_csv(cudf::io::datasource* source, if (!reader_opts.is_enabled_mangle_dupe_cols()) { for (auto& col_name : column_names) { if (++col_names_counts[col_name] > 1) { - // All duplicate columns will be ignored; First appearance is parsed + CUDF_LOG_WARN("Multiple columns with name {}; only the first appearance is parsed", + col_name); + const auto idx = &col_name - column_names.data(); column_flags[idx] = column_parse::disabled; } diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b891b3ac567..b52075e4c28 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1334,6 +1334,7 @@ void CompressOrcDataStreams(uint8_t* compressed_data, [] __device__(compression_result & stat) { stat.status = compression_status::FAILURE; }); // Since SNAPPY is the default compression (may not be explicitly requested), fall back to // writing without compression + CUDF_LOG_WARN("ORC writer: compression failed, writing uncompressed data"); } } else if (compression == ZLIB) { if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index e6e14908f36..ef75e93a983 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -488,6 +488,9 @@ struct leaf_schema_fn { if (col_meta.is_decimal_precision_set()) { CUDF_EXPECTS(col_meta.get_decimal_precision() >= col_schema.decimal_scale, "Precision must be equal to or greater than scale!"); + if (col_schema.type == Type::INT64 and col_meta.get_decimal_precision() < 10) { + CUDF_LOG_WARN("Parquet writer: writing a decimal column with precision < 10 as int64"); + } col_schema.decimal_precision = col_meta.get_decimal_precision(); } }