From 9a7cba21006121e5cbfb59f4fd6b3aea9f5d4ab4 Mon Sep 17 00:00:00 2001 From: vuule Date: Thu, 30 Mar 2023 13:55:16 -0700 Subject: [PATCH 1/3] cuIO warnings --- cpp/benchmarks/io/cuio_common.cpp | 17 ++++++++++++++++- cpp/src/io/comp/nvcomp_adapter.cpp | 3 +++ cpp/src/io/csv/reader_impl.cu | 4 +++- cpp/src/io/orc/stripe_enc.cu | 1 + cpp/src/io/parquet/writer_impl.cu | 3 +++ 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 2ed6550a75e..6b8af91b842 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -173,10 +174,24 @@ std::string exec_cmd(std::string_view cmd) return error_out; } +void log_l3_warning_once() +{ + static bool is_logged = false; + if (not is_logged) { + CUDF_LOG_WARN( + "Running benchmarks without dropping the L3 cache; results may not reflect file IO " + "throughput"); + is_logged = true; + } +} + void try_drop_l3_cache() { static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr; - if (not is_drop_cache_enabled) { return; } + if (not is_drop_cache_enabled) { + log_l3_warning_once(); + return; + } std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"}; CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(), diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index fd070363108..9234101c431 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -269,6 +269,9 @@ size_t batched_compress_temp_size(compression_type compression, compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size); } catch (...) { // Ignore errors in the expanded version; fall back to the old API in case of failure + CUDF_LOG_WARN( + "Error in CompressGetTempSizeEx call, falling back to CompressGetTempSize; this may increase " + "the memory usage"); } #endif diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 9c1ff67d97c..2da5b8f09db 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -712,7 +712,9 @@ table_with_metadata read_csv(cudf::io::datasource* source, if (!reader_opts.is_enabled_mangle_dupe_cols()) { for (auto& col_name : column_names) { if (++col_names_counts[col_name] > 1) { - // All duplicate columns will be ignored; First appearance is parsed + CUDF_LOG_WARN("Multiple columns with name {}; only the first appearance is parsed", + col_name); + const auto idx = &col_name - column_names.data(); column_flags[idx] = column_parse::disabled; } diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 427167e2d0f..47650beaa66 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1347,6 +1347,7 @@ void CompressOrcDataStreams(uint8_t* compressed_data, [] __device__(compression_result & stat) { stat.status = compression_status::FAILURE; }); // Since SNAPPY is the default compression (may not be explicitly requested), fall back to // writing without compression + CUDF_LOG_WARN("ORC writer: error during compression, writing uncompressed data"); } } else if (compression == ZLIB) { if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE); diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index e6e14908f36..ef75e93a983 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -488,6 +488,9 @@ struct leaf_schema_fn { if (col_meta.is_decimal_precision_set()) { CUDF_EXPECTS(col_meta.get_decimal_precision() >= col_schema.decimal_scale, "Precision must be equal to or greater than scale!"); + if (col_schema.type == Type::INT64 and col_meta.get_decimal_precision() < 10) { + CUDF_LOG_WARN("Parquet writer: writing a decimal column with precision < 10 as int64"); + } col_schema.decimal_precision = col_meta.get_decimal_precision(); } } From 3b67af758e268a75863efce6428deaeb62dac175 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Apr 2023 18:40:15 -0700 Subject: [PATCH 2/3] avoid word "error" in warning messages Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- cpp/src/io/orc/stripe_enc.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 47650beaa66..e80a13332d9 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -1347,7 +1347,7 @@ void CompressOrcDataStreams(uint8_t* compressed_data, [] __device__(compression_result & stat) { stat.status = compression_status::FAILURE; }); // Since SNAPPY is the default compression (may not be explicitly requested), fall back to // writing without compression - CUDF_LOG_WARN("ORC writer: error during compression, writing uncompressed data"); + CUDF_LOG_WARN("ORC writer: compression failed, writing uncompressed data"); } } else if (compression == ZLIB) { if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE); From 15168511a302c6b34260c455fed47811d0ca1964 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 3 Apr 2023 18:40:41 -0700 Subject: [PATCH 3/3] same Co-authored-by: Mark Harris <783069+harrism@users.noreply.github.com> --- cpp/src/io/comp/nvcomp_adapter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index 9234101c431..363ae6af1ad 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -270,7 +270,7 @@ size_t batched_compress_temp_size(compression_type compression, } catch (...) { // Ignore errors in the expanded version; fall back to the old API in case of failure CUDF_LOG_WARN( - "Error in CompressGetTempSizeEx call, falling back to CompressGetTempSize; this may increase " + "CompressGetTempSizeEx call failed, falling back to CompressGetTempSize; this may increase " "the memory usage"); } #endif