From 6924637e64735b1c2cb8f8803a8d8cf494080fc7 Mon Sep 17 00:00:00 2001 From: stdpain <34912776+stdpain@users.noreply.github.com> Date: Wed, 26 May 2021 10:02:39 +0800 Subject: [PATCH] [BUG] fix compression bug while compaction (#5893) Because the maximum length of LZ4 compression is 2^32, it can cause some memory problems --- be/src/olap/fs/file_block_manager.cpp | 2 +- be/src/olap/rowset/segment_v2/page_io.cpp | 30 ++++++++++++----------- be/src/util/block_compression.cpp | 12 ++++++++- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/be/src/olap/fs/file_block_manager.cpp b/be/src/olap/fs/file_block_manager.cpp index 616734244e2a93..2f49136a014dc0 100644 --- a/be/src/olap/fs/file_block_manager.cpp +++ b/be/src/olap/fs/file_block_manager.cpp @@ -169,7 +169,7 @@ Status FileWritableBlock::appendv(const Slice* data, size_t data_cnt) { // Calculate the amount of data written size_t bytes_written = accumulate(data, data + data_cnt, static_cast(0), - [&](int sum, const Slice& curr) { return sum + curr.size; }); + [](size_t sum, const Slice& curr) { return sum + curr.size; }); _bytes_appended += bytes_written; return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp b/be/src/olap/rowset/segment_v2/page_io.cpp index 16037daa944120..8f455b1b186100 100644 --- a/be/src/olap/rowset/segment_v2/page_io.cpp +++ b/be/src/olap/rowset/segment_v2/page_io.cpp @@ -41,20 +41,22 @@ Status PageIO::compress_page_body(const BlockCompressionCodec* codec, double min size_t uncompressed_size = Slice::compute_total_size(body); if (codec != nullptr && uncompressed_size > 0) { size_t max_compressed_size = codec->max_compressed_len(uncompressed_size); - faststring buf; - buf.resize(max_compressed_size); - Slice compressed_slice(buf); - RETURN_IF_ERROR(codec->compress(body, &compressed_slice)); - buf.resize(compressed_slice.get_size()); - - double space_saving = 1.0 - static_cast(buf.size()) / uncompressed_size; - // return compressed body only when it saves more than min_space_saving - if (space_saving > 0 && space_saving >= min_space_saving) { - // shrink the buf to fit the len size to avoid taking - // up the memory of the size MAX_COMPRESSED_SIZE - buf.shrink_to_fit(); - *compressed_body = buf.build(); - return Status::OK(); + if (max_compressed_size) { + faststring buf; + buf.resize(max_compressed_size); + Slice compressed_slice(buf); + RETURN_IF_ERROR(codec->compress(body, &compressed_slice)); + buf.resize(compressed_slice.get_size()); + + double space_saving = 1.0 - static_cast(buf.size()) / uncompressed_size; + // return compressed body only when it saves more than min_space_saving + if (space_saving > 0 && space_saving >= min_space_saving) { + // shrink the buf to fit the len size to avoid taking + // up the memory of the size MAX_COMPRESSED_SIZE + buf.shrink_to_fit(); + *compressed_body = buf.build(); + return Status::OK(); + } } } // otherwise, do not compress diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index 109848d630b47b..290ffec66c3fc2 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -26,6 +26,8 @@ #include "gutil/strings/substitute.h" #include "util/faststring.h" +#include + namespace doris { using strings::Substitute; @@ -71,7 +73,12 @@ class Lz4BlockCompression : public BlockCompressionCodec { return Status::OK(); } - size_t max_compressed_len(size_t len) const override { return LZ4_compressBound(len); } + size_t max_compressed_len(size_t len) const override { + if (len > std::numeric_limits::max()) { + return 0; + } + return LZ4_compressBound(len); + } }; // Used for LZ4 frame format, decompress speed is two times faster than LZ4. @@ -120,6 +127,9 @@ class Lz4fBlockCompression : public BlockCompressionCodec { } size_t max_compressed_len(size_t len) const override { + if (len > std::numeric_limits::max()) { + return 0; + } return std::max(LZ4F_compressBound(len, &_s_preferences), LZ4F_compressFrameBound(len, &_s_preferences)); }