diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index baf625965d8..ea8486c89a3 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -104,7 +104,7 @@ check_then_add_sources_compile_flag ( src/Columns/ColumnVector.cpp src/DataTypes/DataTypeString.cpp src/Interpreters/Join.cpp - src/IO/Compression/CompressionCodecDelta.cpp + src/IO/Compression/CompressionCodecDeltaPFor.cpp src/Storages/DeltaMerge/BitmapFilter/BitmapFilter.cpp src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp ) diff --git a/dbms/src/IO/Compression/CompressionCodecDelta.cpp b/dbms/src/IO/Compression/CompressionCodecDeltaPFor.cpp similarity index 89% rename from dbms/src/IO/Compression/CompressionCodecDelta.cpp rename to dbms/src/IO/Compression/CompressionCodecDeltaPFor.cpp index a7ccd8bf331..388c0799419 100644 --- a/dbms/src/IO/Compression/CompressionCodecDelta.cpp +++ b/dbms/src/IO/Compression/CompressionCodecDeltaPFor.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -33,20 +33,20 @@ extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; } // namespace ErrorCodes -CompressionCodecDelta::CompressionCodecDelta(UInt8 delta_bytes_size_) - : delta_bytes_size(delta_bytes_size_) +CompressionCodecDeltaPFor::CompressionCodecDeltaPFor(UInt8 bytes_size_) + : bytes_size(bytes_size_) {} -UInt8 CompressionCodecDelta::getMethodByte() const +UInt8 CompressionCodecDeltaPFor::getMethodByte() const { - return static_cast(CompressionMethodByte::Delta); + return static_cast(CompressionMethodByte::DeltaPFor); } -UInt32 CompressionCodecDelta::getMaxCompressedDataSize(UInt32 uncompressed_size) const +UInt32 CompressionCodecDeltaPFor::getMaxCompressedDataSize(UInt32 uncompressed_size) const { - // 1 byte for delta_bytes_size, x bytes for frame of reference, 1 byte for width. - return 1 + delta_bytes_size + sizeof(UInt8) - + BitpackingPrimitives::getRequiredSize(uncompressed_size / delta_bytes_size, delta_bytes_size * 8); + // 1 byte for bytes_size, x bytes for frame of reference, 1 byte for width. + return 1 + bytes_size + sizeof(UInt8) + + BitpackingPrimitives::getRequiredSize(uncompressed_size / bytes_size, bytes_size * 8); } namespace @@ -85,7 +85,12 @@ UInt32 compressData(const char * source, UInt32 source_size, char * dest) if (width == 0) return sizeof(ST) + sizeof(UInt8); auto required_size = BitpackingPrimitives::getRequiredSize(count, width); - BitpackingPrimitives::packBuffer(reinterpret_cast(dest), deltas.data(), count, width); + // after applying frame of reference, all deltas are bigger than 0. + BitpackingPrimitives::packBuffer( + reinterpret_cast(dest), + reinterpret_cast(deltas.data()), + count, + width); return sizeof(ST) + sizeof(UInt8) + required_size; } @@ -226,7 +231,7 @@ void PForDecode(const char * source, UInt32 source_size, unsigned char * dest, U source += sizeof(UInt8); const auto required_size = source_size - sizeof(ST) - sizeof(UInt8); RUNTIME_CHECK(BitpackingPrimitives::getRequiredSize(count, width) == required_size); - BitpackingPrimitives::unPackBuffer(dest, reinterpret_cast(source), count, width); + BitpackingPrimitives::unPackBuffer(dest, reinterpret_cast(source), count, width); ApplyFrameOfReference(reinterpret_cast(dest), frame_of_reference, count); } @@ -268,17 +273,13 @@ void decompressData(const char * source, UInt32 source_size, char * dest } // namespace -UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_size, char * dest) const +UInt32 CompressionCodecDeltaPFor::doCompressData(const char * source, UInt32 source_size, char * dest) const { - if unlikely (source_size % delta_bytes_size != 0) - throw Exception( - ErrorCodes::CANNOT_DECOMPRESS, - "source size {} is not aligned to {}", - source_size, - delta_bytes_size); - dest[0] = delta_bytes_size; + if unlikely (source_size % bytes_size != 0) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "source size {} is not aligned to {}", source_size, bytes_size); + dest[0] = bytes_size; size_t start_pos = 1; - switch (delta_bytes_size) + switch (bytes_size) { case 1: return 1 + compressData(source, source_size, &dest[start_pos]); @@ -293,7 +294,7 @@ UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_ } } -void CompressionCodecDelta::doDecompressData( +void CompressionCodecDeltaPFor::doDecompressData( const char * source, UInt32 source_size, char * dest, @@ -333,7 +334,11 @@ void CompressionCodecDelta::doDecompressData( } } -void CompressionCodecDelta::ordinaryDecompress(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) +void CompressionCodecDeltaPFor::ordinaryDecompress( + const char * source, + UInt32 source_size, + char * dest, + UInt32 dest_size) { if unlikely (source_size < 2) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress delta-encoded data. File has wrong header"); diff --git a/dbms/src/IO/Compression/CompressionCodecDelta.h b/dbms/src/IO/Compression/CompressionCodecDeltaPFor.h similarity index 89% rename from dbms/src/IO/Compression/CompressionCodecDelta.h rename to dbms/src/IO/Compression/CompressionCodecDeltaPFor.h index a3f0c12df3c..67bac7f74e0 100644 --- a/dbms/src/IO/Compression/CompressionCodecDelta.h +++ b/dbms/src/IO/Compression/CompressionCodecDeltaPFor.h @@ -19,10 +19,10 @@ namespace DB { -class CompressionCodecDelta : public ICompressionCodec +class CompressionCodecDeltaPFor : public ICompressionCodec { public: - explicit CompressionCodecDelta(UInt8 delta_bytes_size_); + explicit CompressionCodecDeltaPFor(UInt8 bytes_size_); UInt8 getMethodByte() const override; @@ -42,7 +42,7 @@ class CompressionCodecDelta : public ICompressionCodec bool isGenericCompression() const override { return false; } private: - const UInt8 delta_bytes_size; + const UInt8 bytes_size; }; } // namespace DB diff --git a/dbms/src/IO/Compression/CompressionFactory.h b/dbms/src/IO/Compression/CompressionFactory.h index 63b00d02c66..63c1ccc879d 100644 --- a/dbms/src/IO/Compression/CompressionFactory.h +++ b/dbms/src/IO/Compression/CompressionFactory.h @@ -15,7 +15,7 @@ #pragma once #include -#include +#include #include #include #include @@ -57,8 +57,8 @@ class CompressionFactory } switch (setting.method_byte) { - case CompressionMethodByte::Delta: - return std::make_unique(setting.type_bytes_size); + case CompressionMethodByte::DeltaPFor: + return std::make_unique(setting.type_bytes_size); case CompressionMethodByte::RLE: return std::make_unique(setting.type_bytes_size); case CompressionMethodByte::NONE: diff --git a/dbms/src/IO/Compression/CompressionInfo.h b/dbms/src/IO/Compression/CompressionInfo.h index 52ad2db002c..ff258fcffbf 100644 --- a/dbms/src/IO/Compression/CompressionInfo.h +++ b/dbms/src/IO/Compression/CompressionInfo.h @@ -57,7 +57,7 @@ enum class CompressionMethodByte : UInt8 QPL = 0x88, ZSTD = 0x90, Multiple = 0x91, - Delta = 0x92, + DeltaPFor = 0x92, RLE = 0x93, // COL_END is not a compreesion method, but a flag of column end used in compact file. COL_END = 0x66, diff --git a/dbms/src/IO/Compression/CompressionSettings.h b/dbms/src/IO/Compression/CompressionSettings.h index aadae1d8861..d8bda880bfc 100644 --- a/dbms/src/IO/Compression/CompressionSettings.h +++ b/dbms/src/IO/Compression/CompressionSettings.h @@ -38,7 +38,7 @@ const std::unordered_map method_map = {CompressionMethodByte::ZSTD, CompressionMethod::ZSTD}, {CompressionMethodByte::QPL, CompressionMethod::QPL}, {CompressionMethodByte::NONE, CompressionMethod::NONE}, - {CompressionMethodByte::Delta, CompressionMethod::NONE}, + {CompressionMethodByte::DeltaPFor, CompressionMethod::NONE}, {CompressionMethodByte::RLE, CompressionMethod::NONE}, }; diff --git a/dbms/src/IO/Compression/tests/bench_codec_delta.cpp b/dbms/src/IO/Compression/tests/bench_codec_delta.cpp index 4eecf302737..28774392555 100644 --- a/dbms/src/IO/Compression/tests/bench_codec_delta.cpp +++ b/dbms/src/IO/Compression/tests/bench_codec_delta.cpp @@ -13,7 +13,7 @@ // limitations under the License. #include -#include +#include #include #include @@ -27,7 +27,7 @@ static void codecDeltaOrdinaryBM(benchmark::State & state) std::vector v(DEFAULT_MERGE_BLOCK_SIZE); for (auto & i : v) i = random(); - CompressionCodecDelta codec(sizeof(T)); + CompressionCodecDeltaPFor codec(sizeof(T)); char dest[sizeof(T) * DEFAULT_MERGE_BLOCK_SIZE + 1]; for (auto _ : state) { @@ -52,7 +52,7 @@ static void codecDeltaSpecializedUInt64BM(benchmark::State & state) std::vector v(DEFAULT_MERGE_BLOCK_SIZE); for (auto & i : v) i = random(); - CompressionCodecDelta codec(sizeof(UInt64)); + CompressionCodecDeltaPFor codec(sizeof(UInt64)); char dest[sizeof(UInt64) * DEFAULT_MERGE_BLOCK_SIZE + 1]; for (auto _ : state) { @@ -67,7 +67,7 @@ static void codecDeltaSpecializedUInt32BM(benchmark::State & state) std::vector v(DEFAULT_MERGE_BLOCK_SIZE); for (auto & i : v) i = random(); - CompressionCodecDelta codec(sizeof(UInt32)); + CompressionCodecDeltaPFor codec(sizeof(UInt32)); char dest[sizeof(UInt32) * DEFAULT_MERGE_BLOCK_SIZE + 1]; for (auto _ : state) { diff --git a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp index 484f1d4915a..9cfb3e23efd 100644 --- a/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp +++ b/dbms/src/IO/Compression/tests/gtest_codec_compression.cpp @@ -523,7 +523,7 @@ std::vector generatePyramidOfSequences( #define G(generator) generator, #generator const auto IntegerCodecsToTest = ::testing::Values( - CompressionMethodByte::Delta, + CompressionMethodByte::DeltaPFor, CompressionMethodByte::RLE #if USE_QPL ,