Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix up superblock mode #2100

Merged
merged 1 commit into from
May 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/common/huf.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);

typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
Expand Down
25 changes: 25 additions & 0 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,31 @@ typedef struct {
U32 longLengthPos;
} seqStore_t;

typedef struct {
U32 litLength;
U32 matchLength;
} ZSTD_sequenceLength;

/**
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
*/
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
Copy link
Contributor

@Cyan4973 Cyan4973 May 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that this function, ZSTD_getSequenceLength(), is only used within zstd_compress_superblock.c.

Therefore, why is it implemented in common/zstd_internal.h ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a generic helper function related to sequences. By putting it near the sequences, hopefully the next person who needs to read a litlen/matchlen will use this function, and see the reasoning behind it. A refactor of existing helper code, like the sequence collector, should switch it to using this function.

{
ZSTD_sequenceLength seqLen;
seqLen.litLength = seq->litLength;
seqLen.matchLength = seq->matchLength + MINMATCH;
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
if (seqStore->longLengthID == 1) {
seqLen.litLength += 0xFFFF;
}
if (seqStore->longLengthID == 2) {
seqLen.matchLength += 0xFFFF;
}
}
return seqLen;
}

/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/huf_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
return nbBits >> 3;
}

static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
Expand Down
52 changes: 31 additions & 21 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1928,21 +1928,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}

static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
{
switch (cctxParams->literalCompressionMode) {
case ZSTD_lcm_huffman:
return 0;
case ZSTD_lcm_uncompressed:
return 1;
default:
assert(0 /* impossible: pre-validated */);
/* fall-through */
case ZSTD_lcm_auto:
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
}
}

/* ZSTD_useTargetCBlockSize():
* Returns if target compressed block size param is being used.
* If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
Expand Down Expand Up @@ -2387,6 +2372,18 @@ static int ZSTD_isRLE(const BYTE *ip, size_t length) {
return 1;
}

/* Returns true if the given block may be RLE.
* This is just a heuristic based on the compressibility.
* It may return both false positives and false negatives.
*/
static int ZSTD_maybeRLE(seqStore_t const* seqStore)
{
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);

return nbSeqs < 4 && nbLits < 10;
}

static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
{
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
Expand Down Expand Up @@ -2463,6 +2460,16 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
{
DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
if (bss == ZSTDbss_compress) {
if (/* We don't want to emit our first block as a RLE even if it qualifies because
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

* This is only an issue for zstd <= v1.4.3
*/
!zc->isFirstBlock &&
ZSTD_maybeRLE(&zc->seqStore) &&
ZSTD_isRLE((BYTE const*)src, srcSize))
{
return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
}
/* Attempt superblock compression.
*
* Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
Expand All @@ -2481,12 +2488,15 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
* * cSize >= blockBound(srcSize): We have expanded the block too much so
* emit an uncompressed block.
*/
size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, lastBlock);
if (cSize != ERROR(dstSize_tooSmall)) {
FORWARD_IF_ERROR(cSize);
if (cSize != 0 && cSize < srcSize + ZSTD_blockHeaderSize) {
ZSTD_confirmRepcodesAndEntropyTables(zc);
return cSize;
{
size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
if (cSize != ERROR(dstSize_tooSmall)) {
size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
FORWARD_IF_ERROR(cSize);
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
ZSTD_confirmRepcodesAndEntropyTables(zc);
return cSize;
}
}
}
}
Expand Down
50 changes: 50 additions & 0 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}

typedef struct repcodes_s {
U32 rep[3];
} repcodes_t;

MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
{
repcodes_t newReps;
if (offset >= ZSTD_REP_NUM) { /* full offset */
newReps.rep[2] = rep[1];
newReps.rep[1] = rep[0];
newReps.rep[0] = offset - ZSTD_REP_MOVE;
} else { /* repcode */
U32 const repCode = offset + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
newReps.rep[1] = rep[0];
newReps.rep[0] = currentOffset;
} else { /* repCode == 0 */
memcpy(&newReps, rep, sizeof(newReps));
}
}
return newReps;
}

/* ZSTD_cParam_withinBounds:
* @return 1 if value is within cParam bounds,
* 0 otherwise */
Expand All @@ -351,6 +376,16 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
return ZSTD_blockHeaderSize + srcSize;
}

MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
{
BYTE* const op = (BYTE*)dst;
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
MEM_writeLE24(op, cBlockHeader);
op[3] = src;
return 4;
}


/* ZSTD_minGain() :
* minimum compression required
Expand All @@ -364,6 +399,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
return (srcSize >> minlog) + 2;
}

MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
{
switch (cctxParams->literalCompressionMode) {
case ZSTD_lcm_huffman:
return 0;
case ZSTD_lcm_uncompressed:
return 1;
default:
assert(0 /* impossible: pre-validated */);
/* fall-through */
case ZSTD_lcm_auto:
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
}
}

/*! ZSTD_safecopyLiterals() :
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
Expand Down
8 changes: 6 additions & 2 deletions lib/compress/zstd_compress_literals.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
}

memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize;
}

Expand All @@ -62,6 +63,7 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
}

ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
return flSize+1;
}

Expand All @@ -80,8 +82,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;

DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
disableLiteralCompression);
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
disableLiteralCompression, (U32)srcSize);

/* Prepare nextEntropy assuming reusing the existing table */
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
Expand Down Expand Up @@ -110,6 +112,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table");
hType = set_repeat;
}
}
Expand Down Expand Up @@ -150,5 +153,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
default: /* not possible : lhSize is {3,4,5} */
assert(0);
}
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
return lhSize+cLitSize;
}
Loading