Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding --long support for --patch-from #1959

Merged
merged 34 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
73edb1b
adding long support for patch-from
Apr 7, 2020
77aa272
adding refPrefix to dictionary_decompress
Apr 7, 2020
81549bc
adding refPrefix to dictionary_loader
Apr 7, 2020
69cbfc1
conversion nit
Apr 7, 2020
4c9fc6d
triggering log mode on chainLog < fileLog and removing old threshold
Apr 7, 2020
f9335a6
adding refPrefix to dictionary_round_trip
Apr 7, 2020
1f339cc
adding docs
Apr 7, 2020
510e08b
adding enableldm + forceWindow test for dict
Apr 8, 2020
9df3e9e
separate patch-from logic into FIO_adjustParamsForPatchFromMode
Apr 8, 2020
07a204a
moving memLimit adjustment to outside ifdefs (need for decomp)
Apr 8, 2020
c949f2e
removing refPrefix gate on dictionary_round_trip
Apr 8, 2020
fe332df
rebase on top of dev refPrefix change
Apr 8, 2020
7eaafaa
making sure refPrefx + ldm is < 1% of srcSize
Apr 8, 2020
ec5c4c6
combining notes for patch-from
Apr 8, 2020
abda0ae
moving memlimit logic inside fileio.c
Apr 8, 2020
f6d66bd
adding display for optimal parser and long mode trigger
Apr 8, 2020
d67bf90
conversion nit
Apr 8, 2020
36d50ec
fuzzer found heap-overflow fix
Apr 9, 2020
80cd488
another conversion nit
Apr 9, 2020
d054841
moving FIO_adjustMemLimitForPatchFromMode outside ifndef
Apr 9, 2020
025b44a
making params immutable
Apr 9, 2020
6e9baf8
moving memLimit update before createDictBuffer call
Apr 9, 2020
ffd1a8a
making maxSrcSize unsigned long long
Apr 9, 2020
e60e7a6
making dictSize and maxSrcSize params unsigned long long
Apr 9, 2020
16c826d
error on files larger than 4gb
Apr 9, 2020
0320dbf
extend refPrefix test to include round trip
Apr 9, 2020
14e075d
conversion to size_t
Apr 9, 2020
c6e9d7f
making sure ldm is at least 10x better
Apr 15, 2020
2177a0d
removing break
Apr 15, 2020
aa17451
including zstd_compress_internal and removing redundant macros
Apr 16, 2020
92a9775
exposing ZSTD_cycleLog()
Apr 16, 2020
fc4844a
using cycleLog instead of chainLog
Apr 16, 2020
7b113a1
add some more docs about user optimizations
Apr 17, 2020
a18409c
formatting
Apr 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -945,9 +945,11 @@ size_t ZSTD_CCtx_refPrefix_advanced(
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
ZSTD_clearAllDicts(cctx);
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
cctx->prefixDict.dictContentType = dictContentType;
if (prefix != NULL && prefixSize > 0) {
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
cctx->prefixDict.dictContentType = dictContentType;
}
return 0;
}

Expand Down Expand Up @@ -2782,6 +2784,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
* @return : 0, or an error code
*/
static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ldmState_t* ls,
ZSTD_cwksp* ws,
ZSTD_CCtx_params const* params,
const void* src, size_t srcSize,
Expand All @@ -2793,6 +2796,11 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ZSTD_window_update(&ms->window, src, srcSize);
ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);

if (params->ldmParams.enableLdm && ls != NULL) {
ZSTD_window_update(&ls->window, src, srcSize);
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
bimbashrestha marked this conversation as resolved.
Show resolved Hide resolved
}

/* Assert that we the ms params match the params we're being given */
ZSTD_assertEqualCParams(params->cParams, ms->cParams);

Expand All @@ -2805,6 +2813,11 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,

ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);

if (params->ldmParams.enableLdm && ls != NULL && srcSize >= params->ldmParams.minMatchLength) {
ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
break;
bimbashrestha marked this conversation as resolved.
Show resolved Hide resolved
}

switch(params->cParams.strategy)
{
case ZSTD_fast:
Expand Down Expand Up @@ -2983,7 +2996,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
ms, ws, params, dictPtr, dictContentSize, dtlm));
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm));
return dictID;
}
}
Expand All @@ -2993,6 +3006,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
ldmState_t* ls,
ZSTD_cwksp* ws,
const ZSTD_CCtx_params* params,
const void* dict, size_t dictSize,
Expand All @@ -3010,13 +3024,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,

/* dict restricted modes */
if (dictContentType == ZSTD_dct_rawContent)
return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm);
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);

if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
return ZSTD_loadDictionaryContent(
ms, ws, params, dict, dictSize, dtlm);
ms, ls, ws, params, dict, dictSize, dtlm);
}
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
assert(0); /* impossible */
Expand Down Expand Up @@ -3059,12 +3073,12 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
{ size_t const dictID = cdict ?
ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->workspace, &cctx->appliedParams, cdict->dictContent,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, dictContentType, dtlm,
cctx->entropyWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->workspace, &cctx->appliedParams, dict, dictSize,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
dictContentType, dtlm, cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
assert(dictID <= UINT_MAX);
Expand Down Expand Up @@ -3342,7 +3356,7 @@ static size_t ZSTD_initCDict_internal(
params.fParams.contentSizeFlag = 1;
params.cParams = cParams;
{ size_t const dictID = ZSTD_compress_insertDictionary(
&cdict->cBlockState, &cdict->matchState, &cdict->workspace,
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
&params, cdict->dictContent, cdict->dictContentSize,
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
Expand Down
1 change: 1 addition & 0 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ typedef struct {
typedef struct {
ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
U32 loadedDictEnd;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
* Depends on ldmParams.minMatchLength */
Expand Down
14 changes: 12 additions & 2 deletions lib/compress/zstd_ldm.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
return rollingHash;
}

void ZSTD_ldm_fillHashTable(
ldmState_t* state, const BYTE* ip,
const BYTE* iend, ldmParams_t const* params)
{
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
ZSTD_ldm_fillLdmHashTable(
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
params->hashLog - params->bucketSizeLog,
*params);
}


/** ZSTD_ldm_limitTableUpdate() :
*
Expand Down Expand Up @@ -459,7 +470,7 @@ size_t ZSTD_ldm_generateSequences(
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
*/
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize);
Expand Down Expand Up @@ -567,7 +578,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
if (sequence.offset == 0)
break;

assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend);

/* Fill tables for block compressor */
Expand Down
4 changes: 4 additions & 0 deletions lib/compress/zstd_ldm.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ extern "C" {

#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT

void ZSTD_ldm_fillHashTable(
ldmState_t* state, const BYTE* ip,
const BYTE* iend, ldmParams_t const* params);

/**
* ZSTD_ldm_generateSequences():
*
Expand Down
15 changes: 12 additions & 3 deletions lib/compress/zstdmt_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ typedef struct {
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
} serialState_t;

static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
static int ZSTDMT_serialState_reset(serialState_t* serialState,
ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params,
size_t jobSize, const void* dict, size_t const dictSize)
{
/* Adjust parameters */
if (params.ldmParams.enableLdm) {
Expand Down Expand Up @@ -507,6 +509,13 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
memset(serialState->ldmState.hashTable, 0, hashSize);
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
}

/* Update window state and fill hash table with dict */
if (params.ldmParams.enableLdm && dict) {
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, &params.ldmParams);
}
bimbashrestha marked this conversation as resolved.
Show resolved Hide resolved

serialState->params = params;
serialState->params.jobSize = (U32)jobSize;
return 0;
Expand Down Expand Up @@ -1267,7 +1276,7 @@ static size_t ZSTDMT_compress_advanced_internal(

assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0))
return ERROR(memory_allocation);

FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
Expand Down Expand Up @@ -1500,7 +1509,7 @@ size_t ZSTDMT_initCStream_internal(
mtctx->allJobsCompleted = 0;
mtctx->consumed = 0;
mtctx->produced = 0;
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, dict, dictSize))
return ERROR(memory_allocation);
return 0;
}
Expand Down
Loading