"Short cache" optimization for level 1-4 DMS (+5-30% compression speed) (#3152)
* first attempt at fast DMS short cache * significant wins for some scenarios * fix all clang regressions * nits * fix 1.5% gcc11 regression on hot 110Kdict scenario * fix CI * nit * Add tags to doublefast hash table * use tags in doublefast DMS * Fix CI * Clean up some hardcoded logic / constants * Switch forCCtx to an enum * nit * add short cache to ip+1 long search * Move tag size into hashLog * Minor nits * Truncate dictionaries greater than 16MB in short cache mode * Helper function for tag comparison * Cap short cache hashLog at 24 to prevent overflow * size_t dictTagsMatch -> int dictTagsMatch * nit * Clean up and comment dictionary truncation * Move ZSTD_tableFillPurpose_e next to ZSTD_dictTableLoadMethod_e * Comment and expand helper functions * Asserts and documentation * nit
This commit is contained in:
parent
eb842a2260
commit
f6ef14329f
@ -275,6 +275,12 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
||||
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
||||
}
|
||||
|
||||
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
|
||||
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
|
||||
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
|
||||
return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
|
||||
}
|
||||
|
||||
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
||||
ZSTD_compressionParameters cParams)
|
||||
{
|
||||
@ -1367,6 +1373,13 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
||||
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
|
||||
|
||||
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
|
||||
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
if (cPar.hashLog > maxShortCacheHashLog) {
|
||||
cPar.hashLog = maxShortCacheHashLog;
|
||||
}
|
||||
}
|
||||
|
||||
return cPar;
|
||||
}
|
||||
|
||||
@ -2096,6 +2109,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
|
||||
ZSTD_compressionParameters const* cParams) {
|
||||
if (ZSTD_CDictIndicesAreTagged(cParams)){
|
||||
/* Remove tags from the CDict table if they are present.
|
||||
* See docs on "short cache" in zstd_compress_internal.h for context. */
|
||||
size_t i;
|
||||
for (i = 0; i < tableSize; i++) {
|
||||
U32 const taggedIndex = src[i];
|
||||
U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
dst[i] = index;
|
||||
}
|
||||
} else {
|
||||
ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
|
||||
}
|
||||
}
|
||||
|
||||
static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params,
|
||||
@ -2131,14 +2160,15 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
||||
: 0;
|
||||
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
|
||||
|
||||
ZSTD_memcpy(cctx->blockState.matchState.hashTable,
|
||||
cdict->matchState.hashTable,
|
||||
hSize * sizeof(U32));
|
||||
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
|
||||
cdict->matchState.hashTable,
|
||||
hSize, cdict_cParams);
|
||||
|
||||
/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
|
||||
if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
|
||||
ZSTD_memcpy(cctx->blockState.matchState.chainTable,
|
||||
cdict->matchState.chainTable,
|
||||
chainSize * sizeof(U32));
|
||||
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
|
||||
cdict->matchState.chainTable,
|
||||
chainSize, cdict_cParams);
|
||||
}
|
||||
/* copy tag table */
|
||||
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
|
||||
@ -4205,7 +4235,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
ZSTD_cwksp* ws,
|
||||
ZSTD_CCtx_params const* params,
|
||||
const void* src, size_t srcSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
const BYTE* const iend = ip + srcSize;
|
||||
@ -4214,22 +4245,37 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
/* Assert that the ms params match the params we're being given */
|
||||
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
|
||||
|
||||
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
|
||||
{ /* Ensure large dictionaries can't cause index overflow */
|
||||
|
||||
/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
|
||||
* Dictionaries right at the edge will immediately trigger overflow
|
||||
* correction, but I don't want to insert extra constraints here.
|
||||
*/
|
||||
U32 const maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
|
||||
/* We must have cleared our windows when our source is this large. */
|
||||
assert(ZSTD_window_isEmpty(ms->window));
|
||||
if (loadLdmDict)
|
||||
assert(ZSTD_window_isEmpty(ls->window));
|
||||
U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
|
||||
|
||||
int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams);
|
||||
if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
|
||||
/* Some dictionary matchfinders in zstd use "short cache",
|
||||
* which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
|
||||
* CDict hashtable entry as a tag rather than as part of an index.
|
||||
* When short cache is used, we need to truncate the dictionary
|
||||
* so that its indices don't overlap with the tag. */
|
||||
U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
|
||||
maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
|
||||
assert(!loadLdmDict);
|
||||
}
|
||||
|
||||
/* If the dictionary is too large, only load the suffix of the dictionary. */
|
||||
if (srcSize > maxDictSize) {
|
||||
ip = iend - maxDictSize;
|
||||
src = ip;
|
||||
srcSize = maxDictSize;
|
||||
}
|
||||
} }
|
||||
|
||||
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
|
||||
/* We must have cleared our windows when our source is this large. */
|
||||
assert(ZSTD_window_isEmpty(ms->window));
|
||||
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
|
||||
}
|
||||
|
||||
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
|
||||
@ -4252,10 +4298,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
switch(params->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, dtlm);
|
||||
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
|
||||
break;
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
||||
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
|
||||
break;
|
||||
|
||||
case ZSTD_greedy:
|
||||
@ -4421,6 +4467,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
ZSTD_CCtx_params const* params,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp,
|
||||
void* workspace)
|
||||
{
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
@ -4439,7 +4486,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
{
|
||||
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
||||
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
||||
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
||||
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
|
||||
}
|
||||
return dictID;
|
||||
}
|
||||
@ -4455,6 +4502,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp,
|
||||
void* workspace)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
|
||||
@ -4467,13 +4515,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
|
||||
/* dict restricted modes */
|
||||
if (dictContentType == ZSTD_dct_rawContent)
|
||||
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
|
||||
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
||||
|
||||
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
|
||||
if (dictContentType == ZSTD_dct_auto) {
|
||||
DEBUGLOG(4, "raw content dictionary detected");
|
||||
return ZSTD_loadDictionaryContent(
|
||||
ms, ls, ws, params, dict, dictSize, dtlm);
|
||||
ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
||||
}
|
||||
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
|
||||
assert(0); /* impossible */
|
||||
@ -4481,7 +4529,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
|
||||
/* dict as full zstd dictionary */
|
||||
return ZSTD_loadZstdDictionary(
|
||||
bs, ms, ws, params, dict, dictSize, dtlm, workspace);
|
||||
bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
|
||||
}
|
||||
|
||||
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
||||
@ -4524,11 +4572,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
|
||||
cdict->dictContentSize, cdict->dictContentType, dtlm,
|
||||
cctx->entropyWorkspace)
|
||||
ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
|
||||
: ZSTD_compress_insertDictionary(
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
|
||||
dictContentType, dtlm, cctx->entropyWorkspace);
|
||||
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
|
||||
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
||||
assert(dictID <= UINT_MAX);
|
||||
cctx->dictID = (U32)dictID;
|
||||
@ -4832,7 +4880,7 @@ static size_t ZSTD_initCDict_internal(
|
||||
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
||||
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
|
||||
¶ms, cdict->dictContent, cdict->dictContentSize,
|
||||
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
|
||||
dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
|
||||
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
||||
assert(dictID <= (size_t)(U32)-1);
|
||||
cdict->dictID = (U32)dictID;
|
||||
|
@ -434,6 +434,7 @@ struct ZSTD_CCtx_s {
|
||||
};
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_noDict = 0,
|
||||
@ -745,32 +746,36 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
|
||||
* Hashes
|
||||
***************************************/
|
||||
static const U32 prime3bytes = 506832829U;
|
||||
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
|
||||
static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
|
||||
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
|
||||
|
||||
static const U32 prime4bytes = 2654435761U;
|
||||
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
|
||||
static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
|
||||
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
|
||||
|
||||
static const U64 prime5bytes = 889523592379ULL;
|
||||
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime6bytes = 227718039650203ULL;
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime7bytes = 58295818150454627ULL;
|
||||
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR
|
||||
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
{
|
||||
/* Although some of these hashes do support hBits up to 64, some do not.
|
||||
* To be on the safe side, always avoid hBits > 32. */
|
||||
assert(hBits <= 32);
|
||||
|
||||
switch(mls)
|
||||
{
|
||||
default:
|
||||
@ -1264,6 +1269,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
|
||||
#endif
|
||||
|
||||
/* Short Cache */
|
||||
|
||||
/* Normally, zstd matchfinders follow this flow:
|
||||
* 1. Compute hash at ip
|
||||
* 2. Load index from hashTable[hash]
|
||||
* 3. Check if *ip == *(base + index)
|
||||
* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
|
||||
*
|
||||
* Short cache is an optimization which allows us to avoid step 3 most of the time
|
||||
* when the data doesn't actually match. With short cache, the flow becomes:
|
||||
* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
|
||||
* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
|
||||
* 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
|
||||
*
|
||||
* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
|
||||
* dictMatchState matchfinders.
|
||||
*/
|
||||
#define ZSTD_SHORT_CACHE_TAG_BITS 8
|
||||
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
|
||||
|
||||
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
|
||||
* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
|
||||
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
|
||||
size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
|
||||
assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
|
||||
hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
|
||||
}
|
||||
|
||||
/* Helper function for short cache matchfinders.
|
||||
* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
|
||||
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
|
||||
U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
|
||||
U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
|
||||
return tag1 == tag2;
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
@ -11,8 +11,43 @@
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_double_fast.h"
|
||||
|
||||
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashLarge = ms->hashTable;
|
||||
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const mls = cParams->minMatch;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* ip = base + ms->nextToUpdate;
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
const U32 fastHashFillStep = 3;
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Always insert every fastHashFillStep position into the hash tables.
|
||||
* Insert the other positions into the large hash table if their entry
|
||||
* is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
|
||||
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
|
||||
if (i == 0) {
|
||||
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
|
||||
}
|
||||
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
|
||||
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
|
||||
}
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
}
|
||||
|
||||
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@ -43,7 +78,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
} }
|
||||
} }
|
||||
}
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp)
|
||||
{
|
||||
if (tfp == ZSTD_tfp_forCDict) {
|
||||
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
|
||||
} else {
|
||||
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -289,8 +336,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictHBitsL = dictCParams->hashLog;
|
||||
const U32 dictHBitsS = dictCParams->chainLog;
|
||||
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
||||
@ -312,8 +359,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
U32 offset;
|
||||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
|
||||
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
|
||||
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
|
||||
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
|
||||
U32 const curr = (U32)(ip-base);
|
||||
U32 const matchIndexL = hashLong[h2];
|
||||
U32 matchIndexS = hashSmall[h];
|
||||
@ -343,9 +394,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else {
|
||||
} else if (dictTagsMatchL) {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
||||
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
assert(dictMatchL < dictEnd);
|
||||
|
||||
@ -361,9 +412,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
} else {
|
||||
} else if (dictTagsMatchS) {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
||||
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
match = dictBase + dictMatchIndexS;
|
||||
matchIndexS = dictMatchIndexS + dictIndexDelta;
|
||||
|
||||
@ -378,10 +429,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
continue;
|
||||
|
||||
_search_next_long:
|
||||
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
hashLong[hl3] = curr + 1;
|
||||
|
||||
@ -394,9 +446,9 @@ _search_next_long:
|
||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else {
|
||||
} else if (dictTagsMatchL3) {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
||||
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
assert(dictMatchL3 < dictEnd);
|
||||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
|
||||
|
@ -19,7 +19,8 @@ extern "C" {
|
||||
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
||||
|
||||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
@ -11,8 +11,42 @@
|
||||
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
|
||||
#include "zstd_fast.h"
|
||||
|
||||
static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
U32 const mls = cParams->minMatch;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* ip = base + ms->nextToUpdate;
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
const U32 fastHashFillStep = 3;
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
/* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
|
||||
* Feel free to remove this assert if there's a good reason! */
|
||||
assert(dtlm == ZSTD_dtlm_full);
|
||||
|
||||
/* Always insert every fastHashFillStep position into the hash table.
|
||||
* Insert the other positions if their hash entry is empty.
|
||||
*/
|
||||
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
{ size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
|
||||
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
|
||||
|
||||
if (dtlm == ZSTD_dtlm_fast) continue;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
{ U32 p;
|
||||
for (p = 1; p < fastHashFillStep; ++p) {
|
||||
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
|
||||
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
|
||||
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
|
||||
} } } }
|
||||
}
|
||||
|
||||
static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
@ -25,6 +59,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
const U32 fastHashFillStep = 3;
|
||||
|
||||
/* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
|
||||
* Feel free to remove this assert if there's a good reason! */
|
||||
assert(dtlm == ZSTD_dtlm_fast);
|
||||
|
||||
/* Always insert every fastHashFillStep position into the hash table.
|
||||
* Insert the other positions if their hash entry is empty.
|
||||
*/
|
||||
@ -42,6 +80,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
} } } }
|
||||
}
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp)
|
||||
{
|
||||
if (tfp == ZSTD_tfp_forCDict) {
|
||||
ZSTD_fillHashTableForCDict(ms, end, dtlm);
|
||||
} else {
|
||||
ZSTD_fillHashTableForCCtx(ms, end, dtlm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* If you squint hard enough (and ignore repcodes), the search operation at any
|
||||
@ -435,7 +485,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
|
||||
const U32 dictHLog = dictCParams->hashLog;
|
||||
const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
|
||||
/* if a dictionary is still attached, it necessarily means that
|
||||
* it is within window size. So we just check it. */
|
||||
@ -463,8 +513,11 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
|
||||
size_t mLength;
|
||||
size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
const size_t dictHash0 = ZSTD_hashPtr(ip0, dictHLog, mls);
|
||||
U32 dictMatchIndex = dictHashTable[dictHash0];
|
||||
|
||||
size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
|
||||
U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
|
||||
|
||||
U32 matchIndex = hashTable[hash0];
|
||||
U32 curr = (U32)(ip0 - base);
|
||||
size_t step = stepSize;
|
||||
@ -479,7 +532,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
const size_t dictHash1 = ZSTD_hashPtr(ip1, dictHLog, mls);
|
||||
size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
|
||||
hashTable[hash0] = curr; /* update hash table */
|
||||
|
||||
if (((U32) ((prefixStartIndex - 1) - repIndex) >=
|
||||
@ -490,26 +543,33 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
ip0++;
|
||||
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
|
||||
break;
|
||||
} else if (matchIndex <= prefixStartIndex) {
|
||||
/* We only look for a dict match if the normal matchIndex is invalid */
|
||||
}
|
||||
|
||||
if (dictTagsMatch) {
|
||||
/* Found a possible dict match */
|
||||
const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||
if (dictMatchIndex > dictStartIndex &&
|
||||
MEM_read32(dictMatch) == MEM_read32(ip0)) {
|
||||
/* found a dict match */
|
||||
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip0 > anchor) & (dictMatch > dictStart))
|
||||
&& (ip0[-1] == dictMatch[-1])) {
|
||||
ip0--;
|
||||
dictMatch--;
|
||||
mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
break;
|
||||
/* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
|
||||
if (matchIndex <= prefixStartIndex) {
|
||||
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
|
||||
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
|
||||
while (((ip0 > anchor) & (dictMatch > dictStart))
|
||||
&& (ip0[-1] == dictMatch[-1])) {
|
||||
ip0--;
|
||||
dictMatch--;
|
||||
mLength++;
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (MEM_read32(match) == MEM_read32(ip0)) {
|
||||
}
|
||||
|
||||
if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
|
||||
/* found a regular match */
|
||||
U32 const offset = (U32) (ip0 - match);
|
||||
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
|
||||
@ -526,7 +586,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
}
|
||||
|
||||
/* Prepare for next iteration */
|
||||
dictMatchIndex = dictHashTable[dictHash1];
|
||||
dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
|
||||
dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
|
||||
matchIndex = hashTable[hash1];
|
||||
|
||||
if (ip1 >= nextStep) {
|
||||
|
@ -19,7 +19,8 @@ extern "C" {
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
||||
ZSTD_tableFillPurpose_e tfp);
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
@ -242,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
||||
switch(ms->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
break;
|
||||
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
|
||||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
||||
break;
|
||||
|
||||
case ZSTD_greedy:
|
||||
|
Loading…
x
Reference in New Issue
Block a user