a couple performance improvement changes for ldm
This commit is contained in:
parent
7e6729055a
commit
1e65711ca5
@ -1723,11 +1723,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||||||
/* ldm bucketOffsets table */
|
/* ldm bucketOffsets table */
|
||||||
if (params.ldmParams.enableLdm) {
|
if (params.ldmParams.enableLdm) {
|
||||||
/* TODO: avoid memset? */
|
/* TODO: avoid memset? */
|
||||||
size_t const ldmBucketSize =
|
size_t const numBuckets =
|
||||||
((size_t)1) << (params.ldmParams.hashLog -
|
((size_t)1) << (params.ldmParams.hashLog -
|
||||||
params.ldmParams.bucketSizeLog);
|
params.ldmParams.bucketSizeLog);
|
||||||
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
|
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
|
||||||
ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
|
ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sequences storage */
|
/* sequences storage */
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
#define LDM_BUCKET_SIZE_LOG 3
|
#define LDM_BUCKET_SIZE_LOG 3
|
||||||
#define LDM_MIN_MATCH_LENGTH 64
|
#define LDM_MIN_MATCH_LENGTH 64
|
||||||
#define LDM_HASH_RLOG 7
|
#define LDM_HASH_RLOG 7
|
||||||
#define LDM_HASH_CHAR_OFFSET 10
|
|
||||||
|
|
||||||
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
||||||
ZSTD_compressionParameters const* cParams)
|
ZSTD_compressionParameters const* cParams)
|
||||||
@ -57,11 +56,11 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
|||||||
/** ZSTD_ldm_getSmallHash() :
|
/** ZSTD_ldm_getSmallHash() :
|
||||||
* numBits should be <= 32
|
* numBits should be <= 32
|
||||||
* If numBits==0, returns 0.
|
* If numBits==0, returns 0.
|
||||||
* @return : the most significant numBits of value. */
|
* @return : the most significant numBits of hash. */
|
||||||
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
|
static U32 ZSTD_ldm_getSmallHash(U64 hash, U32 numBits)
|
||||||
{
|
{
|
||||||
assert(numBits <= 32);
|
assert(numBits <= 32);
|
||||||
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
|
return numBits == 0 ? 0 : (U32)(hash >> (64 - numBits));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_ldm_getChecksum() :
|
/** ZSTD_ldm_getChecksum() :
|
||||||
@ -70,22 +69,19 @@ static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
|
|||||||
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
|
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
|
||||||
{
|
{
|
||||||
assert(numBitsToDiscard <= 32);
|
assert(numBitsToDiscard <= 32);
|
||||||
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
|
return (hash >> (64 - (32 + numBitsToDiscard))) & 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_ldm_getTag() ;
|
/** ZSTD_ldm_getTagMask() :
|
||||||
* Given the hash, returns the most significant numTagBits bits
|
* Returns the mask against which the rolling hash must be
|
||||||
* after (32 + hbits) bits.
|
* checked. */
|
||||||
*
|
static U64 ZSTD_ldm_getTagMask(U32 hbits, U32 hashRateLog)
|
||||||
* If there are not enough bits remaining, return the last
|
|
||||||
* numTagBits bits. */
|
|
||||||
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
|
|
||||||
{
|
{
|
||||||
assert(numTagBits < 32 && hbits <= 32);
|
assert(numTagBits < 32 && hbits <= 32);
|
||||||
if (32 - hbits < numTagBits) {
|
if (32 - hbits < hashRateLog) {
|
||||||
return hash & (((U32)1 << numTagBits) - 1);
|
return (((U64)1 << hashRateLog) - 1);
|
||||||
} else {
|
} else {
|
||||||
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
|
return (((U64)1 << hashRateLog) - 1) << (32 - hbits - hashRateLog);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,10 +99,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
|
|||||||
size_t const hash, const ldmEntry_t entry,
|
size_t const hash, const ldmEntry_t entry,
|
||||||
ldmParams_t const ldmParams)
|
ldmParams_t const ldmParams)
|
||||||
{
|
{
|
||||||
BYTE* const bucketOffsets = ldmState->bucketOffsets;
|
BYTE* const pOffset = ldmState->bucketOffsets + hash;
|
||||||
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
|
unsigned const offset = *pOffset;
|
||||||
bucketOffsets[hash]++;
|
|
||||||
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
|
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
|
||||||
|
*pOffset = (offset + 1) & (((U32)1 << ldmParams.bucketSizeLog) - 1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_ldm_makeEntryAndInsertByTag() :
|
/** ZSTD_ldm_makeEntryAndInsertByTag() :
|
||||||
@ -125,9 +123,8 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
|||||||
U32 const offset,
|
U32 const offset,
|
||||||
ldmParams_t const ldmParams)
|
ldmParams_t const ldmParams)
|
||||||
{
|
{
|
||||||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
|
U64 const tagMask = ZSTD_ldm_getTagMask(hBits, ldmParams.hashRateLog);
|
||||||
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
|
if ((rollingHash & tagMask) == tagMask) {
|
||||||
if (tag == tagMask) {
|
|
||||||
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
||||||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
||||||
ldmEntry_t entry;
|
ldmEntry_t entry;
|
||||||
@ -276,9 +273,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||||||
U32 const minMatchLength = params->minMatchLength;
|
U32 const minMatchLength = params->minMatchLength;
|
||||||
U64 const hashPower = ldmState->hashPower;
|
U64 const hashPower = ldmState->hashPower;
|
||||||
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
||||||
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
|
U32 const entsPerBucket = 1U << params->bucketSizeLog;
|
||||||
U32 const hashRateLog = params->hashRateLog;
|
U64 const tagMask = ZSTD_ldm_getTagMask(hBits, params->hashRateLog);
|
||||||
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
|
|
||||||
/* Prefix and extDict parameters */
|
/* Prefix and extDict parameters */
|
||||||
U32 const dictLimit = ldmState->window.dictLimit;
|
U32 const dictLimit = ldmState->window.dictLimit;
|
||||||
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
||||||
@ -299,10 +295,12 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||||||
U64 rollingHash = 0;
|
U64 rollingHash = 0;
|
||||||
|
|
||||||
while (ip <= ilimit) {
|
while (ip <= ilimit) {
|
||||||
|
U32 hash, checksum;
|
||||||
size_t mLength;
|
size_t mLength;
|
||||||
U32 const curr = (U32)(ip - base);
|
U32 const curr = (U32)(ip - base);
|
||||||
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
||||||
ldmEntry_t* bestEntry = NULL;
|
ldmEntry_t const* bestEntry = NULL;
|
||||||
|
|
||||||
if (ip != istart) {
|
if (ip != istart) {
|
||||||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
|
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
|
||||||
lastHashed[minMatchLength],
|
lastHashed[minMatchLength],
|
||||||
@ -313,22 +311,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||||||
lastHashed = ip;
|
lastHashed = ip;
|
||||||
|
|
||||||
/* Do not insert and do not look for a match */
|
/* Do not insert and do not look for a match */
|
||||||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
|
if ((rollingHash & tagMask) != tagMask) {
|
||||||
ip++;
|
ip++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
||||||
|
checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
||||||
|
|
||||||
/* Get the best entry and compute the match lengths */
|
/* Get the best entry and compute the match lengths */
|
||||||
{
|
{
|
||||||
ldmEntry_t* const bucket =
|
ldmEntry_t* const bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
|
||||||
ZSTD_ldm_getBucket(ldmState,
|
ldmEntry_t const* cur;
|
||||||
ZSTD_ldm_getSmallHash(rollingHash, hBits),
|
|
||||||
*params);
|
|
||||||
ldmEntry_t* cur;
|
|
||||||
size_t bestMatchLength = 0;
|
size_t bestMatchLength = 0;
|
||||||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
|
||||||
|
|
||||||
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
|
for (cur = bucket; cur < bucket + entsPerBucket; ++cur) {
|
||||||
size_t curForwardMatchLength, curBackwardMatchLength,
|
size_t curForwardMatchLength, curBackwardMatchLength,
|
||||||
curTotalMatchLength;
|
curTotalMatchLength;
|
||||||
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
|
||||||
@ -379,9 +376,11 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||||||
|
|
||||||
/* No match found -- continue searching */
|
/* No match found -- continue searching */
|
||||||
if (bestEntry == NULL) {
|
if (bestEntry == NULL) {
|
||||||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
|
ldmEntry_t entry;
|
||||||
hBits, curr,
|
|
||||||
*params);
|
entry.offset = curr;
|
||||||
|
entry.checksum = checksum;
|
||||||
|
ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
|
||||||
ip++;
|
ip++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -73,7 +73,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||||||
*
|
*
|
||||||
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
|
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
|
||||||
* Avoids emitting matches less than `minMatch` bytes.
|
* Avoids emitting matches less than `minMatch` bytes.
|
||||||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
* Must be called for data that is not passed to ZSTD_ldm_blockCompress().
|
||||||
*/
|
*/
|
||||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
||||||
U32 const minMatch);
|
U32 const minMatch);
|
||||||
|
@ -486,10 +486,10 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|||||||
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
|
||||||
unsigned const bucketLog =
|
unsigned const bucketLog =
|
||||||
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
|
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
|
||||||
size_t const bucketSize = (size_t)1 << bucketLog;
|
|
||||||
unsigned const prevBucketLog =
|
unsigned const prevBucketLog =
|
||||||
serialState->params.ldmParams.hashLog -
|
serialState->params.ldmParams.hashLog -
|
||||||
serialState->params.ldmParams.bucketSizeLog;
|
serialState->params.ldmParams.bucketSizeLog;
|
||||||
|
size_t const numBuckets = (size_t)1 << bucketLog;
|
||||||
/* Size the seq pool tables */
|
/* Size the seq pool tables */
|
||||||
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
||||||
/* Reset the window */
|
/* Reset the window */
|
||||||
@ -501,13 +501,13 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
|
|||||||
}
|
}
|
||||||
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
|
||||||
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
|
||||||
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
|
serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
|
||||||
}
|
}
|
||||||
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
|
||||||
return 1;
|
return 1;
|
||||||
/* Zero the tables */
|
/* Zero the tables */
|
||||||
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
|
||||||
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
|
||||||
|
|
||||||
/* Update window state and fill hash table with dict */
|
/* Update window state and fill hash table with dict */
|
||||||
serialState->ldmState.loadedDictEnd = 0;
|
serialState->ldmState.loadedDictEnd = 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user