Switch to using ZSTD_count instead of function pointer
parent
19258f51c1
commit
1fa223859f
|
@ -36,14 +36,38 @@ LDM_hashEntry *HASH_getEntryFromHash(
|
|||
return getBucket(table, hash);
|
||||
}
|
||||
|
||||
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
|
||||
U32 minMatchLength, U32 maxWindowSize) {
|
||||
U32 lengthLeft = minMatchLength;
|
||||
const BYTE *curIn = pIn;
|
||||
const BYTE *curMatch = pMatch;
|
||||
|
||||
if (pIn - pMatch > maxWindowSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (; lengthLeft >= 4; lengthLeft -= 4) {
|
||||
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
|
||||
return 0;
|
||||
}
|
||||
curIn += 4;
|
||||
curMatch += 4;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const hash_t hash,
|
||||
const U32 checksum,
|
||||
const BYTE *pIn,
|
||||
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize) {
|
||||
LDM_hashEntry *entry = getBucket(table, hash);
|
||||
(void)checksum;
|
||||
if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
|
||||
(void)pEnd;
|
||||
if (isValidMatch(pIn, entry->offset + table->offsetBase,
|
||||
minMatchLength, maxWindowSize)) {
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
|
|
|
@ -9,11 +9,14 @@
|
|||
|
||||
// Number of elements per hash bucket.
|
||||
// HASH_BUCKET_SIZE_LOG defined in ldm.h
|
||||
#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
|
||||
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
|
||||
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||
|
||||
// TODO: rename. Number of hash buckets.
|
||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
|
||||
|
||||
//#define TMP_ZSTDTOGGLE
|
||||
|
||||
struct LDM_hashTable {
|
||||
U32 size; // Number of buckets
|
||||
U32 maxEntries; // Rename...
|
||||
|
@ -39,20 +42,162 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
|
|||
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
|
||||
}
|
||||
|
||||
#ifdef TMP_ZSTDTOGGLE
|
||||
static unsigned ZSTD_NbCommonBytes (register size_t val)
|
||||
{
|
||||
if (MEM_isLittleEndian()) {
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanForward64( &r, (U64)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
|
||||
0, 3, 1, 3, 1, 4, 2, 7,
|
||||
0, 2, 3, 6, 1, 5, 3, 5,
|
||||
1, 3, 4, 4, 2, 5, 6, 7,
|
||||
7, 0, 1, 2, 3, 3, 4, 6,
|
||||
2, 6, 5, 5, 3, 4, 5, 6,
|
||||
7, 1, 2, 4, 6, 4, 4, 5,
|
||||
7, 2, 6, 5, 7, 6, 7, 7 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r=0;
|
||||
_BitScanForward( &r, (U32)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
|
||||
3, 2, 2, 1, 3, 2, 0, 1,
|
||||
3, 3, 1, 2, 2, 2, 2, 0,
|
||||
3, 1, 2, 0, 1, 0, 1, 1 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
# endif
|
||||
}
|
||||
} else { /* Big Endian CPU */
|
||||
if (MEM_64bits()) {
|
||||
# if defined(_MSC_VER) && defined(_WIN64)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse64( &r, val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clzll(val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
||||
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
||||
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if defined(_MSC_VER)
|
||||
unsigned long r = 0;
|
||||
_BitScanReverse( &r, (unsigned long)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
# else
|
||||
unsigned r;
|
||||
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
||||
r += (!val);
|
||||
return r;
|
||||
# endif
|
||||
} }
|
||||
}
|
||||
|
||||
// From lib/compress/zstd_compress.c
|
||||
static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch,
|
||||
const BYTE *const pInLimit) {
|
||||
const BYTE * const pStart = pIn;
|
||||
const BYTE * const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
|
||||
|
||||
while (pIn < pInLoopLimit) {
|
||||
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
|
||||
if (!diff) {
|
||||
pIn += sizeof(size_t);
|
||||
pMatch += sizeof(size_t);
|
||||
continue;
|
||||
}
|
||||
pIn += ZSTD_NbCommonBytes(diff);
|
||||
return (size_t)(pIn - pStart);
|
||||
}
|
||||
|
||||
if (MEM_64bits()) {
|
||||
if ((pIn < (pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) {
|
||||
pIn += 4;
|
||||
pMatch += 4;
|
||||
}
|
||||
}
|
||||
if ((pIn < (pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) {
|
||||
pIn += 2;
|
||||
pMatch += 2;
|
||||
}
|
||||
if ((pIn < pInLimit) && (*pMatch == *pIn)) {
|
||||
pIn++;
|
||||
}
|
||||
return (size_t)(pIn - pStart);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
|
||||
U32 minMatchLength, U32 maxWindowSize) {
|
||||
U32 lengthLeft = minMatchLength;
|
||||
const BYTE *curIn = pIn;
|
||||
const BYTE *curMatch = pMatch;
|
||||
|
||||
if (pIn - pMatch > maxWindowSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (; lengthLeft >= 4; lengthLeft -= 4) {
|
||||
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
|
||||
return 0;
|
||||
}
|
||||
curIn += 4;
|
||||
curMatch += 4;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // TMP_ZSTDTOGGLE
|
||||
|
||||
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const hash_t hash,
|
||||
const U32 checksum,
|
||||
const BYTE *pIn,
|
||||
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize) {
|
||||
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||
LDM_hashEntry *cur = bucket;
|
||||
// TODO: in order of recency?
|
||||
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||
// Check checksum for faster check.
|
||||
const BYTE *pMatch = cur->offset + table->offsetBase;
|
||||
#ifdef TMP_ZSTDTOGGLE
|
||||
if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
|
||||
U32 matchLength = ZSTD_count(pIn, pMatch, pEnd);
|
||||
if (matchLength >= minMatchLength) {
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)pEnd;
|
||||
(void)minMatchLength;
|
||||
(void)maxWindowSize;
|
||||
|
||||
if (cur->checksum == checksum &&
|
||||
(*isValid)(pIn, cur->offset + table->offsetBase)) {
|
||||
isValidMatch(pIn, pMatch, minMatchLength, maxWindowSize)) {
|
||||
return cur;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -91,6 +91,7 @@ struct LDM_CCtx {
|
|||
hash_t lagHash;
|
||||
U32 lagSum;
|
||||
|
||||
U64 numHashInserts;
|
||||
// DEBUG
|
||||
const BYTE *DEBUG_setNextHash;
|
||||
};
|
||||
|
@ -164,7 +165,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
|||
}
|
||||
printf("\n");
|
||||
printf("=====================\n");
|
||||
|
||||
}
|
||||
|
||||
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
||||
|
@ -376,7 +376,7 @@ void LDM_outputConfiguration(void) {
|
|||
printf("Min match, hash length: %d, %d\n",
|
||||
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
||||
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
||||
printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY);
|
||||
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||
printf("LDM_LAG %d\n", LDM_LAG);
|
||||
printf("=====================\n");
|
||||
}
|
||||
|
@ -456,8 +456,10 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
|||
#ifdef HASH_CHECK
|
||||
entry = HASH_getEntryFromHash(cctx->hashTable, h, sum);
|
||||
#else
|
||||
entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
|
||||
&LDM_isValidMatch);
|
||||
entry = HASH_getValidEntry(cctx->hashTable, h, sum,
|
||||
cctx->ip, cctx->iend,
|
||||
LDM_MIN_MATCH_LENGTH,
|
||||
LDM_WINDOW_SIZE);
|
||||
#endif
|
||||
|
||||
if (entry != NULL) {
|
||||
|
@ -534,9 +536,10 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
LDM_CCtx cctx;
|
||||
const BYTE *match = NULL;
|
||||
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
|
||||
printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||
// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||
|
||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||
LDM_outputConfiguration();
|
||||
|
||||
/* Hash the first position and put it into the hash table. */
|
||||
LDM_putHashOfCurrentPosition(&cctx);
|
||||
|
@ -553,11 +556,10 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
* and encode the final literals.
|
||||
*/
|
||||
while (LDM_findBestMatch(&cctx, &match) == 0) {
|
||||
U32 backwardsMatchLen = 0;
|
||||
#ifdef COMPUTE_STATS
|
||||
cctx.stats.numMatches++;
|
||||
#endif
|
||||
|
||||
// printf("HERE %zu\n", cctx.ip - cctx.ibase);
|
||||
/**
|
||||
* Catch up: look back to extend the match backwards from the found match.
|
||||
*/
|
||||
|
@ -565,6 +567,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
cctx.ip[-1] == match[-1]) {
|
||||
cctx.ip--;
|
||||
match--;
|
||||
backwardsMatchLen++;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -575,8 +578,9 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
const U32 literalLength = cctx.ip - cctx.anchor;
|
||||
const U32 offset = cctx.ip - match;
|
||||
const U32 matchLength = LDM_countMatchLength(
|
||||
cctx.ip + LDM_MIN_MATCH_LENGTH, match + LDM_MIN_MATCH_LENGTH,
|
||||
cctx.ihashLimit);
|
||||
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
|
||||
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
|
||||
cctx.ihashLimit) + backwardsMatchLen;
|
||||
|
||||
LDM_outputBlock(&cctx, literalLength, offset, matchLength);
|
||||
|
||||
|
|
|
@ -12,18 +12,17 @@
|
|||
|
||||
// Defines the size of the hash table.
|
||||
// Currently this should be less than WINDOW_SIZE_LOG + 4?
|
||||
#define LDM_MEMORY_USAGE 24
|
||||
#define LDM_MEMORY_USAGE 23
|
||||
|
||||
//#define LDM_LAG (1 << 23)
|
||||
//#define LDM_LAG (1 << 20)
|
||||
#define LDM_LAG 0
|
||||
#define LDM_LAG (0)
|
||||
|
||||
#define LDM_WINDOW_SIZE_LOG 28
|
||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||
|
||||
//These should be multiples of four (and perhaps set to the same values?).
|
||||
#define LDM_MIN_MATCH_LENGTH 512
|
||||
#define LDM_HASH_LENGTH 512
|
||||
#define LDM_MIN_MATCH_LENGTH 64
|
||||
#define LDM_HASH_LENGTH 64
|
||||
|
||||
typedef struct LDM_compressStats LDM_compressStats;
|
||||
typedef struct LDM_CCtx LDM_CCtx;
|
||||
|
|
|
@ -40,7 +40,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
|||
const hash_t hash,
|
||||
const U32 checksum,
|
||||
const BYTE *pIn,
|
||||
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize);
|
||||
|
||||
hash_t HASH_hashU32(U32 value);
|
||||
|
||||
|
|
Loading…
Reference in New Issue