Switch to using ZSTD_count instead of function pointer

dev
Stella Lau 2017-07-18 18:05:10 -07:00
parent 19258f51c1
commit 1fa223859f
5 changed files with 194 additions and 20 deletions

View File

@ -36,14 +36,38 @@ LDM_hashEntry *HASH_getEntryFromHash(
return getBucket(table, hash);
}
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
U32 minMatchLength, U32 maxWindowSize) {
U32 lengthLeft = minMatchLength;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
if (pIn - pMatch > maxWindowSize) {
return 0;
}
for (; lengthLeft >= 4; lengthLeft -= 4) {
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
curIn += 4;
curMatch += 4;
}
return 1;
}
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize) {
LDM_hashEntry *entry = getBucket(table, hash);
(void)checksum;
if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
(void)pEnd;
if (isValidMatch(pIn, entry->offset + table->offsetBase,
minMatchLength, maxWindowSize)) {
return entry;
}
return NULL;

View File

@ -9,11 +9,14 @@
// Number of elements per hash bucket.
// HASH_BUCKET_SIZE_LOG defined in ldm.h
#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
// TODO: rename. Number of hash buckets.
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
//#define TMP_ZSTDTOGGLE
struct LDM_hashTable {
U32 size; // Number of buckets
U32 maxEntries; // Rename...
@ -39,20 +42,162 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
}
#ifdef TMP_ZSTDTOGGLE
static unsigned ZSTD_NbCommonBytes (register size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanForward64( &r, (U64)val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r=0;
_BitScanForward( &r, (U32)val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
unsigned long r = 0;
_BitScanReverse64( &r, val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r = 0;
_BitScanReverse( &r, (unsigned long)val );
return (unsigned)(r>>3);
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
// From lib/compress/zstd_compress.c
static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch,
const BYTE *const pInLimit) {
const BYTE * const pStart = pIn;
const BYTE * const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
while (pIn < pInLoopLimit) {
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) {
pIn += sizeof(size_t);
pMatch += sizeof(size_t);
continue;
}
pIn += ZSTD_NbCommonBytes(diff);
return (size_t)(pIn - pStart);
}
if (MEM_64bits()) {
if ((pIn < (pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) {
pIn += 4;
pMatch += 4;
}
}
if ((pIn < (pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) {
pIn += 2;
pMatch += 2;
}
if ((pIn < pInLimit) && (*pMatch == *pIn)) {
pIn++;
}
return (size_t)(pIn - pStart);
}
#else
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
U32 minMatchLength, U32 maxWindowSize) {
U32 lengthLeft = minMatchLength;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
if (pIn - pMatch > maxWindowSize) {
return 0;
}
for (; lengthLeft >= 4; lengthLeft -= 4) {
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
curIn += 4;
curMatch += 4;
}
return 1;
}
#endif // TMP_ZSTDTOGGLE
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize) {
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
// Check checksum for faster check.
const BYTE *pMatch = cur->offset + table->offsetBase;
#ifdef TMP_ZSTDTOGGLE
if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
U32 matchLength = ZSTD_count(pIn, pMatch, pEnd);
if (matchLength >= minMatchLength) {
return cur;
}
}
#else
(void)pEnd;
(void)minMatchLength;
(void)maxWindowSize;
if (cur->checksum == checksum &&
(*isValid)(pIn, cur->offset + table->offsetBase)) {
isValidMatch(pIn, pMatch, minMatchLength, maxWindowSize)) {
return cur;
}
#endif
}
return NULL;
}

View File

@ -91,6 +91,7 @@ struct LDM_CCtx {
hash_t lagHash;
U32 lagSum;
U64 numHashInserts;
// DEBUG
const BYTE *DEBUG_setNextHash;
};
@ -164,7 +165,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
}
printf("\n");
printf("=====================\n");
}
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
@ -376,7 +376,7 @@ void LDM_outputConfiguration(void) {
printf("Min match, hash length: %d, %d\n",
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY);
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
printf("LDM_LAG %d\n", LDM_LAG);
printf("=====================\n");
}
@ -456,8 +456,10 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
#ifdef HASH_CHECK
entry = HASH_getEntryFromHash(cctx->hashTable, h, sum);
#else
entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
&LDM_isValidMatch);
entry = HASH_getValidEntry(cctx->hashTable, h, sum,
cctx->ip, cctx->iend,
LDM_MIN_MATCH_LENGTH,
LDM_WINDOW_SIZE);
#endif
if (entry != NULL) {
@ -534,9 +536,10 @@ size_t LDM_compress(const void *src, size_t srcSize,
LDM_CCtx cctx;
const BYTE *match = NULL;
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
LDM_outputConfiguration();
/* Hash the first position and put it into the hash table. */
LDM_putHashOfCurrentPosition(&cctx);
@ -553,11 +556,10 @@ size_t LDM_compress(const void *src, size_t srcSize,
* and encode the final literals.
*/
while (LDM_findBestMatch(&cctx, &match) == 0) {
U32 backwardsMatchLen = 0;
#ifdef COMPUTE_STATS
cctx.stats.numMatches++;
#endif
// printf("HERE %zu\n", cctx.ip - cctx.ibase);
/**
* Catch up: look back to extend the match backwards from the found match.
*/
@ -565,6 +567,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
cctx.ip[-1] == match[-1]) {
cctx.ip--;
match--;
backwardsMatchLen++;
}
/**
@ -575,8 +578,9 @@ size_t LDM_compress(const void *src, size_t srcSize,
const U32 literalLength = cctx.ip - cctx.anchor;
const U32 offset = cctx.ip - match;
const U32 matchLength = LDM_countMatchLength(
cctx.ip + LDM_MIN_MATCH_LENGTH, match + LDM_MIN_MATCH_LENGTH,
cctx.ihashLimit);
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
cctx.ihashLimit) + backwardsMatchLen;
LDM_outputBlock(&cctx, literalLength, offset, matchLength);

View File

@ -12,18 +12,17 @@
// Defines the size of the hash table.
// Currently this should be less than WINDOW_SIZE_LOG + 4?
#define LDM_MEMORY_USAGE 24
#define LDM_MEMORY_USAGE 23
//#define LDM_LAG (1 << 23)
//#define LDM_LAG (1 << 20)
#define LDM_LAG 0
#define LDM_LAG (0)
#define LDM_WINDOW_SIZE_LOG 28
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four (and perhaps set to the same values?).
#define LDM_MIN_MATCH_LENGTH 512
#define LDM_HASH_LENGTH 512
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_LENGTH 64
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;

View File

@ -40,7 +40,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize);
hash_t HASH_hashU32(U32 value);