better support for large dictionaries (> 128 KB)
This commit is contained in:
parent
07626dfa51
commit
c154d9d6a2
@ -230,4 +230,29 @@ void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
|
||||
void ZSTD_defaultFreeFunction(void* opaque, void* address);
|
||||
static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
|
||||
|
||||
/*====== common function ======*/
|
||||
|
||||
MEM_STATIC U32 ZSTD_highbit32(U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)r;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return 31 - __builtin_clz(val);
|
||||
# else /* Software version */
|
||||
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
int r;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
|
||||
return r;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
#endif /* ZSTD_CCOMMON_H_MODULE */
|
||||
|
@ -73,27 +73,6 @@ static const U32 g_searchStrength = 8; /* control skip over incompressible dat
|
||||
***************************************/
|
||||
size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
|
||||
|
||||
static U32 ZSTD_highbit32(U32 val)
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
_BitScanReverse(&r, val);
|
||||
return (unsigned)r;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return 31 - __builtin_clz(val);
|
||||
# else /* Software version */
|
||||
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
||||
U32 v = val;
|
||||
int r;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
|
||||
return r;
|
||||
# endif
|
||||
}
|
||||
|
||||
/*-*************************************
|
||||
* Sequence storage
|
||||
|
@ -660,7 +660,7 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
||||
}
|
||||
|
||||
|
||||
#define OFFCODE_MAX 18 /* only applicable to first block */
|
||||
#define OFFCODE_MAX 30 /* only applicable to first block */
|
||||
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
unsigned compressionLevel,
|
||||
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
||||
@ -670,6 +670,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
||||
U32 offcodeCount[OFFCODE_MAX+1];
|
||||
short offcodeNCount[OFFCODE_MAX+1];
|
||||
U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
|
||||
U32 matchLengthCount[MaxML+1];
|
||||
short matchLengthNCount[MaxML+1];
|
||||
U32 litLengthCount[MaxLL+1];
|
||||
@ -686,8 +687,9 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
BYTE* dstPtr = (BYTE*)dstBuffer;
|
||||
|
||||
/* init */
|
||||
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
||||
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
||||
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
|
||||
for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
|
||||
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
||||
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
||||
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
||||
@ -866,8 +868,8 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
||||
|
||||
#define DIB_MINSAMPLESSIZE 512
|
||||
/*! ZDICT_trainFromBuffer_unsafe() :
|
||||
* `samplesBuffer` must be followed by noisy guard band.
|
||||
* @return : size of dictionary.
|
||||
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
||||
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
||||
*/
|
||||
size_t ZDICT_trainFromBuffer_unsafe(
|
||||
void* dictBuffer, size_t maxDictSize,
|
||||
@ -973,23 +975,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_params_t params)
|
||||
{
|
||||
size_t result;
|
||||
void* newBuff;
|
||||
size_t sBuffSize;
|
||||
size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
||||
if (sBuffSize < DIB_MINSAMPLESSIZE) return 0; /* not enough content => no dictionary */
|
||||
|
||||
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
|
||||
if (sBuffSize==0) return 0; /* empty content => no dictionary */
|
||||
newBuff = malloc(sBuffSize + NOISELENGTH);
|
||||
if (!newBuff) return ERROR(memory_allocation);
|
||||
|
||||
memcpy(newBuff, samplesBuffer, sBuffSize);
|
||||
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||
|
||||
{ size_t const result = ZDICT_trainFromBuffer_unsafe(
|
||||
result = ZDICT_trainFromBuffer_unsafe(
|
||||
dictBuffer, dictBufferCapacity,
|
||||
newBuff, samplesSizes, nbSamples,
|
||||
params);
|
||||
free(newBuff);
|
||||
return result; }
|
||||
free(newBuff);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user