Fixed zdict
more dictionary compression tests
This commit is contained in:
parent
74bd11954b
commit
b44be74244
92
lib/zdict.c
92
lib/zdict.c
@ -574,7 +574,6 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|||||||
{
|
{
|
||||||
unsigned acc = PRIME1;
|
unsigned acc = PRIME1;
|
||||||
size_t p=0;;
|
size_t p=0;;
|
||||||
|
|
||||||
for (p=0; p<length; p++) {
|
for (p=0; p<length; p++) {
|
||||||
acc *= PRIME2;
|
acc *= PRIME2;
|
||||||
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
||||||
@ -594,30 +593,37 @@ static void ZDICT_countEStats(EStats_ress_t esr,
|
|||||||
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
|
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
|
||||||
const void* src, size_t srcSize)
|
const void* src, size_t srcSize)
|
||||||
{
|
{
|
||||||
const BYTE* bytePtr;
|
const seqStore_t* seqStorePtr;
|
||||||
const U32* u32Ptr;
|
|
||||||
seqStore_t seqStore;
|
|
||||||
|
|
||||||
if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */
|
if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */
|
||||||
ZSTD_copyCCtx(esr.zc, esr.ref);
|
ZSTD_copyCCtx(esr.zc, esr.ref);
|
||||||
ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
||||||
seqStore = ZSTD_copySeqStore(esr.zc);
|
seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
||||||
|
|
||||||
/* count stats */
|
/* literals stats */
|
||||||
for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++)
|
{ const BYTE* bytePtr;
|
||||||
|
for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
|
||||||
countLit[*bytePtr]++;
|
countLit[*bytePtr]++;
|
||||||
for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) {
|
|
||||||
BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1;
|
|
||||||
if (*u32Ptr==0) offcode=0;
|
|
||||||
offsetcodeCount[offcode]++;
|
|
||||||
}
|
}
|
||||||
(void)matchlengthCount; (void)litlengthCount;
|
|
||||||
/*
|
/* seqStats */
|
||||||
for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
|
{ size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
|
||||||
matchlengthCount[*bytePtr]++;
|
ZSTD_seqToCodes(seqStorePtr, nbSeq);
|
||||||
for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
|
|
||||||
litlengthCount[*bytePtr]++;
|
{ const BYTE* codePtr = seqStorePtr->offCodeStart;
|
||||||
*/
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
{ const BYTE* codePtr = seqStorePtr->mlCodeStart;
|
||||||
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
{ const BYTE* codePtr = seqStorePtr->llCodeStart;
|
||||||
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
|
||||||
|
} }
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
||||||
@ -636,13 +642,13 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
const void* dictBuffer, size_t dictBufferSize)
|
const void* dictBuffer, size_t dictBufferSize)
|
||||||
{
|
{
|
||||||
U32 countLit[256];
|
U32 countLit[256];
|
||||||
U32 offcodeCount[MaxOff+1];
|
|
||||||
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
||||||
short offcodeNCount[MaxOff+1];
|
U32 offcodeCount[OFFCODE_MAX+1];
|
||||||
|
short offcodeNCount[OFFCODE_MAX+1];
|
||||||
U32 matchLengthCount[MaxML+1];
|
U32 matchLengthCount[MaxML+1];
|
||||||
short matchLengthNCount[MaxML+1];
|
short matchLengthNCount[MaxML+1];
|
||||||
U32 litlengthCount[MaxLL+1];
|
U32 litLengthCount[MaxLL+1];
|
||||||
short litlengthNCount[MaxLL+1];
|
short litLengthNCount[MaxLL+1];
|
||||||
EStats_ress_t esr;
|
EStats_ress_t esr;
|
||||||
ZSTD_parameters params;
|
ZSTD_parameters params;
|
||||||
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
|
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
|
||||||
@ -653,7 +659,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
||||||
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
|
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
|
||||||
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
||||||
for (u=0; u<=MaxLL; u++) litlengthCount[u]=1;
|
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
||||||
esr.ref = ZSTD_createCCtx();
|
esr.ref = ZSTD_createCCtx();
|
||||||
esr.zc = ZSTD_createCCtx();
|
esr.zc = ZSTD_createCCtx();
|
||||||
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
||||||
@ -670,7 +676,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
/* collect stats on all files */
|
/* collect stats on all files */
|
||||||
for (u=0; u<nbFiles; u++) {
|
for (u=0; u<nbFiles; u++) {
|
||||||
ZDICT_countEStats(esr,
|
ZDICT_countEStats(esr,
|
||||||
countLit, offcodeCount, matchLengthCount, litlengthCount,
|
countLit, offcodeCount, matchLengthCount, litLengthCount,
|
||||||
(const char*)srcBuffer + pos, fileSizes[u]);
|
(const char*)srcBuffer + pos, fileSizes[u]);
|
||||||
pos += fileSizes[u];
|
pos += fileSizes[u];
|
||||||
}
|
}
|
||||||
@ -702,11 +708,11 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
}
|
}
|
||||||
mlLog = (U32)errorCode;
|
mlLog = (U32)errorCode;
|
||||||
|
|
||||||
total=0; for (u=0; u<=MaxLL; u++) total+=litlengthCount[u];
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
||||||
errorCode = FSE_normalizeCount(litlengthNCount, llLog, litlengthCount, total, MaxLL);
|
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with litlengthCount");
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
llLog = (U32)errorCode;
|
llLog = (U32)errorCode;
|
||||||
@ -742,7 +748,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
maxDstSize -= errorCode;
|
maxDstSize -= errorCode;
|
||||||
eSize += errorCode;
|
eSize += errorCode;
|
||||||
|
|
||||||
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litlengthNCount, MaxLL, llLog);
|
errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
||||||
@ -801,12 +807,17 @@ static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
|
||||||
|
/*! ZDICT_trainFromBuffer_unsafe() :
|
||||||
|
* `samplesBuffer` must be followed by noisy guard band.
|
||||||
|
* @return : size of dictionary.
|
||||||
|
*/
|
||||||
size_t ZDICT_trainFromBuffer_unsafe(
|
size_t ZDICT_trainFromBuffer_unsafe(
|
||||||
void* dictBuffer, size_t maxDictSize,
|
void* dictBuffer, size_t maxDictSize,
|
||||||
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t params)
|
ZDICT_params_t params)
|
||||||
{
|
{
|
||||||
const U32 dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
|
U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
|
||||||
dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
||||||
unsigned selectivity = params.selectivityLevel;
|
unsigned selectivity = params.selectivityLevel;
|
||||||
unsigned compressionLevel = params.compressionLevel;
|
unsigned compressionLevel = params.compressionLevel;
|
||||||
@ -816,10 +827,11 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
|
|
||||||
/* checks */
|
/* checks */
|
||||||
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
|
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
|
||||||
|
if (!dictList) return ERROR(memory_allocation);
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
|
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
|
||||||
if (!dictList) return ERROR(memory_allocation);
|
if (sBuffSize < DIB_MINSAMPLESSIZE) return 0; /* not enough source to create dictionary */
|
||||||
ZDICT_initDictItem(dictList);
|
ZDICT_initDictItem(dictList);
|
||||||
g_displayLevel = params.notificationLevel;
|
g_displayLevel = params.notificationLevel;
|
||||||
if (selectivity==0) selectivity = g_selectivity_default;
|
if (selectivity==0) selectivity = g_selectivity_default;
|
||||||
@ -834,9 +846,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
|
|
||||||
/* display best matches */
|
/* display best matches */
|
||||||
if (g_displayLevel>= 3) {
|
if (g_displayLevel>= 3) {
|
||||||
const U32 nb = 25;
|
U32 const nb = 25;
|
||||||
|
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
||||||
U32 u;
|
U32 u;
|
||||||
U32 dictContentSize = ZDICT_dictSize(dictList);
|
|
||||||
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
||||||
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
||||||
for (u=1; u<=nb; u++) {
|
for (u=1; u<=nb; u++) {
|
||||||
@ -850,8 +862,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
} } }
|
} } }
|
||||||
|
|
||||||
/* create dictionary */
|
/* create dictionary */
|
||||||
{
|
{ U32 dictContentSize = ZDICT_dictSize(dictList);
|
||||||
U32 dictContentSize = ZDICT_dictSize(dictList);
|
|
||||||
size_t hSize;
|
size_t hSize;
|
||||||
BYTE* ptr;
|
BYTE* ptr;
|
||||||
U32 u;
|
U32 u;
|
||||||
@ -896,31 +907,32 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||||
|
* work around : duplicate the buffer, and add the noise */
|
||||||
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_params_t params)
|
ZDICT_params_t params)
|
||||||
{
|
{
|
||||||
size_t sBuffSize;
|
|
||||||
void* newBuff;
|
void* newBuff;
|
||||||
size_t result;
|
size_t sBuffSize;
|
||||||
|
|
||||||
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
|
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
|
||||||
|
if (sBuffSize==0) return 0; /* empty content => no dictionary */
|
||||||
newBuff = malloc(sBuffSize + NOISELENGTH);
|
newBuff = malloc(sBuffSize + NOISELENGTH);
|
||||||
if (!newBuff) return ERROR(memory_allocation);
|
if (!newBuff) return ERROR(memory_allocation);
|
||||||
|
|
||||||
memcpy(newBuff, samplesBuffer, sBuffSize);
|
memcpy(newBuff, samplesBuffer, sBuffSize);
|
||||||
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||||
|
|
||||||
result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity,
|
{ size_t const result = ZDICT_trainFromBuffer_unsafe(
|
||||||
|
dictBuffer, dictBufferCapacity,
|
||||||
newBuff, samplesSizes, nbSamples,
|
newBuff, samplesSizes, nbSamples,
|
||||||
params);
|
params);
|
||||||
free(newBuff);
|
free(newBuff);
|
||||||
return result;
|
return result; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
|
||||||
* work around : duplicate the buffer, and add the noise ? */
|
|
||||||
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
||||||
{
|
{
|
||||||
|
@ -127,9 +127,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
|
|||||||
return 0; /* reserved as a potential error code in the future */
|
return 0; /* reserved as a potential error code in the future */
|
||||||
}
|
}
|
||||||
|
|
||||||
seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
|
||||||
{
|
{
|
||||||
return ctx->seqStore;
|
return &(ctx->seqStore);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -569,11 +569,59 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
|||||||
ostart[4] = (BYTE)(cLitSize);
|
ostart[4] = (BYTE)(cLitSize);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return lhSize+cLitSize;
|
return lhSize+cLitSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq)
|
||||||
|
{
|
||||||
|
/* LL codes */
|
||||||
|
{ static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 16, 17, 17, 18, 18, 19, 19,
|
||||||
|
20, 20, 20, 20, 21, 21, 21, 21,
|
||||||
|
22, 22, 22, 22, 22, 22, 22, 22,
|
||||||
|
23, 23, 23, 23, 23, 23, 23, 23,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24,
|
||||||
|
24, 24, 24, 24, 24, 24, 24, 24 };
|
||||||
|
const BYTE LL_deltaCode = 19;
|
||||||
|
U16* const llTable = seqStorePtr->litLengthStart;
|
||||||
|
BYTE* const llCodeTable = seqStorePtr->llCodeStart;
|
||||||
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) {
|
||||||
|
U32 ll = llTable[u];
|
||||||
|
if (llTable[u] == 65535) { ll = seqStorePtr->longLength; llTable[u] = (U16)ll; }
|
||||||
|
llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
|
||||||
|
} }
|
||||||
|
|
||||||
|
/* Offset codes */
|
||||||
|
{ const U32* const offsetTable = seqStorePtr->offsetStart;
|
||||||
|
BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
|
||||||
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ML codes */
|
||||||
|
{ static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
|
||||||
|
38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
|
||||||
|
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||||
|
41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
|
||||||
|
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
|
||||||
|
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
|
||||||
|
const BYTE ML_deltaCode = 36;
|
||||||
|
U16* const mlTable = seqStorePtr->matchLengthStart;
|
||||||
|
BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
|
||||||
|
size_t u;
|
||||||
|
for (u=0; u<nbSeq; u++) {
|
||||||
|
U32 ml = mlTable[u];
|
||||||
|
if (mlTable[u] == 65535) { ml = seqStorePtr->longLength; mlTable[u] = (U16)ml; }
|
||||||
|
mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
|
||||||
|
} }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
size_t srcSize)
|
size_t srcSize)
|
||||||
@ -619,22 +667,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|||||||
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
||||||
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
||||||
|
|
||||||
/* LL codes */
|
/* convert length/distances into codes */
|
||||||
{ static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
|
ZSTD_seqToCodes(seqStorePtr, nbSeq);
|
||||||
8, 9, 10, 11, 12, 13, 14, 15,
|
|
||||||
16, 16, 17, 17, 18, 18, 19, 19,
|
|
||||||
20, 20, 20, 20, 21, 21, 21, 21,
|
|
||||||
22, 22, 22, 22, 22, 22, 22, 22,
|
|
||||||
23, 23, 23, 23, 23, 23, 23, 23,
|
|
||||||
24, 24, 24, 24, 24, 24, 24, 24,
|
|
||||||
24, 24, 24, 24, 24, 24, 24, 24 };
|
|
||||||
const BYTE LL_deltaCode = 19;
|
|
||||||
size_t u;
|
|
||||||
for (u=0; u<nbSeq; u++) {
|
|
||||||
U32 ll = llTable[u];
|
|
||||||
if (llTable[u] == 65535) { ll = seqStorePtr->longLength; llTable[u] = (U16)ll; }
|
|
||||||
llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
|
|
||||||
} }
|
|
||||||
|
|
||||||
/* CTable for Literal Lengths */
|
/* CTable for Literal Lengths */
|
||||||
{ U32 max = MaxLL;
|
{ U32 max = MaxLL;
|
||||||
@ -660,9 +694,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|||||||
LLtype = FSE_ENCODING_DYNAMIC;
|
LLtype = FSE_ENCODING_DYNAMIC;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
/* Offset codes */
|
/* CTable for Offsets */
|
||||||
{ size_t i; for (i=0; i<nbSeq; i++) ofCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]); }
|
|
||||||
|
|
||||||
{ U32 max = MaxOff;
|
{ U32 max = MaxOff;
|
||||||
size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
|
size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
|
||||||
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
|
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
|
||||||
@ -686,23 +718,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|||||||
Offtype = FSE_ENCODING_DYNAMIC;
|
Offtype = FSE_ENCODING_DYNAMIC;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
/* ML codes */
|
|
||||||
{ static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
||||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
||||||
32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
|
|
||||||
38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
|
|
||||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
|
||||||
41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
|
|
||||||
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
|
|
||||||
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
|
|
||||||
const BYTE ML_deltaCode = 36;
|
|
||||||
size_t u;
|
|
||||||
for (u=0; u<nbSeq; u++) {
|
|
||||||
U32 ml = mlTable[u];
|
|
||||||
if (mlTable[u] == 65535) { ml = seqStorePtr->longLength; mlTable[u] = (U16)ml; }
|
|
||||||
mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
|
|
||||||
} }
|
|
||||||
|
|
||||||
/* CTable for MatchLengths */
|
/* CTable for MatchLengths */
|
||||||
{ U32 max = MaxML;
|
{ U32 max = MaxML;
|
||||||
size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
|
size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
|
||||||
|
@ -236,7 +236,8 @@ typedef struct {
|
|||||||
#endif
|
#endif
|
||||||
} seqStore_t;
|
} seqStore_t;
|
||||||
|
|
||||||
seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx);
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
|
||||||
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
|
||||||
|
|
||||||
|
|
||||||
#endif /* ZSTD_CCOMMON_H_MODULE */
|
#endif /* ZSTD_CCOMMON_H_MODULE */
|
||||||
|
@ -333,28 +333,30 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
|||||||
{
|
{
|
||||||
FILE* srcFile = ress.srcFile;
|
FILE* srcFile = ress.srcFile;
|
||||||
FILE* dstFile = ress.dstFile;
|
FILE* dstFile = ress.dstFile;
|
||||||
U64 filesize = 0;
|
U64 readsize = 0;
|
||||||
U64 compressedfilesize = 0;
|
U64 compressedfilesize = 0;
|
||||||
size_t dictSize = ress.dictBufferSize;
|
size_t dictSize = ress.dictBufferSize;
|
||||||
size_t sizeCheck, errorCode;
|
size_t sizeCheck, errorCode;
|
||||||
ZSTD_parameters params;
|
ZSTD_parameters params;
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
filesize = MAX(FIO_getFileSize(srcFileName),dictSize);
|
{ U64 const filesize = FIO_getFileSize(srcFileName);
|
||||||
params = ZSTD_getParams(cLevel, filesize);
|
U64 const levelsize = MAX(FIO_getFileSize(srcFileName), dictSize);
|
||||||
|
params = ZSTD_getParams(cLevel, levelsize);
|
||||||
params.srcSize = filesize;
|
params.srcSize = filesize;
|
||||||
|
}
|
||||||
if (g_maxWLog) if (params.windowLog > g_maxWLog) params.windowLog = g_maxWLog;
|
if (g_maxWLog) if (params.windowLog > g_maxWLog) params.windowLog = g_maxWLog;
|
||||||
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params);
|
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params);
|
||||||
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
|
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
|
||||||
|
|
||||||
/* Main compression loop */
|
/* Main compression loop */
|
||||||
filesize = 0;
|
readsize = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
/* Fill input Buffer */
|
/* Fill input Buffer */
|
||||||
size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
|
size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
|
||||||
if (inSize==0) break;
|
if (inSize==0) break;
|
||||||
filesize += inSize;
|
readsize += inSize;
|
||||||
DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20));
|
DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(readsize>>20));
|
||||||
|
|
||||||
{ /* Compress using buffered streaming */
|
{ /* Compress using buffered streaming */
|
||||||
size_t usedInSize = inSize;
|
size_t usedInSize = inSize;
|
||||||
@ -371,13 +373,12 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
|||||||
if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
|
if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
|
||||||
compressedfilesize += cSize;
|
compressedfilesize += cSize;
|
||||||
}
|
}
|
||||||
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
|
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of Frame */
|
/* End of Frame */
|
||||||
{
|
{ size_t cSize = ress.dstBufferSize;
|
||||||
size_t cSize = ress.dstBufferSize;
|
size_t const result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
|
||||||
size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
|
|
||||||
if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end");
|
if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end");
|
||||||
|
|
||||||
sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile);
|
sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile);
|
||||||
@ -388,7 +389,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
|||||||
/* Status */
|
/* Status */
|
||||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||||
DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
|
DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
|
||||||
(unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
|
(unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,7 @@ roundTripTest() {
|
|||||||
|
|
||||||
echo "\n**** simple tests **** "
|
echo "\n**** simple tests **** "
|
||||||
./datagen > tmp
|
./datagen > tmp
|
||||||
$ZSTD tmp
|
$ZSTD -f tmp
|
||||||
$ZSTD -99 tmp && die "too large compression level undetected"
|
$ZSTD -99 tmp && die "too large compression level undetected"
|
||||||
$ZSTD tmp -c > tmpCompressed
|
$ZSTD tmp -c > tmpCompressed
|
||||||
$ZSTD tmp --stdout > tmpCompressed
|
$ZSTD tmp --stdout > tmpCompressed
|
||||||
@ -71,6 +71,11 @@ echo "\n**** dictionary tests **** "
|
|||||||
./datagen -g1M | md5sum > tmp1
|
./datagen -g1M | md5sum > tmp1
|
||||||
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2
|
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2
|
||||||
diff -q tmp1 tmp2
|
diff -q tmp1 tmp2
|
||||||
|
$ZSTD --train *.c *.h -o tmpDict
|
||||||
|
$ZSTD xxhash.c -D tmpDict -of tmp
|
||||||
|
$ZSTD -d tmp -D tmpDict -of result
|
||||||
|
diff xxhash.c result
|
||||||
|
|
||||||
|
|
||||||
echo "\n**** multiple files tests **** "
|
echo "\n**** multiple files tests **** "
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user