Merge branch 'dev' into fracFse
This commit is contained in:
commit
b5ef32fea7
@ -603,6 +603,7 @@ Let's call its first byte `byte0`.
|
|||||||
- `if (byte0 == 0)` : there are no sequences.
|
- `if (byte0 == 0)` : there are no sequences.
|
||||||
The sequence section stops there.
|
The sequence section stops there.
|
||||||
Decompressed content is defined entirely as Literals Section content.
|
Decompressed content is defined entirely as Literals Section content.
|
||||||
|
The FSE tables used in `Repeat_Mode` aren't updated.
|
||||||
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
|
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
|
||||||
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
|
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
|
||||||
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
|
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
|
||||||
@ -631,7 +632,7 @@ They follow the same enumeration :
|
|||||||
No distribution table will be present.
|
No distribution table will be present.
|
||||||
- `RLE_Mode` : The table description consists of a single byte.
|
- `RLE_Mode` : The table description consists of a single byte.
|
||||||
This code will be repeated for all sequences.
|
This code will be repeated for all sequences.
|
||||||
- `Repeat_Mode` : The table used in the previous `Compressed_Block` will be used again,
|
- `Repeat_Mode` : The table used in the previous `Compressed_Block` with `Number_of_Sequences > 0` will be used again,
|
||||||
or if this is the first block, table in the dictionary will be used
|
or if this is the first block, table in the dictionary will be used
|
||||||
No distribution table will be present.
|
No distribution table will be present.
|
||||||
Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.
|
Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.
|
||||||
|
@ -72,7 +72,19 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||||||
unsigned charnum = 0;
|
unsigned charnum = 0;
|
||||||
int previous0 = 0;
|
int previous0 = 0;
|
||||||
|
|
||||||
if (hbSize < 4) return ERROR(srcSize_wrong);
|
if (hbSize < 4) {
|
||||||
|
/* This function only works when hbSize >= 4 */
|
||||||
|
char buffer[4];
|
||||||
|
memset(buffer, 0, sizeof(buffer));
|
||||||
|
memcpy(buffer, headerBuffer, hbSize);
|
||||||
|
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
|
||||||
|
buffer, sizeof(buffer));
|
||||||
|
if (FSE_isError(countSize)) return countSize;
|
||||||
|
if (countSize > hbSize) return ERROR(corruption_detected);
|
||||||
|
return countSize;
|
||||||
|
} }
|
||||||
|
assert(hbSize >= 4);
|
||||||
|
|
||||||
bitStream = MEM_readLE32(ip);
|
bitStream = MEM_readLE32(ip);
|
||||||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
|
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
|
||||||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
|
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
|
||||||
@ -105,6 +117,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
|||||||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
|
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
|
||||||
while (charnum < n0) normalizedCounter[charnum++] = 0;
|
while (charnum < n0) normalizedCounter[charnum++] = 0;
|
||||||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
|
||||||
|
assert((bitCount >> 3) <= 3); /* For first condition to work */
|
||||||
ip += bitCount>>3;
|
ip += bitCount>>3;
|
||||||
bitCount &= 7;
|
bitCount &= 7;
|
||||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||||
|
@ -591,8 +591,9 @@ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
|
|||||||
* Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
|
* Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
|
||||||
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
||||||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
||||||
MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog)
|
MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
|
||||||
{
|
{
|
||||||
|
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
|
||||||
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
|
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
|
||||||
U32 const threshold = (minNbBits+1) << 16;
|
U32 const threshold = (minNbBits+1) << 16;
|
||||||
assert(tableLog < 16);
|
assert(tableLog < 16);
|
||||||
|
@ -946,10 +946,10 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
|||||||
int i;
|
int i;
|
||||||
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
||||||
bs->rep[i] = repStartValue[i];
|
bs->rep[i] = repStartValue[i];
|
||||||
bs->entropy.hufCTable_repeatMode = HUF_repeat_none;
|
bs->entropy.huf.repeatMode = HUF_repeat_none;
|
||||||
bs->entropy.offcode_repeatMode = FSE_repeat_none;
|
bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
|
||||||
bs->entropy.matchlength_repeatMode = FSE_repeat_none;
|
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
|
||||||
bs->entropy.litlength_repeatMode = FSE_repeat_none;
|
bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! ZSTD_invalidateMatchState()
|
/*! ZSTD_invalidateMatchState()
|
||||||
@ -963,6 +963,7 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
|
|||||||
ms->nextToUpdate = ms->window.dictLimit + 1;
|
ms->nextToUpdate = ms->window.dictLimit + 1;
|
||||||
ms->loadedDictEnd = 0;
|
ms->loadedDictEnd = 0;
|
||||||
ms->opt.litLengthSum = 0; /* force reset of btopt stats */
|
ms->opt.litLengthSum = 0; /* force reset of btopt stats */
|
||||||
|
ms->dictMatchState = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! ZSTD_continueCCtx() :
|
/*! ZSTD_continueCCtx() :
|
||||||
@ -1203,42 +1204,80 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
|
|||||||
U64 pledgedSrcSize,
|
U64 pledgedSrcSize,
|
||||||
ZSTD_buffered_policy_e zbuff)
|
ZSTD_buffered_policy_e zbuff)
|
||||||
{
|
{
|
||||||
|
/* We have a choice between copying the dictionary context into the working
|
||||||
|
* context, or referencing the dictionary context from the working context
|
||||||
|
* in-place. We decide here which strategy to use. */
|
||||||
|
const int attachDict = ( pledgedSrcSize <= 8 KB
|
||||||
|
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN )
|
||||||
|
&& !params.forceWindow /* dictMatchState isn't correctly
|
||||||
|
* handled in _enforceMaxDist */
|
||||||
|
&& cdict->cParams.strategy == ZSTD_fast
|
||||||
|
&& ZSTD_equivalentCParams(cctx->appliedParams.cParams,
|
||||||
|
cdict->cParams);
|
||||||
|
|
||||||
|
|
||||||
{ unsigned const windowLog = params.cParams.windowLog;
|
{ unsigned const windowLog = params.cParams.windowLog;
|
||||||
assert(windowLog != 0);
|
assert(windowLog != 0);
|
||||||
/* Copy only compression parameters related to tables. */
|
/* Copy only compression parameters related to tables. */
|
||||||
params.cParams = cdict->cParams;
|
params.cParams = cdict->cParams;
|
||||||
params.cParams.windowLog = windowLog;
|
params.cParams.windowLog = windowLog;
|
||||||
ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff);
|
ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
||||||
|
attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset,
|
||||||
|
zbuff);
|
||||||
assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
|
assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
|
||||||
assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
|
assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
|
||||||
assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
|
assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy tables */
|
if (attachDict) {
|
||||||
{ size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
|
const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc
|
||||||
size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
|
- cdict->matchState.window.base);
|
||||||
size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
|
if (cdictLen == 0) {
|
||||||
assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
|
/* don't even attach dictionaries with no contents */
|
||||||
assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
|
DEBUGLOG(4, "skipping attaching empty dictionary");
|
||||||
assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
|
} else {
|
||||||
assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
|
DEBUGLOG(4, "attaching dictionary into context");
|
||||||
memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
|
cctx->blockState.matchState.dictMatchState = &cdict->matchState;
|
||||||
}
|
|
||||||
/* Zero the hashTable3, since the cdict never fills it */
|
/* prep working match state so dict matches never have negative indices
|
||||||
{ size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
|
* when they are translated to the working context's index space. */
|
||||||
assert(cdict->matchState.hashLog3 == 0);
|
if (cctx->blockState.matchState.window.dictLimit < cdictLen) {
|
||||||
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
|
cctx->blockState.matchState.window.nextSrc =
|
||||||
|
cctx->blockState.matchState.window.base + cdictLen;
|
||||||
|
ZSTD_window_clear(&cctx->blockState.matchState.window);
|
||||||
|
}
|
||||||
|
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DEBUGLOG(4, "copying dictionary into context");
|
||||||
|
/* copy tables */
|
||||||
|
{ size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
|
||||||
|
size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
|
||||||
|
size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
|
||||||
|
assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
|
||||||
|
assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
|
||||||
|
assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
|
||||||
|
assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
|
||||||
|
memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Zero the hashTable3, since the cdict never fills it */
|
||||||
|
{ size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
|
||||||
|
assert(cdict->matchState.hashLog3 == 0);
|
||||||
|
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy dictionary offsets */
|
||||||
|
{
|
||||||
|
ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
|
||||||
|
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
|
||||||
|
dstMatchState->window = srcMatchState->window;
|
||||||
|
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
|
||||||
|
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
|
||||||
|
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy dictionary offsets */
|
|
||||||
{
|
|
||||||
ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
|
|
||||||
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
|
|
||||||
dstMatchState->window = srcMatchState->window;
|
|
||||||
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
|
|
||||||
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
|
|
||||||
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
|
|
||||||
}
|
|
||||||
cctx->dictID = cdict->dictID;
|
cctx->dictID = cdict->dictID;
|
||||||
|
|
||||||
/* copy block state */
|
/* copy block state */
|
||||||
@ -1455,8 +1494,8 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
|
|||||||
|
|
||||||
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
|
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
|
||||||
|
|
||||||
static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
|
static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||||
ZSTD_entropyCTables_t* nextEntropy,
|
ZSTD_hufCTables_t* nextHuf,
|
||||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
@ -1473,27 +1512,25 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
|
|||||||
disableLiteralCompression);
|
disableLiteralCompression);
|
||||||
|
|
||||||
/* Prepare nextEntropy assuming reusing the existing table */
|
/* Prepare nextEntropy assuming reusing the existing table */
|
||||||
nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
|
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||||
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
|
|
||||||
sizeof(prevEntropy->hufCTable));
|
|
||||||
|
|
||||||
if (disableLiteralCompression)
|
if (disableLiteralCompression)
|
||||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||||
|
|
||||||
/* small ? don't even attempt compression (speed opt) */
|
/* small ? don't even attempt compression (speed opt) */
|
||||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||||
{ size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
|
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
|
||||||
{ HUF_repeat repeat = prevEntropy->hufCTable_repeatMode;
|
{ HUF_repeat repeat = prevHuf->repeatMode;
|
||||||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
||||||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
||||||
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
||||||
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2)
|
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
|
||||||
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
||||||
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2);
|
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||||
if (repeat != HUF_repeat_none) {
|
if (repeat != HUF_repeat_none) {
|
||||||
/* reused the existing table */
|
/* reused the existing table */
|
||||||
hType = set_repeat;
|
hType = set_repeat;
|
||||||
@ -1501,17 +1538,17 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
||||||
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
|
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||||
}
|
}
|
||||||
if (cLitSize==1) {
|
if (cLitSize==1) {
|
||||||
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
|
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hType == set_compressed) {
|
if (hType == set_compressed) {
|
||||||
/* using a newly constructed table */
|
/* using a newly constructed table */
|
||||||
nextEntropy->hufCTable_repeatMode = HUF_repeat_check;
|
nextHuf->repeatMode = HUF_repeat_check;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Build header */
|
/* Build header */
|
||||||
@ -1561,6 +1598,137 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|||||||
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* -log2(x / 256) lookup table for x in [0, 256).
|
||||||
|
* If x == 0: Return 0
|
||||||
|
* Else: Return floor(-log2(x / 256) * 256)
|
||||||
|
*/
|
||||||
|
static unsigned const kInverseProbabiltyLog256[256] = {
|
||||||
|
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
|
||||||
|
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
|
||||||
|
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
|
||||||
|
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
|
||||||
|
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
|
||||||
|
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
|
||||||
|
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
|
||||||
|
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
|
||||||
|
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
|
||||||
|
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
|
||||||
|
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
|
||||||
|
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
|
||||||
|
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
|
||||||
|
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
|
||||||
|
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
|
||||||
|
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
|
||||||
|
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
|
||||||
|
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
|
||||||
|
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
|
||||||
|
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
|
||||||
|
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
|
||||||
|
5, 4, 2, 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the cost in bits of encoding the distribution described by count
|
||||||
|
* using the entropy bound.
|
||||||
|
*/
|
||||||
|
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
|
||||||
|
{
|
||||||
|
unsigned cost = 0;
|
||||||
|
unsigned s;
|
||||||
|
for (s = 0; s <= max; ++s) {
|
||||||
|
unsigned norm = (unsigned)((256 * count[s]) / total);
|
||||||
|
if (count[s] != 0 && norm == 0)
|
||||||
|
norm = 1;
|
||||||
|
assert(count[s] < total);
|
||||||
|
cost += count[s] * kInverseProbabiltyLog256[norm];
|
||||||
|
}
|
||||||
|
return cost >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the cost in bits of encoding the distribution in count using the
|
||||||
|
* table described by norm. The max symbol support by norm is assumed >= max.
|
||||||
|
* norm must be valid for every symbol with non-zero probability in count.
|
||||||
|
*/
|
||||||
|
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
||||||
|
unsigned const* count, unsigned const max)
|
||||||
|
{
|
||||||
|
unsigned const shift = 8 - accuracyLog;
|
||||||
|
size_t cost = 0;
|
||||||
|
unsigned s;
|
||||||
|
assert(accuracyLog <= 8);
|
||||||
|
for (s = 0; s <= max; ++s) {
|
||||||
|
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
|
||||||
|
unsigned const norm256 = normAcc << shift;
|
||||||
|
assert(norm256 > 0);
|
||||||
|
assert(norm256 < 256);
|
||||||
|
cost += count[s] * kInverseProbabiltyLog256[norm256];
|
||||||
|
}
|
||||||
|
return cost >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
||||||
|
void const* ptr = ctable;
|
||||||
|
U16 const* u16ptr = (U16 const*)ptr;
|
||||||
|
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
|
||||||
|
return maxSymbolValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the cost in bits of encoding the distribution in count using ctable.
|
||||||
|
* Returns an error if ctable cannot represent all the symbols in count.
|
||||||
|
*/
|
||||||
|
static size_t ZSTD_fseBitCost(
|
||||||
|
FSE_CTable const* ctable,
|
||||||
|
unsigned const* count,
|
||||||
|
unsigned const max)
|
||||||
|
{
|
||||||
|
unsigned const kAccuracyLog = 8;
|
||||||
|
size_t cost = 0;
|
||||||
|
unsigned s;
|
||||||
|
FSE_CState_t cstate;
|
||||||
|
FSE_initCState(&cstate, ctable);
|
||||||
|
if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
|
||||||
|
DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
|
||||||
|
ZSTD_getFSEMaxSymbolValue(ctable), max);
|
||||||
|
return ERROR(GENERIC);
|
||||||
|
}
|
||||||
|
for (s = 0; s <= max; ++s) {
|
||||||
|
unsigned const tableLog = cstate.stateLog;
|
||||||
|
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
|
||||||
|
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
|
||||||
|
if (count[s] == 0)
|
||||||
|
continue;
|
||||||
|
if (bitCost >= badCost) {
|
||||||
|
DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
|
||||||
|
return ERROR(GENERIC);
|
||||||
|
}
|
||||||
|
cost += count[s] * bitCost;
|
||||||
|
}
|
||||||
|
return cost >> kAccuracyLog;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the cost in bytes of encoding the normalized count header.
|
||||||
|
* Returns an error if any of the helper functions return an error.
|
||||||
|
*/
|
||||||
|
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
||||||
|
size_t const nbSeq, unsigned const FSELog)
|
||||||
|
{
|
||||||
|
BYTE wksp[FSE_NCOUNTBOUND];
|
||||||
|
S16 norm[MaxSeq + 1];
|
||||||
|
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||||
|
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
|
||||||
|
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
ZSTD_defaultDisallowed = 0,
|
ZSTD_defaultDisallowed = 0,
|
||||||
ZSTD_defaultAllowed = 1
|
ZSTD_defaultAllowed = 1
|
||||||
@ -1568,37 +1736,73 @@ typedef enum {
|
|||||||
|
|
||||||
MEM_STATIC
|
MEM_STATIC
|
||||||
symbolEncodingType_e ZSTD_selectEncodingType(
|
symbolEncodingType_e ZSTD_selectEncodingType(
|
||||||
FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
|
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
||||||
U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
|
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
||||||
|
FSE_CTable const* prevCTable,
|
||||||
|
short const* defaultNorm, U32 defaultNormLog,
|
||||||
|
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
||||||
|
ZSTD_strategy const strategy)
|
||||||
{
|
{
|
||||||
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
||||||
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
||||||
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
||||||
if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
|
if (mostFrequent == nbSeq) {
|
||||||
|
*repeatMode = FSE_repeat_none;
|
||||||
|
if (isDefaultAllowed && nbSeq <= 2) {
|
||||||
|
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
||||||
|
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
||||||
|
* If basic encoding isn't possible, always choose RLE.
|
||||||
|
*/
|
||||||
|
DEBUGLOG(5, "Selected set_basic");
|
||||||
|
return set_basic;
|
||||||
|
}
|
||||||
DEBUGLOG(5, "Selected set_rle");
|
DEBUGLOG(5, "Selected set_rle");
|
||||||
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
|
||||||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
|
||||||
* If basic encoding isn't possible, always choose RLE.
|
|
||||||
*/
|
|
||||||
*repeatMode = FSE_repeat_check;
|
|
||||||
return set_rle;
|
return set_rle;
|
||||||
}
|
}
|
||||||
if ( isDefaultAllowed
|
if (strategy < ZSTD_lazy) {
|
||||||
&& (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
|
if (isDefaultAllowed) {
|
||||||
DEBUGLOG(5, "Selected set_repeat");
|
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
|
||||||
return set_repeat;
|
DEBUGLOG(5, "Selected set_repeat");
|
||||||
}
|
return set_repeat;
|
||||||
if ( isDefaultAllowed
|
}
|
||||||
&& ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) {
|
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
|
||||||
DEBUGLOG(5, "Selected set_basic");
|
DEBUGLOG(5, "Selected set_basic");
|
||||||
/* The format allows default tables to be repeated, but it isn't useful.
|
/* The format allows default tables to be repeated, but it isn't useful.
|
||||||
* When using simple heuristics to select encoding type, we don't want
|
* When using simple heuristics to select encoding type, we don't want
|
||||||
* to confuse these tables with dictionaries. When running more careful
|
* to confuse these tables with dictionaries. When running more careful
|
||||||
* analysis, we don't need to waste time checking both repeating tables
|
* analysis, we don't need to waste time checking both repeating tables
|
||||||
* and default tables.
|
* and default tables.
|
||||||
*/
|
*/
|
||||||
*repeatMode = FSE_repeat_none;
|
*repeatMode = FSE_repeat_none;
|
||||||
return set_basic;
|
return set_basic;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
|
||||||
|
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
|
||||||
|
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
|
||||||
|
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
|
||||||
|
|
||||||
|
if (isDefaultAllowed) {
|
||||||
|
assert(!ZSTD_isError(basicCost));
|
||||||
|
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
|
||||||
|
}
|
||||||
|
assert(!ZSTD_isError(NCountCost));
|
||||||
|
assert(compressedCost < ERROR(maxCode));
|
||||||
|
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
|
||||||
|
(U32)basicCost, (U32)repeatCost, (U32)compressedCost);
|
||||||
|
if (basicCost <= repeatCost && basicCost <= compressedCost) {
|
||||||
|
DEBUGLOG(5, "Selected set_basic");
|
||||||
|
assert(isDefaultAllowed);
|
||||||
|
*repeatMode = FSE_repeat_none;
|
||||||
|
return set_basic;
|
||||||
|
}
|
||||||
|
if (repeatCost <= compressedCost) {
|
||||||
|
DEBUGLOG(5, "Selected set_repeat");
|
||||||
|
assert(!ZSTD_isError(repeatCost));
|
||||||
|
return set_repeat;
|
||||||
|
}
|
||||||
|
assert(compressedCost < basicCost && compressedCost < repeatCost);
|
||||||
}
|
}
|
||||||
DEBUGLOG(5, "Selected set_compressed");
|
DEBUGLOG(5, "Selected set_compressed");
|
||||||
*repeatMode = FSE_repeat_check;
|
*repeatMode = FSE_repeat_check;
|
||||||
@ -1803,10 +2007,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
const int bmi2)
|
const int bmi2)
|
||||||
{
|
{
|
||||||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
||||||
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||||
U32 count[MaxSeq+1];
|
U32 count[MaxSeq+1];
|
||||||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
||||||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
||||||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
||||||
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
|
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
|
||||||
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
||||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||||
@ -1817,6 +2022,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
BYTE* op = ostart;
|
BYTE* op = ostart;
|
||||||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||||
BYTE* seqHead;
|
BYTE* seqHead;
|
||||||
|
BYTE* lastNCount = NULL;
|
||||||
|
|
||||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||||
|
|
||||||
@ -1824,7 +2030,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
{ const BYTE* const literals = seqStorePtr->litStart;
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
||||||
size_t const litSize = seqStorePtr->lit - literals;
|
size_t const litSize = seqStorePtr->lit - literals;
|
||||||
size_t const cSize = ZSTD_compressLiterals(
|
size_t const cSize = ZSTD_compressLiterals(
|
||||||
prevEntropy, nextEntropy,
|
&prevEntropy->huf, &nextEntropy->huf,
|
||||||
cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
|
cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
|
||||||
op, dstCapacity,
|
op, dstCapacity,
|
||||||
literals, litSize,
|
literals, litSize,
|
||||||
@ -1844,13 +2050,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
else
|
else
|
||||||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
||||||
if (nbSeq==0) {
|
if (nbSeq==0) {
|
||||||
memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable));
|
/* Copy the old tables over as if we repeated them */
|
||||||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
||||||
memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
|
return op - ostart;
|
||||||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
|
||||||
memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
|
|
||||||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
|
||||||
return op - ostart;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* seqHead : flags for FSE encoding type */
|
/* seqHead : flags for FSE encoding type */
|
||||||
@ -1862,13 +2064,17 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
{ U32 max = MaxLL;
|
{ U32 max = MaxLL;
|
||||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
|
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
|
||||||
DEBUGLOG(5, "Building LL table");
|
DEBUGLOG(5, "Building LL table");
|
||||||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
|
||||||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
|
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
|
||||||
|
assert(set_basic < set_compressed && set_rle < set_compressed);
|
||||||
|
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
||||||
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
|
||||||
workspace, HUF_WORKSPACE_SIZE);
|
workspace, HUF_WORKSPACE_SIZE);
|
||||||
if (ZSTD_isError(countSize)) return countSize;
|
if (ZSTD_isError(countSize)) return countSize;
|
||||||
|
if (LLtype == set_compressed)
|
||||||
|
lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
} }
|
} }
|
||||||
/* build CTable for Offsets */
|
/* build CTable for Offsets */
|
||||||
@ -1877,26 +2083,32 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||||
DEBUGLOG(5, "Building OF table");
|
DEBUGLOG(5, "Building OF table");
|
||||||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
|
||||||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
|
Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy);
|
||||||
|
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
||||||
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
|
||||||
workspace, HUF_WORKSPACE_SIZE);
|
workspace, HUF_WORKSPACE_SIZE);
|
||||||
if (ZSTD_isError(countSize)) return countSize;
|
if (ZSTD_isError(countSize)) return countSize;
|
||||||
|
if (Offtype == set_compressed)
|
||||||
|
lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
} }
|
} }
|
||||||
/* build CTable for MatchLengths */
|
/* build CTable for MatchLengths */
|
||||||
{ U32 max = MaxML;
|
{ U32 max = MaxML;
|
||||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
|
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
|
||||||
DEBUGLOG(5, "Building ML table");
|
DEBUGLOG(5, "Building ML table");
|
||||||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
|
||||||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
|
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
|
||||||
|
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
||||||
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
|
||||||
workspace, HUF_WORKSPACE_SIZE);
|
workspace, HUF_WORKSPACE_SIZE);
|
||||||
if (ZSTD_isError(countSize)) return countSize;
|
if (ZSTD_isError(countSize)) return countSize;
|
||||||
|
if (MLtype == set_compressed)
|
||||||
|
lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
@ -1911,6 +2123,21 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
longOffsets, bmi2);
|
longOffsets, bmi2);
|
||||||
if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
|
if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
|
||||||
op += bitstreamSize;
|
op += bitstreamSize;
|
||||||
|
/* zstd versions <= 1.3.4 mistakenly report corruption when
|
||||||
|
* FSE_readNCount() recieves a buffer < 4 bytes.
|
||||||
|
* Fixed by https://github.com/facebook/zstd/pull/1146.
|
||||||
|
* This can happen when the last set_compressed table present is 2
|
||||||
|
* bytes and the bitstream is only one byte.
|
||||||
|
* In this exceedingly rare case, we will simply emit an uncompressed
|
||||||
|
* block, since it isn't worth optimizing.
|
||||||
|
*/
|
||||||
|
if (lastNCount && (op - lastNCount) < 4) {
|
||||||
|
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
|
||||||
|
assert(op - lastNCount == 3);
|
||||||
|
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
|
||||||
|
"emitting an uncompressed block.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return op - ostart;
|
return op - ostart;
|
||||||
@ -1926,6 +2153,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|||||||
size_t const cSize = ZSTD_compressSequences_internal(
|
size_t const cSize = ZSTD_compressSequences_internal(
|
||||||
seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
|
seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
|
||||||
workspace, bmi2);
|
workspace, bmi2);
|
||||||
|
if (cSize == 0) return 0;
|
||||||
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
|
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
|
||||||
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
|
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
|
||||||
*/
|
*/
|
||||||
@ -1942,8 +2170,8 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|||||||
* block. After the first block, the offcode table might not have large
|
* block. After the first block, the offcode table might not have large
|
||||||
* enough codes to represent the offsets in the data.
|
* enough codes to represent the offsets in the data.
|
||||||
*/
|
*/
|
||||||
if (nextEntropy->offcode_repeatMode == FSE_repeat_valid)
|
if (nextEntropy->fse.offcode_repeatMode == FSE_repeat_valid)
|
||||||
nextEntropy->offcode_repeatMode = FSE_repeat_check;
|
nextEntropy->fse.offcode_repeatMode = FSE_repeat_check;
|
||||||
|
|
||||||
return cSize;
|
return cSize;
|
||||||
}
|
}
|
||||||
@ -1951,9 +2179,9 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
|||||||
/* ZSTD_selectBlockCompressor() :
|
/* ZSTD_selectBlockCompressor() :
|
||||||
* Not static, but internal use only (used by long distance matcher)
|
* Not static, but internal use only (used by long distance matcher)
|
||||||
* assumption : strat is a valid strategy */
|
* assumption : strat is a valid strategy */
|
||||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
|
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
|
||||||
{
|
{
|
||||||
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
|
static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
|
||||||
{ ZSTD_compressBlock_fast /* default for 0 */,
|
{ ZSTD_compressBlock_fast /* default for 0 */,
|
||||||
ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
|
ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
|
||||||
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
|
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
|
||||||
@ -1961,13 +2189,19 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict
|
|||||||
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
|
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
|
||||||
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
|
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
|
||||||
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
|
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
|
||||||
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
|
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict },
|
||||||
|
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
|
||||||
|
ZSTD_compressBlock_fast_dictMatchState,
|
||||||
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ }
|
||||||
};
|
};
|
||||||
|
ZSTD_blockCompressor selectedCompressor;
|
||||||
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
|
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
|
||||||
|
|
||||||
assert((U32)strat >= (U32)ZSTD_fast);
|
assert((U32)strat >= (U32)ZSTD_fast);
|
||||||
assert((U32)strat <= (U32)ZSTD_btultra);
|
assert((U32)strat <= (U32)ZSTD_btultra);
|
||||||
return blockCompressor[extDict!=0][(U32)strat];
|
selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
|
||||||
|
assert(selectedCompressor != NULL);
|
||||||
|
return selectedCompressor;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
|
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
|
||||||
@ -1999,6 +2233,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
ZSTD_resetSeqStore(&(zc->seqStore));
|
ZSTD_resetSeqStore(&(zc->seqStore));
|
||||||
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
|
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
|
||||||
|
|
||||||
|
/* a gap between an attached dict and the current window is not safe,
|
||||||
|
* they must remain adjacent, and when that stops being the case, the dict
|
||||||
|
* must be unset */
|
||||||
|
assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
|
||||||
|
|
||||||
/* limited update after a very long match */
|
/* limited update after a very long match */
|
||||||
{ const BYTE* const base = ms->window.base;
|
{ const BYTE* const base = ms->window.base;
|
||||||
const BYTE* const istart = (const BYTE*)src;
|
const BYTE* const istart = (const BYTE*)src;
|
||||||
@ -2009,7 +2248,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* select and store sequences */
|
/* select and store sequences */
|
||||||
{ U32 const extDict = ZSTD_window_hasExtDict(ms->window);
|
{ ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
|
||||||
size_t lastLLSize;
|
size_t lastLLSize;
|
||||||
{ int i;
|
{ int i;
|
||||||
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
||||||
@ -2023,7 +2262,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
ms, &zc->seqStore,
|
ms, &zc->seqStore,
|
||||||
zc->blockState.nextCBlock->rep,
|
zc->blockState.nextCBlock->rep,
|
||||||
&zc->appliedParams.cParams,
|
&zc->appliedParams.cParams,
|
||||||
src, srcSize, extDict);
|
src, srcSize);
|
||||||
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
|
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
|
||||||
} else if (zc->appliedParams.ldmParams.enableLdm) {
|
} else if (zc->appliedParams.ldmParams.enableLdm) {
|
||||||
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
|
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
|
||||||
@ -2040,10 +2279,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
ms, &zc->seqStore,
|
ms, &zc->seqStore,
|
||||||
zc->blockState.nextCBlock->rep,
|
zc->blockState.nextCBlock->rep,
|
||||||
&zc->appliedParams.cParams,
|
&zc->appliedParams.cParams,
|
||||||
src, srcSize, extDict);
|
src, srcSize);
|
||||||
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
||||||
} else { /* not long range mode */
|
} else { /* not long range mode */
|
||||||
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
|
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
|
||||||
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
|
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
|
||||||
}
|
}
|
||||||
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
|
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
|
||||||
@ -2110,8 +2349,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||||
else ms->nextToUpdate -= correction;
|
else ms->nextToUpdate -= correction;
|
||||||
ms->loadedDictEnd = 0;
|
ms->loadedDictEnd = 0;
|
||||||
|
ms->dictMatchState = NULL;
|
||||||
}
|
}
|
||||||
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd);
|
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||||
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
||||||
|
|
||||||
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
||||||
@ -2384,7 +2624,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
dictPtr += 4;
|
dictPtr += 4;
|
||||||
|
|
||||||
{ unsigned maxSymbolValue = 255;
|
{ unsigned maxSymbolValue = 255;
|
||||||
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
|
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
|
||||||
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
||||||
if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
|
if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
|
||||||
dictPtr += hufHeaderSize;
|
dictPtr += hufHeaderSize;
|
||||||
@ -2396,7 +2636,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
||||||
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
||||||
/* fill all offset symbols to avoid garbage at end of table */
|
/* fill all offset symbols to avoid garbage at end of table */
|
||||||
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
|
CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
|
||||||
dictionary_corrupted);
|
dictionary_corrupted);
|
||||||
dictPtr += offcodeHeaderSize;
|
dictPtr += offcodeHeaderSize;
|
||||||
}
|
}
|
||||||
@ -2408,7 +2648,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
|
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
|
||||||
/* Every match length code must have non-zero probability */
|
/* Every match length code must have non-zero probability */
|
||||||
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
|
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
|
||||||
CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
|
CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
|
||||||
dictionary_corrupted);
|
dictionary_corrupted);
|
||||||
dictPtr += matchlengthHeaderSize;
|
dictPtr += matchlengthHeaderSize;
|
||||||
}
|
}
|
||||||
@ -2420,7 +2660,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
|
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
|
||||||
/* Every literal length code must have non-zero probability */
|
/* Every literal length code must have non-zero probability */
|
||||||
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
|
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
|
||||||
CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
|
CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
|
||||||
dictionary_corrupted);
|
dictionary_corrupted);
|
||||||
dictPtr += litlengthHeaderSize;
|
dictPtr += litlengthHeaderSize;
|
||||||
}
|
}
|
||||||
@ -2446,10 +2686,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
|
if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
|
||||||
} }
|
} }
|
||||||
|
|
||||||
bs->entropy.hufCTable_repeatMode = HUF_repeat_valid;
|
bs->entropy.huf.repeatMode = HUF_repeat_valid;
|
||||||
bs->entropy.offcode_repeatMode = FSE_repeat_valid;
|
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
|
||||||
bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
|
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
|
||||||
bs->entropy.litlength_repeatMode = FSE_repeat_valid;
|
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
|
||||||
CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
|
CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
|
||||||
return dictID;
|
return dictID;
|
||||||
}
|
}
|
||||||
|
@ -53,14 +53,22 @@ typedef struct ZSTD_prefixDict_s {
|
|||||||
} ZSTD_prefixDict;
|
} ZSTD_prefixDict;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
|
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||||
|
HUF_repeat repeatMode;
|
||||||
|
} ZSTD_hufCTables_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
|
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
|
||||||
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
|
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
|
||||||
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
|
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
|
||||||
HUF_repeat hufCTable_repeatMode;
|
|
||||||
FSE_repeat offcode_repeatMode;
|
FSE_repeat offcode_repeatMode;
|
||||||
FSE_repeat matchlength_repeatMode;
|
FSE_repeat matchlength_repeatMode;
|
||||||
FSE_repeat litlength_repeatMode;
|
FSE_repeat litlength_repeatMode;
|
||||||
|
} ZSTD_fseCTables_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
ZSTD_hufCTables_t huf;
|
||||||
|
ZSTD_fseCTables_t fse;
|
||||||
} ZSTD_entropyCTables_t;
|
} ZSTD_entropyCTables_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -114,7 +122,8 @@ typedef struct {
|
|||||||
U32 lowLimit; /* below that point, no more data */
|
U32 lowLimit; /* below that point, no more data */
|
||||||
} ZSTD_window_t;
|
} ZSTD_window_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||||
|
struct ZSTD_matchState_t {
|
||||||
ZSTD_window_t window; /* State for window round buffer management */
|
ZSTD_window_t window; /* State for window round buffer management */
|
||||||
U32 loadedDictEnd; /* index of end of dictionary */
|
U32 loadedDictEnd; /* index of end of dictionary */
|
||||||
U32 nextToUpdate; /* index from which to continue table update */
|
U32 nextToUpdate; /* index from which to continue table update */
|
||||||
@ -124,7 +133,8 @@ typedef struct {
|
|||||||
U32* hashTable3;
|
U32* hashTable3;
|
||||||
U32* chainTable;
|
U32* chainTable;
|
||||||
optState_t opt; /* optimal parser state */
|
optState_t opt; /* optimal parser state */
|
||||||
} ZSTD_matchState_t;
|
const ZSTD_matchState_t *dictMatchState;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
ZSTD_compressedBlockState_t* prevCBlock;
|
ZSTD_compressedBlockState_t* prevCBlock;
|
||||||
@ -240,10 +250,13 @@ struct ZSTD_CCtx_s {
|
|||||||
|
|
||||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||||
|
|
||||||
|
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
|
||||||
|
|
||||||
|
|
||||||
typedef size_t (*ZSTD_blockCompressor) (
|
typedef size_t (*ZSTD_blockCompressor) (
|
||||||
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
||||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
|
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
|
||||||
|
|
||||||
|
|
||||||
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
||||||
@ -500,6 +513,20 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
|
|||||||
return window.lowLimit < window.dictLimit;
|
return window.lowLimit < window.dictLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ZSTD_matchState_dictMode():
|
||||||
|
* Inspects the provided matchState and figures out what dictMode should be
|
||||||
|
* passed to the compressor.
|
||||||
|
*/
|
||||||
|
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
||||||
|
{
|
||||||
|
return ZSTD_window_hasExtDict(ms->window) ?
|
||||||
|
ZSTD_extDict :
|
||||||
|
ms->dictMatchState != NULL ?
|
||||||
|
ZSTD_dictMatchState :
|
||||||
|
ZSTD_noDict;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ZSTD_window_needOverflowCorrection():
|
* ZSTD_window_needOverflowCorrection():
|
||||||
* Returns non-zero if the indices are getting too large and need overflow
|
* Returns non-zero if the indices are getting too large and need overflow
|
||||||
@ -567,18 +594,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|||||||
* ZSTD_window_enforceMaxDist():
|
* ZSTD_window_enforceMaxDist():
|
||||||
* Updates lowLimit so that:
|
* Updates lowLimit so that:
|
||||||
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
||||||
|
*
|
||||||
* This allows a simple check that index >= lowLimit to see if index is valid.
|
* This allows a simple check that index >= lowLimit to see if index is valid.
|
||||||
* This must be called before a block compression call, with srcEnd as the block
|
* This must be called before a block compression call, with srcEnd as the block
|
||||||
* source end.
|
* source end.
|
||||||
|
*
|
||||||
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
|
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
|
||||||
* This is because dictionaries are allowed to be referenced as long as the last
|
* This is because dictionaries are allowed to be referenced as long as the last
|
||||||
* byte of the dictionary is in the window, but once they are out of range,
|
* byte of the dictionary is in the window, but once they are out of range,
|
||||||
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
|
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
|
||||||
* loadedDictEnd == 0.
|
* loadedDictEnd == 0.
|
||||||
|
*
|
||||||
|
* In normal dict mode, the dict is between lowLimit and dictLimit. In
|
||||||
|
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
|
||||||
|
* is below them. forceWindow and dictMatchState are therefore incompatible.
|
||||||
*/
|
*/
|
||||||
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||||
void const* srcEnd, U32 maxDist,
|
void const* srcEnd, U32 maxDist,
|
||||||
U32* loadedDictEndPtr)
|
U32* loadedDictEndPtr,
|
||||||
|
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||||
{
|
{
|
||||||
U32 const current = (U32)((BYTE const*)srcEnd - window->base);
|
U32 const current = (U32)((BYTE const*)srcEnd - window->base);
|
||||||
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
|
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
|
||||||
@ -592,6 +626,8 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
|||||||
}
|
}
|
||||||
if (loadedDictEndPtr)
|
if (loadedDictEndPtr)
|
||||||
*loadedDictEndPtr = 0;
|
*loadedDictEndPtr = 0;
|
||||||
|
if (dictMatchStatePtr)
|
||||||
|
*dictMatchStatePtr = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,26 +45,57 @@ FORCE_INLINE_TEMPLATE
|
|||||||
size_t ZSTD_compressBlock_fast_generic(
|
size_t ZSTD_compressBlock_fast_generic(
|
||||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
void const* src, size_t srcSize,
|
void const* src, size_t srcSize,
|
||||||
U32 const hlog, U32 const stepSize, U32 const mls)
|
U32 const hlog, U32 const stepSize, U32 const mls,
|
||||||
|
ZSTD_dictMode_e const dictMode)
|
||||||
{
|
{
|
||||||
U32* const hashTable = ms->hashTable;
|
U32* const hashTable = ms->hashTable;
|
||||||
const BYTE* const base = ms->window.base;
|
const BYTE* const base = ms->window.base;
|
||||||
const BYTE* const istart = (const BYTE*)src;
|
const BYTE* const istart = (const BYTE*)src;
|
||||||
const BYTE* ip = istart;
|
const BYTE* ip = istart;
|
||||||
const BYTE* anchor = istart;
|
const BYTE* anchor = istart;
|
||||||
const U32 lowestIndex = ms->window.dictLimit;
|
const U32 prefixLowestIndex = ms->window.dictLimit;
|
||||||
const BYTE* const lowest = base + lowestIndex;
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||||
const BYTE* const iend = istart + srcSize;
|
const BYTE* const iend = istart + srcSize;
|
||||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||||
U32 offset_1=rep[0], offset_2=rep[1];
|
U32 offset_1=rep[0], offset_2=rep[1];
|
||||||
U32 offsetSaved = 0;
|
U32 offsetSaved = 0;
|
||||||
|
|
||||||
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||||
|
const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
|
||||||
|
dms->hashTable : NULL;
|
||||||
|
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
|
||||||
|
dms->window.dictLimit : 0;
|
||||||
|
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||||
|
dms->window.base : NULL;
|
||||||
|
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
|
||||||
|
dictBase + dictLowestIndex : NULL;
|
||||||
|
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||||
|
dms->window.nextSrc : NULL;
|
||||||
|
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||||
|
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||||
|
0;
|
||||||
|
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
|
||||||
|
|
||||||
|
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||||
|
|
||||||
|
/* otherwise, we would get index underflow when translating a dict index
|
||||||
|
* into a local index */
|
||||||
|
assert(dictMode != ZSTD_dictMatchState
|
||||||
|
|| prefixLowestIndex >= (U32)(dictEnd - dictBase));
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
ip += (ip==lowest);
|
ip += (dictAndPrefixLength == 0);
|
||||||
{ U32 const maxRep = (U32)(ip-lowest);
|
if (dictMode == ZSTD_noDict) {
|
||||||
|
U32 const maxRep = (U32)(ip - prefixLowest);
|
||||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||||
}
|
}
|
||||||
|
if (dictMode == ZSTD_dictMatchState) {
|
||||||
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||||
|
* disabling. */
|
||||||
|
assert(offset_1 <= dictAndPrefixLength);
|
||||||
|
assert(offset_2 <= dictAndPrefixLength);
|
||||||
|
}
|
||||||
|
|
||||||
/* Main Search Loop */
|
/* Main Search Loop */
|
||||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||||
@ -73,26 +104,62 @@ size_t ZSTD_compressBlock_fast_generic(
|
|||||||
U32 const current = (U32)(ip-base);
|
U32 const current = (U32)(ip-base);
|
||||||
U32 const matchIndex = hashTable[h];
|
U32 const matchIndex = hashTable[h];
|
||||||
const BYTE* match = base + matchIndex;
|
const BYTE* match = base + matchIndex;
|
||||||
|
const U32 repIndex = current + 1 - offset_1;
|
||||||
|
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||||
|
&& repIndex < prefixLowestIndex) ?
|
||||||
|
dictBase + (repIndex - dictIndexDelta) :
|
||||||
|
base + repIndex;
|
||||||
hashTable[h] = current; /* update hash table */
|
hashTable[h] = current; /* update hash table */
|
||||||
|
|
||||||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
if (dictMode == ZSTD_dictMatchState
|
||||||
|
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||||
|
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||||
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||||
|
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4;
|
||||||
|
ip++;
|
||||||
|
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||||
|
} else if ( dictMode == ZSTD_noDict
|
||||||
|
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||||
ip++;
|
ip++;
|
||||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||||
} else {
|
} else if ( (matchIndex <= prefixLowestIndex)
|
||||||
if ( (matchIndex <= lowestIndex)
|
|| (MEM_read32(match) != MEM_read32(ip)) ) {
|
||||||
|| (MEM_read32(match) != MEM_read32(ip)) ) {
|
if (dictMode == ZSTD_dictMatchState) {
|
||||||
|
U32 const dictMatchIndex = dictHashTable[h];
|
||||||
|
const BYTE* dictMatch = dictBase + dictMatchIndex;
|
||||||
|
if (dictMatchIndex <= dictLowestIndex ||
|
||||||
|
MEM_read32(dictMatch) != MEM_read32(ip)) {
|
||||||
|
assert(stepSize >= 1);
|
||||||
|
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
/* found a dict match */
|
||||||
|
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
|
||||||
|
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4;
|
||||||
|
while (((ip>anchor) & (dictMatch>dictLowest))
|
||||||
|
&& (ip[-1] == dictMatch[-1])) {
|
||||||
|
ip--; dictMatch--; mLength++;
|
||||||
|
} /* catch up */
|
||||||
|
offset_2 = offset_1;
|
||||||
|
offset_1 = offset;
|
||||||
|
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
assert(stepSize >= 1);
|
assert(stepSize >= 1);
|
||||||
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
/* found a regular match */
|
||||||
|
U32 const offset = (U32)(ip-match);
|
||||||
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
|
||||||
{ U32 const offset = (U32)(ip-match);
|
while (((ip>anchor) & (match>prefixLowest))
|
||||||
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||||
offset_2 = offset_1;
|
offset_2 = offset_1;
|
||||||
offset_1 = offset;
|
offset_1 = offset;
|
||||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||||
} }
|
}
|
||||||
|
|
||||||
/* match found */
|
/* match found */
|
||||||
ip += mLength;
|
ip += mLength;
|
||||||
@ -102,19 +169,43 @@ size_t ZSTD_compressBlock_fast_generic(
|
|||||||
/* Fill Table */
|
/* Fill Table */
|
||||||
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
||||||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
|
||||||
|
|
||||||
/* check immediate repcode */
|
/* check immediate repcode */
|
||||||
while ( (ip <= ilimit)
|
if (dictMode == ZSTD_dictMatchState) {
|
||||||
&& ( (offset_2>0)
|
while (ip <= ilimit) {
|
||||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
U32 const current2 = (U32)(ip-base);
|
||||||
/* store sequence */
|
U32 const repIndex2 = current2 - offset_2;
|
||||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
||||||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
dictBase - dictIndexDelta + repIndex2 :
|
||||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
|
base + repIndex2;
|
||||||
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||||
ip += rLength;
|
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||||
anchor = ip;
|
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||||
continue; /* faster when present ... (?) */
|
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4;
|
||||||
} } }
|
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||||
|
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
|
||||||
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||||
|
ip += repLength2;
|
||||||
|
anchor = ip;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dictMode == ZSTD_noDict) {
|
||||||
|
while ( (ip <= ilimit)
|
||||||
|
&& ( (offset_2>0)
|
||||||
|
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||||
|
/* store sequence */
|
||||||
|
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||||
|
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||||
|
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
|
||||||
|
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
|
||||||
|
ip += rLength;
|
||||||
|
anchor = ip;
|
||||||
|
continue; /* faster when present ... (?) */
|
||||||
|
} } } }
|
||||||
|
|
||||||
/* save reps for next block */
|
/* save reps for next block */
|
||||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||||
@ -132,17 +223,40 @@ size_t ZSTD_compressBlock_fast(
|
|||||||
U32 const hlog = cParams->hashLog;
|
U32 const hlog = cParams->hashLog;
|
||||||
U32 const mls = cParams->searchLength;
|
U32 const mls = cParams->searchLength;
|
||||||
U32 const stepSize = cParams->targetLength;
|
U32 const stepSize = cParams->targetLength;
|
||||||
|
assert(ms->dictMatchState == NULL);
|
||||||
switch(mls)
|
switch(mls)
|
||||||
{
|
{
|
||||||
default: /* includes case 3 */
|
default: /* includes case 3 */
|
||||||
case 4 :
|
case 4 :
|
||||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict);
|
||||||
case 5 :
|
case 5 :
|
||||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict);
|
||||||
case 6 :
|
case 6 :
|
||||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict);
|
||||||
case 7 :
|
case 7 :
|
||||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||||
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
|
||||||
|
{
|
||||||
|
U32 const hlog = cParams->hashLog;
|
||||||
|
U32 const mls = cParams->searchLength;
|
||||||
|
U32 const stepSize = cParams->targetLength;
|
||||||
|
assert(ms->dictMatchState != NULL);
|
||||||
|
switch(mls)
|
||||||
|
{
|
||||||
|
default: /* includes case 3 */
|
||||||
|
case 4 :
|
||||||
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState);
|
||||||
|
case 5 :
|
||||||
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState);
|
||||||
|
case 6 :
|
||||||
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState);
|
||||||
|
case 7 :
|
||||||
|
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|||||||
size_t ZSTD_compressBlock_fast(
|
size_t ZSTD_compressBlock_fast(
|
||||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
||||||
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||||
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
||||||
size_t ZSTD_compressBlock_fast_extDict(
|
size_t ZSTD_compressBlock_fast_extDict(
|
||||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
||||||
|
@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences(
|
|||||||
* * Try invalidation after the sequence generation and test the
|
* * Try invalidation after the sequence generation and test the
|
||||||
* the offset against maxDist directly.
|
* the offset against maxDist directly.
|
||||||
*/
|
*/
|
||||||
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
|
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
|
||||||
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
||||||
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
||||||
ldmState, sequences, params, chunkStart, chunkSize);
|
ldmState, sequences, params, chunkStart, chunkSize);
|
||||||
@ -591,12 +591,12 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
|||||||
|
|
||||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
|
||||||
int const extDict)
|
|
||||||
{
|
{
|
||||||
unsigned const minMatch = cParams->searchLength;
|
unsigned const minMatch = cParams->searchLength;
|
||||||
ZSTD_blockCompressor const blockCompressor =
|
ZSTD_blockCompressor const blockCompressor =
|
||||||
ZSTD_selectBlockCompressor(cParams->strategy, extDict);
|
ZSTD_selectBlockCompressor(cParams->strategy,
|
||||||
|
ZSTD_matchState_dictMode(ms));
|
||||||
BYTE const* const base = ms->window.base;
|
BYTE const* const base = ms->window.base;
|
||||||
/* Input bounds */
|
/* Input bounds */
|
||||||
BYTE const* const istart = (BYTE const*)src;
|
BYTE const* const istart = (BYTE const*)src;
|
||||||
|
@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences(
|
|||||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||||
ZSTD_compressionParameters const* cParams,
|
ZSTD_compressionParameters const* cParams,
|
||||||
void const* src, size_t srcSize,
|
void const* src, size_t srcSize);
|
||||||
int const extDict);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ZSTD_ldm_skipSequences():
|
* ZSTD_ldm_skipSequences():
|
||||||
|
@ -39,7 +39,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
optPtr->priceType = zop_predef;
|
optPtr->priceType = zop_predef;
|
||||||
|
|
||||||
assert(optPtr->symbolCosts != NULL);
|
assert(optPtr->symbolCosts != NULL);
|
||||||
if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
|
||||||
if (srcSize <= 8192) /* heuristic */
|
if (srcSize <= 8192) /* heuristic */
|
||||||
optPtr->priceType = zop_static;
|
optPtr->priceType = zop_static;
|
||||||
else {
|
else {
|
||||||
@ -52,7 +52,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
{ unsigned lit;
|
{ unsigned lit;
|
||||||
for (lit=0; lit<=MaxLit; lit++) {
|
for (lit=0; lit<=MaxLit; lit++) {
|
||||||
U32 const scaleLog = 11; /* scale to 2K */
|
U32 const scaleLog = 11; /* scale to 2K */
|
||||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit);
|
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||||
assert(bitCost <= scaleLog);
|
assert(bitCost <= scaleLog);
|
||||||
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||||
optPtr->litSum += optPtr->litFreq[lit];
|
optPtr->litSum += optPtr->litFreq[lit];
|
||||||
@ -60,7 +60,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
|
|
||||||
{ unsigned ll;
|
{ unsigned ll;
|
||||||
FSE_CState_t llstate;
|
FSE_CState_t llstate;
|
||||||
FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable);
|
FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
|
||||||
optPtr->litLengthSum = 0;
|
optPtr->litLengthSum = 0;
|
||||||
for (ll=0; ll<=MaxLL; ll++) {
|
for (ll=0; ll<=MaxLL; ll++) {
|
||||||
U32 const scaleLog = 10; /* scale to 1K */
|
U32 const scaleLog = 10; /* scale to 1K */
|
||||||
@ -72,7 +72,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
|
|
||||||
{ unsigned ml;
|
{ unsigned ml;
|
||||||
FSE_CState_t mlstate;
|
FSE_CState_t mlstate;
|
||||||
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable);
|
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
|
||||||
optPtr->matchLengthSum = 0;
|
optPtr->matchLengthSum = 0;
|
||||||
for (ml=0; ml<=MaxML; ml++) {
|
for (ml=0; ml<=MaxML; ml++) {
|
||||||
U32 const scaleLog = 10;
|
U32 const scaleLog = 10;
|
||||||
@ -84,7 +84,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
|
|
||||||
{ unsigned of;
|
{ unsigned of;
|
||||||
FSE_CState_t ofstate;
|
FSE_CState_t ofstate;
|
||||||
FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable);
|
FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
|
||||||
optPtr->offCodeSum = 0;
|
optPtr->offCodeSum = 0;
|
||||||
for (of=0; of<=MaxOff; of++) {
|
for (of=0; of<=MaxOff; of++) {
|
||||||
U32 const scaleLog = 10;
|
U32 const scaleLog = 10;
|
||||||
@ -180,9 +180,9 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|||||||
if (optPtr->priceType == zop_static) {
|
if (optPtr->priceType == zop_static) {
|
||||||
U32 u, cost;
|
U32 u, cost;
|
||||||
assert(optPtr->symbolCosts != NULL);
|
assert(optPtr->symbolCosts != NULL);
|
||||||
assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid);
|
assert(optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid);
|
||||||
for (u=0, cost=0; u < litLength; u++)
|
for (u=0, cost=0; u < litLength; u++)
|
||||||
cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]);
|
cost += HUF_getNbBits(optPtr->symbolCosts->huf.CTable, literals[u]);
|
||||||
return cost * BITCOST_MULTIPLIER;
|
return cost * BITCOST_MULTIPLIER;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,7 +202,7 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|||||||
if (optPtr->priceType == zop_static) {
|
if (optPtr->priceType == zop_static) {
|
||||||
U32 const llCode = ZSTD_LLcode(litLength);
|
U32 const llCode = ZSTD_LLcode(litLength);
|
||||||
FSE_CState_t cstate;
|
FSE_CState_t cstate;
|
||||||
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable);
|
FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable);
|
||||||
{ U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode);
|
{ U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode);
|
||||||
DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER);
|
DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER);
|
||||||
return price;
|
return price;
|
||||||
@ -234,7 +234,7 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con
|
|||||||
if (optPtr->priceType == zop_static) {
|
if (optPtr->priceType == zop_static) {
|
||||||
U32 const llCode = ZSTD_LLcode(litLength);
|
U32 const llCode = ZSTD_LLcode(litLength);
|
||||||
FSE_CState_t cstate;
|
FSE_CState_t cstate;
|
||||||
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable);
|
FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable);
|
||||||
return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||||
+ BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode)
|
+ BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode)
|
||||||
- BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0);
|
- BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0);
|
||||||
@ -284,8 +284,8 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength,
|
|||||||
if (optPtr->priceType == zop_static) {
|
if (optPtr->priceType == zop_static) {
|
||||||
U32 const mlCode = ZSTD_MLcode(mlBase);
|
U32 const mlCode = ZSTD_MLcode(mlBase);
|
||||||
FSE_CState_t mlstate, offstate;
|
FSE_CState_t mlstate, offstate;
|
||||||
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable);
|
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
|
||||||
FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable);
|
FSE_initCState(&offstate, optPtr->symbolCosts->fse.offcodeCTable);
|
||||||
return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER
|
return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER
|
||||||
+ BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER;
|
+ BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user