Merge branch 'dev' into fracFse

This commit is contained in:
Yann Collet 2018-05-24 14:09:49 -07:00
commit b5ef32fea7
10 changed files with 567 additions and 160 deletions

View File

@ -603,6 +603,7 @@ Let's call its first byte `byte0`.
- `if (byte0 == 0)` : there are no sequences. - `if (byte0 == 0)` : there are no sequences.
The sequence section stops there. The sequence section stops there.
Decompressed content is defined entirely as Literals Section content. Decompressed content is defined entirely as Literals Section content.
The FSE tables used in `Repeat_Mode` aren't updated.
- `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte.
- `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes. - `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes.
- `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes. - `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes.
@ -631,7 +632,7 @@ They follow the same enumeration :
No distribution table will be present. No distribution table will be present.
- `RLE_Mode` : The table description consists of a single byte. - `RLE_Mode` : The table description consists of a single byte.
This code will be repeated for all sequences. This code will be repeated for all sequences.
- `Repeat_Mode` : The table used in the previous `Compressed_Block` will be used again, - `Repeat_Mode` : The table used in the previous `Compressed_Block` with `Number_of_Sequences > 0` will be used again,
or if this is the first block, table in the dictionary will be used or if this is the first block, table in the dictionary will be used
No distribution table will be present. No distribution table will be present.
Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated. Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated.

View File

@ -72,7 +72,19 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
unsigned charnum = 0; unsigned charnum = 0;
int previous0 = 0; int previous0 = 0;
if (hbSize < 4) return ERROR(srcSize_wrong); if (hbSize < 4) {
/* This function only works when hbSize >= 4 */
char buffer[4];
memset(buffer, 0, sizeof(buffer));
memcpy(buffer, headerBuffer, hbSize);
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
buffer, sizeof(buffer));
if (FSE_isError(countSize)) return countSize;
if (countSize > hbSize) return ERROR(corruption_detected);
return countSize;
} }
assert(hbSize >= 4);
bitStream = MEM_readLE32(ip); bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@ -105,6 +117,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0; while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3; ip += bitCount>>3;
bitCount &= 7; bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount; bitStream = MEM_readLE32(ip) >> bitCount;

View File

@ -591,8 +591,9 @@ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
* Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
* note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog) MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
{ {
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
U32 const threshold = (minNbBits+1) << 16; U32 const threshold = (minNbBits+1) << 16;
assert(tableLog < 16); assert(tableLog < 16);

View File

@ -946,10 +946,10 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
int i; int i;
for (i = 0; i < ZSTD_REP_NUM; ++i) for (i = 0; i < ZSTD_REP_NUM; ++i)
bs->rep[i] = repStartValue[i]; bs->rep[i] = repStartValue[i];
bs->entropy.hufCTable_repeatMode = HUF_repeat_none; bs->entropy.huf.repeatMode = HUF_repeat_none;
bs->entropy.offcode_repeatMode = FSE_repeat_none; bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
bs->entropy.matchlength_repeatMode = FSE_repeat_none; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
bs->entropy.litlength_repeatMode = FSE_repeat_none; bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
} }
/*! ZSTD_invalidateMatchState() /*! ZSTD_invalidateMatchState()
@ -963,6 +963,7 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
ms->nextToUpdate = ms->window.dictLimit + 1; ms->nextToUpdate = ms->window.dictLimit + 1;
ms->loadedDictEnd = 0; ms->loadedDictEnd = 0;
ms->opt.litLengthSum = 0; /* force reset of btopt stats */ ms->opt.litLengthSum = 0; /* force reset of btopt stats */
ms->dictMatchState = NULL;
} }
/*! ZSTD_continueCCtx() : /*! ZSTD_continueCCtx() :
@ -1203,17 +1204,52 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
U64 pledgedSrcSize, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff) ZSTD_buffered_policy_e zbuff)
{ {
/* We have a choice between copying the dictionary context into the working
* context, or referencing the dictionary context from the working context
* in-place. We decide here which strategy to use. */
const int attachDict = ( pledgedSrcSize <= 8 KB
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN )
&& !params.forceWindow /* dictMatchState isn't correctly
* handled in _enforceMaxDist */
&& cdict->cParams.strategy == ZSTD_fast
&& ZSTD_equivalentCParams(cctx->appliedParams.cParams,
cdict->cParams);
{ unsigned const windowLog = params.cParams.windowLog; { unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0); assert(windowLog != 0);
/* Copy only compression parameters related to tables. */ /* Copy only compression parameters related to tables. */
params.cParams = cdict->cParams; params.cParams = cdict->cParams;
params.cParams.windowLog = windowLog; params.cParams.windowLog = windowLog;
ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset,
zbuff);
assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
} }
if (attachDict) {
const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base);
if (cdictLen == 0) {
/* don't even attach dictionaries with no contents */
DEBUGLOG(4, "skipping attaching empty dictionary");
} else {
DEBUGLOG(4, "attaching dictionary into context");
cctx->blockState.matchState.dictMatchState = &cdict->matchState;
/* prep working match state so dict matches never have negative indices
* when they are translated to the working context's index space. */
if (cctx->blockState.matchState.window.dictLimit < cdictLen) {
cctx->blockState.matchState.window.nextSrc =
cctx->blockState.matchState.window.base + cdictLen;
ZSTD_window_clear(&cctx->blockState.matchState.window);
}
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
}
} else {
DEBUGLOG(4, "copying dictionary into context");
/* copy tables */ /* copy tables */
{ size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
size_t const hSize = (size_t)1 << cdict->cParams.hashLog; size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
@ -1224,6 +1260,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
} }
/* Zero the hashTable3, since the cdict never fills it */ /* Zero the hashTable3, since the cdict never fills it */
{ size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
assert(cdict->matchState.hashLog3 == 0); assert(cdict->matchState.hashLog3 == 0);
@ -1239,6 +1276,8 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
} }
}
cctx->dictID = cdict->dictID; cctx->dictID = cdict->dictID;
/* copy block state */ /* copy block state */
@ -1455,8 +1494,8 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_entropyCTables_t* nextEntropy, ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression, ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -1473,27 +1512,25 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
disableLiteralCompression); disableLiteralCompression);
/* Prepare nextEntropy assuming reusing the existing table */ /* Prepare nextEntropy assuming reusing the existing table */
nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode; memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
sizeof(prevEntropy->hufCTable));
if (disableLiteralCompression) if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */ /* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63 # define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
{ HUF_repeat repeat = prevEntropy->hufCTable_repeatMode; { HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2) workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2); workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
hType = set_repeat; hType = set_repeat;
@ -1501,17 +1538,17 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
} }
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) { if (cLitSize==1) {
memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) { if (hType == set_compressed) {
/* using a newly constructed table */ /* using a newly constructed table */
nextEntropy->hufCTable_repeatMode = HUF_repeat_check; nextHuf->repeatMode = HUF_repeat_check;
} }
/* Build header */ /* Build header */
@ -1561,6 +1598,137 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML; mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
} }
/**
* -log2(x / 256) lookup table for x in [0, 256).
* If x == 0: Return 0
* Else: Return floor(-log2(x / 256) * 256)
*/
static unsigned const kInverseProbabiltyLog256[256] = {
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
5, 4, 2, 1,
};
/**
* Returns the cost in bits of encoding the distribution described by count
* using the entropy bound.
*/
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
{
unsigned cost = 0;
unsigned s;
for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0)
norm = 1;
assert(count[s] < total);
cost += count[s] * kInverseProbabiltyLog256[norm];
}
return cost >> 8;
}
/**
* Returns the cost in bits of encoding the distribution in count using the
* table described by norm. The max symbol support by norm is assumed >= max.
* norm must be valid for every symbol with non-zero probability in count.
*/
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
unsigned const* count, unsigned const max)
{
unsigned const shift = 8 - accuracyLog;
size_t cost = 0;
unsigned s;
assert(accuracyLog <= 8);
for (s = 0; s <= max; ++s) {
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
unsigned const norm256 = normAcc << shift;
assert(norm256 > 0);
assert(norm256 < 256);
cost += count[s] * kInverseProbabiltyLog256[norm256];
}
return cost >> 8;
}
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
void const* ptr = ctable;
U16 const* u16ptr = (U16 const*)ptr;
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
return maxSymbolValue;
}
/**
* Returns the cost in bits of encoding the distribution in count using ctable.
* Returns an error if ctable cannot represent all the symbols in count.
*/
static size_t ZSTD_fseBitCost(
FSE_CTable const* ctable,
unsigned const* count,
unsigned const max)
{
unsigned const kAccuracyLog = 8;
size_t cost = 0;
unsigned s;
FSE_CState_t cstate;
FSE_initCState(&cstate, ctable);
if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
ZSTD_getFSEMaxSymbolValue(ctable), max);
return ERROR(GENERIC);
}
for (s = 0; s <= max; ++s) {
unsigned const tableLog = cstate.stateLog;
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
if (count[s] == 0)
continue;
if (bitCost >= badCost) {
DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
return ERROR(GENERIC);
}
cost += count[s] * bitCost;
}
return cost >> kAccuracyLog;
}
/**
* Returns the cost in bytes of encoding the normalized count header.
* Returns an error if any of the helper functions return an error.
*/
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
size_t const nbSeq, unsigned const FSELog)
{
BYTE wksp[FSE_NCOUNTBOUND];
S16 norm[MaxSeq + 1];
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
}
typedef enum { typedef enum {
ZSTD_defaultDisallowed = 0, ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1 ZSTD_defaultAllowed = 1
@ -1568,28 +1736,36 @@ typedef enum {
MEM_STATIC MEM_STATIC
symbolEncodingType_e ZSTD_selectEncodingType( symbolEncodingType_e ZSTD_selectEncodingType(
FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{ {
#define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000 #define MAX_SEQ_FOR_STATIC_FSE 1000
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { if (mostFrequent == nbSeq) {
DEBUGLOG(5, "Selected set_rle"); *repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols, /* Prefer set_basic over set_rle when there are 2 or less symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE. * If basic encoding isn't possible, always choose RLE.
*/ */
*repeatMode = FSE_repeat_check; DEBUGLOG(5, "Selected set_basic");
return set_basic;
}
DEBUGLOG(5, "Selected set_rle");
return set_rle; return set_rle;
} }
if ( isDefaultAllowed if (strategy < ZSTD_lazy) {
&& (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { if (isDefaultAllowed) {
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
DEBUGLOG(5, "Selected set_repeat"); DEBUGLOG(5, "Selected set_repeat");
return set_repeat; return set_repeat;
} }
if ( isDefaultAllowed if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
&& ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) {
DEBUGLOG(5, "Selected set_basic"); DEBUGLOG(5, "Selected set_basic");
/* The format allows default tables to be repeated, but it isn't useful. /* The format allows default tables to be repeated, but it isn't useful.
* When using simple heuristics to select encoding type, we don't want * When using simple heuristics to select encoding type, we don't want
@ -1600,6 +1776,34 @@ symbolEncodingType_e ZSTD_selectEncodingType(
*repeatMode = FSE_repeat_none; *repeatMode = FSE_repeat_none;
return set_basic; return set_basic;
} }
}
} else {
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
if (isDefaultAllowed) {
assert(!ZSTD_isError(basicCost));
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
}
assert(!ZSTD_isError(NCountCost));
assert(compressedCost < ERROR(maxCode));
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
(U32)basicCost, (U32)repeatCost, (U32)compressedCost);
if (basicCost <= repeatCost && basicCost <= compressedCost) {
DEBUGLOG(5, "Selected set_basic");
assert(isDefaultAllowed);
*repeatMode = FSE_repeat_none;
return set_basic;
}
if (repeatCost <= compressedCost) {
DEBUGLOG(5, "Selected set_repeat");
assert(!ZSTD_isError(repeatCost));
return set_repeat;
}
assert(compressedCost < basicCost && compressedCost < repeatCost);
}
DEBUGLOG(5, "Selected set_compressed"); DEBUGLOG(5, "Selected set_compressed");
*repeatMode = FSE_repeat_check; *repeatMode = FSE_repeat_check;
return set_compressed; return set_compressed;
@ -1803,10 +2007,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
const int bmi2) const int bmi2)
{ {
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
U32 count[MaxSeq+1]; U32 count[MaxSeq+1];
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
const seqDef* const sequences = seqStorePtr->sequencesStart; const seqDef* const sequences = seqStorePtr->sequencesStart;
const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@ -1817,6 +2022,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
BYTE* op = ostart; BYTE* op = ostart;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* seqHead; BYTE* seqHead;
BYTE* lastNCount = NULL;
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
@ -1824,7 +2030,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
{ const BYTE* const literals = seqStorePtr->litStart; { const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = seqStorePtr->lit - literals; size_t const litSize = seqStorePtr->lit - literals;
size_t const cSize = ZSTD_compressLiterals( size_t const cSize = ZSTD_compressLiterals(
prevEntropy, nextEntropy, &prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy, cctxParams->disableLiteralCompression, cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
op, dstCapacity, op, dstCapacity,
literals, litSize, literals, litSize,
@ -1844,12 +2050,8 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
else else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) { if (nbSeq==0) {
memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable)); /* Copy the old tables over as if we repeated them */
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
return op - ostart; return op - ostart;
} }
@ -1862,13 +2064,17 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
{ U32 max = MaxLL; { U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building LL table"); DEBUGLOG(5, "Building LL table");
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
workspace, HUF_WORKSPACE_SIZE); workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize; if (ZSTD_isError(countSize)) return countSize;
if (LLtype == set_compressed)
lastNCount = op;
op += countSize; op += countSize;
} } } }
/* build CTable for Offsets */ /* build CTable for Offsets */
@ -1877,26 +2083,32 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table"); DEBUGLOG(5, "Building OF table");
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
workspace, HUF_WORKSPACE_SIZE); workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize; if (ZSTD_isError(countSize)) return countSize;
if (Offtype == set_compressed)
lastNCount = op;
op += countSize; op += countSize;
} } } }
/* build CTable for MatchLengths */ /* build CTable for MatchLengths */
{ U32 max = MaxML; { U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building ML table"); DEBUGLOG(5, "Building ML table");
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
workspace, HUF_WORKSPACE_SIZE); workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize; if (ZSTD_isError(countSize)) return countSize;
if (MLtype == set_compressed)
lastNCount = op;
op += countSize; op += countSize;
} } } }
@ -1911,6 +2123,21 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
longOffsets, bmi2); longOffsets, bmi2);
if (ZSTD_isError(bitstreamSize)) return bitstreamSize; if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
op += bitstreamSize; op += bitstreamSize;
/* zstd versions <= 1.3.4 mistakenly report corruption when
* FSE_readNCount() recieves a buffer < 4 bytes.
* Fixed by https://github.com/facebook/zstd/pull/1146.
* This can happen when the last set_compressed table present is 2
* bytes and the bitstream is only one byte.
* In this exceedingly rare case, we will simply emit an uncompressed
* block, since it isn't worth optimizing.
*/
if (lastNCount && (op - lastNCount) < 4) {
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
assert(op - lastNCount == 3);
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
"emitting an uncompressed block.");
return 0;
}
} }
return op - ostart; return op - ostart;
@ -1926,6 +2153,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
size_t const cSize = ZSTD_compressSequences_internal( size_t const cSize = ZSTD_compressSequences_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
workspace, bmi2); workspace, bmi2);
if (cSize == 0) return 0;
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/ */
@ -1942,8 +2170,8 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
* block. After the first block, the offcode table might not have large * block. After the first block, the offcode table might not have large
* enough codes to represent the offsets in the data. * enough codes to represent the offsets in the data.
*/ */
if (nextEntropy->offcode_repeatMode == FSE_repeat_valid) if (nextEntropy->fse.offcode_repeatMode == FSE_repeat_valid)
nextEntropy->offcode_repeatMode = FSE_repeat_check; nextEntropy->fse.offcode_repeatMode = FSE_repeat_check;
return cSize; return cSize;
} }
@ -1951,9 +2179,9 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
/* ZSTD_selectBlockCompressor() : /* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher) * Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */ * assumption : strat is a valid strategy */
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
{ {
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */, { ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
@ -1961,13 +2189,19 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict
{ ZSTD_compressBlock_fast_extDict /* default for 0 */, { ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState,
NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ }
}; };
ZSTD_blockCompressor selectedCompressor;
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat >= (U32)ZSTD_fast);
assert((U32)strat <= (U32)ZSTD_btultra); assert((U32)strat <= (U32)ZSTD_btultra);
return blockCompressor[extDict!=0][(U32)strat]; selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
assert(selectedCompressor != NULL);
return selectedCompressor;
} }
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
@ -1999,6 +2233,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ZSTD_resetSeqStore(&(zc->seqStore)); ZSTD_resetSeqStore(&(zc->seqStore));
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
/* a gap between an attached dict and the current window is not safe,
* they must remain adjacent, and when that stops being the case, the dict
* must be unset */
assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
/* limited update after a very long match */ /* limited update after a very long match */
{ const BYTE* const base = ms->window.base; { const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
@ -2009,7 +2248,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
} }
/* select and store sequences */ /* select and store sequences */
{ U32 const extDict = ZSTD_window_hasExtDict(ms->window); { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
size_t lastLLSize; size_t lastLLSize;
{ int i; { int i;
for (i = 0; i < ZSTD_REP_NUM; ++i) for (i = 0; i < ZSTD_REP_NUM; ++i)
@ -2023,7 +2262,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ms, &zc->seqStore, ms, &zc->seqStore,
zc->blockState.nextCBlock->rep, zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams, &zc->appliedParams.cParams,
src, srcSize, extDict); src, srcSize);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size); assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) { } else if (zc->appliedParams.ldmParams.enableLdm) {
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
@ -2040,10 +2279,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ms, &zc->seqStore, ms, &zc->seqStore,
zc->blockState.nextCBlock->rep, zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams, &zc->appliedParams.cParams,
src, srcSize, extDict); src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size); assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */ } else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
} }
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
@ -2110,8 +2349,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction; else ms->nextToUpdate -= correction;
ms->loadedDictEnd = 0; ms->loadedDictEnd = 0;
ms->dictMatchState = NULL;
} }
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
{ size_t cSize = ZSTD_compressBlock_internal(cctx, { size_t cSize = ZSTD_compressBlock_internal(cctx,
@ -2384,7 +2624,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
dictPtr += 4; dictPtr += 4;
{ unsigned maxSymbolValue = 255; { unsigned maxSymbolValue = 255;
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize; dictPtr += hufHeaderSize;
@ -2396,7 +2636,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
/* fill all offset symbols to avoid garbage at end of table */ /* fill all offset symbols to avoid garbage at end of table */
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE), CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted); dictionary_corrupted);
dictPtr += offcodeHeaderSize; dictPtr += offcodeHeaderSize;
} }
@ -2408,7 +2648,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */ /* Every match length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted); dictionary_corrupted);
dictPtr += matchlengthHeaderSize; dictPtr += matchlengthHeaderSize;
} }
@ -2420,7 +2660,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */ /* Every literal length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted); dictionary_corrupted);
dictPtr += litlengthHeaderSize; dictPtr += litlengthHeaderSize;
} }
@ -2446,10 +2686,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
} } } }
bs->entropy.hufCTable_repeatMode = HUF_repeat_valid; bs->entropy.huf.repeatMode = HUF_repeat_valid;
bs->entropy.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
bs->entropy.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
bs->entropy.litlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
return dictID; return dictID;
} }

View File

@ -53,14 +53,22 @@ typedef struct ZSTD_prefixDict_s {
} ZSTD_prefixDict; } ZSTD_prefixDict;
typedef struct { typedef struct {
U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; U32 CTable[HUF_CTABLE_SIZE_U32(255)];
HUF_repeat repeatMode;
} ZSTD_hufCTables_t;
typedef struct {
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
HUF_repeat hufCTable_repeatMode;
FSE_repeat offcode_repeatMode; FSE_repeat offcode_repeatMode;
FSE_repeat matchlength_repeatMode; FSE_repeat matchlength_repeatMode;
FSE_repeat litlength_repeatMode; FSE_repeat litlength_repeatMode;
} ZSTD_fseCTables_t;
typedef struct {
ZSTD_hufCTables_t huf;
ZSTD_fseCTables_t fse;
} ZSTD_entropyCTables_t; } ZSTD_entropyCTables_t;
typedef struct { typedef struct {
@ -114,7 +122,8 @@ typedef struct {
U32 lowLimit; /* below that point, no more data */ U32 lowLimit; /* below that point, no more data */
} ZSTD_window_t; } ZSTD_window_t;
typedef struct { typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */ ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */ U32 loadedDictEnd; /* index of end of dictionary */
U32 nextToUpdate; /* index from which to continue table update */ U32 nextToUpdate; /* index from which to continue table update */
@ -124,7 +133,8 @@ typedef struct {
U32* hashTable3; U32* hashTable3;
U32* chainTable; U32* chainTable;
optState_t opt; /* optimal parser state */ optState_t opt; /* optimal parser state */
} ZSTD_matchState_t; const ZSTD_matchState_t *dictMatchState;
};
typedef struct { typedef struct {
ZSTD_compressedBlockState_t* prevCBlock; ZSTD_compressedBlockState_t* prevCBlock;
@ -240,10 +250,13 @@ struct ZSTD_CCtx_s {
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
typedef size_t (*ZSTD_blockCompressor) ( typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
MEM_STATIC U32 ZSTD_LLcode(U32 litLength) MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -500,6 +513,20 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
return window.lowLimit < window.dictLimit; return window.lowLimit < window.dictLimit;
} }
/**
* ZSTD_matchState_dictMode():
* Inspects the provided matchState and figures out what dictMode should be
* passed to the compressor.
*/
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
{
return ZSTD_window_hasExtDict(ms->window) ?
ZSTD_extDict :
ms->dictMatchState != NULL ?
ZSTD_dictMatchState :
ZSTD_noDict;
}
/** /**
* ZSTD_window_needOverflowCorrection(): * ZSTD_window_needOverflowCorrection():
* Returns non-zero if the indices are getting too large and need overflow * Returns non-zero if the indices are getting too large and need overflow
@ -567,18 +594,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* ZSTD_window_enforceMaxDist(): * ZSTD_window_enforceMaxDist():
* Updates lowLimit so that: * Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
* This allows a simple check that index >= lowLimit to see if index is valid. * This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block * This must be called before a block compression call, with srcEnd as the block
* source end. * source end.
*
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last * This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range, * byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use * they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0. * loadedDictEnd == 0.
*
* In normal dict mode, the dict is between lowLimit and dictLimit. In
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
* is below them. forceWindow and dictMatchState are therefore incompatible.
*/ */
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd, U32 maxDist, void const* srcEnd, U32 maxDist,
U32* loadedDictEndPtr) U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{ {
U32 const current = (U32)((BYTE const*)srcEnd - window->base); U32 const current = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
@ -592,6 +626,8 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
} }
if (loadedDictEndPtr) if (loadedDictEndPtr)
*loadedDictEndPtr = 0; *loadedDictEndPtr = 0;
if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
} }
} }

View File

@ -45,26 +45,57 @@ FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic( size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
U32 const hlog, U32 const stepSize, U32 const mls) U32 const hlog, U32 const stepSize, U32 const mls,
ZSTD_dictMode_e const dictMode)
{ {
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.dictLimit; const U32 prefixLowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex; const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0; U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
dms->hashTable : NULL;
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
dms->window.base : NULL;
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
dictBase + dictLowestIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* otherwise, we would get index underflow when translating a dict index
* into a local index */
assert(dictMode != ZSTD_dictMatchState
|| prefixLowestIndex >= (U32)(dictEnd - dictBase));
/* init */ /* init */
ip += (ip==lowest); ip += (dictAndPrefixLength == 0);
{ U32 const maxRep = (U32)(ip-lowest); if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
} }
if (dictMode == ZSTD_dictMatchState) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
}
/* Main Search Loop */ /* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@ -73,26 +104,62 @@ size_t ZSTD_compressBlock_fast_generic(
U32 const current = (U32)(ip-base); U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h]; U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex; const BYTE* match = base + matchIndex;
const U32 repIndex = current + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = current; /* update hash table */ hashTable[h] = current; /* update hash table */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { if (dictMode == ZSTD_dictMatchState
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else if ( dictMode == ZSTD_noDict
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else { } else if ( (matchIndex <= prefixLowestIndex)
if ( (matchIndex <= lowestIndex)
|| (MEM_read32(match) != MEM_read32(ip)) ) { || (MEM_read32(match) != MEM_read32(ip)) ) {
if (dictMode == ZSTD_dictMatchState) {
U32 const dictMatchIndex = dictHashTable[h];
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictLowestIndex ||
MEM_read32(dictMatch) != MEM_read32(ip)) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
} else {
/* found a dict match */
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4;
while (((ip>anchor) & (dictMatch>dictLowest))
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else {
assert(stepSize >= 1); assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize; ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue; continue;
} }
} else {
/* found a regular match */
U32 const offset = (U32)(ip-match);
mLength = ZSTD_count(ip+4, match+4, iend) + 4; mLength = ZSTD_count(ip+4, match+4, iend) + 4;
{ U32 const offset = (U32)(ip-match); while (((ip>anchor) & (match>prefixLowest))
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} } }
/* match found */ /* match found */
ip += mLength; ip += mLength;
@ -102,19 +169,43 @@ size_t ZSTD_compressBlock_fast_generic(
/* Fill Table */ /* Fill Table */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */ /* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) {
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
dictBase - dictIndexDelta + repIndex2 :
base + repIndex2;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
}
}
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit) while ( (ip <= ilimit)
&& ( (offset_2>0) && ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */ /* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength; ip += rLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
} } } } } } }
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1 ? offset_1 : offsetSaved;
@ -132,17 +223,40 @@ size_t ZSTD_compressBlock_fast(
U32 const hlog = cParams->hashLog; U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength; U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength; U32 const stepSize = cParams->targetLength;
assert(ms->dictMatchState == NULL);
switch(mls) switch(mls)
{ {
default: /* includes case 3 */ default: /* includes case 3 */
case 4 : case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict);
case 5 : case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict);
case 6 : case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict);
case 7 : case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict);
}
}
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength;
assert(ms->dictMatchState != NULL);
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState);
} }
} }

View File

@ -24,6 +24,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
size_t ZSTD_compressBlock_fast( size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_extDict( size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);

View File

@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences(
* * Try invalidation after the sequence generation and test the * * Try invalidation after the sequence generation and test the
* the offset against maxDist directly. * the offset against maxDist directly.
*/ */
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal( newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize); ldmState, sequences, params, chunkStart, chunkSize);
@ -591,12 +591,12 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
int const extDict)
{ {
unsigned const minMatch = cParams->searchLength; unsigned const minMatch = cParams->searchLength;
ZSTD_blockCompressor const blockCompressor = ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, extDict); ZSTD_selectBlockCompressor(cParams->strategy,
ZSTD_matchState_dictMode(ms));
BYTE const* const base = ms->window.base; BYTE const* const base = ms->window.base;
/* Input bounds */ /* Input bounds */
BYTE const* const istart = (BYTE const*)src; BYTE const* const istart = (BYTE const*)src;

View File

@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences(
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, ZSTD_compressionParameters const* cParams,
void const* src, size_t srcSize, void const* src, size_t srcSize);
int const extDict);
/** /**
* ZSTD_ldm_skipSequences(): * ZSTD_ldm_skipSequences():

View File

@ -39,7 +39,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->priceType = zop_predef; optPtr->priceType = zop_predef;
assert(optPtr->symbolCosts != NULL); assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
if (srcSize <= 8192) /* heuristic */ if (srcSize <= 8192) /* heuristic */
optPtr->priceType = zop_static; optPtr->priceType = zop_static;
else { else {
@ -52,7 +52,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
{ unsigned lit; { unsigned lit;
for (lit=0; lit<=MaxLit; lit++) { for (lit=0; lit<=MaxLit; lit++) {
U32 const scaleLog = 11; /* scale to 2K */ U32 const scaleLog = 11; /* scale to 2K */
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
assert(bitCost <= scaleLog); assert(bitCost <= scaleLog);
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litSum += optPtr->litFreq[lit]; optPtr->litSum += optPtr->litFreq[lit];
@ -60,7 +60,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
{ unsigned ll; { unsigned ll;
FSE_CState_t llstate; FSE_CState_t llstate;
FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable); FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
optPtr->litLengthSum = 0; optPtr->litLengthSum = 0;
for (ll=0; ll<=MaxLL; ll++) { for (ll=0; ll<=MaxLL; ll++) {
U32 const scaleLog = 10; /* scale to 1K */ U32 const scaleLog = 10; /* scale to 1K */
@ -72,7 +72,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
{ unsigned ml; { unsigned ml;
FSE_CState_t mlstate; FSE_CState_t mlstate;
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
optPtr->matchLengthSum = 0; optPtr->matchLengthSum = 0;
for (ml=0; ml<=MaxML; ml++) { for (ml=0; ml<=MaxML; ml++) {
U32 const scaleLog = 10; U32 const scaleLog = 10;
@ -84,7 +84,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
{ unsigned of; { unsigned of;
FSE_CState_t ofstate; FSE_CState_t ofstate;
FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable); FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
optPtr->offCodeSum = 0; optPtr->offCodeSum = 0;
for (of=0; of<=MaxOff; of++) { for (of=0; of<=MaxOff; of++) {
U32 const scaleLog = 10; U32 const scaleLog = 10;
@ -180,9 +180,9 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
if (optPtr->priceType == zop_static) { if (optPtr->priceType == zop_static) {
U32 u, cost; U32 u, cost;
assert(optPtr->symbolCosts != NULL); assert(optPtr->symbolCosts != NULL);
assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid); assert(optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid);
for (u=0, cost=0; u < litLength; u++) for (u=0, cost=0; u < litLength; u++)
cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]); cost += HUF_getNbBits(optPtr->symbolCosts->huf.CTable, literals[u]);
return cost * BITCOST_MULTIPLIER; return cost * BITCOST_MULTIPLIER;
} }
@ -202,7 +202,7 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
if (optPtr->priceType == zop_static) { if (optPtr->priceType == zop_static) {
U32 const llCode = ZSTD_LLcode(litLength); U32 const llCode = ZSTD_LLcode(litLength);
FSE_CState_t cstate; FSE_CState_t cstate;
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable);
{ U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); { U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode);
DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER);
return price; return price;
@ -234,7 +234,7 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con
if (optPtr->priceType == zop_static) { if (optPtr->priceType == zop_static) {
U32 const llCode = ZSTD_LLcode(litLength); U32 const llCode = ZSTD_LLcode(litLength);
FSE_CState_t cstate; FSE_CState_t cstate;
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable);
return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+ BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode) + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode)
- BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0); - BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0);
@ -284,8 +284,8 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength,
if (optPtr->priceType == zop_static) { if (optPtr->priceType == zop_static) {
U32 const mlCode = ZSTD_MLcode(mlBase); U32 const mlCode = ZSTD_MLcode(mlBase);
FSE_CState_t mlstate, offstate; FSE_CState_t mlstate, offstate;
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); FSE_initCState(&offstate, optPtr->symbolCosts->fse.offcodeCTable);
return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER
+ BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER; + BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER;
} }