diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 66819d13..d9547544 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -603,6 +603,7 @@ Let's call its first byte `byte0`. - `if (byte0 == 0)` : there are no sequences. The sequence section stops there. Decompressed content is defined entirely as Literals Section content. + The FSE tables used in `Repeat_Mode` aren't updated. - `if (byte0 < 128)` : `Number_of_Sequences = byte0` . Uses 1 byte. - `if (byte0 < 255)` : `Number_of_Sequences = ((byte0-128) << 8) + byte1` . Uses 2 bytes. - `if (byte0 == 255)`: `Number_of_Sequences = byte1 + (byte2<<8) + 0x7F00` . Uses 3 bytes. @@ -631,7 +632,7 @@ They follow the same enumeration : No distribution table will be present. - `RLE_Mode` : The table description consists of a single byte. This code will be repeated for all sequences. -- `Repeat_Mode` : The table used in the previous `Compressed_Block` will be used again, +- `Repeat_Mode` : The table used in the previous `Compressed_Block` with `Number_of_Sequences > 0` will be used again, or if this is the first block, table in the dictionary will be used No distribution table will be present. Note that this includes `RLE_mode`, so if `Repeat_Mode` follows `RLE_Mode`, the same symbol will be repeated. diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 344c3236..33fd04bd 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -72,7 +72,19 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t unsigned charnum = 0; int previous0 = 0; - if (hbSize < 4) return ERROR(srcSize_wrong); + if (hbSize < 4) { + /* This function only works when hbSize >= 4 */ + char buffer[4]; + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 4); + bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); @@ -105,6 +117,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); while (charnum < n0) normalizedCounter[charnum++] = 0; if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; bitStream = MEM_readLE32(ip) >> bitCount; diff --git a/lib/common/fse.h b/lib/common/fse.h index cd810c7d..8d703369 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -591,8 +591,9 @@ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) * note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog) +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) { + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; U32 const threshold = (minNbBits+1) << 16; assert(tableLog < 16); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d8420a8a..538319bc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -946,10 +946,10 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) int i; for (i = 0; i < ZSTD_REP_NUM; ++i) bs->rep[i] = repStartValue[i]; - bs->entropy.hufCTable_repeatMode = HUF_repeat_none; - bs->entropy.offcode_repeatMode = FSE_repeat_none; - bs->entropy.matchlength_repeatMode = FSE_repeat_none; - bs->entropy.litlength_repeatMode = FSE_repeat_none; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; } /*! ZSTD_invalidateMatchState() @@ -963,6 +963,7 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) ms->nextToUpdate = ms->window.dictLimit + 1; ms->loadedDictEnd = 0; ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; } /*! ZSTD_continueCCtx() : @@ -1203,42 +1204,80 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + /* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ + const int attachDict = ( pledgedSrcSize <= 8 KB + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) + && !params.forceWindow /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ + && cdict->cParams.strategy == ZSTD_fast + && ZSTD_equivalentCParams(cctx->appliedParams.cParams, + cdict->cParams); + + { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = cdict->cParams; params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset, + zbuff); assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); } - /* copy tables */ - { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); - size_t const hSize = (size_t)1 << cdict->cParams.hashLog; - size_t const tableSpace = (chainSize + hSize) * sizeof(U32); - assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); - assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); - memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ - } - /* Zero the hashTable3, since the cdict never fills it */ - { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; - assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + if (attachDict) { + const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictLen) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictLen; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } + } else { + DEBUGLOG(4, "copying dictionary into context"); + /* copy tables */ + { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); + size_t const hSize = (size_t)1 << cdict->cParams.hashLog; + size_t const tableSpace = (chainSize + hSize) * sizeof(U32); + assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); + assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); + memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* Zero the hashTable3, since the cdict never fills it */ + { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + /* copy dictionary offsets */ + { + ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } } - /* copy dictionary offsets */ - { - ZSTD_matchState_t const* srcMatchState = &cdict->matchState; - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } cctx->dictID = cdict->dictID; /* copy block state */ @@ -1455,8 +1494,8 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } -static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, +static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1473,27 +1512,25 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, disableLiteralCompression); /* Prepare nextEntropy assuming reusing the existing table */ - nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode; - memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, - sizeof(prevEntropy->hufCTable)); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); if (disableLiteralCompression) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); /* small ? don't even attempt compression (speed opt) */ # define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode; + { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2) + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2); + workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ hType = set_repeat; @@ -1501,17 +1538,17 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, } if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (cLitSize==1) { - memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); } if (hType == set_compressed) { /* using a newly constructed table */ - nextEntropy->hufCTable_repeatMode = HUF_repeat_check; + nextHuf->repeatMode = HUF_repeat_check; } /* Build header */ @@ -1561,6 +1598,137 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabiltyLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabiltyLog256[norm]; + } + return cost >> 8; +} + + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabiltyLog256[norm256]; + } + return cost >> 8; +} + + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +static size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + + typedef enum { ZSTD_defaultDisallowed = 0, ZSTD_defaultAllowed = 1 @@ -1568,37 +1736,73 @@ typedef enum { MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( - FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, - U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) { #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } DEBUGLOG(5, "Selected set_rle"); - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - *repeatMode = FSE_repeat_check; return set_rle; } - if ( isDefaultAllowed - && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( isDefaultAllowed - && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (U32)basicCost, (U32)repeatCost, (U32)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); } DEBUGLOG(5, "Selected set_compressed"); *repeatMode = FSE_repeat_check; @@ -1803,10 +2007,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, const int bmi2) { const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; U32 count[MaxSeq+1]; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; @@ -1817,6 +2022,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, BYTE* op = ostart; size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; BYTE* seqHead; + BYTE* lastNCount = NULL; ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; size_t const litSize = seqStorePtr->lit - literals; size_t const cSize = ZSTD_compressLiterals( - prevEntropy, nextEntropy, + &prevEntropy->huf, &nextEntropy->huf, cctxParams->cParams.strategy, cctxParams->disableLiteralCompression, op, dstCapacity, literals, litSize, @@ -1844,13 +2050,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) { - memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable)); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable)); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - return op - ostart; + /* Copy the old tables over as if we repeated them */ + memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return op - ostart; } /* seqHead : flags for FSE encoding type */ @@ -1862,13 +2064,17 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; + if (LLtype == set_compressed) + lastNCount = op; op += countSize; } } /* build CTable for Offsets */ @@ -1877,26 +2083,32 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; + if (Offtype == set_compressed) + lastNCount = op; op += countSize; } } /* build CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); DEBUGLOG(5, "Building ML table"); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; + if (MLtype == set_compressed) + lastNCount = op; op += countSize; } } @@ -1911,6 +2123,21 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, longOffsets, bmi2); if (ZSTD_isError(bitstreamSize)) return bitstreamSize; op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() recieves a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } } return op - ostart; @@ -1926,6 +2153,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, size_t const cSize = ZSTD_compressSequences_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, workspace, bmi2); + if (cSize == 0) return 0; /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. */ @@ -1942,8 +2170,8 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, * block. After the first block, the offcode table might not have large * enough codes to represent the offsets in the data. */ - if (nextEntropy->offcode_repeatMode == FSE_repeat_valid) - nextEntropy->offcode_repeatMode = FSE_repeat_check; + if (nextEntropy->fse.offcode_repeatMode == FSE_repeat_valid) + nextEntropy->fse.offcode_repeatMode = FSE_repeat_check; return cSize; } @@ -1951,9 +2179,9 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) { - static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { + static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, @@ -1961,13 +2189,19 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict { ZSTD_compressBlock_fast_extDict /* default for 0 */, ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, - ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } + ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ } }; + ZSTD_blockCompressor selectedCompressor; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat <= (U32)ZSTD_btultra); - return blockCompressor[extDict!=0][(U32)strat]; + selectedCompressor = blockCompressor[(int)dictMode][(U32)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; } static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, @@ -1999,6 +2233,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ZSTD_resetSeqStore(&(zc->seqStore)); ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, and when that stops being the case, the dict + * must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + /* limited update after a very long match */ { const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; @@ -2009,7 +2248,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } /* select and store sequences */ - { U32 const extDict = ZSTD_window_hasExtDict(ms->window); + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); size_t lastLLSize; { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -2023,7 +2262,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, - src, srcSize, extDict); + src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; @@ -2040,10 +2279,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, - src, srcSize, extDict); + src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; @@ -2110,8 +2349,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; else ms->nextToUpdate -= correction; ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; } - ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); + ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; { size_t cSize = ZSTD_compressBlock_internal(cctx, @@ -2384,7 +2624,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, dictPtr += 4; { unsigned maxSymbolValue = 255; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; @@ -2396,7 +2636,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ /* fill all offset symbols to avoid garbage at end of table */ - CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE), + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE), dictionary_corrupted); dictPtr += offcodeHeaderSize; } @@ -2408,7 +2648,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); /* Every match length code must have non-zero probability */ CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE), dictionary_corrupted); dictPtr += matchlengthHeaderSize; } @@ -2420,7 +2660,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); /* Every literal length code must have non-zero probability */ CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), + CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE), dictionary_corrupted); dictPtr += litlengthHeaderSize; } @@ -2446,10 +2686,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); } } - bs->entropy.hufCTable_repeatMode = HUF_repeat_valid; - bs->entropy.offcode_repeatMode = FSE_repeat_valid; - bs->entropy.matchlength_repeatMode = FSE_repeat_valid; - bs->entropy.litlength_repeatMode = FSE_repeat_valid; + bs->entropy.huf.repeatMode = HUF_repeat_valid; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); return dictID; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0f1830a5..a7666d5c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -53,14 +53,22 @@ typedef struct ZSTD_prefixDict_s { } ZSTD_prefixDict; typedef struct { - U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; + U32 CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - HUF_repeat hufCTable_repeatMode; FSE_repeat offcode_repeatMode; FSE_repeat matchlength_repeatMode; FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; typedef struct { @@ -114,7 +122,8 @@ typedef struct { U32 lowLimit; /* below that point, no more data */ } ZSTD_window_t; -typedef struct { +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary */ U32 nextToUpdate; /* index from which to continue table update */ @@ -124,7 +133,8 @@ typedef struct { U32* hashTable3; U32* chainTable; optState_t opt; /* optimal parser state */ -} ZSTD_matchState_t; + const ZSTD_matchState_t *dictMatchState; +}; typedef struct { ZSTD_compressedBlockState_t* prevCBlock; @@ -240,10 +250,13 @@ struct ZSTD_CCtx_s { typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; +typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; + + typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -500,6 +513,20 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) return window.lowLimit < window.dictLimit; } +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + ZSTD_dictMatchState : + ZSTD_noDict; +} + /** * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow @@ -567,18 +594,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * ZSTD_window_enforceMaxDist(): * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * * This allows a simple check that index >= lowLimit to see if index is valid. * This must be called before a block compression call, with srcEnd as the block * source end. + * * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * This is because dictionaries are allowed to be referenced as long as the last * byte of the dictionary is in the window, but once they are out of range, * they cannot be referenced. If loadedDictEndPtr is NULL, we use * loadedDictEnd == 0. + * + * In normal dict mode, the dict is between lowLimit and dictLimit. In + * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary + * is below them. forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, void const* srcEnd, U32 maxDist, - U32* loadedDictEndPtr) + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) { U32 const current = (U32)((BYTE const*)srcEnd - window->base); U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; @@ -592,6 +626,8 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, } if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) + *dictMatchStatePtr = NULL; } } diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 22b84d1c..3bac2bdd 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -45,26 +45,57 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const hlog, U32 const stepSize, U32 const mls) + U32 const hlog, U32 const stepSize, U32 const mls, + ZSTD_dictMode_e const dictMode) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowestIndex = ms->window.dictLimit; - const BYTE* const lowest = base + lowestIndex; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* otherwise, we would get index underflow when translating a dict index + * into a local index */ + assert(dictMode != ZSTD_dictMatchState + || prefixLowestIndex >= (U32)(dictEnd - dictBase)); + /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -73,26 +104,62 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; hashTable[h] = current; /* update hash table */ - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ( (matchIndex <= lowestIndex) - || (MEM_read32(match) != MEM_read32(ip)) ) { + } else if ( (matchIndex <= prefixLowestIndex) + || (MEM_read32(match) != MEM_read32(ip)) ) { + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndex = dictHashTable[h]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictLowestIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; + while (((ip>anchor) & (dictMatch>dictLowest)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); mLength = ZSTD_count(ip+4, match+4, iend) + 4; - { U32 const offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } } + while (((ip>anchor) & (match>prefixLowest)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } /* match found */ ip += mLength; @@ -102,19 +169,43 @@ size_t ZSTD_compressBlock_fast_generic( /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } } /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; @@ -132,17 +223,40 @@ size_t ZSTD_compressBlock_fast( U32 const hlog = cParams->hashLog; U32 const mls = cParams->searchLength; U32 const stepSize = cParams->targetLength; + assert(ms->dictMatchState == NULL); switch(mls) { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict); + } +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState); } } diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index 746849fc..7e7435f8 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -24,6 +24,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 9d825e69..03d1f54c 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences( * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ newLeftoverSize = ZSTD_ldm_generateSequences_internal( ldmState, sequences, params, chunkStart, chunkSize); @@ -591,12 +591,12 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, - int const extDict) + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) { unsigned const minMatch = cParams->searchLength; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, extDict); + ZSTD_selectBlockCompressor(cParams->strategy, + ZSTD_matchState_dictMode(ms)); BYTE const* const base = ms->window.base; /* Input bounds */ BYTE const* const istart = (BYTE const*)src; diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 0c3789ff..96588adb 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences( size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, - void const* src, size_t srcSize, - int const extDict); + void const* src, size_t srcSize); /** * ZSTD_ldm_skipSequences(): diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 3a48187c..521fbbf3 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -39,7 +39,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->priceType = zop_predef; assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ if (srcSize <= 8192) /* heuristic */ optPtr->priceType = zop_static; else { @@ -52,7 +52,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, { unsigned lit; for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); assert(bitCost <= scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litSum += optPtr->litFreq[lit]; @@ -60,7 +60,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, { unsigned ll; FSE_CState_t llstate; - FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable); + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); optPtr->litLengthSum = 0; for (ll=0; ll<=MaxLL; ll++) { U32 const scaleLog = 10; /* scale to 1K */ @@ -72,7 +72,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, { unsigned ml; FSE_CState_t mlstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); optPtr->matchLengthSum = 0; for (ml=0; ml<=MaxML; ml++) { U32 const scaleLog = 10; @@ -84,7 +84,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, { unsigned of; FSE_CState_t ofstate; - FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable); + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); optPtr->offCodeSum = 0; for (of=0; of<=MaxOff; of++) { U32 const scaleLog = 10; @@ -180,9 +180,9 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, if (optPtr->priceType == zop_static) { U32 u, cost; assert(optPtr->symbolCosts != NULL); - assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid); + assert(optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid); for (u=0, cost=0; u < litLength; u++) - cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]); + cost += HUF_getNbBits(optPtr->symbolCosts->huf.CTable, literals[u]); return cost * BITCOST_MULTIPLIER; } @@ -202,7 +202,7 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP if (optPtr->priceType == zop_static) { U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; - FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); + FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable); { U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); return price; @@ -234,7 +234,7 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con if (optPtr->priceType == zop_static) { U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; - FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); + FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable); return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode) - BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0); @@ -284,8 +284,8 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, if (optPtr->priceType == zop_static) { U32 const mlCode = ZSTD_MLcode(mlBase); FSE_CState_t mlstate, offstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); - FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + FSE_initCState(&offstate, optPtr->symbolCosts->fse.offcodeCTable); return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER + BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER; }