[libzstd] Handle uncompressed literals

This commit is contained in:
Nick Terrell 2019-02-15 10:29:03 -08:00
parent a96e67af6c
commit 3d7377b874
4 changed files with 40 additions and 20 deletions

View File

@ -402,7 +402,6 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_minMatch:
case ZSTD_c_targetLength:
case ZSTD_c_strategy:
case ZSTD_c_literalCompressionMode:
return 1;
case ZSTD_c_format:
@ -421,6 +420,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_ldmHashRateLog:
case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode:
default:
return 0;
}
@ -2677,7 +2677,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
goto out; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
/* required for optimal parser to read stats from dictionary */
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
/* tell the optimal parser how we expect to compress literals */
ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
/* a gap between an attached dict and the current window is not safe,
* they must remain adjacent,

View File

@ -107,6 +107,7 @@ typedef struct {
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
ZSTD_literalCompressionMode_e literalCompressionMode;
} optState_t;
typedef struct {

View File

@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price)
}
#endif
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
{
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
}
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
{
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
if (ZSTD_compressedLiterals(optPtr))
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
const BYTE* const src, size_t const srcSize,
int const optLevel)
{
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic;
@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
/* huffman table presumed generated by dictionary */
optPtr->priceType = zop_dynamic;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
{ unsigned lit;
if (compressedLiterals) {
unsigned lit;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
for (lit=0; lit<=MaxLit; lit++) {
U32 const scaleLog = 11; /* scale to 2K */
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
} else { /* not a dictionary */
assert(optPtr->litFreq != NULL);
{ unsigned lit = MaxLit;
if (compressedLiterals) {
unsigned lit = MaxLit;
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
}
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
{ unsigned ll;
for (ll=0; ll<=MaxLL; ll++)
@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
} else { /* new block : re-use previous statistics, scaled down */
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
if (compressedLiterals)
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
int optLevel)
{
if (litLength == 0) return 0;
if (!ZSTD_compressedLiterals(optPtr))
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
if (optPtr->priceType == zop_predef)
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
U32 offsetCode, U32 matchLength)
{
/* literals */
{ U32 u;
if (ZSTD_compressedLiterals(optPtr)) {
U32 u;
for (u=0; u < litLength; u++)
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
/* used in 2-pass strategy */
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
{
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
if (ZSTD_compressedLiterals(optPtr))
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);

View File

@ -179,7 +179,7 @@ silesia, small hash log, advanced one
silesia, small chain log, advanced one pass, 4931093
silesia, explicit params, advanced one pass, 4815369
silesia, uncompressed literals, advanced one pass, 5155424
silesia, uncompressed literals optimal, advanced one pass, 4426654
silesia, uncompressed literals optimal, advanced one pass, 4325427
silesia, huffman literals, advanced one pass, 5341356
silesia.tar, level -5, advanced one pass, 7160438
silesia.tar, level -3, advanced one pass, 6789024
@ -204,7 +204,7 @@ silesia.tar, small hash log, advanced one
silesia.tar, small chain log, advanced one pass, 4943255
silesia.tar, explicit params, advanced one pass, 4829974
silesia.tar, uncompressed literals, advanced one pass, 5157992
silesia.tar, uncompressed literals optimal, advanced one pass, 4372744
silesia.tar, uncompressed literals optimal, advanced one pass, 4321094
silesia.tar, huffman literals, advanced one pass, 5358079
github, level -5, advanced one pass, 232744
github, level -5 with dict, advanced one pass, 46718
@ -243,7 +243,7 @@ github, small hash log, advanced one
github, small chain log, advanced one pass, 136314
github, explicit params, advanced one pass, 137670
github, uncompressed literals, advanced one pass, 167004
github, uncompressed literals optimal, advanced one pass, 164600
github, uncompressed literals optimal, advanced one pass, 156824
github, huffman literals, advanced one pass, 143457
silesia, level -5, advanced one pass small out, 7152294
silesia, level -3, advanced one pass small out, 6789969
@ -268,7 +268,7 @@ silesia, small hash log, advanced one
silesia, small chain log, advanced one pass small out, 4931093
silesia, explicit params, advanced one pass small out, 4815369
silesia, uncompressed literals, advanced one pass small out, 5155424
silesia, uncompressed literals optimal, advanced one pass small out, 4426654
silesia, uncompressed literals optimal, advanced one pass small out, 4325427
silesia, huffman literals, advanced one pass small out, 5341356
silesia.tar, level -5, advanced one pass small out, 7160438
silesia.tar, level -3, advanced one pass small out, 6789024
@ -293,7 +293,7 @@ silesia.tar, small hash log, advanced one
silesia.tar, small chain log, advanced one pass small out, 4943255
silesia.tar, explicit params, advanced one pass small out, 4829974
silesia.tar, uncompressed literals, advanced one pass small out, 5157992
silesia.tar, uncompressed literals optimal, advanced one pass small out, 4372744
silesia.tar, uncompressed literals optimal, advanced one pass small out, 4321094
silesia.tar, huffman literals, advanced one pass small out, 5358079
github, level -5, advanced one pass small out, 232744
github, level -5 with dict, advanced one pass small out, 46718
@ -332,7 +332,7 @@ github, small hash log, advanced one
github, small chain log, advanced one pass small out, 136314
github, explicit params, advanced one pass small out, 137670
github, uncompressed literals, advanced one pass small out, 167004
github, uncompressed literals optimal, advanced one pass small out, 164600
github, uncompressed literals optimal, advanced one pass small out, 156824
github, huffman literals, advanced one pass small out, 143457
silesia, level -5, advanced streaming, 7152294
silesia, level -3, advanced streaming, 6789973
@ -357,7 +357,7 @@ silesia, small hash log, advanced str
silesia, small chain log, advanced streaming, 4931093
silesia, explicit params, advanced streaming, 4815380
silesia, uncompressed literals, advanced streaming, 5155424
silesia, uncompressed literals optimal, advanced streaming, 4426654
silesia, uncompressed literals optimal, advanced streaming, 4325427
silesia, huffman literals, advanced streaming, 5341357
silesia.tar, level -5, advanced streaming, 7160440
silesia.tar, level -3, advanced streaming, 6789026
@ -382,7 +382,7 @@ silesia.tar, small hash log, advanced str
silesia.tar, small chain log, advanced streaming, 4943260
silesia.tar, explicit params, advanced streaming, 4830002
silesia.tar, uncompressed literals, advanced streaming, 5157995
silesia.tar, uncompressed literals optimal, advanced streaming, 4372744
silesia.tar, uncompressed literals optimal, advanced streaming, 4321094
silesia.tar, huffman literals, advanced streaming, 5358083
github, level -5, advanced streaming, 232744
github, level -5 with dict, advanced streaming, 46718
@ -421,7 +421,7 @@ github, small hash log, advanced str
github, small chain log, advanced streaming, 136314
github, explicit params, advanced streaming, 137670
github, uncompressed literals, advanced streaming, 167004
github, uncompressed literals optimal, advanced streaming, 164600
github, uncompressed literals optimal, advanced streaming, 156824
github, huffman literals, advanced streaming, 143457
silesia, level -5, old streaming, 7152294
silesia, level -3, old streaming, 6789973

1 Data Config Method Total compressed size
179 silesia small chain log advanced one pass 4931093
180 silesia explicit params advanced one pass 4815369
181 silesia uncompressed literals advanced one pass 5155424
182 silesia uncompressed literals optimal advanced one pass 4426654 4325427
183 silesia huffman literals advanced one pass 5341356
184 silesia.tar level -5 advanced one pass 7160438
185 silesia.tar level -3 advanced one pass 6789024
204 silesia.tar small chain log advanced one pass 4943255
205 silesia.tar explicit params advanced one pass 4829974
206 silesia.tar uncompressed literals advanced one pass 5157992
207 silesia.tar uncompressed literals optimal advanced one pass 4372744 4321094
208 silesia.tar huffman literals advanced one pass 5358079
209 github level -5 advanced one pass 232744
210 github level -5 with dict advanced one pass 46718
243 github small chain log advanced one pass 136314
244 github explicit params advanced one pass 137670
245 github uncompressed literals advanced one pass 167004
246 github uncompressed literals optimal advanced one pass 164600 156824
247 github huffman literals advanced one pass 143457
248 silesia level -5 advanced one pass small out 7152294
249 silesia level -3 advanced one pass small out 6789969
268 silesia small chain log advanced one pass small out 4931093
269 silesia explicit params advanced one pass small out 4815369
270 silesia uncompressed literals advanced one pass small out 5155424
271 silesia uncompressed literals optimal advanced one pass small out 4426654 4325427
272 silesia huffman literals advanced one pass small out 5341356
273 silesia.tar level -5 advanced one pass small out 7160438
274 silesia.tar level -3 advanced one pass small out 6789024
293 silesia.tar small chain log advanced one pass small out 4943255
294 silesia.tar explicit params advanced one pass small out 4829974
295 silesia.tar uncompressed literals advanced one pass small out 5157992
296 silesia.tar uncompressed literals optimal advanced one pass small out 4372744 4321094
297 silesia.tar huffman literals advanced one pass small out 5358079
298 github level -5 advanced one pass small out 232744
299 github level -5 with dict advanced one pass small out 46718
332 github small chain log advanced one pass small out 136314
333 github explicit params advanced one pass small out 137670
334 github uncompressed literals advanced one pass small out 167004
335 github uncompressed literals optimal advanced one pass small out 164600 156824
336 github huffman literals advanced one pass small out 143457
337 silesia level -5 advanced streaming 7152294
338 silesia level -3 advanced streaming 6789973
357 silesia small chain log advanced streaming 4931093
358 silesia explicit params advanced streaming 4815380
359 silesia uncompressed literals advanced streaming 5155424
360 silesia uncompressed literals optimal advanced streaming 4426654 4325427
361 silesia huffman literals advanced streaming 5341357
362 silesia.tar level -5 advanced streaming 7160440
363 silesia.tar level -3 advanced streaming 6789026
382 silesia.tar small chain log advanced streaming 4943260
383 silesia.tar explicit params advanced streaming 4830002
384 silesia.tar uncompressed literals advanced streaming 5157995
385 silesia.tar uncompressed literals optimal advanced streaming 4372744 4321094
386 silesia.tar huffman literals advanced streaming 5358083
387 github level -5 advanced streaming 232744
388 github level -5 with dict advanced streaming 46718
421 github small chain log advanced streaming 136314
422 github explicit params advanced streaming 137670
423 github uncompressed literals advanced streaming 167004
424 github uncompressed literals optimal advanced streaming 164600 156824
425 github huffman literals advanced streaming 143457
426 silesia level -5 old streaming 7152294
427 silesia level -3 old streaming 6789973