Don't shrink window log when streaming with a dictionary
Fixes #2442. 1. When creating a dictionary keep the same behavior as before. Assume the source size is 513 bytes when adjusting parameters. 2. When calling ZSTD_getCParams() or ZSTD_adjustCParams() keep the same behavior as before. 3. When attaching a dictionary keep the same behavior of ignoring the dictionary size. When streaming this will select the largest parameters and not adjust them down. But, the CDict will use the correctly sized parameters, which seems like the right tradeoff. 4. When not attaching a dictionary (either forced not to, or using a prefix dictionary) we select parameters based on the dictionary size + source size, and assume the source size is small, which is the same behavior as before. But, now we don't adjust the window log (and hash and chain log) down when the source size is unknown. When the source size is unknown all cdicts should attach, except when the user disables attaching, or `forceWindow` is used. This means that when streaming with a CDict we end up in the good case where we get small CDict parameters, and large source parameters. TODO: Add a streaming + dictionary regression test case.
This commit is contained in:
parent
a98a6e2091
commit
9d31c704d5
@ -1188,15 +1188,30 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
||||
assert(ZSTD_checkCParams(cPar)==0);
|
||||
|
||||
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
srcSize = minSrcSize;
|
||||
|
||||
switch (mode) {
|
||||
case ZSTD_cpm_noAttachDict:
|
||||
/* If we don't know the source size, don't make any
|
||||
* assumptions about it. We will already have selected
|
||||
* smaller parameters if a dictionary is in use.
|
||||
*/
|
||||
break;
|
||||
case ZSTD_cpm_unknown:
|
||||
/* Keep the legacy behavior of assuming small source
|
||||
* sizes when the cparam mode is unkown.
|
||||
*/
|
||||
/* fall-through */
|
||||
case ZSTD_cpm_createCDict:
|
||||
/* Assume a small source size when creating a dictionary
|
||||
* with an unkown source size.
|
||||
*/
|
||||
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
srcSize = minSrcSize;
|
||||
break;
|
||||
case ZSTD_cpm_attachDict:
|
||||
/* Dictionary has its own dedicated parameters which have
|
||||
* already been selected. We are selecting parameters
|
||||
* for only the source.
|
||||
*/
|
||||
dictSize = 0;
|
||||
break;
|
||||
default:
|
||||
@ -1213,7 +1228,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
ZSTD_highbit32(tSize-1) + 1;
|
||||
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
|
||||
}
|
||||
{ U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
|
||||
if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
|
||||
U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
|
||||
if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
|
||||
if (cycleLog > dictAndWindowLog)
|
||||
@ -2955,9 +2971,9 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
||||
}
|
||||
|
||||
/* ZSTD_writeSkippableFrame_advanced() :
|
||||
* Writes out a skippable frame with the specified magic number variant (16 are supported),
|
||||
* Writes out a skippable frame with the specified magic number variant (16 are supported),
|
||||
* from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
|
||||
*
|
||||
*
|
||||
* Returns the total number of bytes written, or a ZSTD error code.
|
||||
*/
|
||||
size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
|
||||
|
@ -216,7 +216,7 @@ github.tar, level 16 with dict, zstdcli,
|
||||
github.tar, level 19, zstdcli, 32841
|
||||
github.tar, level 19 with dict, zstdcli, 32899
|
||||
github.tar, no source size, zstdcli, 38442
|
||||
github.tar, no source size with dict, zstdcli, 213380
|
||||
github.tar, no source size with dict, zstdcli, 38004
|
||||
github.tar, long distance mode, zstdcli, 39680
|
||||
github.tar, multithreaded, zstdcli, 38445
|
||||
github.tar, multithreaded long distance mode, zstdcli, 39680
|
||||
@ -618,7 +618,7 @@ github.tar, level 16 with dict, advanced
|
||||
github.tar, level 19, advanced streaming, 32837
|
||||
github.tar, level 19 with dict, advanced streaming, 32895
|
||||
github.tar, no source size, advanced streaming, 38438
|
||||
github.tar, no source size with dict, advanced streaming, 214381
|
||||
github.tar, no source size with dict, advanced streaming, 38000
|
||||
github.tar, long distance mode, advanced streaming, 39676
|
||||
github.tar, multithreaded, advanced streaming, 38441
|
||||
github.tar, multithreaded long distance mode, advanced streaming, 39676
|
||||
@ -695,7 +695,7 @@ github, level 16 with dict, old stre
|
||||
github, level 19, old streaming, 134064
|
||||
github, level 19 with dict, old streaming, 37576
|
||||
github, no source size, old streaming, 140632
|
||||
github, no source size with dict, old streaming, 40678
|
||||
github, no source size with dict, old streaming, 40654
|
||||
github, uncompressed literals, old streaming, 136335
|
||||
github, uncompressed literals optimal, old streaming, 134064
|
||||
github, huffman literals, old streaming, 175568
|
||||
@ -728,7 +728,7 @@ github.tar, level 16 with dict, old stre
|
||||
github.tar, level 19, old streaming, 32837
|
||||
github.tar, level 19 with dict, old streaming, 32895
|
||||
github.tar, no source size, old streaming, 38438
|
||||
github.tar, no source size with dict, old streaming, 214384
|
||||
github.tar, no source size with dict, old streaming, 38000
|
||||
github.tar, uncompressed literals, old streaming, 38441
|
||||
github.tar, uncompressed literals optimal, old streaming, 32837
|
||||
github.tar, huffman literals, old streaming, 42465
|
||||
@ -813,7 +813,7 @@ github, level 16 with dict, old stre
|
||||
github, level 19, old streaming advanced, 134064
|
||||
github, level 19 with dict, old streaming advanced, 37576
|
||||
github, no source size, old streaming advanced, 140632
|
||||
github, no source size with dict, old streaming advanced, 40643
|
||||
github, no source size with dict, old streaming advanced, 40608
|
||||
github, long distance mode, old streaming advanced, 141104
|
||||
github, multithreaded, old streaming advanced, 141104
|
||||
github, multithreaded long distance mode, old streaming advanced, 141104
|
||||
@ -854,7 +854,7 @@ github.tar, level 16 with dict, old stre
|
||||
github.tar, level 19, old streaming advanced, 32837
|
||||
github.tar, level 19 with dict, old streaming advanced, 32876
|
||||
github.tar, no source size, old streaming advanced, 38438
|
||||
github.tar, no source size with dict, old streaming advanced, 214384
|
||||
github.tar, no source size with dict, old streaming advanced, 38015
|
||||
github.tar, long distance mode, old streaming advanced, 38441
|
||||
github.tar, multithreaded, old streaming advanced, 38441
|
||||
github.tar, multithreaded long distance mode, old streaming advanced, 38441
|
||||
@ -880,7 +880,7 @@ github, level 9 with dict, old stre
|
||||
github, level 13 with dict, old streaming cdcit, 39743
|
||||
github, level 16 with dict, old streaming cdcit, 37577
|
||||
github, level 19 with dict, old streaming cdcit, 37576
|
||||
github, no source size with dict, old streaming cdcit, 40678
|
||||
github, no source size with dict, old streaming cdcit, 40654
|
||||
github.tar, level -5 with dict, old streaming cdcit, 45018
|
||||
github.tar, level -3 with dict, old streaming cdcit, 41886
|
||||
github.tar, level -1 with dict, old streaming cdcit, 41636
|
||||
@ -895,7 +895,7 @@ github.tar, level 9 with dict, old stre
|
||||
github.tar, level 13 with dict, old streaming cdcit, 36372
|
||||
github.tar, level 16 with dict, old streaming cdcit, 39353
|
||||
github.tar, level 19 with dict, old streaming cdcit, 32676
|
||||
github.tar, no source size with dict, old streaming cdcit, 214384
|
||||
github.tar, no source size with dict, old streaming cdcit, 38000
|
||||
github, level -5 with dict, old streaming advanced cdict, 49562
|
||||
github, level -3 with dict, old streaming advanced cdict, 44956
|
||||
github, level -1 with dict, old streaming advanced cdict, 42383
|
||||
@ -910,7 +910,7 @@ github, level 9 with dict, old stre
|
||||
github, level 13 with dict, old streaming advanced cdict, 39731
|
||||
github, level 16 with dict, old streaming advanced cdict, 40789
|
||||
github, level 19 with dict, old streaming advanced cdict, 37576
|
||||
github, no source size with dict, old streaming advanced cdict, 40643
|
||||
github, no source size with dict, old streaming advanced cdict, 40608
|
||||
github.tar, level -5 with dict, old streaming advanced cdict, 44307
|
||||
github.tar, level -3 with dict, old streaming advanced cdict, 41359
|
||||
github.tar, level -1 with dict, old streaming advanced cdict, 41322
|
||||
@ -925,4 +925,4 @@ github.tar, level 9 with dict, old stre
|
||||
github.tar, level 13 with dict, old streaming advanced cdict, 36035
|
||||
github.tar, level 16 with dict, old streaming advanced cdict, 38736
|
||||
github.tar, level 19 with dict, old streaming advanced cdict, 32876
|
||||
github.tar, no source size with dict, old streaming advanced cdict, 214384
|
||||
github.tar, no source size with dict, old streaming advanced cdict, 38015
|
||||
|
|
Loading…
x
Reference in New Issue
Block a user