From ca77822ddfa51c3618ab006a133752cdc13dfd7a Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 25 Apr 2018 16:32:29 -0700 Subject: [PATCH] Fix parameter adjustment with dictionary The new advanced API basically set `requestedParams = appliedParams` when using a dictionary. This halted all parameter adjustment, which can hurt compression ratio if, for example, the window log is small for the first call, but the rest of the files are large. This patch fixes the bug, and checks that the `requestedParams` don't change in the new advanced API when using a dictionary, and generally in the fuzzer. --- lib/compress/zstd_compress.c | 16 ++++------ tests/zstreamtest.c | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7a504328..f3a49e67 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1195,18 +1195,16 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, - unsigned windowLog, - ZSTD_frameParameters fParams, + ZSTD_CCtx_params params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - { ZSTD_CCtx_params params = cctx->requestedParams; + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = cdict->cParams; - if (windowLog) params.cParams.windowLog = windowLog; - params.fParams = fParams; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_noMemset, zbuff); + params.cParams.windowLog = windowLog; + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); @@ -2495,9 +2493,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (cdict && cdict->dictContentSize>0) { - cctx->requestedParams = params; - return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog, - params.fParams, pledgedSrcSize, zbuff); + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 14412f4b..ffed955a 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -110,6 +110,20 @@ unsigned int FUZ_rand(unsigned int* seedPtr) #f, ZSTD_getErrorName(err)); \ } +#define CHECK_RET(ret, cond, ...) { \ + if (cond) { \ + DISPLAY("Error %llu => ", (unsigned long long)ret); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (line %u)\n", __LINE__); \ + return ret; \ +} } + +#define CHECK_RET_Z(f) { \ + size_t const err = f; \ + CHECK_RET(err, ZSTD_isError(err), "%s : %s ", \ + #f, ZSTD_getErrorName(err)); \ +} + /*====================================================== * Basic Unit tests @@ -207,6 +221,42 @@ static size_t SEQ_generateRoundTrip(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, return 0; } +static size_t getCCtxParams(ZSTD_CCtx* zc, ZSTD_parameters* savedParams) +{ + unsigned value; + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_windowLog, &savedParams->cParams.windowLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_hashLog, &savedParams->cParams.hashLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_chainLog, &savedParams->cParams.chainLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_searchLog, &savedParams->cParams.searchLog)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_minMatch, &savedParams->cParams.searchLength)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_targetLength, &savedParams->cParams.targetLength)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_compressionStrategy, &value)); + savedParams->cParams.strategy = value; + + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_checksumFlag, &savedParams->fParams.checksumFlag)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_contentSizeFlag, &savedParams->fParams.contentSizeFlag)); + CHECK_RET_Z(ZSTD_CCtx_getParameter(zc, ZSTD_p_dictIDFlag, &value)); + savedParams->fParams.noDictIDFlag = !value; + return 0; +} + +static U32 badParameters(ZSTD_CCtx* zc, ZSTD_parameters const savedParams) +{ + ZSTD_parameters params; + if (ZSTD_isError(getCCtxParams(zc, ¶ms))) return 10; + CHECK_RET(1, params.cParams.windowLog != savedParams.cParams.windowLog, "windowLog"); + CHECK_RET(2, params.cParams.hashLog != savedParams.cParams.hashLog, "hashLog"); + CHECK_RET(3, params.cParams.chainLog != savedParams.cParams.chainLog, "chainLog"); + CHECK_RET(4, params.cParams.searchLog != savedParams.cParams.searchLog, "searchLog"); + CHECK_RET(5, params.cParams.searchLength != savedParams.cParams.searchLength, "searchLength"); + CHECK_RET(6, params.cParams.targetLength != savedParams.cParams.targetLength, "targetLength"); + + CHECK_RET(7, params.fParams.checksumFlag != savedParams.fParams.checksumFlag, "checksumFlag"); + CHECK_RET(8, params.fParams.contentSizeFlag != savedParams.fParams.contentSizeFlag, "contentSizeFlag"); + CHECK_RET(9, params.fParams.noDictIDFlag != savedParams.fParams.noDictIDFlag, "noDictIDFlag"); + return 0; +} + static int basicUnitTests(U32 seed, double compressibility) { size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH; @@ -606,6 +656,8 @@ static int basicUnitTests(U32 seed, double compressibility) for (size = 512; size <= maxSize; size <<= 1) { U64 const crcOrig = XXH64(CNBuffer, size, 0); ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_parameters savedParams; + getCCtxParams(cctx, &savedParams); outBuff.dst = compressedBuffer; outBuff.size = compressedBufferSize; outBuff.pos = 0; @@ -614,6 +666,7 @@ static int basicUnitTests(U32 seed, double compressibility) inBuff.pos = 0; CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); CHECK_Z(ZSTD_compress_generic(cctx, &outBuff, &inBuff, ZSTD_e_end)); + CHECK(badParameters(cctx, savedParams), "Bad CCtx params"); if (inBuff.pos != inBuff.size) goto _output_error; { ZSTD_outBuffer decOut = {decodedBuffer, size, 0}; ZSTD_inBuffer decIn = {outBuff.dst, outBuff.pos, 0}; @@ -1575,6 +1628,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double U64 crcOrig; U32 resetAllowed = 1; size_t maxTestSize; + ZSTD_parameters savedParams; /* init */ if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } @@ -1737,6 +1791,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double } } } + CHECK_Z(getCCtxParams(zc, &savedParams)); + /* multi-segments compression test */ XXH64_reset(&xxhState, 0); { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; @@ -1779,6 +1835,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize); } + CHECK(badParameters(zc, savedParams), "CCtx params are wrong"); + /* multi - fragments decompression test */ if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { DISPLAYLEVEL(5, "resetting DCtx (dict:%08X) \n", (U32)(size_t)dict);