From 1c8e1942974b3440a4f34da24d679d0406afb17e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Jan 2016 16:31:22 +0100 Subject: [PATCH] modified streaming compression API --- lib/zstd_buffered.c | 11 +++++------ lib/zstd_buffered.h | 18 ++++++++++-------- lib/zstd_buffered_static.h | 4 ++-- lib/zstd_compress.c | 39 ++++++++++++++++---------------------- lib/zstd_static.h | 28 ++++++++------------------- programs/bench.c | 3 +-- programs/fileio.c | 13 ++++--------- programs/fuzzer.c | 14 ++++---------- programs/zbufftest.c | 6 ++---- 9 files changed, 52 insertions(+), 84 deletions(-) diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index aab83e60..dc7b5749 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -119,7 +119,7 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) #define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) #define BLOCKSIZE (128 * 1024) /* a bit too "magic", should come from reference */ -size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params) +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, ZSTD_parameters params) { size_t neededInBuffSize; @@ -143,7 +143,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params) if (zbc->outBuff == NULL) return ERROR(memory_allocation); } - zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, params); + zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params); if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize; zbc->inToCompress = 0; @@ -156,14 +156,13 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params) size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) { - return ZBUFF_compressInit_advanced(zbc, ZSTD_getParams(compressionLevel, 0)); + return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); } -ZSTDLIB_API size_t ZBUFF_compressWithDictionary(ZBUFF_CCtx* zbc, const void* src, size_t srcSize) +ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compress_insertDictionary(zbc->zc, src, srcSize); - return 0; + return ZBUFF_compressInit_advanced(zbc, dict, dictSize, ZSTD_getParams(compressionLevel, 0)); } diff --git a/lib/zstd_buffered.h b/lib/zstd_buffered.h index 63101a10..1e8830b1 100644 --- a/lib/zstd_buffered.h +++ b/lib/zstd_buffered.h @@ -69,7 +69,8 @@ ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void); ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZBUFF_compressWithDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr); ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDstSizePtr); @@ -79,11 +80,11 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst * * A ZBUFF_CCtx object is required to track streaming operation. * Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. -* Use ZBUFF_compressInit() to start a new compression operation. * ZBUFF_CCtx objects can be reused multiple times. * -* Optionally, a reference to a static dictionary can be created with ZBUFF_compressWithDictionary() -* Note that the dictionary content must remain accessible during the compression process. +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. * * Use ZBUFF_compressContinue() repetitively to consume input stream. * *srcSizePtr and *maxDstSizePtr can be any size. @@ -93,9 +94,10 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst * @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) * or an error code, which can be tested using ZBUFF_isError(). * -* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. -* Note that it will not output more than *maxDstSizePtr. -* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *maxDstSizePtr. +* Note that the function cannot output more than the size of `dst` buffer (initial value of *maxDstSizePtr). +* Therefore, some content might still be left into internal buffer if dst buffer is too small. * @return : nb of bytes still present into internal buffer (0 if it's empty) * or an error code, which can be tested using ZBUFF_isError(). * @@ -108,7 +110,7 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* maxDst * or an error code, which can be tested using ZBUFF_isError(). * * Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize -* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value. +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering). * output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. * By using both, you ensure that input will be entirely consumed, and output will always contain the result. * **************************************************/ diff --git a/lib/zstd_buffered_static.h b/lib/zstd_buffered_static.h index 7d9ee27d..5052f4c3 100644 --- a/lib/zstd_buffered_static.h +++ b/lib/zstd_buffered_static.h @@ -45,14 +45,14 @@ extern "C" { /* ************************************* * Includes ***************************************/ -#include "zstd_static.h" +#include "zstd_static.h" /* ZSTD_parameters */ #include "zstd_buffered.h" /* ************************************* * Advanced Streaming functions ***************************************/ -ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, ZSTD_parameters params); +ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params); #if defined (__cplusplus) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 4124e4f2..8ebd58be 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2167,7 +2167,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t return hufHeaderSize; } -size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) { if (dict && dictSize) { @@ -2186,22 +2186,23 @@ size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t di /*! ZSTD_compressBegin_advanced * @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, ZSTD_parameters params) { size_t errorCode; ZSTD_validateParams(¶ms); - errorCode = ZSTD_resetCCtx_advanced(ctx, params); + errorCode = ZSTD_resetCCtx_advanced(zc, params); if (ZSTD_isError(errorCode)) return errorCode; - MEM_writeLE32(ctx->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */ - ((BYTE*)ctx->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); - ctx->hbSize = ZSTD_frameHeaderSize_min; - ctx->stage = 0; + MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */ + ((BYTE*)zc->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); + zc->hbSize = ZSTD_frameHeaderSize_min; + zc->stage = 0; - return 0; + return ZSTD_compress_insertDictionary(zc, dict, dictSize); } @@ -2219,15 +2220,14 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint) return result; } -/* to do -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, int compressionLevel) +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) { - return 0; -}*/ + return ZSTD_compressBegin_advanced(zc, dict, dictSize, ZSTD_getParams(compressionLevel, 0)); +} -size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, int compressionLevel) +size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) { - return ZSTD_compressBegin_advanced(ctx, ZSTD_getParams(compressionLevel, 0)); + return ZSTD_compressBegin_advanced(zc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); } @@ -2269,17 +2269,10 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, BYTE* op = ostart; size_t oSize; - /* Header */ - oSize = ZSTD_compressBegin_advanced(ctx, params); + /* Init */ + oSize = ZSTD_compressBegin_advanced(ctx, dict, dictSize, params); if(ZSTD_isError(oSize)) return oSize; - /* dictionary */ - if (dict) - { - oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize); - if (ZSTD_isError(oSize)) return oSize; - } - /* body (compression) */ oSize = ZSTD_compressContinue (ctx, op, maxDstSize, src, srcSize); if(ZSTD_isError(oSize)) return oSize; diff --git a/lib/zstd_static.h b/lib/zstd_static.h index fd4131ae..b985ce1e 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -132,12 +132,8 @@ ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( ****************************************/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, int compressionLevel); -//ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params); - -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, ZSTD_parameters params); - -ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params); +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx); ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); @@ -149,18 +145,10 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. ZSTD_CCtx object can be re-used multiple times within successive compression operations. - First operation is to start a new frame. - Use ZSTD_compressBegin(). - You may also prefer the advanced derivative ZSTD_compressBegin_advanced(), for finer parameter control. - - It's then possible to add a dictionary with ZSTD_compress_insertDictionary() - Note that dictionary presence is a "hidden" information, - the decoder needs to be aware that it is required for proper decoding, or decoding will fail. - - If you want to compress a lot of messages using same dictionary, - it can be beneficial to duplicate compression context rather than reloading dictionary each time. - In such case, use ZSTD_duplicateCCtx(), which will need an already created ZSTD_CCtx, - in order to duplicate compression context into it. + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). The interface is synchronous, so all input will be consumed and produce a compressed output. @@ -168,7 +156,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz Worst case evaluation is provided by ZSTD_compressBound(). Finish a frame with ZSTD_compressEnd(), which will write the epilogue. - Without it, the frame will be considered incomplete by decoders. + Without the epilogue, frames will be considered incomplete by decoder. You can then reuse ZSTD_CCtx to compress some new frame. */ @@ -176,7 +164,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx); +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize); diff --git a/programs/bench.c b/programs/bench.c index f5b6f8d5..1f35301f 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -295,8 +295,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, milliTime = BMK_GetMilliStart(); while (BMK_GetMilliSpan(milliTime) < TIMELOOP) { - ZSTD_compressBegin_advanced(refCtx, ZSTD_getParams(cLevel, dictBufferSize+largestBlockSize)); - ZSTD_compress_insertDictionary(refCtx, dictBuffer, dictBufferSize); + ZSTD_compressBegin_advanced(refCtx, dictBuffer, dictBufferSize, ZSTD_getParams(cLevel, dictBufferSize+largestBlockSize)); for (blockNb=0; blockNb>20)); diff --git a/programs/fuzzer.c b/programs/fuzzer.c index b72f7e57..93a029ec 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -203,9 +203,7 @@ static int basicUnitTests(U32 seed, double compressibility) size_t cSizeOrig; DISPLAYLEVEL(4, "test%3i : load dictionary into context : ", testNb++); - result = ZSTD_compressBegin(ctxOrig, 2); - if (ZSTD_isError(result)) goto _output_error; - result = ZSTD_compress_insertDictionary(ctxOrig, CNBuffer, dictSize); + result = ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2); if (ZSTD_isError(result)) goto _output_error; result = ZSTD_copyCCtx(ctxDuplicated, ctxOrig); if (ZSTD_isError(result)) goto _output_error; @@ -293,9 +291,7 @@ static int basicUnitTests(U32 seed, double compressibility) /* dictionary block compression */ DISPLAYLEVEL(4, "test%3i : Dictionary Block compression test : ", testNb++); - result = ZSTD_compressBegin(cctx, 5); - if (ZSTD_isError(result)) goto _output_error; - result = ZSTD_compress_insertDictionary(cctx, CNBuffer, dictSize); + result = ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5); if (ZSTD_isError(result)) goto _output_error; cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize); if (ZSTD_isError(cSize)) goto _output_error; @@ -569,10 +565,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit dict = srcBuffer + sampleStart; dictSize = sampleSize; - errorCode = ZSTD_compressBegin(refCtx, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); - CHECK (ZSTD_isError(errorCode), "start streaming error : %s", ZSTD_getErrorName(errorCode)); - errorCode = ZSTD_compress_insertDictionary(refCtx, dict, dictSize); - CHECK (ZSTD_isError(errorCode), "dictionary insertion error : %s", ZSTD_getErrorName(errorCode)); + errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); + CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); errorCode = ZSTD_copyCCtx(ctx, refCtx); CHECK (ZSTD_isError(errorCode), "context duplication error : %s", ZSTD_getErrorName(errorCode)); totalTestSize = 0; cSize = 0; diff --git a/programs/zbufftest.c b/programs/zbufftest.c index 4c1b7ba4..a8257dfc 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -158,10 +158,9 @@ static int basicUnitTests(U32 seed, double compressibility) /* Basic compression test */ DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); - ZBUFF_compressInit(zc, 1); + ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1); readSize = CNBufferSize; genSize = compressedBufferSize; - ZBUFF_compressWithDictionary(zc, CNBuffer, 128 KB); result = ZBUFF_compressContinue(zc, compressedBuffer, &genSize, CNBuffer, &readSize); if (ZBUFF_isError(result)) goto _output_error; if (readSize != CNBufferSize) goto _output_error; /* entire input should be consumed */ @@ -317,7 +316,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog; maxTestSize = (size_t)1 << sampleSizeLog; maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1); - ZBUFF_compressInit(zc, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog; sampleSize = (size_t)1 << sampleSizeLog; @@ -325,7 +323,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize); dict = srcBuffer + sampleStart; dictSize = sampleSize; - ZBUFF_compressWithDictionary(zc, dict, dictSize); + ZBUFF_compressInitDictionary(zc, dict, dictSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); totalTestSize = 0; cSize = 0;