From f97c2dbd395e64bb4d58948622f15961181307c0 Mon Sep 17 00:00:00 2001
From: Yann Collet These functions give the current memory usage of selected object.
- Object memory usage can evolve if it's re-used multiple times.
+ Object memory usage can evolve when re-used multiple times.
Introduction
@@ -399,7 +399,7 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
size_t ZSTD_estimateCCtxSize(int compressionLevel);
@@ -646,12 +646,12 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
@return : 0, or an error code (which can be tested using ZSTD_isError())
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; -ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +Advanced Streaming decompression functions
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);/**< same as ZSTD_initStaticDCtx() */ +typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); -size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ -size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -783,8 +783,29 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
typedef enum { + ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder will not be able to recognise this format, requiring instructions. */ + ZSTD_f_zstd1_headerless, /* Variant of zstd frame format, without any frame header; + * Other metadata, like block size or frame checksum, are still generated. + * Useful to save between 6 and ZSTD_frameHeaderSize_max bytes per generated frame. + * However, required decoding parameters will have to be saved or known by some mechanism. + * Decoder will not be able to recognise this format, requiring instructions and parameters. */ + ZSTD_f_zstd1_block /* Generate a zstd compressed block, without any metadata. + * Note that size of block content must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. + * See ZSTD_compressBlock() for more details. + * Resulting compressed block can be decoded with ZSTD_decompressBlock(). */ +} ZSTD_format_e; +
typedef enum { + /* compression format */ + ZSTD_p_format = 10, /* See ZSTD_format_e enum definition. + * Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */ + /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table * Default level is ZSTD_CLEVEL_DEFAULT==3. @@ -949,7 +970,7 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_ttypedef enum { ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */ ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */ - ZSTD_e_end /* flush any remaining data and ends current frame. Any future compression starts a new frame. */ + ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */ } ZSTD_EndDirective;
size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, @@ -959,8 +980,8 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_tBehave about the same as ZSTD_compressStream. To note : - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter() - Compression parameters cannot be changed once compression is started. - - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize - - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit. + - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize + - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - @return provides the minimum amount of data still to flush from internal buffers or an error code, which can be tested using ZSTD_isError(). if @return != 0, flush is not fully completed, there is some data left within internal buffers. @@ -976,6 +997,7 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t Useful after an error, or to interrupt an ongoing compression job and start a new one. Any internal data not yet flushed is cancelled. Dictionary (if any) is dropped. + All parameters are back to default values. It's possible to modify compression parameters after a reset.
@@ -987,26 +1009,30 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t ZSTD_EndDirective endOp);Same as ZSTD_compress_generic(), but using only integral types as arguments. - Argument list is larger and less expressive than ZSTD_{in,out}Buffer, + Argument list is larger than ZSTD_{in,out}Buffer, but can be helpful for binders from dynamic languages which have troubles handling structures containing memory pointers.
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void); ++Quick howto : - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - - ZSTD_CCtxParam_setParameter() : Push parameters one by one into an - existing ZSTD_CCtx_params structure. This is similar to - ZSTD_CCtx_setParameter(). - - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to an existing CCtx. These - parameters will be applied to all subsequent compression jobs. + - ZSTD_CCtxParam_setParameter() : Push parameters one by one into + an existing ZSTD_CCtx_params structure. + This is similar to + ZSTD_CCtx_setParameter(). + - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + an existing CCtx. + These parameters will be applied to + all subsequent compression jobs. - ZSTD_compress_generic() : Do compression using the CCtx. - ZSTD_freeCCtxParams() : Free the memory. - This can be used with ZSTD_estimateCCtxSize_opaque() for static allocation - for single-threaded compression. + This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + for static allocation for single-threaded compression. -
size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params);Reset params to default, with the default compression level. @@ -1030,22 +1056,84 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t Set one compression parameter, selected by enum ZSTD_cParameter. Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). Note : when `value` is an enum, cast it to unsigned for proper type checking. - @result : 0, or an error code (which can be tested with ZSTD_isError()). + @result : 0, or an error code (which can be tested with ZSTD_isError()).
size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -Apply a set of ZSTD_CCtx_params to the compression context. - This must be done before the dictionary is loaded. - The pledgedSrcSize is treated as unknown. - Multithreading parameters are applied only if nbThreads > 1. +
Apply a set of ZSTD_CCtx_params to the compression context. + This must be done before the dictionary is loaded. + The pledgedSrcSize is treated as unknown. + Multithreading parameters are applied only if nbThreads > 1.
- Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). +Advanced parameters for decompression API
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); +Create an internal DDict from dict buffer, + to be used to decompress next frames. + @result : 0, or an error code (which can be tested with ZSTD_isError()). + Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + meaning "return to no-dictionary mode". + Note 1 : `dict` content will be copied internally. + Use ZSTD_DCtx_loadDictionary_byReference() + to reference dictionary content instead. + In which case, the dictionary buffer must outlive its users. + Note 2 : Loading a dictionary involves building tables, + which has a non-negligible impact on CPU usage and latency. + Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select + how dictionary content will be interpreted and loaded. + +
+ +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +Reference a prepared dictionary, to be used to decompress next frames. + The dictionary remains active for decompression of future frames using same DCtx. + @result : 0, or an error code (which can be tested with ZSTD_isError()). + Note 1 : Currently, only one dictionary can be managed. + Referencing a new dictionary effectively "discards" any previous one. + Special : adding a NULL DDict means "return to no-dictionary mode". + Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + +
+ +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); +Reference a prefix (single-usage dictionary) for next compression job. + Prefix is **only used once**. It must be explicitly referenced before each frame. + If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead. + @result : 0, or an error code (which can be tested with ZSTD_isError()). + Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + Note 2 : Prefix buffer is referenced. It must outlive compression job. + Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. + Note 4 : Referencing a raw content prefix costs almost nothing cpu and memory wise. + +
+ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + This is useful to prevent a decoder context from reserving too much memory for itself (potential attack scenario). + This parameter is only useful in streaming mode, since no internal buffer is allocated in direct mode. + By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_MAX) + @return : 0, or an error code (which can be tested using ZSTD_isError()). + +
+ +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +Instruct the decoder context about what kind of data to decode next. + This instruction is mandatory to decode data without a fully-formed header, + such ZSTD_f_zstd1_magicless for example. + @return : 0, or an error code (which can be tested using ZSTD_isError()). + +
+ +=== Block level API ===
+ ++Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. A few rules to respect : @@ -1055,7 +1143,7 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t + compression : any ZSTD_compressBegin*() variant, including with dictionary + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks + For inputs larger than a single block size, consider using the regular ZSTD_compress() instead. Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. @@ -1066,7 +1154,7 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t + In case of multiple successive blocks, should some of them be uncompressed, decoder must be informed of their existence in order to follow proper history. Use ZSTD_insertBlock() for such a case. -
size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index cd0dbcc2..8a24d42f 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -282,6 +282,7 @@ typedef struct { } ZSTD_entropyCTables_t; struct ZSTD_CCtx_params_s { + ZSTD_format_e format; ZSTD_compressionParameters cParams; ZSTD_frameParameters fParams; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c58d3044..88eb51dc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -256,6 +256,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v switch(param) { + case ZSTD_p_format : + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_p_compressionLevel: if (value == 0) return 0; /* special value : 0 means "don't change anything" */ if (cctx->cdict) return ERROR(stage_wrong); @@ -326,6 +329,12 @@ size_t ZSTD_CCtxParam_setParameter( { switch(param) { + case ZSTD_p_format : + if (value > (unsigned)ZSTD_f_zstd1_block) + return ERROR(parameter_unsupported); + params->format = (ZSTD_format_e)value; + return 0; + case ZSTD_p_compressionLevel : if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); if (value == 0) return 0; diff --git a/lib/zstd.h b/lib/zstd.h index 81158a20..655fa29d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -486,7 +486,7 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); /*! ZSTD_sizeof_*() : * These functions give the current memory usage of selected object. - * Object memory usage can evolve if it's re-used multiple times. */ + * Object memory usage can evolve when re-used multiple times. */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); @@ -747,12 +747,12 @@ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledg /*===== Advanced Streaming decompression functions =====*/ -typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ +typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ @@ -908,17 +908,17 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -/*=== New advanced API (experimental, and compression only) ===*/ +/** === New advanced API (experimental) === **/ /* notes on API design : - * In this proposal, parameters are pushed one by one into an existing CCtx, + * In this proposal, parameters are pushed one by one into an existing context, * and then applied on all subsequent compression jobs. * When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT. * * This API is intended to replace all others experimental API. * It can basically do all other use cases, and even new ones. - * It stands a good chance to become "stable", - * after a reasonable testing period. + * In constrast with _advanced() variants, it stands a reasonable chance to become "stable", + * after a testing period. */ /* note on naming convention : @@ -934,19 +934,25 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * All enum will be manually set to explicit values before reaching "stable API" status */ typedef enum { - ZSTD_f_zstd1, /* Normal (default) zstd frame format, as specified in zstd_compression_format.md */ - ZSTD_f_zstd1_magicLess, /* Almost zstd frame format, but without initial 4-bytes magic number */ - ZSTD_f_zstd1_headerless, /* Almost zstd frame format, but without any frame header; - * Other metadata, like block size or frame checksum, are still generated */ - ZSTD_f_zstd_block /* Pure zstd compressed block, without any metadata. - * Note that size of uncompressed block must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. - * See ZSTD_compressBlock() for more details. */ -} ZSTD_format; + ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder will not be able to recognise this format, requiring instructions. */ + ZSTD_f_zstd1_headerless, /* Variant of zstd frame format, without any frame header; + * Other metadata, like block size or frame checksum, are still generated. + * Useful to save between 6 and ZSTD_frameHeaderSize_max bytes per generated frame. + * However, required decoding parameters will have to be saved or known by some mechanism. + * Decoder will not be able to recognise this format, requiring instructions and parameters. */ + ZSTD_f_zstd1_block /* Generate a zstd compressed block, without any metadata. + * Note that size of block content must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. + * See ZSTD_compressBlock() for more details. + * Resulting compressed block can be decoded with ZSTD_decompressBlock(). */ +} ZSTD_format_e; typedef enum { /* compression format */ - ZSTD_p_format = 10, /* See ZSTD_format enum definition. - * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility. */ + ZSTD_p_format = 10, /* See ZSTD_format_e enum definition. + * Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */ /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table @@ -1116,15 +1122,15 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre typedef enum { ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */ ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */ - ZSTD_e_end /* flush any remaining data and ends current frame. Any future compression starts a new frame. */ + ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */ } ZSTD_EndDirective; /*! ZSTD_compress_generic() : * Behave about the same as ZSTD_compressStream. To note : * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter() * - Compression parameters cannot be changed once compression is started. - * - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize - * - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit. + * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize + * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. * - @return provides the minimum amount of data still to flush from internal buffers * or an error code, which can be tested using ZSTD_isError(). * if @return != 0, flush is not fully completed, there is some data left within internal buffers. @@ -1143,6 +1149,7 @@ ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, * Useful after an error, or to interrupt an ongoing compression job and start a new one. * Any internal data not yet flushed is cancelled. * Dictionary (if any) is dropped. + * All parameters are back to default values. * It's possible to modify compression parameters after a reset. */ ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */ @@ -1151,7 +1158,7 @@ ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */ /*! ZSTD_compress_generic_simpleArgs() : * Same as ZSTD_compress_generic(), * but using only integral types as arguments. - * Argument list is larger and less expressive than ZSTD_{in,out}Buffer, + * Argument list is larger than ZSTD_{in,out}Buffer, * but can be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ @@ -1162,19 +1169,22 @@ size_t ZSTD_compress_generic_simpleArgs ( ZSTD_EndDirective endOp); -/** ZSTD_CCtx_params - * +/*! ZSTD_CCtx_params : + * Quick howto : * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into an - * existing ZSTD_CCtx_params structure. This is similar to - * ZSTD_CCtx_setParameter(). - * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to an existing CCtx. These - * parameters will be applied to all subsequent compression jobs. + * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent compression jobs. * - ZSTD_compress_generic() : Do compression using the CCtx. * - ZSTD_freeCCtxParams() : Free the memory. * - * This can be used with ZSTD_estimateCCtxSize_opaque() for static allocation - * for single-threaded compression. + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation for single-threaded compression. */ ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); @@ -1202,22 +1212,96 @@ ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); * Set one compression parameter, selected by enum ZSTD_cParameter. * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). * Note : when `value` is an enum, cast it to unsigned for proper type checking. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : - * Apply a set of ZSTD_CCtx_params to the compression context. - * This must be done before the dictionary is loaded. - * The pledgedSrcSize is treated as unknown. - * Multithreading parameters are applied only if nbThreads > 1. + * Apply a set of ZSTD_CCtx_params to the compression context. + * This must be done before the dictionary is loaded. + * The pledgedSrcSize is treated as unknown. + * Multithreading parameters are applied only if nbThreads > 1. */ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -/** - Block functions +/*=== Advanced parameters for decompression API ===*/ + +/* The following parameters must be set after creating a ZSTD_DCtx* (or ZSTD_DStream*) object, + * but before starting decompression of a frame. + */ + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : `dict` content will be copied internally. + * Use ZSTD_DCtx_loadDictionary_byReference() + * to reference dictionary content instead. + * In which case, the dictionary buffer must outlive its users. + * Note 2 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select + * how dictionary content will be interpreted and loaded. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); + + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special : adding a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compression job. + * Prefix is **only used once**. It must be explicitly referenced before each frame. + * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It must outlive compression job. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. + * Note 4 : Referencing a raw content prefix costs almost nothing cpu and memory wise. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); + + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This is useful to prevent a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in direct mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_MAX) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + + + +/** === Block level API === **/ + +/*! Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. From 96f0cde31a33f2abc9f782c3b802126a0d235ab3 Mon Sep 17 00:00:00 2001 From: Yann ColletDate: Sun, 24 Sep 2017 16:47:02 -0700 Subject: [PATCH 03/22] minor function rename ZSTD_estimateCStreamSize_advanced_usingCParams -> ZSTD_estimateCStreamSize_usingCParams _usingX is clear. _advanced feels redundant --- doc/zstd_manual.html | 36 +++++++++++++++---------------- examples/streaming_memory_usage.c | 4 ++-- lib/compress/zstd_compress.c | 24 ++++++++++----------- lib/zstd.h | 36 +++++++++++++++---------------- tests/paramgrill.c | 4 ++-- tests/zstreamtest.c | 10 ++++----- 6 files changed, 57 insertions(+), 57 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 64870229..1cb08328 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -403,29 +403,29 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
size_t ZSTD_estimateCCtxSize(int compressionLevel); -size_t ZSTD_estimateCCtxSize_advanced_usingCParams(ZSTD_compressionParameters cParams); -size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params); +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); size_t ZSTD_estimateDCtxSize(void);These functions make it possible to estimate memory usage of a future {D,C}Ctx, before its creation. ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. It will also consider src size to be arbitrarily "large", which is worst case. - If srcSize is known to always be small, ZSTD_estimateCCtxSize_advanced_usingCParams() can provide a tighter estimation. - ZSTD_estimateCCtxSize_advanced_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - ZSTD_estimateCCtxSize_advanced_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1. + If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1. Note : CCtx estimation is only correct for single-threaded compression
size_t ZSTD_estimateCStreamSize(int compressionLevel); -size_t ZSTD_estimateCStreamSize_advanced_usingCParams(ZSTD_compressionParameters cParams); -size_t ZSTD_estimateCStreamSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params); +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); size_t ZSTD_estimateDStreamSize(size_t windowSize); size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. It will also consider src size to be arbitrarily "large", which is worst case. - If srcSize is known to always be small, ZSTD_estimateCStreamSize_advanced_usingCParams() can provide a tighter estimation. - ZSTD_estimateCStreamSize_advanced_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - ZSTD_estimateCStreamSize_advanced_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1. + If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1. Note : CStream estimation is only correct for single-threaded compression. ZSTD_DStream memory budget depends on window Size. This information can be passed manually, using ZSTD_estimateDStreamSize, @@ -436,8 +436,8 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
typedef enum { - ZSTD_dlm_byCopy = 0, /* Copy dictionary content internally. */ - ZSTD_dlm_byRef, /* Reference dictionary content -- the dictionary buffer must outlives its users. */ + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ } ZSTD_dictLoadMethod_e;
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); @@ -656,8 +656,8 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression para
This is an advanced API, giving full control over buffer management, for users which need direct control over memory. - But it's also a complex one, with many restrictions (documented below). - Prefer using normal streaming API for an easier experience + But it's also a complex one, with several restrictions, documented below. + Prefer normal streaming API for an easier experience.@@ -673,8 +673,8 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression para Then, consume your input using ZSTD_compressContinue(). There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. - - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. Worst case evaluation is provided by ZSTD_compressBound(). ZSTD_compressContinue() doesn't guarantee recover after a failed compression. @@ -685,9 +685,9 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression para Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames will be considered unfinished (corrupted) by decoders. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c index b709f50b..b056c2a5 100644 --- a/examples/streaming_memory_usage.c +++ b/examples/streaming_memory_usage.c @@ -85,7 +85,7 @@ int main(int argc, char const *argv[]) { return 1; } } - + size_t compressedSize; { ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 }; ZSTD_outBuffer outBuff = { compressedData, sizeof(compressedData), 0 }; @@ -133,7 +133,7 @@ int main(int argc, char const *argv[]) { size_t const cstreamSize = ZSTD_sizeof_CStream(cstream); size_t const cstreamEstimatedSize = wLog ? - ZSTD_estimateCStreamSize_advanced_usingCParams(params.cParams) : + ZSTD_estimateCStreamSize_usingCParams(params.cParams) : ZSTD_estimateCStreamSize(compressionLevel); size_t const dstreamSize = ZSTD_sizeof_DStream(dstream); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 88eb51dc..abb27961 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -673,7 +673,7 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); } -size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params) +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { /* Estimate CCtx size is supported for single-threaded compression only. */ if (params->nbThreads > 1) { return ERROR(GENERIC); } @@ -710,22 +710,22 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa } } -size_t ZSTD_estimateCCtxSize_advanced_usingCParams(ZSTD_compressionParameters cParams) +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) { ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_advanced_usingCCtxParams(¶ms); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); } size_t ZSTD_estimateCCtxSize(int compressionLevel) { ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); - return ZSTD_estimateCCtxSize_advanced_usingCParams(cParams); + return ZSTD_estimateCCtxSize_usingCParams(cParams); } -size_t ZSTD_estimateCStreamSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params) +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { if (params->nbThreads > 1) { return ERROR(GENERIC); } - { size_t const CCtxSize = ZSTD_estimateCCtxSize_advanced_usingCCtxParams(params); + { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; @@ -735,15 +735,15 @@ size_t ZSTD_estimateCStreamSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* } } -size_t ZSTD_estimateCStreamSize_advanced_usingCParams(ZSTD_compressionParameters cParams) +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) { ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_advanced_usingCCtxParams(¶ms); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); } size_t ZSTD_estimateCStreamSize(int compressionLevel) { ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); - return ZSTD_estimateCStreamSize_advanced_usingCParams(cParams); + return ZSTD_estimateCStreamSize_usingCParams(cParams); } static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, @@ -2182,8 +2182,8 @@ size_t ZSTD_estimateCDictSize_advanced( { DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict)); DEBUGLOG(5, "CCtx estimate : %u", - (U32)ZSTD_estimateCCtxSize_advanced_usingCParams(cParams)); - return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_advanced_usingCParams(cParams) + (U32)ZSTD_estimateCCtxSize_usingCParams(cParams)); + return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_usingCParams(cParams) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); } @@ -2308,7 +2308,7 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams) { - size_t const cctxSize = ZSTD_estimateCCtxSize_advanced_usingCParams(cParams); + size_t const cctxSize = ZSTD_estimateCCtxSize_usingCParams(cParams); size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) + cctxSize; ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; diff --git a/lib/zstd.h b/lib/zstd.h index 655fa29d..047f905b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -499,21 +499,21 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); * of a future {D,C}Ctx, before its creation. * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCCtxSize_advanced_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_advanced_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_advanced_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1. + * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1. * Note : CCtx estimation is only correct for single-threaded compression */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_advanced_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_advanced_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_advanced_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_advanced_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1. * Note : CStream estimation is only correct for single-threaded compression. * ZSTD_DStream memory budget depends on window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, @@ -522,14 +522,14 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); * an internal ?Dict will be created, which additional size is not estimated here. * In this case, get total size by adding ZSTD_estimate?DictSize */ ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_advanced_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); typedef enum { - ZSTD_dlm_byCopy = 0, /* Copy dictionary content internally. */ - ZSTD_dlm_byRef, /* Reference dictionary content -- the dictionary buffer must outlives its users. */ + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ } ZSTD_dictLoadMethod_e; /*! ZSTD_estimate?DictSize() : @@ -760,8 +760,8 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompress * Buffer-less and synchronous inner streaming functions * * This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with many restrictions (documented below). -* Prefer using normal streaming API for an easier experience +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. ********************************************************************* */ /** @@ -778,8 +778,8 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompress Then, consume your input using ZSTD_compressContinue(). There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. - - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. Worst case evaluation is provided by ZSTD_compressBound(). ZSTD_compressContinue() doesn't guarantee recover after a failed compression. @@ -790,9 +790,9 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompress Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames will be considered unfinished (corrupted) by decoders. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. */ /*===== Buffer-less streaming compression functions =====*/ diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 1bc48f40..317ec461 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -391,8 +391,8 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); - size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize_advanced_usingCParams(params); - size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize_advanced_usingCParams(winners[cLevel].params); + size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(params); + size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize_usingCParams(winners[cLevel].params); double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index e52335da..613a879b 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -200,11 +200,11 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo /* context size functions */ DISPLAYLEVEL(3, "test%3i : estimate CStream size : ", testNb++); { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictSize); - size_t const s = ZSTD_estimateCStreamSize_advanced_usingCParams(cParams) - /* uses ZSTD_initCStream_usingDict() */ - + ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); - if (ZSTD_isError(s)) goto _output_error; - DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); + size_t const cstreamSize = ZSTD_estimateCStreamSize_usingCParams(cParams); + size_t const cdictSize = ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); /* uses ZSTD_initCStream_usingDict() */ + if (ZSTD_isError(cstreamSize)) goto _output_error; + if (ZSTD_isError(cdictSize)) goto _output_error; + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)(cstreamSize + cdictSize)); } DISPLAYLEVEL(3, "test%3i : check actual CStream size : ", testNb++); From 62568c9a426316a063c70e4f1eb1241eaad83fec Mon Sep 17 00:00:00 2001 From: Yann ColletDate: Mon, 25 Sep 2017 14:26:26 -0700 Subject: [PATCH 04/22] added capability to generate magic-less frames decoder not implemented yet --- lib/compress/zstd_compress.c | 12 ++- lib/decompress/zstd_decompress.c | 134 +++++++++++++++++++++++-------- lib/zstd.h | 28 +++++-- programs/Makefile | 2 +- programs/fileio.c | 2 +- tests/fuzzer.c | 39 ++++++++- tests/playTests.sh | 6 +- 7 files changed, 172 insertions(+), 51 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index abb27961..16137bb7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1690,14 +1690,18 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, U32 const fcsCode = params.fParams.contentSizeFlag ? (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); - size_t pos; + size_t pos=0; if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); - DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = frameHeaderDecriptionByte; pos=5; + if (params.format == ZSTD_f_zstd1) { + DEBUGLOG(4, "writing zstd magic number"); + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDecriptionByte; if (!singleSegment) op[pos++] = windowLogByte; switch(dictIDSizeCode) { diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index cd64f3bb..47627037 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -110,6 +110,7 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; U32 dictID; + ZSTD_format_e format; const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; @@ -264,32 +265,57 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) } -/** ZSTD_frameHeaderSize() : -* srcSize must be >= ZSTD_frameHeaderSize_prefix. -* @return : size of the Frame Header */ -size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { - if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); - { BYTE const fhd = ((const BYTE*)src)[4]; + size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - 4 /* magic number size */ : + ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); + assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + if (srcSize < minInputSize) return ERROR(srcSize_wrong); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; U32 const singleSegment = (fhd >> 5) & 1; U32 const fcsId = fhd >> 6; - return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] - + (singleSegment && !fcsId); + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); } } +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} -/** ZSTD_getFrameHeader() : -* decode Frame Header, or require larger `srcSize`. -* @return : 0, `zfhPtr` is correctly filled, -* >0, `srcSize` is too small, result is expected `srcSize`, -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) + +/** ZSTD_getFrameHeader_internal() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) { const BYTE* ip = (const BYTE*)src; - if (srcSize < ZSTD_frameHeaderSize_prefix) return ZSTD_frameHeaderSize_prefix; + size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - 4 /* magic number size */ : + ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); + assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + if (srcSize < minInputSize) return minInputSize; + + if (format != ZSTD_f_zstd1_magicless) if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ @@ -304,13 +330,13 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src } /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); if (srcSize < fhsize) return fhsize; zfhPtr->headerSize = (U32)fhsize; } - { BYTE const fhdByte = ip[4]; - size_t pos = 5; + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; U32 const dictIDSizeCode = fhdByte&3; U32 const checksumFlag = (fhdByte>>2)&1; U32 const singleSegment = (fhdByte>>5)&1; @@ -357,6 +383,18 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src return 0; } +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_internal(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + /** ZSTD_getFrameContentSize() : * compatible with legacy mode * @return : decompressed size of the single frame pointed to be `src` if known, otherwise @@ -390,7 +428,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) unsigned long long totalDstSize = 0; while (srcSize >= ZSTD_frameHeaderSize_prefix) { - const U32 magicNumber = MEM_readLE32(src); + U32 const magicNumber = MEM_readLE32(src); if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { size_t skippableSize; @@ -422,11 +460,9 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) src = (const BYTE *)src + frameSrcSize; srcSize -= frameSrcSize; } - } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - if (srcSize) { - return ZSTD_CONTENTSIZE_ERROR; - } + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; return totalDstSize; } @@ -442,7 +478,8 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - return ret >= ZSTD_CONTENTSIZE_ERROR ? 0 : ret; + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; } @@ -452,8 +489,8 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { size_t const result = ZSTD_getFrameHeader(&(dctx->fParams), src, headerSize); - if (ZSTD_isError(result)) return result; /* invalid header */ - if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ + if (ZSTD_isError(result)) return result; /* invalid header */ + if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); @@ -499,7 +536,7 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, } /*! ZSTD_decodeLiteralsBlock() : - @return : nb of bytes read from src (< srcSize ) */ + * @return : nb of bytes read from src (< srcSize ) */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { @@ -700,9 +737,9 @@ static const FSE_decode_t4 OF_defaultDTable[(1< dstCapacity) return ERROR(dstSize_tooSmall); memset(dst, byte, length); @@ -1607,6 +1644,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, #endif magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); if (magicNumber != ZSTD_MAGICNUMBER) { if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { size_t skippableSize; @@ -1716,7 +1755,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { DEBUGLOG(5, "ZSTD_decompressContinue"); /* Sanity check */ - if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* unauthorized */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) @@ -2244,14 +2283,38 @@ size_t ZSTD_resetDStream(ZSTD_DStream* zds) size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue) { + ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init); + if ((unsigned)zds->streamStage > (unsigned)zdss_loadHeader) + return ERROR(stage_wrong); switch(paramType) { default : return ERROR(parameter_unsupported); - case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; + case DStream_p_maxWindowSize : + DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10); + zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); + break; } return 0; } +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init); + if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader) + return ERROR(stage_wrong); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init); + if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader) + return ERROR(stage_wrong); + dctx->format = format; + return 0; +} + size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) { @@ -2276,7 +2339,7 @@ size_t ZSTD_estimateDStreamSize(size_t windowSize) return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; } -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) { U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable */ ZSTD_frameHeader zfh; @@ -2389,7 +2452,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } /* control buffer memory usage */ - DEBUGLOG(4, "Control max buffer memory usage"); + DEBUGLOG(4, "Control max buffer memory usage (max %u KB)", + (U32)(zds->maxWindowSize >> 10)); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); diff --git a/lib/zstd.h b/lib/zstd.h index 047f905b..5b654d73 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1247,9 +1247,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select * how dictionary content will be interpreted and loaded. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); /* not implemented */ /*! ZSTD_DCtx_refDDict() : @@ -1261,7 +1261,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void * Special : adding a NULL DDict means "return to no-dictionary mode". * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. */ -ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /* not implemented */ /*! ZSTD_DCtx_refPrefix() : @@ -1273,10 +1273,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); * Note 2 : Prefix buffer is referenced. It must outlive compression job. * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. - * Note 4 : Referencing a raw content prefix costs almost nothing cpu and memory wise. + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); /* not implemented */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); /* not implemented */ /*! ZSTD_DCtx_setMaxWindowSize() : @@ -1295,9 +1295,21 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /* implemented, but not functional */ +/* How to decompress ? + * + * currently, use ZSTD_decompressStream(). + * We could also create a ZSTD_decompress_generic(), + * for an API experience similar to the compression one. + * It would effectively works exactly the same as ZSTD_decompressStream(). + * + * Also : to re-init a decoding context, use ZSTD_initDStream(). + * Here also, for a similar API logic, we could create ZSTD_DCtx_reset(). + * It would behave the same. + */ + /** === Block level API === **/ diff --git a/programs/Makefile b/programs/Makefile index b13629df..179c1f62 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -40,7 +40,7 @@ CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -DZSTD_NEWAPI \ -DXXH_NAMESPACE=ZSTD_ # because xxhash.o already compiled with this macro from library CFLAGS ?= -O3 -DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ +DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ diff --git a/programs/fileio.c b/programs/fileio.c index 623c4f4d..8ce904fc 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1036,7 +1036,7 @@ static dRess_t FIO_createDResources(const char* dictFileName) /* Allocation */ ress.dctx = ZSTD_createDStream(); if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream"); - ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit); + CHECK( ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit) ); ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index bfa290c6..92d2f91b 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -918,6 +918,43 @@ static int basicUnitTests(U32 seed, double compressibility) ZSTD_freeCCtx(cctx); } + /* custom formats tests */ + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + static const size_t inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ + + /* basic block compression */ + DISPLAYLEVEL(4, "test%3i : magic-less format test : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_format, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; + size_t const result = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + cSize = out.pos; + } + DISPLAYLEVEL(4, "OK (compress : %u -> %u bytes)\n", (U32)inputSize, (U32)cSize); + + DISPLAYLEVEL(4, "test%3i : decompress normally (should fail) : ", testNb++); + { size_t const decodeResult = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (ZSTD_getErrorCode(decodeResult) != ZSTD_error_prefix_unknown) goto _output_error; + DISPLAYLEVEL(4, "OK : %s \n", ZSTD_getErrorName(decodeResult)); + } + + DISPLAYLEVEL(4, "test%3i : decompress with magic-less instruction : ", testNb++); + CHECK( ZSTD_initDStream(dctx) ); + CHECK( ZSTD_DCtx_setFormat(dctx, ZSTD_f_zstd1_magicless) ); + { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 }; + ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 }; + size_t const result = ZSTD_decompressStream(dctx, &out, &in); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + if (out.pos != inputSize) goto _output_error; + DISPLAYLEVEL(4, "OK : regenerated %u bytes \n", (U32)out.pos); + } + + ZSTD_freeCCtx(cctx); + } + /* block API tests */ { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); static const size_t dictSize = 65 KB; @@ -961,8 +998,8 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); ZSTD_freeCCtx(cctx); - ZSTD_freeDCtx(dctx); } + ZSTD_freeDCtx(dctx); /* long rle test */ { size_t sampleSize = 0; diff --git a/tests/playTests.sh b/tests/playTests.sh index 38b7a196..819047e1 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -45,7 +45,6 @@ then fi isWindows=false -ECHO="echo -e" INTOVOID="/dev/null" case "$OS" in Windows*) @@ -66,6 +65,11 @@ case "$UNAME" in SunOS) DIFF="gdiff" ;; esac +ECHO="echo -e" +case "$UNAME" in + Darwin) ECHO="echo" ;; +esac + $ECHO "\nStarting playTests.sh isWindows=$isWindows ZSTD='$ZSTD'" [ -n "$ZSTD" ] || die "ZSTD variable must be defined!" From 044fb4c057c5cbfc4472b3da4aca6aed994805a4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 25 Sep 2017 15:12:09 -0700 Subject: [PATCH 05/22] implemented magic-less frame decoder --- lib/decompress/zstd_decompress.c | 62 ++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 47627037..c4d4654a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -152,7 +152,9 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { - dctx->expected = ZSTD_frameHeaderSize_prefix; + dctx->expected = (dctx->format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - 4 /* magic size */ : + ZSTD_frameHeaderSize_prefix; dctx->stage = ZSTDds_getFrameHeaderSize; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; @@ -182,6 +184,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->inBuffSize = 0; dctx->outBuffSize = 0; dctx->streamStage = zdss_init; + dctx->format = ZSTD_f_zstd1; } ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) @@ -488,7 +491,7 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { - size_t const result = ZSTD_getFrameHeader(&(dctx->fParams), src, headerSize); + size_t const result = ZSTD_getFrameHeader_internal(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) @@ -1755,33 +1758,31 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { DEBUGLOG(5, "ZSTD_decompressContinue"); /* Sanity check */ - if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */ if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) { case ZSTDds_getFrameHeaderSize : - if (srcSize != ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); /* unauthorized */ assert(src != NULL); - if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } - dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= 4); /* to read skippable magic number */ + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* magic number + skippable frame length */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { - dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - } - dctx->expected = 0; /* not necessary to copy more */ - /* fall-through */ + memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + case ZSTDds_decodeFrameHeader: assert(src != NULL); - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); + memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; @@ -1813,6 +1814,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } return 0; } + case ZSTDds_decompressLastBlock: case ZSTDds_decompressBlock: DEBUGLOG(5, "case ZSTDds_decompressBlock"); @@ -1858,29 +1860,34 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } return rSize; } + case ZSTDds_checkChecksum: - DEBUGLOG(4, "case ZSTDds_checkChecksum"); assert(srcSize == 4); /* guaranteed by dctx->expected */ { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "calculated %08X :: %08X read", h32, check32); + DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32); if (check32 != h32) return ERROR(checksum_wrong); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; } + case ZSTDds_decodeSkippableHeader: - { assert(src != NULL); - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); + { size_t const skippableFrameHeaderSize = 8; + assert(src != NULL); + assert(srcSize <= skippableFrameHeaderSize); + memcpy(dctx->headerBuffer + (skippableFrameHeaderSize - srcSize), src, srcSize); + dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); /* note : expect can grow seriously large, beyond buffer size */ dctx->stage = ZSTDds_skipFrame; return 0; } + case ZSTDds_skipFrame: { dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; } + default: return ERROR(GENERIC); /* impossible */ } @@ -2308,6 +2315,7 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) { + DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format); ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init); if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader) return ERROR(stage_wrong); @@ -2390,7 +2398,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB /* fall-through */ case zdss_loadHeader : - { size_t const hSize = ZSTD_getFrameHeader(&zds->fParams, zds->headerBuffer, zds->lhSize); + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); + { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + DEBUGLOG(5, "header size : %u", (U32)hSize); if (ZSTD_isError(hSize)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); From b8d4a3887fc12f4fcc769d6ce116e716e792139c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 25 Sep 2017 15:25:07 -0700 Subject: [PATCH 06/22] introduced constant ZSTD_frameIdSize within zstd_internal.h This is the size of magic number. Avoids using `4` directly in source code, which is a bit less meaningful. --- lib/common/zstd_internal.h | 2 ++ lib/decompress/zstd_decompress.c | 54 +++++++++++++++----------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 8a24d42f..69469872 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -105,6 +105,8 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; +static const size_t ZSTD_frameIdSize = 4; /* magic number */ + #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index c4d4654a..d2b85a4a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -153,7 +153,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { dctx->expected = (dctx->format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - 4 /* magic size */ : + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; dctx->stage = ZSTDds_getFrameHeaderSize; dctx->decodedSize = 0; @@ -256,7 +256,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) * Note 3 : Skippable Frame Identifiers are considered valid. */ unsigned ZSTD_isFrame(const void* buffer, size_t size) { - if (size < 4) return 0; + if (size < ZSTD_frameIdSize) return 0; { U32 const magic = MEM_readLE32(buffer); if (magic == ZSTD_MAGICNUMBER) return 1; if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; @@ -276,8 +276,9 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - 4 /* magic number size */ : + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT(ZSTD_frameHeaderSize_prefix >= ZSTD_frameIdSize); ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ if (srcSize < minInputSize) return ERROR(srcSize_wrong); @@ -311,7 +312,7 @@ static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* { const BYTE* ip = (const BYTE*)src; size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - 4 /* magic number size */ : + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); @@ -325,7 +326,7 @@ static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + frame length */ memset(zfhPtr, 0, sizeof(*zfhPtr)); - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + 4); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize); zfhPtr->frameType = ZSTD_skippableFrame; return 0; } @@ -437,8 +438,8 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) return ERROR(srcSize_wrong); - skippableSize = MEM_readLE32((const BYTE *)src + 4) + - ZSTD_skippableHeaderSize; + skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; if (srcSize < skippableSize) { return ZSTD_CONTENTSIZE_ERROR; } @@ -1484,7 +1485,7 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) #endif if ( (srcSize >= ZSTD_skippableHeaderSize) && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) { - return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4); + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize); } else { const BYTE* ip = (const BYTE*)src; const BYTE* const ipstart = ip; @@ -1654,8 +1655,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) return ERROR(srcSize_wrong); - skippableSize = MEM_readLE32((const BYTE *)src + 4) + - ZSTD_skippableHeaderSize; + skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize) + + ZSTD_skippableHeaderSize; if (srcSize < skippableSize) return ERROR(srcSize_wrong); src = (const BYTE *)src + skippableSize; @@ -1766,10 +1767,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_getFrameHeaderSize : assert(src != NULL); if (dctx->format == ZSTD_f_zstd1) { /* allows header */ - assert(srcSize >= 4); /* to read skippable magic number */ + assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */ if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* magic number + skippable frame length */ + dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */ dctx->stage = ZSTDds_decodeSkippableHeader; return 0; } } @@ -1873,20 +1874,17 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } case ZSTDds_decodeSkippableHeader: - { size_t const skippableFrameHeaderSize = 8; - assert(src != NULL); - assert(srcSize <= skippableFrameHeaderSize); - memcpy(dctx->headerBuffer + (skippableFrameHeaderSize - srcSize), src, srcSize); - dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); /* note : expect can grow seriously large, beyond buffer size */ - dctx->stage = ZSTDds_skipFrame; - return 0; - } + assert(src != NULL); + assert(srcSize <= ZSTD_skippableHeaderSize); + memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; case ZSTDds_skipFrame: - { dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; default: return ERROR(GENERIC); /* impossible */ @@ -1968,7 +1966,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict if (magic != ZSTD_MAGIC_DICTIONARY) { return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ } } - dctx->dictID = MEM_readLE32((const char*)dict + 4); + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize); /* load entropy tables */ { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); @@ -2048,7 +2046,7 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict) { U32 const magic = MEM_readLE32(ddict->dictContent); if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */ } - ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4); + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize); /* load entropy tables */ CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted ); @@ -2169,7 +2167,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) { if (dictSize < 8) return 0; if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dict + 4); + return MEM_readLE32((const char*)dict + ZSTD_frameIdSize); } /*! ZSTD_getDictID_fromDDict() : @@ -2453,7 +2451,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + 4); + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize); zds->stage = ZSTDds_skipFrame; } else { CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); From 6ee05a02b836ee3388d1ade5f671805732797c7c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 25 Sep 2017 15:41:48 -0700 Subject: [PATCH 07/22] added ZSTD_decompress_generic() same as ZSTD_decompressStream(), just for a similar feeling as the compression side, which uses ZSTD_compress_generic() --- doc/zstd_manual.html | 33 +++++++++++++++++++++----------- lib/compress/zstd_compress.c | 1 + lib/decompress/zstd_decompress.c | 9 +++++++++ lib/zstd.h | 32 ++++++++++++++++++++----------- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 1cb08328..eebc2efb 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -27,8 +27,8 @@ Buffer-less and synchronous inner streaming functions Buffer-less streaming compression (synchronous mode) Buffer-less streaming decompression (synchronous mode) -=== New advanced API (experimental) === -=== Block level API === +New advanced API (experimental) +Block level API
Introduction
@@ -783,7 +783,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
typedef enum { ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ @@ -1070,9 +1070,9 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */ +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */ +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); /* not implemented */Create an internal DDict from dict buffer, to be used to decompress next frames. @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -1089,7 +1089,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size
-size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /* not implemented */Reference a prepared dictionary, to be used to decompress next frames. The dictionary remains active for decompression of future frames using same DCtx. @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -1100,8 +1100,8 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size
-size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); -size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); /* not implemented */ +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); /* not implemented */Reference a prefix (single-usage dictionary) for next compression job. Prefix is **only used once**. It must be explicitly referenced before each frame. If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead. @@ -1110,7 +1110,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t Note 2 : Prefix buffer is referenced. It must outlive compression job. Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. - Note 4 : Referencing a raw content prefix costs almost nothing cpu and memory wise. + Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
@@ -1131,7 +1131,18 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
-=== Block level API ===
+size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input); +Behave the same as ZSTD_decompressStream. + Decompression parameters cannot be changed once decompression is started. + @return : an error code, which can be tested using ZSTD_isError() + if >0, a hint, nb of expected input bytes for next invocation. + `0` means : a frame has just been fully decoded and flushed. + +
+ +Block level API
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 16137bb7..c9b8b3cb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2574,6 +2574,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, /** ZSTD_compressStream_generic(): * internal function for all *compressStream*() variants and *compress_generic() + * non-static, because can be called from zstdmt.c * @return : hint size for next input */ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, ZSTD_outBuffer* output, diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index d2b85a4a..634706a5 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2379,7 +2379,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB U32 someMoreWork = 1; DEBUGLOG(5, "ZSTD_decompressStream"); + if (input->pos > input->size) return ERROR(GENERIC); /* forbidden */ + if (output->pos > output->size) return ERROR(GENERIC); /* forbidden */ DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) if (zds->legacyVersion) { /* legacy support is incompatible with static dctx */ @@ -2590,3 +2593,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB return nextSrcSizeHint; } } + + +size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + return ZSTD_decompressStream(dctx, output, input); +} diff --git a/lib/zstd.h b/lib/zstd.h index 5b654d73..cdba0028 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -908,7 +908,9 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -/** === New advanced API (experimental) === **/ +/* ============================================ */ +/** New advanced API (experimental) */ +/* ============================================ */ /* notes on API design : * In this proposal, parameters are pushed one by one into an existing context, @@ -1295,23 +1297,31 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /* implemented, but not functional */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); -/* How to decompress ? - * - * currently, use ZSTD_decompressStream(). - * We could also create a ZSTD_decompress_generic(), - * for an API experience similar to the compression one. - * It would effectively works exactly the same as ZSTD_decompressStream(). - * +/*! ZSTD_decompress_generic() : + * Behave the same as ZSTD_decompressStream. + * Decompression parameters cannot be changed once decompression is started. + * @return : an error code, which can be tested using ZSTD_isError() + * if >0, a hint, nb of expected input bytes for next invocation. + * `0` means : a frame has just been fully decoded and flushed. + */ +ZSTDLIB_API size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input); + + +/* * Also : to re-init a decoding context, use ZSTD_initDStream(). - * Here also, for a similar API logic, we could create ZSTD_DCtx_reset(). + * Here for a similar API logic, we could create ZSTD_DCtx_reset(). * It would behave the same. */ -/** === Block level API === **/ +/* ============================ */ +/** Block level API */ +/* ============================ */ /*! Block functions produce and decode raw zstd blocks, without frame metadata. From f2a913862cd8b4176967967863c05ef2d28e0820 Mon Sep 17 00:00:00 2001 From: Yann Collet
Date: Mon, 25 Sep 2017 15:44:48 -0700 Subject: [PATCH 08/22] added ZSTD_decompress_generic_simpleArgs() --- doc/zstd_manual.html | 12 ++++++++++++ lib/decompress/zstd_decompress.c | 14 ++++++++++++++ lib/zstd.h | 15 ++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index eebc2efb..0847e64a 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1142,6 +1142,18 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
+size_t ZSTD_decompress_generic_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); +Same as ZSTD_decompress_generic(), + but using only integral types as arguments. + Argument list is larger than ZSTD_{in,out}Buffer, + but can be helpful for binders from dynamic languages + which have troubles handling structures containing memory pointers. + +
+Block level API
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 634706a5..78542502 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2599,3 +2599,17 @@ size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inB { return ZSTD_decompressStream(dctx, output, input); } + +size_t ZSTD_decompress_generic_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompress_generic(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} diff --git a/lib/zstd.h b/lib/zstd.h index cdba0028..494000e7 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1164,7 +1164,7 @@ ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */ * but can be helpful for binders from dynamic languages * which have troubles handling structures containing memory pointers. */ -size_t ZSTD_compress_generic_simpleArgs ( +ZSTDLIB_API size_t ZSTD_compress_generic_simpleArgs ( ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, size_t* dstPos, const void* src, size_t srcSize, size_t* srcPos, @@ -1312,6 +1312,19 @@ ZSTDLIB_API size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_inBuffer* input); +/*! ZSTD_decompress_generic_simpleArgs() : + * Same as ZSTD_decompress_generic(), + * but using only integral types as arguments. + * Argument list is larger than ZSTD_{in,out}Buffer, + * but can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompress_generic_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + /* * Also : to re-init a decoding context, use ZSTD_initDStream(). * Here for a similar API logic, we could create ZSTD_DCtx_reset(). From 52a1d1c6dc2b1ff1f9f732fec391483bf9e190fe Mon Sep 17 00:00:00 2001 From: Yann Collet
Date: Mon, 25 Sep 2017 16:21:17 -0700 Subject: [PATCH 09/22] added ZSTD_DCtx_reset() --- doc/zstd_manual.html | 11 ++++++++++- lib/decompress/zstd_decompress.c | 11 ++++++++++- lib/zstd.h | 34 +++++++++++++++++++++++--------- tests/fuzzer.c | 4 ++-- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 0847e64a..15b1afea 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -649,7 +649,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* Advanced Streaming decompression functions
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);/**< same as ZSTD_initStaticDCtx() */ typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; -size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); +size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ @@ -1154,6 +1154,15 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
+void ZSTD_DCtx_reset(ZSTD_DCtx* dctx); /* Not ready yet ! */ +Return a DCtx to clean state. + If a decompression was ongoing, any internal data not yet flushed is cancelled. + All parameters are back to default values, including sticky ones. + Dictionary (if any) is dropped. + Parameters can be modified again after a reset. + +
+Block level API
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 78542502..b9ccc516 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2262,13 +2262,15 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di return ZSTD_frameHeaderSize_prefix; } +/* note : this variant can't fail */ size_t ZSTD_initDStream(ZSTD_DStream* zds) { return ZSTD_initDStream_usingDict(zds, NULL, 0); } /* ZSTD_initDStream_usingDDict() : - * ddict will just be referenced, and must outlive decompression session */ + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict) { size_t const initResult = ZSTD_initDStream(zds); @@ -2613,3 +2615,10 @@ size_t ZSTD_decompress_generic_simpleArgs ( *srcPos = input.pos; return cErr; } + +void ZSTD_DCtx_reset(ZSTD_DCtx* dctx) +{ + (void)ZSTD_initDStream(dctx); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; +} diff --git a/lib/zstd.h b/lib/zstd.h index 494000e7..ca60088c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -750,7 +750,7 @@ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledg ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; -ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); +ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ @@ -920,7 +920,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * This API is intended to replace all others experimental API. * It can basically do all other use cases, and even new ones. * In constrast with _advanced() variants, it stands a reasonable chance to become "stable", - * after a testing period. + * after a good testing period. */ /* note on naming convention : @@ -930,22 +930,34 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * It feels clearer in light of potential variants : * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter()) * ZSTD_CCtxParams_setParameter() (rather than ZSTD_setCCtxParamsParameter() ) + * etc... */ /* note on enum design : - * All enum will be manually set to explicit values before reaching "stable API" status */ + * All enum will be pinned to explicit values before reaching "stable API" status */ typedef enum { + /* should we have a ZSTD_f_auto ? + * for the time being, it would mean exactly the same as ZSTD_f_zstd1. + * But, in the future, if several formats are supported, + * on the compression side, it would mean "default format", + * and on the decompression side, it would mean "multi format" + * while ZSTD_f_zstd1 could be reserved to mean "accept only zstd frames". + * Another option could be to define different enums for compression and decompression. + * This question could also be kept for later, but there is also the question of pinning the enum value, + * and pinning the value `0` is especially important */ ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number. * Useful to save 4 bytes per generated frame. * Decoder will not be able to recognise this format, requiring instructions. */ - ZSTD_f_zstd1_headerless, /* Variant of zstd frame format, without any frame header; + ZSTD_f_zstd1_headerless, /* Not Implemented Yet ! Complex decoder setting ! Might be removed before release */ + /* Variant of zstd frame format, without any frame header; * Other metadata, like block size or frame checksum, are still generated. * Useful to save between 6 and ZSTD_frameHeaderSize_max bytes per generated frame. * However, required decoding parameters will have to be saved or known by some mechanism. * Decoder will not be able to recognise this format, requiring instructions and parameters. */ - ZSTD_f_zstd1_block /* Generate a zstd compressed block, without any metadata. + ZSTD_f_zstd1_block /* Not Implemented Yet ! Might be removed before release */ + /* Generate a zstd compressed block, without any metadata. * Note that size of block content must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. * See ZSTD_compressBlock() for more details. * Resulting compressed block can be decoded with ZSTD_decompressBlock(). */ @@ -1325,11 +1337,15 @@ ZSTDLIB_API size_t ZSTD_decompress_generic_simpleArgs ( const void* src, size_t srcSize, size_t* srcPos); -/* - * Also : to re-init a decoding context, use ZSTD_initDStream(). - * Here for a similar API logic, we could create ZSTD_DCtx_reset(). - * It would behave the same. +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * If a decompression was ongoing, any internal data not yet flushed is cancelled. + * All parameters are back to default values, including sticky ones. + * Dictionary (if any) is dropped. + * Parameters can be modified again after a reset. */ +ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx); + /* ============================ */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 92d2f91b..a341b598 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -941,11 +941,11 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(4, "test%3i : decompress with magic-less instruction : ", testNb++); - CHECK( ZSTD_initDStream(dctx) ); + ZSTD_DCtx_reset(dctx); CHECK( ZSTD_DCtx_setFormat(dctx, ZSTD_f_zstd1_magicless) ); { ZSTD_inBuffer in = { compressedBuffer, cSize, 0 }; ZSTD_outBuffer out = { decodedBuffer, CNBuffSize, 0 }; - size_t const result = ZSTD_decompressStream(dctx, &out, &in); + size_t const result = ZSTD_decompress_generic(dctx, &out, &in); if (result != 0) goto _output_error; if (in.pos != in.size) goto _output_error; if (out.pos != inputSize) goto _output_error; From df4e9bba250e8e506594c8af277b7bc4775168ad Mon Sep 17 00:00:00 2001 From: Yann Collet
Date: Tue, 26 Sep 2017 14:31:06 -0700 Subject: [PATCH 10/22] fixed constant errors for gcc in c99 mode C standard does not consider a `static const int` as a constant. This is a problem for initializer, and ZSTD_STATIC_ASSERT(). Replaced by macro values --- lib/common/zstd_internal.h | 3 ++- lib/decompress/zstd_decompress.c | 2 +- lib/zstd.h | 9 +++++---- tests/fuzzer.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 0e34dc4c..614bd1ea 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -105,7 +105,8 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -static const size_t ZSTD_frameIdSize = 4; /* magic number */ +#define ZSTD_FRAMEIDSIZE 4 +static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 7b26a733..ffb54275 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -278,7 +278,7 @@ static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZST size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; - ZSTD_STATIC_ASSERT(ZSTD_frameHeaderSize_prefix >= ZSTD_frameIdSize); + ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ if (srcSize < minInputSize) return ERROR(srcSize_wrong); diff --git a/lib/zstd.h b/lib/zstd.h index ca60088c..3ec9af59 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -395,11 +395,12 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_LDM_MINMATCH_MAX 4096 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8 -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ -#define ZSTD_FRAMEHEADERSIZE_MIN 6 -static const size_t ZSTD_frameHeaderSize_prefix = 5; /* minimum input size to know frame header size */ -static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; +#define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN 6 +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +static const size_t ZSTD_frameHeaderSize_prefix = ZSTD_FRAMEHEADERSIZE_PREFIX; static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; +static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index a341b598..e77aab36 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -920,7 +920,7 @@ static int basicUnitTests(U32 seed, double compressibility) /* custom formats tests */ { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - static const size_t inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ + size_t const inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ /* basic block compression */ DISPLAYLEVEL(4, "test%3i : magic-less format test : ", testNb++); From 8d1e97ea9cf9edb24c65efaa3be74531f376565c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Sep 2017 15:06:30 -0700 Subject: [PATCH 11/22] minor fixes following @terrelln comments --- lib/decompress/zstd_decompress.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index ffb54275..331ba4c6 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -279,8 +279,8 @@ static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZST ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); - ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); - assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); if (srcSize < minInputSize) return ERROR(srcSize_wrong); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; @@ -315,12 +315,12 @@ static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; - ZSTD_STATIC_ASSERT((unsigned)ZSTD_f_zstd1 < (unsigned)ZSTD_f_zstd1_magicless); - assert((unsigned)format <= ZSTD_f_zstd1_magicless); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); if (srcSize < minInputSize) return minInputSize; - if (format != ZSTD_f_zstd1_magicless) - if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ if (srcSize < ZSTD_skippableHeaderSize) From 319c699991f08702a59e3e5f6c70753de0fb887d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Sep 2017 15:36:14 -0700 Subject: [PATCH 12/22] created ZSTD_startingInputLength() as suggested by @terrelln --- lib/decompress/zstd_decompress.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 331ba4c6..84192b58 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -150,11 +150,19 @@ size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? + ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : + ZSTD_frameHeaderSize_prefix; + ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + return startingInputLength; +} + size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { - dctx->expected = (dctx->format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : - ZSTD_frameHeaderSize_prefix; + dctx->expected = ZSTD_startingInputLength(dctx->format); dctx->stage = ZSTDds_getFrameHeaderSize; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; @@ -267,7 +275,6 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) return 0; } - /** ZSTD_frameHeaderSize_internal() : * srcSize must be large enough to reach header size fields. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless @@ -275,12 +282,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) * or an error code, which can be tested with ZSTD_isError() */ static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { - size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : - ZSTD_frameHeaderSize_prefix; - ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); - /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ - assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + size_t const minInputSize = ZSTD_startingInputLength(format); if (srcSize < minInputSize) return ERROR(srcSize_wrong); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; @@ -311,9 +313,7 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) { const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : - ZSTD_frameHeaderSize_prefix; + size_t const minInputSize = ZSTD_startingInputLength(format); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); From c0dd960363d0d4c1b437b326d2461f5c4005d954 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Sep 2017 15:36:57 -0700 Subject: [PATCH 13/22] switch assert() position --- lib/decompress/zstd_decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 84192b58..6c142919 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -157,6 +157,8 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : ZSTD_frameHeaderSize_prefix; ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); return startingInputLength; } @@ -315,8 +317,6 @@ static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* const BYTE* ip = (const BYTE*)src; size_t const minInputSize = ZSTD_startingInputLength(format); - /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ - assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); if (srcSize < minInputSize) return minInputSize; if ( (format != ZSTD_f_zstd1_magicless) From 4791561c4a0303d995dddd5f4990557c4fe012a7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Sep 2017 17:57:38 -0700 Subject: [PATCH 14/22] silence minor gcc warning -Wempty-body also silence fuzz test artefacts --- lib/compress/zstdmt_compress.c | 6 ++++-- tests/fuzz/.gitignore | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 tests/fuzz/.gitignore diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index ecb799ab..6c91d482 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -54,7 +54,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void) #define MUTEX_WAIT_TIME_DLEVEL 6 #define PTHREAD_MUTEX_LOCK(mutex) { \ - if (ZSTD_DEBUG>=MUTEX_WAIT_TIME_DLEVEL) { \ + if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ pthread_mutex_lock(mutex); \ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ @@ -63,7 +63,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void) DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ elapsedTime, #mutex); \ } } \ - } else pthread_mutex_lock(mutex); \ + } else { \ + pthread_mutex_lock(mutex); \ + } \ } #else diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore new file mode 100644 index 00000000..4ff28de9 --- /dev/null +++ b/tests/fuzz/.gitignore @@ -0,0 +1,5 @@ +# test artefacts +corpora +block_decompress +block_round_trip +simple_round_trip From cd53ac831b833790aa4b216379387ad0faa34223 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 26 Sep 2017 18:26:09 -0700 Subject: [PATCH 15/22] fixed DCtx initialization error now relying on initialization of dctx->format first --- lib/decompress/zstd_decompress.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6c142919..dc6ab3f3 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -162,8 +162,10 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) return startingInputLength; } +/* Note : this function cannot fail */ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { + assert(dctx != NULL); dctx->expected = ZSTD_startingInputLength(dctx->format); dctx->stage = ZSTDds_getFrameHeaderSize; dctx->decodedSize = 0; @@ -174,7 +176,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; - MEM_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; dctx->MLTptr = dctx->entropy.MLTable; @@ -185,6 +187,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { + dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ ZSTD_decompressBegin(dctx); /* cannot fail */ dctx->staticSize = 0; dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; @@ -194,7 +197,19 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->inBuffSize = 0; dctx->outBuffSize = 0; dctx->streamStage = zdss_init; - dctx->format = ZSTD_f_zstd1; +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; } ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) @@ -211,19 +226,6 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) } } -ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_DCtx* dctx = (ZSTD_DCtx*) workspace; - - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ - - ZSTD_initDCtx_internal(dctx); - dctx->staticSize = workspaceSize; - dctx->inBuff = (char*)(dctx+1); - return dctx; -} - ZSTD_DCtx* ZSTD_createDCtx(void) { return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); From ca306c1c84df962fbfc066cdba8b6d409f20e3c2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 00:39:41 -0700 Subject: [PATCH 16/22] fixed a bug in zstreamtest decoder output buffer would receive a wrong size. In previous version, ZSTD_decompressStream() would blindly trust the caller that pos <= size. In this version, this condition is actively checked, and the function returns an error code if this condition is not respected. This check could also be done with an assert(), but since this is a user-facing interface, it seems better to keep this check at runtime. --- lib/decompress/zstd_decompress.c | 12 ++++++++++-- tests/zstreamtest.c | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index dc6ab3f3..0380f6a1 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2404,8 +2404,16 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB U32 someMoreWork = 1; DEBUGLOG(5, "ZSTD_decompressStream"); - if (input->pos > input->size) return ERROR(GENERIC); /* forbidden */ - if (output->pos > output->size) return ERROR(GENERIC); /* forbidden */ + if (input->pos > input->size) { /* forbidden */ + DEBUGLOG(5, "in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + return ERROR(GENERIC); + } + if (output->pos > output->size) { /* forbidden */ + DEBUGLOG(5, "out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + return ERROR(GENERIC); + } DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 613a879b..1f682038 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -914,7 +914,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); inBuff.size = inBuff.pos + readCSrcSize; - outBuff.size = inBuff.pos + dstBuffSize; + outBuff.size = outBuff.pos + dstBuffSize; decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (ZSTD_getErrorCode(decompressionResult) == ZSTD_error_checksum_wrong) { DISPLAY("checksum error : \n"); From bfabd1d4dc4a9dea8fd9fb2d3e50560383aedc90 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 01:01:11 -0700 Subject: [PATCH 17/22] fixed zstreamtest decoding error same error (wrong output buffer size) was present on --mt and --new_api tests. --- tests/zstreamtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 1f682038..4bb486f8 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1178,7 +1178,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); inBuff.size = inBuff.pos + readCSrcSize; - outBuff.size = inBuff.pos + dstBuffSize; + outBuff.size = outBuff.pos + dstBuffSize; DISPLAYLEVEL(5, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize); decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); @@ -1505,7 +1505,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); inBuff.size = inBuff.pos + readCSrcSize; - outBuff.size = inBuff.pos + dstBuffSize; + outBuff.size = outBuff.pos + dstBuffSize; DISPLAYLEVEL(5, "ZSTD_decompressStream input %u bytes (pos:%u/%u)\n", (U32)readCSrcSize, (U32)inBuff.pos, (U32)cSize); decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); From d56a350402ca8ad895195d41f4855799e82ca720 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 10:29:31 -0700 Subject: [PATCH 18/22] removed unsupported formats --- lib/zstd.h | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 3ec9af59..ff780269 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -928,7 +928,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * Initially, the API favored names like ZSTD_setCCtxParameter() . * In this proposal, convention is changed towards ZSTD_CCtx_setParameter() . * The main driver is that it identifies more clearly the target object type. - * It feels clearer in light of potential variants : + * It feels clearer when considering multiple targets : * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter()) * ZSTD_CCtxParams_setParameter() (rather than ZSTD_setCCtxParamsParameter() ) * etc... @@ -938,30 +938,19 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * All enum will be pinned to explicit values before reaching "stable API" status */ typedef enum { - /* should we have a ZSTD_f_auto ? - * for the time being, it would mean exactly the same as ZSTD_f_zstd1. - * But, in the future, if several formats are supported, - * on the compression side, it would mean "default format", - * and on the decompression side, it would mean "multi format" - * while ZSTD_f_zstd1 could be reserved to mean "accept only zstd frames". - * Another option could be to define different enums for compression and decompression. - * This question could also be kept for later, but there is also the question of pinning the enum value, - * and pinning the value `0` is especially important */ - ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ + /* Question : should we have a format ZSTD_f_auto ? + * For the time being, it would mean exactly the same as ZSTD_f_zstd1. + * But, in the future, should several formats be supported, + * on the compression side, it would mean "default format". + * On the decompression side, it would mean "multi format", + * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames". + * Since meaning is a little different, another option could be to define different enums for compression and decompression. + * This question could be kept for later, when there are actually multiple formats to support, + * but there is also the question of pinning enum values, and pinning value `0` is especially important */ + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number. * Useful to save 4 bytes per generated frame. - * Decoder will not be able to recognise this format, requiring instructions. */ - ZSTD_f_zstd1_headerless, /* Not Implemented Yet ! Complex decoder setting ! Might be removed before release */ - /* Variant of zstd frame format, without any frame header; - * Other metadata, like block size or frame checksum, are still generated. - * Useful to save between 6 and ZSTD_frameHeaderSize_max bytes per generated frame. - * However, required decoding parameters will have to be saved or known by some mechanism. - * Decoder will not be able to recognise this format, requiring instructions and parameters. */ - ZSTD_f_zstd1_block /* Not Implemented Yet ! Might be removed before release */ - /* Generate a zstd compressed block, without any metadata. - * Note that size of block content must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. - * See ZSTD_compressBlock() for more details. - * Resulting compressed block can be decoded with ZSTD_decompressBlock(). */ + * Decoder cannot recognise automatically this format, requiring instructions. */ } ZSTD_format_e; typedef enum { From 9416195221552d6fc8b44b899360d3d14f7401ea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 10:35:56 -0700 Subject: [PATCH 19/22] changed error code when pos<=size condition is not respected Now pointing towards src_size or dst_size, instead of error_GENERIC. --- lib/common/error_private.c | 5 +++-- lib/common/zstd_errors.h | 3 ++- lib/decompress/zstd_decompress.c | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 8045e445..11f7cdab 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -30,14 +30,15 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(maxCode): diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h index bde4304c..4bcb7769 100644 --- a/lib/common/zstd_errors.h +++ b/lib/common/zstd_errors.h @@ -63,9 +63,10 @@ typedef enum { ZSTD_error_memory_allocation = 64, ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_srcSize_wrong = 72, + /* following error codes are not stable and may be removed or changed in a future version */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it may change in future versions! Use ZSTD_isError() instead */ + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; /*! ZSTD_getErrorCode() : diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 0380f6a1..332bf860 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2407,12 +2407,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (input->pos > input->size) { /* forbidden */ DEBUGLOG(5, "in: pos: %u vs size: %u", (U32)input->pos, (U32)input->size); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (output->pos > output->size) { /* forbidden */ DEBUGLOG(5, "out: pos: %u vs size: %u", (U32)output->pos, (U32)output->size); - return ERROR(GENERIC); + return ERROR(dstSize_tooSmall); } DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); From ecf1778e23edc42ac6ddeb19151005ee4a08343c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 11:19:21 -0700 Subject: [PATCH 20/22] updated ZSTD_format_e value validation also updated manual --- doc/zstd_manual.html | 24 ++++++++++++------------ lib/compress/zstd_compress.c | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 15b1afea..f1d161f3 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -786,19 +786,19 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long New advanced API (experimental)
typedef enum { - ZSTD_f_zstd1 = 0, /* Normal zstd frame format, specified in zstd_compression_format.md (default) */ + /* Question : should we have a format ZSTD_f_auto ? + * For the time being, it would mean exactly the same as ZSTD_f_zstd1. + * But, in the future, should several formats be supported, + * on the compression side, it would mean "default format". + * On the decompression side, it would mean "multi format", + * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames". + * Since meaning is a little different, another option could be to define different enums for compression and decompression. + * This question could be kept for later, when there are actually multiple formats to support, + * but there is also the question of pinning enum values, and pinning value `0` is especially important */ + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ ZSTD_f_zstd1_magicless, /* Variant of zstd frame format, without initial 4-bytes magic number. * Useful to save 4 bytes per generated frame. - * Decoder will not be able to recognise this format, requiring instructions. */ - ZSTD_f_zstd1_headerless, /* Variant of zstd frame format, without any frame header; - * Other metadata, like block size or frame checksum, are still generated. - * Useful to save between 6 and ZSTD_frameHeaderSize_max bytes per generated frame. - * However, required decoding parameters will have to be saved or known by some mechanism. - * Decoder will not be able to recognise this format, requiring instructions and parameters. */ - ZSTD_f_zstd1_block /* Generate a zstd compressed block, without any metadata. - * Note that size of block content must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB. - * See ZSTD_compressBlock() for more details. - * Resulting compressed block can be decoded with ZSTD_decompressBlock(). */ + * Decoder cannot recognise automatically this format, requiring instructions. */ } ZSTD_format_e;
typedef enum { @@ -1154,7 +1154,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
-void ZSTD_DCtx_reset(ZSTD_DCtx* dctx); /* Not ready yet ! */ +void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);Return a DCtx to clean state. If a decompression was ongoing, any internal data not yet flushed is cancelled. All parameters are back to default values, including sticky ones. diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 47def6b3..7061d1f6 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -330,7 +330,7 @@ size_t ZSTD_CCtxParam_setParameter( switch(param) { case ZSTD_p_format : - if (value > (unsigned)ZSTD_f_zstd1_block) + if (value > (unsigned)ZSTD_f_zstd1) return ERROR(parameter_unsupported); params->format = (ZSTD_format_e)value; return 0; From c9949327883968153470809d1e55b3d954b685f5 Mon Sep 17 00:00:00 2001 From: Yann Collet
Date: Wed, 27 Sep 2017 12:22:22 -0700 Subject: [PATCH 21/22] fixed ZSTD_format_e value validation --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7061d1f6..8c9d4771 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -330,7 +330,7 @@ size_t ZSTD_CCtxParam_setParameter( switch(param) { case ZSTD_p_format : - if (value > (unsigned)ZSTD_f_zstd1) + if (value > (unsigned)ZSTD_f_zstd1_magicless) return ERROR(parameter_unsupported); params->format = (ZSTD_format_e)value; return 0; From ea1f50bf73a1f49bacfd4603e1e86d5bc0b9f31f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 27 Sep 2017 13:51:05 -0700 Subject: [PATCH 22/22] removed ZSTD_decompressBegin() from ZSTD_initDCtx_internal() It does not feel "right" from a dependency perspective. ZSTD_initDCtx_internal() is triggered once, on DCtx creation, while ZSTD_decompressBegin() is invoked at the beginning of each new frame, and is also a user-facing prototype. Downside : a DCtx must be init before first usage ! This was always the intention by the way, and is documented as such. This stage is automatically done within ZSTD_decompress() and variants, and also within ZSTD_decompressStream(). Only ZSTD_decompressContinue() is impacted, it must be preceded by a ZSTD_decompressBegin(), as detailed in doc. A test has been fixed, to no longer rely on undocumented assumption that ZSTD_decompressBegin() is invoked during init. --- lib/decompress/zstd_decompress.c | 50 ++++++++++++++++---------------- tests/fullbench.c | 1 + 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 332bf860..960d9332 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -162,33 +162,9 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) return startingInputLength; } -/* Note : this function cannot fail */ -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) -{ - assert(dctx != NULL); - dctx->expected = ZSTD_startingInputLength(dctx->format); - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->decodedSize = 0; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; -} - static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ - ZSTD_decompressBegin(dctx); /* cannot fail */ dctx->staticSize = 0; dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->ddict = NULL; @@ -542,7 +518,8 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, } /*! ZSTD_decodeLiteralsBlock() : - * @return : nb of bytes read from src (< srcSize ) */ + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { @@ -2003,6 +1980,29 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict return ZSTD_refDictContent(dctx, dict, dictSize); } +/* Note : this function cannot fail */ +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { CHECK_F( ZSTD_decompressBegin(dctx) ); diff --git a/tests/fullbench.c b/tests/fullbench.c index bd9dc613..db00ce21 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -376,6 +376,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) skippedSize = frameHeaderSize + ZSTD_blockHeaderSize; memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize); srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */ + ZSTD_decompressBegin(g_zdc); break; } case 32: /* ZSTD_decodeSeqHeaders */