diff --git a/Makefile b/Makefile
index ed8f1610..c73393cd 100644
--- a/Makefile
+++ b/Makefile
@@ -26,8 +26,7 @@ endif
default: lib zstd-release
.PHONY: all
-all: allmost
- CPPFLAGS=-I../lib LDFLAGS=-L../lib $(MAKE) -C examples/ $@
+all: | allmost examples manual
.PHONY: allmost
allmost:
@@ -68,6 +67,14 @@ zlibwrapper:
test:
$(MAKE) -C $(TESTDIR) $@
+.PHONY: examples
+examples:
+ CPPFLAGS=-I../lib LDFLAGS=-L../lib $(MAKE) -C examples/ all
+
+.PHONY: manual
+manual:
+ $(MAKE) -C contrib/gen_html $@
+
.PHONY: clean
clean:
@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
@@ -75,6 +82,7 @@ clean:
@$(MAKE) -C $(TESTDIR) $@ > $(VOID)
@$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID)
@$(MAKE) -C examples/ $@ > $(VOID)
+ @$(MAKE) -C contrib/gen_html $@ > $(VOID)
@$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
@echo Cleaning completed
@@ -197,16 +205,16 @@ cmaketest:
cd $(BUILDIR)/cmake/build ; cmake -DPREFIX:STRING=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall
c90test: clean
- CFLAGS="-std=c90" $(MAKE) all # will fail, due to // and long long
+ CFLAGS="-std=c90" $(MAKE) allmost # will fail, due to missing support for `long long`
gnu90test: clean
- CFLAGS="-std=gnu90" $(MAKE) all
+ CFLAGS="-std=gnu90" $(MAKE) allmost
c99test: clean
CFLAGS="-std=c99" $(MAKE) allmost
gnu99test: clean
- CFLAGS="-std=gnu99" $(MAKE) all
+ CFLAGS="-std=gnu99" $(MAKE) allmost
c11test: clean
CFLAGS="-std=c11" $(MAKE) allmost
diff --git a/NEWS b/NEWS
index be334975..96ff25fd 100644
--- a/NEWS
+++ b/NEWS
@@ -2,7 +2,7 @@ v1.1.4
cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski
cli : new : advanced benchmark command --priority=rt
cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77
-API : new : ZSTD_getFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize(), by Sean Purcell
+API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize(), by Sean Purcell
API : change : ZSTD_compress*() with srcSize==0 create an empty-frame of known size
build:new : meson build system in contrib/meson, by Dima Krasner
doc : new : educational decoder, by Sean Purcell
diff --git a/circle.yml b/circle.yml
index 69c98854..3102633e 100644
--- a/circle.yml
+++ b/circle.yml
@@ -12,7 +12,7 @@ dependencies:
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-invalidDictionaries && make clean; fi
- |
if [[ "$CIRCLE_NODE_INDEX" == "0" ]]; then g++ -v; make gpptest && make clean; fi
- if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy && make clean; fi
+ if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-decodecorpus && make clean; fi
- |
if [[ "$CIRCLE_NODE_INDEX" == "0" ]]; then gcc -v; make gnu90test && make clean; fi
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-symbols && make clean; fi
diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile
index c68e560a..ea68b11f 100644
--- a/contrib/gen_html/Makefile
+++ b/contrib/gen_html/Makefile
@@ -7,12 +7,18 @@
# of patent rights can be found in the PATENTS file in the same directory.
# ##########################################################################
-
CFLAGS ?= -O3
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
CFLAGS += $(MOREFLAGS)
-FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+FLAGS = $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) $(LDFLAGS)
+ZSTDAPI = ../../lib/zstd.h
+ZSTDMANUAL = ../../doc/zstd_manual.html
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LIBVER := $(shell echo $(LIBVER_SCRIPT))
# Define *.exe as extension for Windows systems
@@ -23,14 +29,23 @@ EXT =
endif
-.PHONY: default gen_html
-
+.PHONY: default
default: gen_html
+.PHONY: all
+all: manual
+
gen_html: gen_html.cpp
- $(CXX) $(FLAGS) $^ -o $@$(EXT)
+ $(CXX) $(FLAGS) $^ -o $@$(EXT)
+$(ZSTDMANUAL): gen_html $(ZSTDAPI)
+ echo "Update zstd manual in /doc"
+ ./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL)
+.PHONY: manual
+manual: gen_html $(ZSTDMANUAL)
+
+.PHONY: clean
clean:
@$(RM) gen_html$(EXT)
@echo Cleaning completed
diff --git a/doc/educational_decoder/README.md b/doc/educational_decoder/README.md
index 2e2186e0..e3b9bf58 100644
--- a/doc/educational_decoder/README.md
+++ b/doc/educational_decoder/README.md
@@ -17,3 +17,13 @@ It also contains implementations of Huffman and FSE table decoding.
harness [dictionary]
+As an additional resource to be used with this decoder,
+see the `decodecorpus` tool in the [tests] directory.
+It generates valid Zstandard frames that can be used to verify
+a Zstandard decoder implementation.
+Note that to use the tool to verify this decoder implementation,
+the --content-size flag should be set,
+as this decoder does not handle streaming decoding,
+and so it must know the decompressed size in advance.
+
+[tests]: https://github.com/facebook/zstd/blob/dev/tests/
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 1badcbd7..23224d77 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
-zstd 1.1.2 Manual
+zstd 1.1.4 Manual
-zstd 1.1.2 Manual
+zstd 1.1.4 Manual
Contents
@@ -19,13 +19,15 @@
- Streaming decompression - HowTo
- START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
- Advanced types
-- Advanced compression functions
-- Advanced decompression functions
-- Advanced streaming functions
-- Buffer-less and synchronous inner streaming functions
-- Buffer-less streaming compression (synchronous mode)
-- Buffer-less streaming decompression (synchronous mode)
-- Block functions
+- Compressed size functions
+- Decompressed size functions
+- Advanced compression functions
+- Advanced decompression functions
+- Advanced streaming functions
+- Buffer-less and synchronous inner streaming functions
+- Buffer-less streaming compression (synchronous mode)
+- Buffer-less streaming decompression (synchronous mode)
+- Block functions
Introduction
@@ -63,7 +65,7 @@
size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
- `compressedSize` : must be the _exact_ size of a single compressed frame.
+
`compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
`dstCapacity` is an upper bound of originalSize.
If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
@return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
@@ -71,7 +73,16 @@
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
- 'src' is the start of a zstd compressed frame.
+
NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+ ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+ frame, but distinguishes empty frames from frames with an unknown size, or errors.
+
+ Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
+ concatenated frames in one buffer, and so is more general.
+ As a result however, it requires more computation and entire frames to be passed to it,
+ as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+
+ 'src' is the start of a zstd compressed frame.
@return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
When `return==0`, data to decompress could be any size.
@@ -88,21 +99,29 @@
note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
-Helper functions
int ZSTD_maxCLevel(void); /*!< maximum compression level available */
+Helper functions
int ZSTD_maxCLevel(void);
/*!< maximum compression level available */
size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
-
+
Explicit memory management
+Compression context
When compressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
+
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+
size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
-Decompression context
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+Decompression context
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTD_DCtx* ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-
+
size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
@@ -131,11 +150,11 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Fast dictionary API
-ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
- `dict` can be released after ZSTD_CDict creation.
+ `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict
size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -151,9 +170,9 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Note that compression level is decided during dictionary creation.
-ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
Create a digested dictionary, ready to start decompression operation without startup delay.
- `dict` can be released after creation.
+ dictBuffer can be released after DDict creation, as its content is copied inside DDict
size_t ZSTD_freeDDict(ZSTD_DDict* ddict);
@@ -271,9 +290,9 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
} ZSTD_compressionParameters;
typedef struct {
- unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
- unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
- unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+ unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+ unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+ unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
} ZSTD_frameParameters;
typedef struct {
@@ -281,11 +300,56 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTD_frameParameters fParams;
} ZSTD_parameters;
-Custom memory allocation functions
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+Custom memory allocation functions
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-
-Advanced compression functions
+
+Compressed size functions
+
+size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize);
+ `src` should point to the start of a ZSTD encoded frame
+ `srcSize` must be at least as large as the frame
+ @return : the compressed size of the frame pointed to by `src`, suitable to pass to
+ `ZSTD_decompress` or similar, or an error code if given invalid input.
+
+
+Decompressed size functions
+
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+ `src` should point to the start of a ZSTD encoded frame
+ `srcSize` must be at least as large as the frame header. A value greater than or equal
+ to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases.
+ @return : decompressed size of the frame pointed to be `src` if known, otherwise
+ - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ - ZSTD_CONTENTSIZE_ERROR if an error occured (e.g. invalid magic number, srcSize too small)
+
+
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+ `src` should point the start of a series of ZSTD encoded and/or skippable frames
+ `srcSize` must be the _exact_ size of this series
+ (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`)
+ @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_
+ - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+
+ note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ In which case, it's necessary to use streaming mode to decompress data.
+ Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+ (For example, data may be necessarily cut into blocks <= 16 KB).
+ note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ note 3 : decompressed size can be very large (64-bits value),
+ potentially larger than what local system can handle as a single memory segment.
+ In which case, it's necessary to use streaming mode to decompress data.
+ note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ Always ensure result fits within application's authorized limits.
+ Each application can set its own limits.
+ note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ read each contained frame header. This is efficient as most of the data is skipped,
+ however it does mean that all frame data must be present and valid.
+
+
+Advanced compression functions
size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
@@ -300,7 +364,22 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Gives the amount of memory used by a given ZSTD_CCtx
-ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+typedef enum {
+ ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
+} ZSTD_CCtxParameter;
+
+size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+ Set advanced parameters, selected through enum ZSTD_CCtxParameter
+ @result : 0, or an error code (which can be tested with ZSTD_isError())
+
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+ Create a digested dictionary for compression
+ Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict
+
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
ZSTD_parameters params, ZSTD_customMem customMem);
Create a ZSTD_CDict using external alloc and free, and customized compression parameters
@@ -336,7 +415,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter
-Advanced decompression functions
+Advanced decompression functions
unsigned ZSTD_isFrame(const void* buffer, size_t size);
Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -357,6 +436,12 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Gives the amount of memory used by a given ZSTD_DCtx
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+ Create a digested dictionary, ready to start decompression operation without startup delay.
+ Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict
+
+
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
Gives the amount of memory used by a given ZSTD_DDict
@@ -385,33 +470,33 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code.
-Advanced streaming functions
+Advanced streaming functions
-Advanced Streaming compression functions
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+Advanced Streaming compression functions
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
/**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
- ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
+ ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
-
-Advanced Streaming decompression functions
typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+
+Advanced Streaming decompression functions
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
/**< note: a dict will not be used if dict == NULL or dictSize < 8 */
size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
-
-Buffer-less and synchronous inner streaming functions
+
+Buffer-less and synchronous inner streaming functions
This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
But it's also a complex one, with many restrictions (documented below).
Prefer using normal streaming API for an easier experience
-Buffer-less streaming compression (synchronous mode)
+Buffer-less streaming compression (synchronous mode)
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -434,20 +519,21 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
In which case, it will "discard" the relevant memory section from its history.
Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
- It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
- Without last block mark, frames will be considered unfinished (broken) by decoders.
+ It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+ Without last block mark, frames will be considered unfinished (corrupted) by decoders.
- You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+ `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
-Buffer-less streaming compression functions
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+Buffer-less streaming compression functions
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */
size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-Buffer-less streaming decompression (synchronous mode)
+
+Buffer-less streaming decompression (synchronous mode)
A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
A ZSTD_DCtx object can be re-used multiple times.
@@ -490,7 +576,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo
Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
This information is not required to properly decode a frame.
- == Special case : skippable frames ==
+ == Special case : skippable frames
Skippable frames allow integration of user-defined data into a flow of concatenated frames.
Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows :
@@ -509,7 +595,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo
unsigned checksumFlag;
} ZSTD_frameParams;
-Buffer-less streaming decompression functions
size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */
+Buffer-less streaming decompression functions
size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);
/**< doesn't consume input, see details below */
size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
@@ -517,8 +603,8 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-Block functions
+
+Block functions
Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
User will have to take in charge required information to regenerate data, such as compressed and content sizes.
@@ -542,10 +628,10 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
Use ZSTD_insertBlock() in such a case.
-Raw zstd block functions
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+Raw zstd block functions
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);
/**< insert block into `dctx` history. Useful for uncompressed blocks */
-
+
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index eda8b9dd..e38ef79b 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -369,7 +369,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
totalDstSize += ret;
}
{
- size_t const frameSrcSize = ZSTD_getFrameCompressedSize(src, srcSize);
+ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
if (ZSTD_isError(frameSrcSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
@@ -1437,17 +1437,20 @@ size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t len
return length;
}
-/** ZSTD_getFrameCompressedSize() :
+/** ZSTD_findFrameCompressedSize() :
* compatible with legacy mode
- * `src` must point to the start of a ZSTD or ZSTD legacy frame
+ * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
* `srcSize` must be at least as large as the frame contained
* @return : the compressed size of the frame starting at `src` */
-size_t ZSTD_getFrameCompressedSize(const void *src, size_t srcSize)
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
- if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getFrameCompressedSizeLegacy(src, srcSize);
+ if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
#endif
- {
+ if (srcSize >= ZSTD_skippableHeaderSize &&
+ (MEM_readLE32(src) & 0xFFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
+ return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4);
+ } else {
const BYTE* ip = (const BYTE*)src;
const BYTE* const ipstart = ip;
size_t remainingSize = srcSize;
@@ -1576,7 +1579,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
- size_t const frameSize = ZSTD_getFrameCompressedSizeLegacy(src, srcSize);
+ size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
size_t decodedSize;
if (ZSTD_isError(frameSize)) return frameSize;
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index b0a7b71d..707e76f0 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -123,26 +123,26 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
}
}
-MEM_STATIC size_t ZSTD_getFrameCompressedSizeLegacy(const void *src,
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
size_t compressedSize)
{
U32 const version = ZSTD_isLegacy(src, compressedSize);
switch(version)
{
case 1 :
- return ZSTDv01_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv01_findFrameCompressedSize(src, compressedSize);
case 2 :
- return ZSTDv02_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv02_findFrameCompressedSize(src, compressedSize);
case 3 :
- return ZSTDv03_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv03_findFrameCompressedSize(src, compressedSize);
case 4 :
- return ZSTDv04_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv04_findFrameCompressedSize(src, compressedSize);
case 5 :
- return ZSTDv05_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv05_findFrameCompressedSize(src, compressedSize);
case 6 :
- return ZSTDv06_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv06_findFrameCompressedSize(src, compressedSize);
case 7 :
- return ZSTDv07_getFrameCompressedSize(src, compressedSize);
+ return ZSTDv07_findFrameCompressedSize(src, compressedSize);
default :
return ERROR(prefix_unknown);
}
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index a0c78a4b..bcacb8d5 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1992,7 +1992,7 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
-size_t ZSTDv01_getFrameCompressedSize(const void* src, size_t srcSize)
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h
index 21959fcd..13cb3acf 100644
--- a/lib/legacy/zstd_v01.h
+++ b/lib/legacy/zstd_v01.h
@@ -40,7 +40,7 @@ ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv01_isError())
*/
-size_t ZSTDv01_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 6cbf8023..2297b28c 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -3378,7 +3378,7 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
-static size_t ZSTD_getFrameCompressedSize(const void *src, size_t srcSize)
+static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
@@ -3524,9 +3524,9 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
-size_t ZSTDv02_getFrameCompressedSize(const void *src, size_t compressedSize)
+size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
{
- return ZSTD_getFrameCompressedSize(src, compressedSize);
+ return ZSTD_findFrameCompressedSize(src, compressedSize);
}
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h
index 9542fc0e..d14f0293 100644
--- a/lib/legacy/zstd_v02.h
+++ b/lib/legacy/zstd_v02.h
@@ -40,7 +40,7 @@ ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv02_isError())
*/
-size_t ZSTDv02_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index 98b93c49..ef654931 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -3019,7 +3019,7 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
-static size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize)
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
@@ -3165,9 +3165,9 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
-size_t ZSTDv03_getFrameCompressedSize(const void* src, size_t srcSize)
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
{
- return ZSTD_getFrameCompressedSize(src, srcSize);
+ return ZSTD_findFrameCompressedSize(src, srcSize);
}
ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h
index 46969410..07f7597b 100644
--- a/lib/legacy/zstd_v03.h
+++ b/lib/legacy/zstd_v03.h
@@ -40,7 +40,7 @@ ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv03_isError())
*/
-size_t ZSTDv03_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 8c929b05..09040e68 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -3326,7 +3326,7 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
return op-ostart;
}
-static size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize)
+static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
@@ -3782,9 +3782,9 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
#endif
}
-size_t ZSTDv04_getFrameCompressedSize(const void* src, size_t srcSize)
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
{
- return ZSTD_getFrameCompressedSize(src, srcSize);
+ return ZSTD_findFrameCompressedSize(src, srcSize);
}
size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h
index bcef1fe9..1b5439d3 100644
--- a/lib/legacy/zstd_v04.h
+++ b/lib/legacy/zstd_v04.h
@@ -40,7 +40,7 @@ ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv04_isError())
*/
-size_t ZSTDv04_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 9689b170..a6f5f5db 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -3583,7 +3583,7 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
#endif
}
-size_t ZSTDv05_getFrameCompressedSize(const void *src, size_t srcSize)
+size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h
index 157dbc57..8ce662fd 100644
--- a/lib/legacy/zstd_v05.h
+++ b/lib/legacy/zstd_v05.h
@@ -38,7 +38,7 @@ ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv05_isError())
*/
-size_t ZSTDv05_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
/* *************************************
* Helper functions
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index f586db22..a4258b67 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -3729,7 +3729,7 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
#endif
}
-size_t ZSTDv06_getFrameCompressedSize(const void* src, size_t srcSize)
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h
index ef1feb2f..10c9c772 100644
--- a/lib/legacy/zstd_v06.h
+++ b/lib/legacy/zstd_v06.h
@@ -47,7 +47,7 @@ ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv06_isError())
*/
-size_t ZSTDv06_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
/* *************************************
* Helper functions
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 07099d5a..e67916b3 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -3968,7 +3968,7 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
#endif
}
-size_t ZSTDv07_getFrameCompressedSize(const void* src, size_t srcSize)
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h
index a79cbb88..cc95c661 100644
--- a/lib/legacy/zstd_v07.h
+++ b/lib/legacy/zstd_v07.h
@@ -54,7 +54,7 @@ ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv07_isError())
*/
-size_t ZSTDv07_getFrameCompressedSize(const void* src, size_t compressedSize);
+size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
/*====== Helper functions ======*/
ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
diff --git a/lib/zstd.h b/lib/zstd.h
index c0a1c7d1..e597c5db 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -400,12 +400,12 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
* Compressed size functions
***************************************/
-/*! ZSTD_getFrameCompressedSize() :
- * `src` should point to the start of a ZSTD encoded frame
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD encoded frame or skippable frame
* `srcSize` must be at least as large as the frame
* @return : the compressed size of the frame pointed to by `src`, suitable to pass to
* `ZSTD_decompress` or similar, or an error code if given invalid input. */
-ZSTDLIB_API size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
/***************************************
* Decompressed size functions
@@ -700,6 +700,9 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
c) Frame Content - any content (User Data) of length equal to Frame Size
For skippable frames ZSTD_decompressContinue() always returns 0.
For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+ Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content.
+ For purposes of decompression, it is valid in both cases to skip the frame using
+ ZSTD_findFrameCompressedSize to find its size in bytes.
It also returns Frame Size as fparamsPtr->frameContentSize.
*/
diff --git a/tests/.gitignore b/tests/.gitignore
index b7ba51b6..dc468dee 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -17,6 +17,7 @@ roundTripCrash
longmatch
symbols
legacy
+decodecorpus
pool
invalidDictionaries
diff --git a/tests/Makefile b/tests/Makefile
index 17286a02..5b0e29c6 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -56,6 +56,7 @@ VOID = /dev/null
ZSTREAM_TESTTIME = -T2mn
FUZZERTEST ?= -T5mn
ZSTDRTTEST = --test-large-data
+DECODECORPUS_TESTTIME = -T30
.PHONY: default all all32 dll clean test test32 test-all namespaceTest versionsTest
@@ -154,6 +155,9 @@ legacy : CPPFLAGS+= -I$(ZSTDDIR)/legacy
legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
+decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) decodecorpus.c
+ $(CC) $(FLAGS) $^ -o $@$(EXT) -lm
+
symbols : symbols.c
$(MAKE) -C $(ZSTDDIR) libzstd
ifneq (,$(filter Windows%,$(OS)))
@@ -184,7 +188,8 @@ clean:
fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT)\
zstreamtest$(EXT) zstreamtest32$(EXT) \
datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
- symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) pool$(EXT)
+ symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) pool$(EXT) \
+ decodecorpus$(EXT)
@echo Cleaning completed
@@ -230,7 +235,7 @@ zstd-playTests: datagen
file $(ZSTD)
ZSTD="$(QEMU_SYS) $(ZSTD)" ./playTests.sh $(ZSTDRTTEST)
-test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy
+test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus
ifeq ($(QEMU_SYS),)
test: test-pool
endif
@@ -302,6 +307,9 @@ test-symbols: symbols
test-legacy: legacy
$(QEMU_SYS) ./legacy
+test-decodecorpus: decodecorpus
+ $(QEMU_SYS) ./decodecorpus -t $(DECODECORPUS_TESTTIME)
+
test-pool: pool
$(QEMU_SYS) ./pool
diff --git a/tests/README.md b/tests/README.md
index 79c067ab..24a28ab7 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -10,12 +10,14 @@ This directory contains the following programs and scripts:
- `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+)
- `zbufftest` : Test tool to check ZBUFF (a buffered streaming API) integrity
- `zstreamtest` : Fuzzer test tool for zstd streaming API
+- `legacy` : Test tool to test decoding of legacy zstd frames
+- `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations
#### `test-zstd-versions.py` - script for testing zstd interoperability between versions
This script creates `versionsTest` directory to which zstd repository is cloned.
-Then all taged (released) versions of zstd are compiled.
+Then all tagged (released) versions of zstd are compiled.
In the following step interoperability between zstd versions is checked.
@@ -64,3 +66,25 @@ optional arguments:
--sleepTime SLEEPTIME
frequency of repository checking in seconds
```
+
+#### `decodecorpus` - tool to generate Zstandard frames for decoder testing
+Command line tool to generate test .zst files.
+
+This tool will generate .zst files with checksums,
+as well as optionally output the corresponding correct uncompressed data for
+extra verfication.
+
+Example:
+```
+./decodecorpus -ptestfiles -otestfiles -n10000 -s5
+```
+will generate 10,000 sample .zst files using a seed of 5 in the `testfiles` directory,
+with the zstd checksum field set,
+as well as the 10,000 original files for more detailed comparison of decompression results.
+
+```
+./decodecorpus -t -T1mn
+```
+will choose a random seed, and for 1 minute,
+generate random test frames and ensure that the
+zstd library correctly decompresses them in both simple and streaming modes.
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
new file mode 100644
index 00000000..df12dd56
--- /dev/null
+++ b/tests/decodecorpus.c
@@ -0,0 +1,1450 @@
+/**
+ * Copyright (c) 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "zstd.h"
+#include "zstd_internal.h"
+#include "mem.h"
+
+// Direct access to internal compression functions is required
+#include "zstd_compress.c"
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h" /* XXH64 */
+
+#ifndef MIN
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef MAX_PATH
+ #ifdef PATH_MAX
+ #define MAX_PATH PATH_MAX
+ #else
+ #define MAX_PATH 256
+ #endif
+#endif
+
+/*-************************************
+* DISPLAY Macros
+**************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 0;
+
+#define DISPLAYUPDATE(...) \
+ do { \
+ if ((clockSpan(g_displayClock) > g_refreshRate) || \
+ (g_displayLevel >= 4)) { \
+ g_displayClock = clock(); \
+ DISPLAY(__VA_ARGS__); \
+ if (g_displayLevel >= 4) fflush(stderr); \
+ } \
+ } while (0)
+static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
+static clock_t g_displayClock = 0;
+
+static clock_t clockSpan(clock_t cStart)
+{
+ return clock() - cStart; /* works even when overflow; max span ~ 30mn */
+}
+
+#define CHECKERR(code) \
+ do { \
+ if (ZSTD_isError(code)) { \
+ DISPLAY("Error occurred while generating data: %s\n", \
+ ZSTD_getErrorName(code)); \
+ exit(1); \
+ } \
+ } while (0)
+
+/*-*******************************************************
+* Random function
+*********************************************************/
+#define CLAMP(x, a, b) ((x) < (a) ? (a) : ((x) > (b) ? (b) : (x)))
+
+static unsigned RAND(unsigned* src)
+{
+#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+ static const U32 prime1 = 2654435761U;
+ static const U32 prime2 = 2246822519U;
+ U32 rand32 = *src;
+ rand32 *= prime1;
+ rand32 += prime2;
+ rand32 = RAND_rotl32(rand32, 13);
+ *src = rand32;
+ return RAND_rotl32(rand32, 27);
+#undef RAND_rotl32
+}
+
+#define DISTSIZE (8192)
+
+/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
+static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
+{
+ size_t i;
+ BYTE* op = ptr;
+
+ for (i = 0; i < size; i++) {
+ op[i] = RAND(seed) % (maxSymb + 1);
+ }
+}
+
+/* Write `size` random bytes into `ptr` */
+static void RAND_buffer(U32* seed, void* ptr, size_t size)
+{
+ size_t i;
+ BYTE* op = ptr;
+
+ for (i = 0; i + 4 <= size; i += 4) {
+ MEM_writeLE32(op + i, RAND(seed));
+ }
+ for (; i < size; i++) {
+ op[i] = RAND(seed) & 0xff;
+ }
+}
+
+/* Write `size` bytes into `ptr` following the distribution `dist` */
+static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
+{
+ size_t i;
+ BYTE* op = ptr;
+
+ for (i = 0; i < size; i++) {
+ op[i] = dist[RAND(seed) % DISTSIZE];
+ }
+}
+
+/* Generate a random distribution where the frequency of each symbol follows a
+ * geometric distribution defined by `weight`
+ * `dist` should have size at least `DISTSIZE` */
+static void RAND_genDist(U32* seed, BYTE* dist, double weight)
+{
+ size_t i = 0;
+ size_t statesLeft = DISTSIZE;
+ BYTE symb = RAND(seed) % 256;
+ BYTE step = (RAND(seed) % 256) | 1; /* force it to be odd so it's relatively prime to 256 */
+
+ while (i < DISTSIZE) {
+ size_t states = ((size_t)(weight * statesLeft)) + 1;
+ size_t j;
+ for (j = 0; j < states && i < DISTSIZE; j++, i++) {
+ dist[i] = symb;
+ }
+
+ symb += step;
+ statesLeft -= states;
+ }
+}
+
+/* Generates a random number in the range [min, max) */
+static inline U32 RAND_range(U32* seed, U32 min, U32 max)
+{
+ return (RAND(seed) % (max-min)) + min;
+}
+
+#define ROUND(x) ((U32)(x + 0.5))
+
+/* Generates a random number in an exponential distribution with mean `mean` */
+static double RAND_exp(U32* seed, double mean)
+{
+ double const u = RAND(seed) / (double) UINT_MAX;
+ return log(1-u) * (-mean);
+}
+
+/*-*******************************************************
+* Constants and Structs
+*********************************************************/
+const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"};
+
+#define MAX_DECOMPRESSED_SIZE_LOG 20
+#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
+
+#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
+#define MAX_BLOCK_SIZE (128ULL * 1024)
+
+#define MIN_SEQ_LEN (3)
+#define MAX_NB_SEQ ((MAX_BLOCK_SIZE + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
+
+BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
+BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
+BYTE LITERAL_BUFFER[MAX_BLOCK_SIZE];
+
+seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
+BYTE SEQUENCE_LITERAL_BUFFER[MAX_BLOCK_SIZE]; /* storeSeq expects a place to copy literals to */
+BYTE SEQUENCE_LLCODE[MAX_BLOCK_SIZE];
+BYTE SEQUENCE_MLCODE[MAX_BLOCK_SIZE];
+BYTE SEQUENCE_OFCODE[MAX_BLOCK_SIZE];
+
+unsigned WKSP[1024];
+
+typedef struct {
+ size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
+ unsigned windowSize; /* contentSize >= windowSize means single segment */
+} frameHeader_t;
+
+/* For repeat modes */
+typedef struct {
+ U32 rep[ZSTD_REP_NUM];
+
+ int hufInit;
+ /* the distribution used in the previous block for repeat mode */
+ BYTE hufDist[DISTSIZE];
+ U32 hufTable [256]; /* HUF_CElt is an incomplete type */
+
+ int fseInit;
+ FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+ FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+ FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+
+ /* Symbols that were present in the previous distribution, for use with
+ * set_repeat */
+ BYTE litlengthSymbolSet[36];
+ BYTE offsetSymbolSet[29];
+ BYTE matchlengthSymbolSet[53];
+} cblockStats_t;
+
+typedef struct {
+ void* data;
+ void* dataStart;
+ void* dataEnd;
+
+ void* src;
+ void* srcStart;
+ void* srcEnd;
+
+ frameHeader_t header;
+
+ cblockStats_t stats;
+ cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
+} frame_t;
+
+/*-*******************************************************
+* Generator Functions
+*********************************************************/
+
+struct {
+ int contentSize; /* force the content size to be present */
+} opts; /* advanced options on generation */
+
+/* Generate and write a random frame header */
+static void writeFrameHeader(U32* seed, frame_t* frame)
+{
+ BYTE* const op = frame->data;
+ size_t pos = 0;
+ frameHeader_t fh;
+
+ BYTE windowByte = 0;
+
+ int singleSegment = 0;
+ int contentSizeFlag = 0;
+ int fcsCode = 0;
+
+ memset(&fh, 0, sizeof(fh));
+
+ /* generate window size */
+ {
+ /* Follow window algorithm from specification */
+ int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
+ int const mantissa = RAND(seed) % 8;
+ windowByte = (exponent << 3) | mantissa;
+ fh.windowSize = (1U << (exponent + 10));
+ fh.windowSize += fh.windowSize / 8 * mantissa;
+ }
+
+ {
+ /* Generate random content size */
+ size_t highBit;
+ if (RAND(seed) & 7) {
+ /* do content of at least 128 bytes */
+ highBit = 1ULL << RAND_range(seed, 7, MAX_DECOMPRESSED_SIZE_LOG);
+ } else if (RAND(seed) & 3) {
+ /* do small content */
+ highBit = 1ULL << RAND_range(seed, 0, 7);
+ } else {
+ /* 0 size frame */
+ highBit = 0;
+ }
+ fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
+
+ /* provide size sometimes */
+ contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
+
+ if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
+ /* do single segment sometimes */
+ fh.windowSize = fh.contentSize;
+ singleSegment = 1;
+ }
+ }
+
+ if (contentSizeFlag) {
+ /* Determine how large fcs field has to be */
+ int minFcsCode = (fh.contentSize >= 256) +
+ (fh.contentSize >= 65536 + 256) +
+ (fh.contentSize > 0xFFFFFFFFU);
+ if (!singleSegment && !minFcsCode) {
+ minFcsCode = 1;
+ }
+ fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
+ if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
+ }
+
+ /* write out the header */
+ MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
+ pos += 4;
+
+ {
+ BYTE const frameHeaderDescriptor =
+ (fcsCode << 6) | (singleSegment << 5) | (1 << 2);
+ op[pos++] = frameHeaderDescriptor;
+ }
+
+ if (!singleSegment) {
+ op[pos++] = windowByte;
+ }
+
+ if (contentSizeFlag) {
+ switch (fcsCode) {
+ default: /* Impossible */
+ case 0: op[pos++] = fh.contentSize; break;
+ case 1: MEM_writeLE16(op + pos, fh.contentSize - 256); pos += 2; break;
+ case 2: MEM_writeLE32(op + pos, fh.contentSize); pos += 4; break;
+ case 3: MEM_writeLE64(op + pos, fh.contentSize); pos += 8; break;
+ }
+ }
+
+ DISPLAYLEVEL(2, " frame content size:\t%zu\n", fh.contentSize);
+ DISPLAYLEVEL(2, " frame window size:\t%u\n", fh.windowSize);
+ DISPLAYLEVEL(2, " content size flag:\t%d\n", contentSizeFlag);
+ DISPLAYLEVEL(2, " single segment flag:\t%d\n", singleSegment);
+
+ frame->data = op + pos;
+ frame->header = fh;
+}
+
+/* Write a literal block in either raw or RLE form, return the literals size */
+static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
+{
+ BYTE* op = (BYTE*)frame->data;
+ int const type = RAND(seed) % 2;
+ int const sizeFormatDesc = RAND(seed) % 8;
+ size_t litSize;
+ size_t maxLitSize = MIN(contentSize, MAX_BLOCK_SIZE);
+
+ if (sizeFormatDesc == 0) {
+ /* Size_FormatDesc = ?0 */
+ maxLitSize = MIN(maxLitSize, 31);
+ } else if (sizeFormatDesc <= 4) {
+ /* Size_FormatDesc = 01 */
+ maxLitSize = MIN(maxLitSize, 4095);
+ } else {
+ /* Size_Format = 11 */
+ maxLitSize = MIN(maxLitSize, 1048575);
+ }
+
+ litSize = RAND(seed) % (maxLitSize + 1);
+ if (frame->src == frame->srcStart && litSize == 0) {
+ litSize = 1; /* no empty literals if there's nothing preceding this block */
+ }
+ if (litSize + 3 > contentSize) {
+ litSize = contentSize; /* no matches shorter than 3 are allowed */
+ }
+ /* use smallest size format that fits */
+ if (litSize < 32) {
+ op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
+ op += 1;
+ } else if (litSize < 4096) {
+ op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
+ op[1] = (litSize >> 4) & 0xff;
+ op += 2;
+ } else {
+ op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
+ op[1] = (litSize >> 4) & 0xff;
+ op[2] = (litSize >> 12) & 0xff;
+ op += 3;
+ }
+
+ if (type == 0) {
+ /* Raw literals */
+ DISPLAYLEVEL(4, " raw literals\n");
+
+ RAND_buffer(seed, LITERAL_BUFFER, litSize);
+ memcpy(op, LITERAL_BUFFER, litSize);
+ op += litSize;
+ } else {
+ /* RLE literals */
+ BYTE const symb = RAND(seed) % 256;
+
+ DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb);
+
+ memset(LITERAL_BUFFER, symb, litSize);
+ op[0] = symb;
+ op++;
+ }
+
+ frame->data = op;
+
+ return litSize;
+}
+
+/* Generate a Huffman header for the given source */
+static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
+ const void* src, size_t srcSize)
+{
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* op = ostart;
+
+ unsigned huffLog = 11;
+ U32 maxSymbolValue = 255;
+
+ U32 count[HUF_SYMBOLVALUE_MAX+1];
+
+ /* Scan input and build symbol stats */
+ { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
+ if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
+ if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
+ }
+
+ /* Build Huffman Tree */
+ /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
+ huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
+ DISPLAYLEVEL(6, " huffman log: %u\n", huffLog);
+ { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
+ CHECKERR(maxBits);
+ huffLog = (U32)maxBits;
+ }
+
+ /* Write table description header */
+ { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog);
+ if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
+ op += hSize;
+ }
+
+ return op - ostart;
+}
+
+/* Write a Huffman coded literals block and return the litearls size */
+static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
+{
+ BYTE* origop = (BYTE*)frame->data;
+ BYTE* opend = (BYTE*)frame->dataEnd;
+ BYTE* op;
+ BYTE* const ostart = origop;
+ int const sizeFormat = RAND(seed) % 4;
+ size_t litSize;
+ size_t hufHeaderSize = 0;
+ size_t compressedSize = 0;
+ size_t maxLitSize = MIN(contentSize-3, MAX_BLOCK_SIZE);
+
+ symbolEncodingType_e hType;
+
+ if (contentSize < 64) {
+ /* make sure we get reasonably-sized literals for compression */
+ return ERROR(GENERIC);
+ }
+
+ DISPLAYLEVEL(4, " compressed literals\n");
+
+ switch (sizeFormat) {
+ case 0: /* fall through, size is the same as case 1 */
+ case 1:
+ maxLitSize = MIN(maxLitSize, 1023);
+ origop += 3;
+ break;
+ case 2:
+ maxLitSize = MIN(maxLitSize, 16383);
+ origop += 4;
+ break;
+ case 3:
+ maxLitSize = MIN(maxLitSize, 262143);
+ origop += 5;
+ break;
+ default:; /* impossible */
+ }
+
+ do {
+ op = origop;
+ do {
+ litSize = RAND(seed) % (maxLitSize + 1);
+ } while (litSize < 32); /* avoid small literal sizes */
+ if (litSize + 3 > contentSize) {
+ litSize = contentSize; /* no matches shorter than 3 are allowed */
+ }
+
+ /* most of the time generate a new distribution */
+ if ((RAND(seed) & 3) || !frame->stats.hufInit) {
+ do {
+ if (RAND(seed) & 3) {
+ /* add 10 to ensure some compressability */
+ double const weight = ((RAND(seed) % 90) + 10) / 100.0;
+
+ DISPLAYLEVEL(5, " distribution weight: %d%%\n",
+ (int)(weight * 100));
+
+ RAND_genDist(seed, frame->stats.hufDist, weight);
+ } else {
+ /* sometimes do restricted range literals to force
+ * non-huffman headers */
+ DISPLAYLEVEL(5, " small range literals\n");
+ RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
+ 15);
+ }
+ RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
+ litSize);
+
+ /* generate the header from the distribution instead of the
+ * actual data to avoid bugs with symbols that were in the
+ * distribution but never showed up in the output */
+ hufHeaderSize = writeHufHeader(
+ seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op,
+ frame->stats.hufDist, DISTSIZE);
+ CHECKERR(hufHeaderSize);
+ /* repeat until a valid header is written */
+ } while (hufHeaderSize == 0);
+ op += hufHeaderSize;
+ hType = set_compressed;
+
+ frame->stats.hufInit = 1;
+ } else {
+ /* repeat the distribution/table from last time */
+ DISPLAYLEVEL(5, " huffman repeat stats\n");
+ RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
+ litSize);
+ hufHeaderSize = 0;
+ hType = set_repeat;
+ }
+
+ do {
+ compressedSize =
+ sizeFormat == 0
+ ? HUF_compress1X_usingCTable(
+ op, opend - op, LITERAL_BUFFER, litSize,
+ (HUF_CElt*)frame->stats.hufTable)
+ : HUF_compress4X_usingCTable(
+ op, opend - op, LITERAL_BUFFER, litSize,
+ (HUF_CElt*)frame->stats.hufTable);
+ CHECKERR(compressedSize);
+ /* this only occurs when it could not compress or similar */
+ } while (compressedSize <= 0);
+
+ op += compressedSize;
+
+ compressedSize += hufHeaderSize;
+ DISPLAYLEVEL(5, " regenerated size: %zu\n", litSize);
+ DISPLAYLEVEL(5, " compressed size: %zu\n", compressedSize);
+ if (compressedSize >= litSize) {
+ DISPLAYLEVEL(5, " trying again\n");
+ /* if we have to try again, reset the stats so we don't accidentally
+ * try to repeat a distribution we just made */
+ frame->stats = frame->oldStats;
+ } else {
+ break;
+ }
+ } while (1);
+
+ /* write header */
+ switch (sizeFormat) {
+ case 0: /* fall through, size is the same as case 1 */
+ case 1: {
+ U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
+ ((U32)compressedSize << 14);
+ MEM_writeLE24(ostart, header);
+ break;
+ }
+ case 2: {
+ U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
+ ((U32)compressedSize << 18);
+ MEM_writeLE32(ostart, header);
+ break;
+ }
+ case 3: {
+ U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
+ ((U32)compressedSize << 22);
+ MEM_writeLE32(ostart, header);
+ ostart[4] = (BYTE)(compressedSize >> 10);
+ break;
+ }
+ default:; /* impossible */
+ }
+
+ frame->data = op;
+ return litSize;
+}
+
+static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
+{
+ /* only do compressed for larger segments to avoid compressibility issues */
+ if (RAND(seed) & 7 && contentSize >= 64) {
+ return writeLiteralsBlockCompressed(seed, frame, contentSize);
+ } else {
+ return writeLiteralsBlockSimple(seed, frame, contentSize);
+ }
+}
+
+static inline void initSeqStore(seqStore_t *seqStore) {
+ seqStore->sequencesStart = SEQUENCE_BUFFER;
+ seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
+ seqStore->llCode = SEQUENCE_LLCODE;
+ seqStore->mlCode = SEQUENCE_MLCODE;
+ seqStore->ofCode = SEQUENCE_OFCODE;
+
+ ZSTD_resetSeqStore(seqStore);
+}
+
+/* Randomly generate sequence commands */
+static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
+ size_t contentSize, size_t literalsSize)
+{
+ /* The total length of all the matches */
+ size_t const remainingMatch = contentSize - literalsSize;
+ size_t excessMatch;
+ U32 i;
+
+ U32 numSequences;
+
+ const BYTE* literals = LITERAL_BUFFER;
+ BYTE* srcPtr = frame->src;
+
+ if (literalsSize == contentSize) {
+ numSequences = 0;
+ } else {
+ /* each match must be at least MIN_SEQ_LEN, so this is the maximum
+ * number of sequences we can have */
+ U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
+ numSequences = (RAND(seed) % maxSequences) + 1;
+
+ /* the extra match lengths we have to allocate to each sequence */
+ excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
+ }
+
+ DISPLAYLEVEL(5, " total match lengths: %zu\n", remainingMatch);
+
+ for (i = 0; i < numSequences; i++) {
+ /* Generate match and literal lengths by exponential distribution to
+ * ensure nice numbers */
+ U32 matchLen =
+ MIN_SEQ_LEN +
+ ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
+ U32 literalLen =
+ (RAND(seed) & 7)
+ ? ROUND(RAND_exp(seed,
+ literalsSize /
+ (double)(numSequences - i)))
+ : 0;
+ /* actual offset, code to send, and point to copy up to when shifting
+ * codes in the repeat offsets history */
+ U32 offset, offsetCode, repIndex;
+
+ /* bounds checks */
+ matchLen = MIN(matchLen, excessMatch + MIN_SEQ_LEN);
+ literalLen = MIN(literalLen, literalsSize);
+ if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
+ if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + excessMatch;
+
+ memcpy(srcPtr, literals, literalLen);
+ srcPtr += literalLen;
+
+ do {
+ if (RAND(seed) & 7) {
+ /* do a normal offset */
+ offset = (RAND(seed) %
+ MIN(frame->header.windowSize,
+ (BYTE*)srcPtr - (BYTE*)frame->srcStart)) +
+ 1;
+ offsetCode = offset + ZSTD_REP_MOVE;
+ repIndex = 2;
+ } else {
+ /* do a repeat offset */
+ offsetCode = RAND(seed) % 3;
+ if (literalLen > 0) {
+ offset = frame->stats.rep[offsetCode];
+ repIndex = offsetCode;
+ } else {
+ /* special case */
+ offset = offsetCode == 2 ? frame->stats.rep[0] - 1
+ : frame->stats.rep[offsetCode + 1];
+ repIndex = MIN(2, offsetCode + 1);
+ }
+ }
+ } while (offset > (BYTE*)srcPtr - (BYTE*)frame->srcStart || offset == 0);
+
+ { size_t j;
+ for (j = 0; j < matchLen; j++) {
+ *srcPtr = *(srcPtr-offset);
+ srcPtr++;
+ }
+ }
+
+ { int r;
+ for (r = repIndex; r > 0; r--) {
+ frame->stats.rep[r] = frame->stats.rep[r - 1];
+ }
+ frame->stats.rep[0] = offset;
+ }
+
+ DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen);
+ DISPLAYLEVEL(7, " srcPos: %8zu seqNb: %3u",
+ (BYTE*)srcPtr - (BYTE*)frame->srcStart, i);
+ DISPLAYLEVEL(6, "\n");
+ if (offsetCode < 3) {
+ DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex);
+ }
+ /* use libzstd sequence handling */
+ ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
+ matchLen - MINMATCH);
+
+ literalsSize -= literalLen;
+ excessMatch -= (matchLen - MIN_SEQ_LEN);
+ literals += literalLen;
+ }
+
+ memcpy(srcPtr, literals, literalsSize);
+ srcPtr += literalsSize;
+ DISPLAYLEVEL(6, " excess literals: %5zu", literalsSize);
+ DISPLAYLEVEL(7, " srcPos: %8zu", (BYTE*)srcPtr - (BYTE*)frame->srcStart);
+ DISPLAYLEVEL(6, "\n");
+
+ return numSequences;
+}
+
+static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
+{
+ size_t i;
+
+ memset(set, 0, (size_t)maxSymbolValue+1);
+
+ for (i = 0; i < len; i++) {
+ set[symbols[i]] = 1;
+ }
+}
+
+static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
+ size_t nbSeq)
+{
+ /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
+ U32 count[MaxSeq+1];
+ S16 norm[MaxSeq+1];
+ FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
+ FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
+ FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
+ U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
+ const seqDef* const sequences = seqStorePtr->sequencesStart;
+ const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+ const BYTE* const llCodeTable = seqStorePtr->llCode;
+ const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+ BYTE* const oend = (BYTE*)frame->dataEnd;
+ BYTE* op = (BYTE*)frame->data;
+ BYTE* seqHead;
+ BYTE scratchBuffer[1<>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+ else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+
+ /* seqHead : flags for FSE encoding type */
+ seqHead = op++;
+
+ if (nbSeq==0) {
+ frame->data = op;
+
+ return 0;
+ }
+
+ /* convert length/distances into codes */
+ ZSTD_seqToCodes(seqStorePtr);
+
+ /* CTable for Literal Lengths */
+ { U32 max = MaxLL;
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
+ if (mostFrequent == nbSeq) {
+ /* do RLE if we have the chance */
+ *op++ = llCodeTable[0];
+ FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+ LLtype = set_rle;
+ } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+ isSymbolSubset(llCodeTable, nbSeq,
+ frame->stats.litlengthSymbolSet, 35)) {
+ /* maybe do repeat mode if we're allowed to */
+ LLtype = set_repeat;
+ } else if (!(RAND(seed) & 3)) {
+ /* maybe use the default distribution */
+ FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
+ LLtype = set_basic;
+ } else {
+ /* fall back on a full table */
+ size_t nbSeq_1 = nbSeq;
+ const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+ if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
+ FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+ { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
+ if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ op += NCountSize; }
+ FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
+ LLtype = set_compressed;
+ } }
+
+ /* CTable for Offsets */
+ /* see Literal Lengths for descriptions of mode choices */
+ { U32 max = MaxOff;
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
+ if (mostFrequent == nbSeq) {
+ *op++ = ofCodeTable[0];
+ FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+ Offtype = set_rle;
+ } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+ isSymbolSubset(ofCodeTable, nbSeq,
+ frame->stats.offsetSymbolSet, 28)) {
+ Offtype = set_repeat;
+ } else if (!(RAND(seed) & 3)) {
+ FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
+ Offtype = set_basic;
+ } else {
+ size_t nbSeq_1 = nbSeq;
+ const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+ if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
+ FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+ { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
+ if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ op += NCountSize; }
+ FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
+ Offtype = set_compressed;
+ } }
+
+ /* CTable for MatchLengths */
+ /* see Literal Lengths for descriptions of mode choices */
+ { U32 max = MaxML;
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
+ if (mostFrequent == nbSeq) {
+ *op++ = *mlCodeTable;
+ FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+ MLtype = set_rle;
+ } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+ isSymbolSubset(mlCodeTable, nbSeq,
+ frame->stats.matchlengthSymbolSet, 52)) {
+ MLtype = set_repeat;
+ } else if (!(RAND(seed) & 3)) {
+ /* sometimes do default distribution */
+ FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
+ MLtype = set_basic;
+ } else {
+ /* fall back on table */
+ size_t nbSeq_1 = nbSeq;
+ const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+ if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
+ FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
+ { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
+ if (FSE_isError(NCountSize)) return ERROR(GENERIC);
+ op += NCountSize; }
+ FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
+ MLtype = set_compressed;
+ } }
+ frame->stats.fseInit = 1;
+ initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
+ initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
+ initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
+
+ DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype);
+
+ *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+ /* Encoding Sequences */
+ { BIT_CStream_t blockStream;
+ FSE_CState_t stateMatchLength;
+ FSE_CState_t stateOffsetBits;
+ FSE_CState_t stateLitLength;
+
+ CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
+
+ /* first symbols */
+ FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+ FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
+ FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
+ BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+ if (MEM_32bits()) BIT_flushBits(&blockStream);
+ BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+ if (MEM_32bits()) BIT_flushBits(&blockStream);
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+ BIT_flushBits(&blockStream);
+
+ { size_t n;
+ for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+ BIT_flushBits(&blockStream); /* (7)*/
+ BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+ if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+ BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+ if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
+ BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
+ BIT_flushBits(&blockStream); /* (7)*/
+ } }
+
+ FSE_flushCState(&blockStream, &stateMatchLength);
+ FSE_flushCState(&blockStream, &stateOffsetBits);
+ FSE_flushCState(&blockStream, &stateLitLength);
+
+ { size_t const streamSize = BIT_closeCStream(&blockStream);
+ if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
+ op += streamSize;
+ } }
+
+ frame->data = op;
+
+ return 0;
+}
+
+static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
+ size_t literalsSize)
+{
+ seqStore_t seqStore;
+ size_t numSequences;
+
+
+ initSeqStore(&seqStore);
+
+ /* randomly generate sequences */
+ numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize);
+ /* write them out to the frame data */
+ CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
+
+ return numSequences;
+}
+
+static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize)
+{
+ BYTE* const blockStart = (BYTE*)frame->data;
+ size_t literalsSize;
+ size_t nbSeq;
+
+ DISPLAYLEVEL(4, " compressed block:\n");
+
+ literalsSize = writeLiteralsBlock(seed, frame, contentSize);
+
+ DISPLAYLEVEL(4, " literals size: %zu\n", literalsSize);
+
+ nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize);
+
+ DISPLAYLEVEL(4, " number of sequences: %zu\n", nbSeq);
+
+ return (BYTE*)frame->data - blockStart;
+}
+
+static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
+ int lastBlock)
+{
+ int const blockTypeDesc = RAND(seed) % 8;
+ size_t blockSize;
+ int blockType;
+
+ BYTE *const header = (BYTE*)frame->data;
+ BYTE *op = header + 3;
+
+ DISPLAYLEVEL(3, " block:\n");
+ DISPLAYLEVEL(3, " block content size: %zu\n", contentSize);
+ DISPLAYLEVEL(3, " last block: %s\n", lastBlock ? "yes" : "no");
+
+ if (blockTypeDesc == 0) {
+ /* Raw data frame */
+
+ RAND_buffer(seed, frame->src, contentSize);
+ memcpy(op, frame->src, contentSize);
+
+ op += contentSize;
+ blockType = 0;
+ blockSize = contentSize;
+ } else if (blockTypeDesc == 1) {
+ /* RLE */
+ BYTE const symbol = RAND(seed) & 0xff;
+
+ op[0] = symbol;
+ memset(frame->src, symbol, contentSize);
+
+ op++;
+ blockType = 1;
+ blockSize = contentSize;
+ } else {
+ /* compressed, most common */
+ size_t compressedSize;
+ blockType = 2;
+
+ frame->oldStats = frame->stats;
+
+ frame->data = op;
+ compressedSize = writeCompressedBlock(seed, frame, contentSize);
+ if (compressedSize > contentSize) {
+ blockType = 0;
+ memcpy(op, frame->src, contentSize);
+
+ op += contentSize;
+ blockSize = contentSize; /* fall back on raw block if data doesn't
+ compress */
+
+ frame->stats = frame->oldStats; /* don't update the stats */
+ } else {
+ op += compressedSize;
+ blockSize = compressedSize;
+ }
+ }
+ frame->src = (BYTE*)frame->src + contentSize;
+
+ DISPLAYLEVEL(3, " block type: %s\n", BLOCK_TYPES[blockType]);
+ DISPLAYLEVEL(3, " block size field: %zu\n", blockSize);
+
+ header[0] = (lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff;
+ MEM_writeLE16(header + 1, blockSize >> 5);
+
+ frame->data = op;
+}
+
+static void writeBlocks(U32* seed, frame_t* frame)
+{
+ size_t contentLeft = frame->header.contentSize;
+ size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize);
+ while (1) {
+ /* 1 in 4 chance of ending frame */
+ int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
+ size_t blockContentSize;
+ if (lastBlock) {
+ blockContentSize = contentLeft;
+ } else {
+ if (contentLeft > 0 && (RAND(seed) & 7)) {
+ /* some variable size blocks */
+ blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
+ } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
+ /* some full size blocks */
+ blockContentSize = maxBlockSize;
+ } else {
+ /* some empty blocks */
+ blockContentSize = 0;
+ }
+ }
+
+ writeBlock(seed, frame, blockContentSize, lastBlock);
+
+ contentLeft -= blockContentSize;
+ if (lastBlock) break;
+ }
+}
+
+static void writeChecksum(frame_t* frame)
+{
+ /* write checksum so implementations can verify their output */
+ U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
+ DISPLAYLEVEL(2, " checksum: %08x\n", (U32)digest);
+ MEM_writeLE32(frame->data, (U32)digest);
+ frame->data = (BYTE*)frame->data + 4;
+}
+
+static void outputBuffer(const void* buf, size_t size, const char* const path)
+{
+ /* write data out to file */
+ const BYTE* ip = (const BYTE*)buf;
+ FILE* out;
+ if (path) {
+ out = fopen(path, "wb");
+ } else {
+ out = stdout;
+ }
+ if (!out) {
+ fprintf(stderr, "Failed to open file at %s: ", path);
+ perror(NULL);
+ exit(1);
+ }
+
+ {
+ size_t fsize = size;
+ size_t written = 0;
+ while (written < fsize) {
+ written += fwrite(ip + written, 1, fsize - written, out);
+ if (ferror(out)) {
+ fprintf(stderr, "Failed to write to file at %s: ", path);
+ perror(NULL);
+ exit(1);
+ }
+ }
+ }
+
+ if (path) {
+ fclose(out);
+ }
+}
+
+static void initFrame(frame_t* fr)
+{
+ memset(fr, 0, sizeof(*fr));
+ fr->data = fr->dataStart = FRAME_BUFFER;
+ fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
+ fr->src = fr->srcStart = CONTENT_BUFFER;
+ fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
+
+ /* init repeat codes */
+ fr->stats.rep[0] = 1;
+ fr->stats.rep[1] = 4;
+ fr->stats.rep[2] = 8;
+}
+
+/* Return the final seed */
+static U32 generateFrame(U32 seed, frame_t* fr)
+{
+ /* generate a complete frame */
+ DISPLAYLEVEL(1, "frame seed: %u\n", seed);
+
+ initFrame(fr);
+
+ writeFrameHeader(&seed, fr);
+ writeBlocks(&seed, fr);
+ writeChecksum(fr);
+
+ return seed;
+}
+
+/*-*******************************************************
+* Test Mode
+*********************************************************/
+
+BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
+
+static size_t testDecodeSimple(frame_t* fr)
+{
+ /* test decoding the generated data with the simple API */
+ size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
+ fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
+
+ if (ZSTD_isError(ret)) return ret;
+
+ if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
+ (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
+ return ERROR(corruption_detected);
+ }
+
+ return ret;
+}
+
+static size_t testDecodeStreaming(frame_t* fr)
+{
+ /* test decoding the generated data with the streaming API */
+ ZSTD_DStream* zd = ZSTD_createDStream();
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ size_t ret;
+
+ if (!zd) return ERROR(memory_allocation);
+
+ in.src = fr->dataStart;
+ in.pos = 0;
+ in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
+
+ out.dst = DECOMPRESSED_BUFFER;
+ out.pos = 0;
+ out.size = ZSTD_DStreamOutSize();
+
+ ZSTD_initDStream(zd);
+ while (1) {
+ ret = ZSTD_decompressStream(zd, &out, &in);
+ if (ZSTD_isError(ret)) goto cleanup; /* error */
+ if (ret == 0) break; /* frame is done */
+
+ /* force decoding to be done in chunks */
+ out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
+ }
+
+ ret = out.pos;
+
+ if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
+ return ERROR(corruption_detected);
+ }
+
+cleanup:
+ ZSTD_freeDStream(zd);
+ return ret;
+}
+
+static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS)
+{
+ unsigned fnum;
+
+ clock_t const startClock = clock();
+ clock_t const maxClockSpan = testDurationS * CLOCKS_PER_SEC;
+
+ if (numFiles == 0 && !testDurationS) numFiles = 1;
+
+ DISPLAY("seed: %u\n", seed);
+
+ for (fnum = 0; fnum < numFiles || clockSpan(startClock) < maxClockSpan; fnum++) {
+ frame_t fr;
+
+ if (fnum < numFiles)
+ DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
+ else
+ DISPLAYUPDATE("\r%u ", fnum);
+
+ seed = generateFrame(seed, &fr);
+
+ { size_t const r = testDecodeSimple(&fr);
+ if (ZSTD_isError(r)) {
+ DISPLAY("Error in simple mode on test seed %u: %s\n", seed + fnum,
+ ZSTD_getErrorName(r));
+ return 1;
+ }
+ }
+ { size_t const r = testDecodeStreaming(&fr);
+ if (ZSTD_isError(r)) {
+ DISPLAY("Error in streaming mode on test seed %u: %s\n", seed + fnum,
+ ZSTD_getErrorName(r));
+ return 1;
+ }
+ }
+ }
+
+ DISPLAY("\r%u tests completed: ", fnum);
+ DISPLAY("OK\n");
+
+ return 0;
+}
+
+/*-*******************************************************
+* File I/O
+*********************************************************/
+
+static int generateFile(U32 seed, const char* const path,
+ const char* const origPath)
+{
+ frame_t fr;
+
+ DISPLAY("seed: %u\n", seed);
+
+ generateFrame(seed, &fr);
+
+ outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
+ if (origPath) {
+ outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
+ }
+ return 0;
+}
+
+static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
+ const char* const origPath)
+{
+ char outPath[MAX_PATH];
+ unsigned fnum;
+
+ DISPLAY("seed: %u\n", seed);
+
+ for (fnum = 0; fnum < numFiles; fnum++) {
+ frame_t fr;
+
+ DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
+
+ seed = generateFrame(seed, &fr);
+
+ if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
+ DISPLAY("Error: path too long\n");
+ return 1;
+ }
+ outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
+
+ if (origPath) {
+ if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
+ DISPLAY("Error: path too long\n");
+ return 1;
+ }
+ outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
+ }
+ }
+
+ DISPLAY("\r%u/%u \n", fnum, numFiles);
+
+ return 0;
+}
+
+
+/*_*******************************************************
+* Command line
+*********************************************************/
+static U32 makeSeed(void)
+{
+ U32 t = time(NULL);
+ return XXH32(&t, sizeof(t), 0) % 65536;
+}
+
+static unsigned readInt(const char** argument)
+{
+ unsigned val = 0;
+ while ((**argument>='0') && (**argument<='9')) {
+ val *= 10;
+ val += **argument - '0';
+ (*argument)++;
+ }
+ return val;
+}
+
+static void usage(const char* programName)
+{
+ DISPLAY( "Usage :\n");
+ DISPLAY( " %s [args]\n", programName);
+ DISPLAY( "\n");
+ DISPLAY( "Arguments :\n");
+ DISPLAY( " -p : select output path (default:stdout)\n");
+ DISPLAY( " in multiple files mode this should be a directory\n");
+ DISPLAY( " -o : select path to output original file (default:no output)\n");
+ DISPLAY( " in multiple files mode this should be a directory\n");
+ DISPLAY( " -s# : select seed (default:random based on time)\n");
+ DISPLAY( " -n# : number of files to generate (default:1)\n");
+ DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n");
+ DISPLAY( " -T# : length of time to run tests for\n");
+ DISPLAY( " -v : increase verbosity level (default:0, max:7)\n");
+ DISPLAY( " -h/H : display help/long help and exit\n");
+}
+
+static void advancedUsage(const char* programName)
+{
+ usage(programName);
+ DISPLAY( "\n");
+ DISPLAY( "Advanced arguments :\n");
+ DISPLAY( " --content-size : always include the content size in the frame header\n");
+}
+
+int main(int argc, char** argv)
+{
+ U32 seed = 0;
+ int seedset = 0;
+ unsigned numFiles = 0;
+ unsigned testDuration = 0;
+ int testMode = 0;
+ const char* path = NULL;
+ const char* origPath = NULL;
+
+ int argNb;
+
+ /* Check command line */
+ for (argNb=1; argNb