diff --git a/Makefile b/Makefile index ed8f1610..c73393cd 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,7 @@ endif default: lib zstd-release .PHONY: all -all: allmost - CPPFLAGS=-I../lib LDFLAGS=-L../lib $(MAKE) -C examples/ $@ +all: | allmost examples manual .PHONY: allmost allmost: @@ -68,6 +67,14 @@ zlibwrapper: test: $(MAKE) -C $(TESTDIR) $@ +.PHONY: examples +examples: + CPPFLAGS=-I../lib LDFLAGS=-L../lib $(MAKE) -C examples/ all + +.PHONY: manual +manual: + $(MAKE) -C contrib/gen_html $@ + .PHONY: clean clean: @$(MAKE) -C $(ZSTDDIR) $@ > $(VOID) @@ -75,6 +82,7 @@ clean: @$(MAKE) -C $(TESTDIR) $@ > $(VOID) @$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID) @$(MAKE) -C examples/ $@ > $(VOID) + @$(MAKE) -C contrib/gen_html $@ > $(VOID) @$(RM) zstd$(EXT) zstdmt$(EXT) tmp* @echo Cleaning completed @@ -197,16 +205,16 @@ cmaketest: cd $(BUILDIR)/cmake/build ; cmake -DPREFIX:STRING=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall c90test: clean - CFLAGS="-std=c90" $(MAKE) all # will fail, due to // and long long + CFLAGS="-std=c90" $(MAKE) allmost # will fail, due to missing support for `long long` gnu90test: clean - CFLAGS="-std=gnu90" $(MAKE) all + CFLAGS="-std=gnu90" $(MAKE) allmost c99test: clean CFLAGS="-std=c99" $(MAKE) allmost gnu99test: clean - CFLAGS="-std=gnu99" $(MAKE) all + CFLAGS="-std=gnu99" $(MAKE) allmost c11test: clean CFLAGS="-std=c11" $(MAKE) allmost diff --git a/NEWS b/NEWS index be334975..96ff25fd 100644 --- a/NEWS +++ b/NEWS @@ -2,7 +2,7 @@ v1.1.4 cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski cli : new : advanced benchmark command --priority=rt cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77 -API : new : ZSTD_getFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize(), by Sean Purcell +API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize(), by Sean Purcell API : change : ZSTD_compress*() with srcSize==0 create an empty-frame of known size build:new : meson build system in contrib/meson, by Dima Krasner doc : new : educational decoder, by Sean Purcell diff --git a/circle.yml b/circle.yml index 69c98854..3102633e 100644 --- a/circle.yml +++ b/circle.yml @@ -12,7 +12,7 @@ dependencies: if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-invalidDictionaries && make clean; fi - | if [[ "$CIRCLE_NODE_INDEX" == "0" ]]; then g++ -v; make gpptest && make clean; fi - if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy && make clean; fi + if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-decodecorpus && make clean; fi - | if [[ "$CIRCLE_NODE_INDEX" == "0" ]]; then gcc -v; make gnu90test && make clean; fi if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-symbols && make clean; fi diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile index c68e560a..ea68b11f 100644 --- a/contrib/gen_html/Makefile +++ b/contrib/gen_html/Makefile @@ -7,12 +7,18 @@ # of patent rights can be found in the PATENTS file in the same directory. # ########################################################################## - CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment CFLAGS += $(MOREFLAGS) -FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) +FLAGS = $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) $(LDFLAGS) +ZSTDAPI = ../../lib/zstd.h +ZSTDMANUAL = ../../doc/zstd_manual.html +LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` +LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` +LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)` +LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) +LIBVER := $(shell echo $(LIBVER_SCRIPT)) # Define *.exe as extension for Windows systems @@ -23,14 +29,23 @@ EXT = endif -.PHONY: default gen_html - +.PHONY: default default: gen_html +.PHONY: all +all: manual + gen_html: gen_html.cpp - $(CXX) $(FLAGS) $^ -o $@$(EXT) + $(CXX) $(FLAGS) $^ -o $@$(EXT) +$(ZSTDMANUAL): gen_html $(ZSTDAPI) + echo "Update zstd manual in /doc" + ./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL) +.PHONY: manual +manual: gen_html $(ZSTDMANUAL) + +.PHONY: clean clean: @$(RM) gen_html$(EXT) @echo Cleaning completed diff --git a/doc/educational_decoder/README.md b/doc/educational_decoder/README.md index 2e2186e0..e3b9bf58 100644 --- a/doc/educational_decoder/README.md +++ b/doc/educational_decoder/README.md @@ -17,3 +17,13 @@ It also contains implementations of Huffman and FSE table decoding. harness [dictionary] +As an additional resource to be used with this decoder, +see the `decodecorpus` tool in the [tests] directory. +It generates valid Zstandard frames that can be used to verify +a Zstandard decoder implementation. +Note that to use the tool to verify this decoder implementation, +the --content-size flag should be set, +as this decoder does not handle streaming decoding, +and so it must know the decompressed size in advance. + +[tests]: https://github.com/facebook/zstd/blob/dev/tests/ diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 1badcbd7..23224d77 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1,10 +1,10 @@ -zstd 1.1.2 Manual +zstd 1.1.4 Manual -

zstd 1.1.2 Manual

+

zstd 1.1.4 Manual


Contents

    @@ -19,13 +19,15 @@
  1. Streaming decompression - HowTo
  2. START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
  3. Advanced types
  4. -
  5. Advanced compression functions
  6. -
  7. Advanced decompression functions
  8. -
  9. Advanced streaming functions
  10. -
  11. Buffer-less and synchronous inner streaming functions
  12. -
  13. Buffer-less streaming compression (synchronous mode)
  14. -
  15. Buffer-less streaming decompression (synchronous mode)
  16. -
  17. Block functions
  18. +
  19. Compressed size functions
  20. +
  21. Decompressed size functions
  22. +
  23. Advanced compression functions
  24. +
  25. Advanced decompression functions
  26. +
  27. Advanced streaming functions
  28. +
  29. Buffer-less and synchronous inner streaming functions
  30. +
  31. Buffer-less streaming compression (synchronous mode)
  32. +
  33. Buffer-less streaming decompression (synchronous mode)
  34. +
  35. Block functions

Introduction

@@ -63,7 +65,7 @@
 
 
size_t ZSTD_decompress( void* dst, size_t dstCapacity,
                               const void* src, size_t compressedSize);
-

`compressedSize` : must be the _exact_ size of a single compressed frame. +

`compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. `dstCapacity` is an upper bound of originalSize. If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), @@ -71,7 +73,16 @@


unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-

'src' is the start of a zstd compressed frame. +

NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize. + ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single + frame, but distinguishes empty frames from frames with an unknown size, or errors. + + Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple + concatenated frames in one buffer, and so is more general. + As a result however, it requires more computation and entire frames to be passed to it, + as opposed to ZSTD_getFrameContentSize which requires only a single frame's header. + + 'src' is the start of a zstd compressed frame. @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise. note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. When `return==0`, data to decompress could be any size. @@ -88,21 +99,29 @@ note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.


-

Helper functions

int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+

Helper functions

int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
 size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
 unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-

+

Explicit memory management


 
+

Compression context

   When compressing many times,
+   it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+   This will make workload friendlier for system's memory.
+   Use one context per thread for parallel execution in multi-threaded environments. 
+
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+

size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
 

Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).


-

Decompression context

typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+

Decompression context

typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
 size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-

+

size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 

Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).


@@ -131,11 +150,11 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);

Fast dictionary API


 
-
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
 

When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. - `dict` can be released after ZSTD_CDict creation. + `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -151,9 +170,9 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
    Note that compression level is decided during dictionary creation. 
 


-
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+
ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 

Create a digested dictionary, ready to start decompression operation without startup delay. - `dict` can be released after creation. + dictBuffer can be released after DDict creation, as its content is copied inside DDict


size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
@@ -271,9 +290,9 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 } ZSTD_compressionParameters;
 

typedef struct {
-    unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
-    unsigned checksumFlag;    /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    unsigned noDictIDFlag;    /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    unsigned checksumFlag;    /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+    unsigned noDictIDFlag;    /**< 1: no dictID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;
 

typedef struct {
@@ -281,11 +300,56 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 

-

Custom memory allocation functions

typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+

Custom memory allocation functions

typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-

-

Advanced compression functions


+

+

Compressed size functions


+
+
size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize);
+

`src` should point to the start of a ZSTD encoded frame + `srcSize` must be at least as large as the frame + @return : the compressed size of the frame pointed to by `src`, suitable to pass to + `ZSTD_decompress` or similar, or an error code if given invalid input. +


+ +

Decompressed size functions


+
+
unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+

`src` should point to the start of a ZSTD encoded frame + `srcSize` must be at least as large as the frame header. A value greater than or equal + to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. + @return : decompressed size of the frame pointed to be `src` if known, otherwise + - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + - ZSTD_CONTENTSIZE_ERROR if an error occured (e.g. invalid magic number, srcSize too small) +


+ +
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+

`src` should point the start of a series of ZSTD encoded and/or skippable frames + `srcSize` must be the _exact_ size of this series + (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`) + @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_ + - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + - if an error occurred: ZSTD_CONTENTSIZE_ERROR + + note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + In which case, it's necessary to use streaming mode to decompress data. + Optionally, application can still use ZSTD_decompress() while relying on implied limits. + (For example, data may be necessarily cut into blocks <= 16 KB). + note 2 : decompressed size is always present when compression is done with ZSTD_compress() + note 3 : decompressed size can be very large (64-bits value), + potentially larger than what local system can handle as a single memory segment. + In which case, it's necessary to use streaming mode to decompress data. + note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + Always ensure result fits within application's authorized limits. + Each application can set its own limits. + note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + read each contained frame header. This is efficient as most of the data is skipped, + however it does mean that all frame data must be present and valid. +


+ +

Advanced compression functions


 
 
size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
 

Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters. @@ -300,7 +364,22 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v

Gives the amount of memory used by a given ZSTD_CCtx


-
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+
typedef enum {
+    ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
+} ZSTD_CCtxParameter;
+

+
size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+

Set advanced parameters, selected through enum ZSTD_CCtxParameter + @result : 0, or an error code (which can be tested with ZSTD_isError()) +


+ +
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+

Create a digested dictionary for compression + Dictionary content is simply referenced, and therefore stays in dictBuffer. + It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict +


+ +
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
                                                   ZSTD_parameters params, ZSTD_customMem customMem);
 

Create a ZSTD_CDict using external alloc and free, and customized compression parameters


@@ -336,7 +415,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v

Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter


-

Advanced decompression functions


+

Advanced decompression functions


 
 
unsigned ZSTD_isFrame(const void* buffer, size_t size);
 

Tells if the content of `buffer` starts with a valid Frame Identifier. @@ -357,6 +436,12 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v

Gives the amount of memory used by a given ZSTD_DCtx


+
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+

Create a digested dictionary, ready to start decompression operation without startup delay. + Dictionary content is simply referenced, and therefore stays in dictBuffer. + It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict +


+
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 

Gives the amount of memory used by a given ZSTD_DDict


@@ -385,33 +470,33 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code.


-

Advanced streaming functions


+

Advanced streaming functions


 
-

Advanced Streaming compression functions

ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct */
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+

Advanced Streaming compression functions

ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
-                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown.  for a frame size of 0 use initCStream_advanced */
 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
-

-

Advanced Streaming decompression functions

typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+

+

Advanced Streaming decompression functions

typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
-

-

Buffer-less and synchronous inner streaming functions

+

+

Buffer-less and synchronous inner streaming functions

   This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
   But it's also a complex one, with many restrictions (documented below).
   Prefer using normal streaming API for an easier experience
  
 
-

Buffer-less streaming compression (synchronous mode)

+

Buffer-less streaming compression (synchronous mode)

   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -434,20 +519,21 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
     In which case, it will "discard" the relevant memory section from its history.
 
   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
-  Without last block mark, frames will be considered unfinished (broken) by decoders.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames will be considered unfinished (corrupted) by decoders.
 
-  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
 
-

Buffer-less streaming compression functions

size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+

Buffer-less streaming compression functions

size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size.  if it is non-zero, it must be accurate.  for 0 size frames, use compressBegin_advanced */
 size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-

-

Buffer-less streaming decompression (synchronous mode)

+

+

Buffer-less streaming decompression (synchronous mode)

   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
@@ -490,7 +576,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo
   Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
   This information is not required to properly decode a frame.
 
-  == Special case : skippable frames ==
+  == Special case : skippable frames 
 
   Skippable frames allow integration of user-defined data into a flow of concatenated frames.
   Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows :
@@ -509,7 +595,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo
     unsigned checksumFlag;
 } ZSTD_frameParams;
 

-

Buffer-less streaming decompression functions

size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
+

Buffer-less streaming decompression functions

size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
@@ -517,8 +603,8 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
 ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-

-

Block functions

+

+

Block functions

     Block functions produce and decode raw zstd blocks, without frame metadata.
     Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
@@ -542,10 +628,10 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
         Use ZSTD_insertBlock() in such a case.
 
-

Raw zstd block functions

size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+

Raw zstd block functions

size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
 size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful for uncompressed blocks */
-

+

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index eda8b9dd..e38ef79b 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -369,7 +369,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) totalDstSize += ret; } { - size_t const frameSrcSize = ZSTD_getFrameCompressedSize(src, srcSize); + size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); if (ZSTD_isError(frameSrcSize)) { return ZSTD_CONTENTSIZE_ERROR; } @@ -1437,17 +1437,20 @@ size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t len return length; } -/** ZSTD_getFrameCompressedSize() : +/** ZSTD_findFrameCompressedSize() : * compatible with legacy mode - * `src` must point to the start of a ZSTD or ZSTD legacy frame + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame * `srcSize` must be at least as large as the frame contained * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_getFrameCompressedSize(const void *src, size_t srcSize) +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isLegacy(src, srcSize)) return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); #endif - { + if (srcSize >= ZSTD_skippableHeaderSize && + (MEM_readLE32(src) & 0xFFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + 4); + } else { const BYTE* ip = (const BYTE*)src; const BYTE* const ipstart = ip; size_t remainingSize = srcSize; @@ -1576,7 +1579,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) { - size_t const frameSize = ZSTD_getFrameCompressedSizeLegacy(src, srcSize); + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); size_t decodedSize; if (ZSTD_isError(frameSize)) return frameSize; diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index b0a7b71d..707e76f0 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -123,26 +123,26 @@ MEM_STATIC size_t ZSTD_decompressLegacy( } } -MEM_STATIC size_t ZSTD_getFrameCompressedSizeLegacy(const void *src, +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t compressedSize) { U32 const version = ZSTD_isLegacy(src, compressedSize); switch(version) { case 1 : - return ZSTDv01_getFrameCompressedSize(src, compressedSize); + return ZSTDv01_findFrameCompressedSize(src, compressedSize); case 2 : - return ZSTDv02_getFrameCompressedSize(src, compressedSize); + return ZSTDv02_findFrameCompressedSize(src, compressedSize); case 3 : - return ZSTDv03_getFrameCompressedSize(src, compressedSize); + return ZSTDv03_findFrameCompressedSize(src, compressedSize); case 4 : - return ZSTDv04_getFrameCompressedSize(src, compressedSize); + return ZSTDv04_findFrameCompressedSize(src, compressedSize); case 5 : - return ZSTDv05_getFrameCompressedSize(src, compressedSize); + return ZSTDv05_findFrameCompressedSize(src, compressedSize); case 6 : - return ZSTDv06_getFrameCompressedSize(src, compressedSize); + return ZSTDv06_findFrameCompressedSize(src, compressedSize); case 7 : - return ZSTDv07_getFrameCompressedSize(src, compressedSize); + return ZSTDv07_findFrameCompressedSize(src, compressedSize); default : return ERROR(prefix_unknown); } diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index a0c78a4b..bcacb8d5 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -1992,7 +1992,7 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -size_t ZSTDv01_getFrameCompressedSize(const void* src, size_t srcSize) +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h index 21959fcd..13cb3acf 100644 --- a/lib/legacy/zstd_v01.h +++ b/lib/legacy/zstd_v01.h @@ -40,7 +40,7 @@ ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv01_isError()) */ -size_t ZSTDv01_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize); /** ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 6cbf8023..2297b28c 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -3378,7 +3378,7 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -static size_t ZSTD_getFrameCompressedSize(const void *src, size_t srcSize) +static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; @@ -3524,9 +3524,9 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); } -size_t ZSTDv02_getFrameCompressedSize(const void *src, size_t compressedSize) +size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize) { - return ZSTD_getFrameCompressedSize(src, compressedSize); + return ZSTD_findFrameCompressedSize(src, compressedSize); } ZSTDv02_Dctx* ZSTDv02_createDCtx(void) diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h index 9542fc0e..d14f0293 100644 --- a/lib/legacy/zstd_v02.h +++ b/lib/legacy/zstd_v02.h @@ -40,7 +40,7 @@ ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv02_isError()) */ -size_t ZSTDv02_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize); /** ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 98b93c49..ef654931 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -3019,7 +3019,7 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -static size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize) +static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; @@ -3165,9 +3165,9 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); } -size_t ZSTDv03_getFrameCompressedSize(const void* src, size_t srcSize) +size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize) { - return ZSTD_getFrameCompressedSize(src, srcSize); + return ZSTD_findFrameCompressedSize(src, srcSize); } ZSTDv03_Dctx* ZSTDv03_createDCtx(void) diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h index 46969410..07f7597b 100644 --- a/lib/legacy/zstd_v03.h +++ b/lib/legacy/zstd_v03.h @@ -40,7 +40,7 @@ ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv03_isError()) */ -size_t ZSTDv03_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize); /** ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index 8c929b05..09040e68 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -3326,7 +3326,7 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, return op-ostart; } -static size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize) +static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; @@ -3782,9 +3782,9 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t #endif } -size_t ZSTDv04_getFrameCompressedSize(const void* src, size_t srcSize) +size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize) { - return ZSTD_getFrameCompressedSize(src, srcSize); + return ZSTD_findFrameCompressedSize(src, srcSize); } size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); } diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h index bcef1fe9..1b5439d3 100644 --- a/lib/legacy/zstd_v04.h +++ b/lib/legacy/zstd_v04.h @@ -40,7 +40,7 @@ ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv04_isError()) */ -size_t ZSTDv04_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize); /** ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 9689b170..a6f5f5db 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -3583,7 +3583,7 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t #endif } -size_t ZSTDv05_getFrameCompressedSize(const void *src, size_t srcSize) +size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h index 157dbc57..8ce662fd 100644 --- a/lib/legacy/zstd_v05.h +++ b/lib/legacy/zstd_v05.h @@ -38,7 +38,7 @@ ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ -size_t ZSTDv05_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize); /* ************************************* * Helper functions diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index f586db22..a4258b67 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -3729,7 +3729,7 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t #endif } -size_t ZSTDv06_getFrameCompressedSize(const void* src, size_t srcSize) +size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h index ef1feb2f..10c9c772 100644 --- a/lib/legacy/zstd_v06.h +++ b/lib/legacy/zstd_v06.h @@ -47,7 +47,7 @@ ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ -size_t ZSTDv06_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize); /* ************************************* * Helper functions diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 07099d5a..e67916b3 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -3968,7 +3968,7 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t #endif } -size_t ZSTDv07_getFrameCompressedSize(const void* src, size_t srcSize) +size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h index a79cbb88..cc95c661 100644 --- a/lib/legacy/zstd_v07.h +++ b/lib/legacy/zstd_v07.h @@ -54,7 +54,7 @@ ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame return : the number of bytes that would be read to decompress this frame or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ -size_t ZSTDv07_getFrameCompressedSize(const void* src, size_t compressedSize); +size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize); /*====== Helper functions ======*/ ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ diff --git a/lib/zstd.h b/lib/zstd.h index c0a1c7d1..e597c5db 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -400,12 +400,12 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v * Compressed size functions ***************************************/ -/*! ZSTD_getFrameCompressedSize() : - * `src` should point to the start of a ZSTD encoded frame +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD encoded frame or skippable frame * `srcSize` must be at least as large as the frame * @return : the compressed size of the frame pointed to by `src`, suitable to pass to * `ZSTD_decompress` or similar, or an error code if given invalid input. */ -ZSTDLIB_API size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); /*************************************** * Decompressed size functions @@ -700,6 +700,9 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci c) Frame Content - any content (User Data) of length equal to Frame Size For skippable frames ZSTD_decompressContinue() always returns 0. For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. + Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content. + For purposes of decompression, it is valid in both cases to skip the frame using + ZSTD_findFrameCompressedSize to find its size in bytes. It also returns Frame Size as fparamsPtr->frameContentSize. */ diff --git a/tests/.gitignore b/tests/.gitignore index b7ba51b6..dc468dee 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -17,6 +17,7 @@ roundTripCrash longmatch symbols legacy +decodecorpus pool invalidDictionaries diff --git a/tests/Makefile b/tests/Makefile index 17286a02..5b0e29c6 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -56,6 +56,7 @@ VOID = /dev/null ZSTREAM_TESTTIME = -T2mn FUZZERTEST ?= -T5mn ZSTDRTTEST = --test-large-data +DECODECORPUS_TESTTIME = -T30 .PHONY: default all all32 dll clean test test32 test-all namespaceTest versionsTest @@ -154,6 +155,9 @@ legacy : CPPFLAGS+= -I$(ZSTDDIR)/legacy legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c $(CC) $(FLAGS) $^ -o $@$(EXT) +decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) decodecorpus.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -lm + symbols : symbols.c $(MAKE) -C $(ZSTDDIR) libzstd ifneq (,$(filter Windows%,$(OS))) @@ -184,7 +188,8 @@ clean: fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT)\ zstreamtest$(EXT) zstreamtest32$(EXT) \ datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \ - symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) pool$(EXT) + symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) pool$(EXT) \ + decodecorpus$(EXT) @echo Cleaning completed @@ -230,7 +235,7 @@ zstd-playTests: datagen file $(ZSTD) ZSTD="$(QEMU_SYS) $(ZSTD)" ./playTests.sh $(ZSTDRTTEST) -test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy +test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus ifeq ($(QEMU_SYS),) test: test-pool endif @@ -302,6 +307,9 @@ test-symbols: symbols test-legacy: legacy $(QEMU_SYS) ./legacy +test-decodecorpus: decodecorpus + $(QEMU_SYS) ./decodecorpus -t $(DECODECORPUS_TESTTIME) + test-pool: pool $(QEMU_SYS) ./pool diff --git a/tests/README.md b/tests/README.md index 79c067ab..24a28ab7 100644 --- a/tests/README.md +++ b/tests/README.md @@ -10,12 +10,14 @@ This directory contains the following programs and scripts: - `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+) - `zbufftest` : Test tool to check ZBUFF (a buffered streaming API) integrity - `zstreamtest` : Fuzzer test tool for zstd streaming API +- `legacy` : Test tool to test decoding of legacy zstd frames +- `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations #### `test-zstd-versions.py` - script for testing zstd interoperability between versions This script creates `versionsTest` directory to which zstd repository is cloned. -Then all taged (released) versions of zstd are compiled. +Then all tagged (released) versions of zstd are compiled. In the following step interoperability between zstd versions is checked. @@ -64,3 +66,25 @@ optional arguments: --sleepTime SLEEPTIME frequency of repository checking in seconds ``` + +#### `decodecorpus` - tool to generate Zstandard frames for decoder testing +Command line tool to generate test .zst files. + +This tool will generate .zst files with checksums, +as well as optionally output the corresponding correct uncompressed data for +extra verfication. + +Example: +``` +./decodecorpus -ptestfiles -otestfiles -n10000 -s5 +``` +will generate 10,000 sample .zst files using a seed of 5 in the `testfiles` directory, +with the zstd checksum field set, +as well as the 10,000 original files for more detailed comparison of decompression results. + +``` +./decodecorpus -t -T1mn +``` +will choose a random seed, and for 1 minute, +generate random test frames and ensure that the +zstd library correctly decompresses them in both simple and streaming modes. diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c new file mode 100644 index 00000000..df12dd56 --- /dev/null +++ b/tests/decodecorpus.c @@ -0,0 +1,1450 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "zstd.h" +#include "zstd_internal.h" +#include "mem.h" + +// Direct access to internal compression functions is required +#include "zstd_compress.c" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + +#ifndef MIN + #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef MAX_PATH + #ifdef PATH_MAX + #define MAX_PATH PATH_MAX + #else + #define MAX_PATH 256 + #endif +#endif + +/*-************************************ +* DISPLAY Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 0; + +#define DISPLAYUPDATE(...) \ + do { \ + if ((clockSpan(g_displayClock) > g_refreshRate) || \ + (g_displayLevel >= 4)) { \ + g_displayClock = clock(); \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel >= 4) fflush(stderr); \ + } \ + } while (0) +static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_displayClock = 0; + +static clock_t clockSpan(clock_t cStart) +{ + return clock() - cStart; /* works even when overflow; max span ~ 30mn */ +} + +#define CHECKERR(code) \ + do { \ + if (ZSTD_isError(code)) { \ + DISPLAY("Error occurred while generating data: %s\n", \ + ZSTD_getErrorName(code)); \ + exit(1); \ + } \ + } while (0) + +/*-******************************************************* +* Random function +*********************************************************/ +#define CLAMP(x, a, b) ((x) < (a) ? (a) : ((x) > (b) ? (b) : (x))) + +static unsigned RAND(unsigned* src) +{ +#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = RAND_rotl32(rand32, 13); + *src = rand32; + return RAND_rotl32(rand32, 27); +#undef RAND_rotl32 +} + +#define DISTSIZE (8192) + +/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ +static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = RAND(seed) % (maxSymb + 1); + } +} + +/* Write `size` random bytes into `ptr` */ +static void RAND_buffer(U32* seed, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i + 4 <= size; i += 4) { + MEM_writeLE32(op + i, RAND(seed)); + } + for (; i < size; i++) { + op[i] = RAND(seed) & 0xff; + } +} + +/* Write `size` bytes into `ptr` following the distribution `dist` */ +static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) +{ + size_t i; + BYTE* op = ptr; + + for (i = 0; i < size; i++) { + op[i] = dist[RAND(seed) % DISTSIZE]; + } +} + +/* Generate a random distribution where the frequency of each symbol follows a + * geometric distribution defined by `weight` + * `dist` should have size at least `DISTSIZE` */ +static void RAND_genDist(U32* seed, BYTE* dist, double weight) +{ + size_t i = 0; + size_t statesLeft = DISTSIZE; + BYTE symb = RAND(seed) % 256; + BYTE step = (RAND(seed) % 256) | 1; /* force it to be odd so it's relatively prime to 256 */ + + while (i < DISTSIZE) { + size_t states = ((size_t)(weight * statesLeft)) + 1; + size_t j; + for (j = 0; j < states && i < DISTSIZE; j++, i++) { + dist[i] = symb; + } + + symb += step; + statesLeft -= states; + } +} + +/* Generates a random number in the range [min, max) */ +static inline U32 RAND_range(U32* seed, U32 min, U32 max) +{ + return (RAND(seed) % (max-min)) + min; +} + +#define ROUND(x) ((U32)(x + 0.5)) + +/* Generates a random number in an exponential distribution with mean `mean` */ +static double RAND_exp(U32* seed, double mean) +{ + double const u = RAND(seed) / (double) UINT_MAX; + return log(1-u) * (-mean); +} + +/*-******************************************************* +* Constants and Structs +*********************************************************/ +const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"}; + +#define MAX_DECOMPRESSED_SIZE_LOG 20 +#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) + +#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ +#define MAX_BLOCK_SIZE (128ULL * 1024) + +#define MIN_SEQ_LEN (3) +#define MAX_NB_SEQ ((MAX_BLOCK_SIZE + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) + +BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; +BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; +BYTE LITERAL_BUFFER[MAX_BLOCK_SIZE]; + +seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; +BYTE SEQUENCE_LITERAL_BUFFER[MAX_BLOCK_SIZE]; /* storeSeq expects a place to copy literals to */ +BYTE SEQUENCE_LLCODE[MAX_BLOCK_SIZE]; +BYTE SEQUENCE_MLCODE[MAX_BLOCK_SIZE]; +BYTE SEQUENCE_OFCODE[MAX_BLOCK_SIZE]; + +unsigned WKSP[1024]; + +typedef struct { + size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ + unsigned windowSize; /* contentSize >= windowSize means single segment */ +} frameHeader_t; + +/* For repeat modes */ +typedef struct { + U32 rep[ZSTD_REP_NUM]; + + int hufInit; + /* the distribution used in the previous block for repeat mode */ + BYTE hufDist[DISTSIZE]; + U32 hufTable [256]; /* HUF_CElt is an incomplete type */ + + int fseInit; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + + /* Symbols that were present in the previous distribution, for use with + * set_repeat */ + BYTE litlengthSymbolSet[36]; + BYTE offsetSymbolSet[29]; + BYTE matchlengthSymbolSet[53]; +} cblockStats_t; + +typedef struct { + void* data; + void* dataStart; + void* dataEnd; + + void* src; + void* srcStart; + void* srcEnd; + + frameHeader_t header; + + cblockStats_t stats; + cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ +} frame_t; + +/*-******************************************************* +* Generator Functions +*********************************************************/ + +struct { + int contentSize; /* force the content size to be present */ +} opts; /* advanced options on generation */ + +/* Generate and write a random frame header */ +static void writeFrameHeader(U32* seed, frame_t* frame) +{ + BYTE* const op = frame->data; + size_t pos = 0; + frameHeader_t fh; + + BYTE windowByte = 0; + + int singleSegment = 0; + int contentSizeFlag = 0; + int fcsCode = 0; + + memset(&fh, 0, sizeof(fh)); + + /* generate window size */ + { + /* Follow window algorithm from specification */ + int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); + int const mantissa = RAND(seed) % 8; + windowByte = (exponent << 3) | mantissa; + fh.windowSize = (1U << (exponent + 10)); + fh.windowSize += fh.windowSize / 8 * mantissa; + } + + { + /* Generate random content size */ + size_t highBit; + if (RAND(seed) & 7) { + /* do content of at least 128 bytes */ + highBit = 1ULL << RAND_range(seed, 7, MAX_DECOMPRESSED_SIZE_LOG); + } else if (RAND(seed) & 3) { + /* do small content */ + highBit = 1ULL << RAND_range(seed, 0, 7); + } else { + /* 0 size frame */ + highBit = 0; + } + fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; + + /* provide size sometimes */ + contentSizeFlag = opts.contentSize | (RAND(seed) & 1); + + if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { + /* do single segment sometimes */ + fh.windowSize = fh.contentSize; + singleSegment = 1; + } + } + + if (contentSizeFlag) { + /* Determine how large fcs field has to be */ + int minFcsCode = (fh.contentSize >= 256) + + (fh.contentSize >= 65536 + 256) + + (fh.contentSize > 0xFFFFFFFFU); + if (!singleSegment && !minFcsCode) { + minFcsCode = 1; + } + fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); + if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; + } + + /* write out the header */ + MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); + pos += 4; + + { + BYTE const frameHeaderDescriptor = + (fcsCode << 6) | (singleSegment << 5) | (1 << 2); + op[pos++] = frameHeaderDescriptor; + } + + if (!singleSegment) { + op[pos++] = windowByte; + } + + if (contentSizeFlag) { + switch (fcsCode) { + default: /* Impossible */ + case 0: op[pos++] = fh.contentSize; break; + case 1: MEM_writeLE16(op + pos, fh.contentSize - 256); pos += 2; break; + case 2: MEM_writeLE32(op + pos, fh.contentSize); pos += 4; break; + case 3: MEM_writeLE64(op + pos, fh.contentSize); pos += 8; break; + } + } + + DISPLAYLEVEL(2, " frame content size:\t%zu\n", fh.contentSize); + DISPLAYLEVEL(2, " frame window size:\t%u\n", fh.windowSize); + DISPLAYLEVEL(2, " content size flag:\t%d\n", contentSizeFlag); + DISPLAYLEVEL(2, " single segment flag:\t%d\n", singleSegment); + + frame->data = op + pos; + frame->header = fh; +} + +/* Write a literal block in either raw or RLE form, return the literals size */ +static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* op = (BYTE*)frame->data; + int const type = RAND(seed) % 2; + int const sizeFormatDesc = RAND(seed) % 8; + size_t litSize; + size_t maxLitSize = MIN(contentSize, MAX_BLOCK_SIZE); + + if (sizeFormatDesc == 0) { + /* Size_FormatDesc = ?0 */ + maxLitSize = MIN(maxLitSize, 31); + } else if (sizeFormatDesc <= 4) { + /* Size_FormatDesc = 01 */ + maxLitSize = MIN(maxLitSize, 4095); + } else { + /* Size_Format = 11 */ + maxLitSize = MIN(maxLitSize, 1048575); + } + + litSize = RAND(seed) % (maxLitSize + 1); + if (frame->src == frame->srcStart && litSize == 0) { + litSize = 1; /* no empty literals if there's nothing preceding this block */ + } + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + /* use smallest size format that fits */ + if (litSize < 32) { + op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; + op += 1; + } else if (litSize < 4096) { + op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op += 2; + } else { + op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; + op[1] = (litSize >> 4) & 0xff; + op[2] = (litSize >> 12) & 0xff; + op += 3; + } + + if (type == 0) { + /* Raw literals */ + DISPLAYLEVEL(4, " raw literals\n"); + + RAND_buffer(seed, LITERAL_BUFFER, litSize); + memcpy(op, LITERAL_BUFFER, litSize); + op += litSize; + } else { + /* RLE literals */ + BYTE const symb = RAND(seed) % 256; + + DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (U32)symb); + + memset(LITERAL_BUFFER, symb, litSize); + op[0] = symb; + op++; + } + + frame->data = op; + + return litSize; +} + +/* Generate a Huffman header for the given source */ +static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, + const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + unsigned huffLog = 11; + U32 maxSymbolValue = 255; + + U32 count[HUF_SYMBOLVALUE_MAX+1]; + + /* Scan input and build symbol stats */ + { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } + + /* Build Huffman Tree */ + /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ + huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); + DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); + { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); + CHECKERR(maxBits); + huffLog = (U32)maxBits; + } + + /* Write table description header */ + { size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog); + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } + + return op - ostart; +} + +/* Write a Huffman coded literals block and return the litearls size */ +static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* origop = (BYTE*)frame->data; + BYTE* opend = (BYTE*)frame->dataEnd; + BYTE* op; + BYTE* const ostart = origop; + int const sizeFormat = RAND(seed) % 4; + size_t litSize; + size_t hufHeaderSize = 0; + size_t compressedSize = 0; + size_t maxLitSize = MIN(contentSize-3, MAX_BLOCK_SIZE); + + symbolEncodingType_e hType; + + if (contentSize < 64) { + /* make sure we get reasonably-sized literals for compression */ + return ERROR(GENERIC); + } + + DISPLAYLEVEL(4, " compressed literals\n"); + + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: + maxLitSize = MIN(maxLitSize, 1023); + origop += 3; + break; + case 2: + maxLitSize = MIN(maxLitSize, 16383); + origop += 4; + break; + case 3: + maxLitSize = MIN(maxLitSize, 262143); + origop += 5; + break; + default:; /* impossible */ + } + + do { + op = origop; + do { + litSize = RAND(seed) % (maxLitSize + 1); + } while (litSize < 32); /* avoid small literal sizes */ + if (litSize + 3 > contentSize) { + litSize = contentSize; /* no matches shorter than 3 are allowed */ + } + + /* most of the time generate a new distribution */ + if ((RAND(seed) & 3) || !frame->stats.hufInit) { + do { + if (RAND(seed) & 3) { + /* add 10 to ensure some compressability */ + double const weight = ((RAND(seed) % 90) + 10) / 100.0; + + DISPLAYLEVEL(5, " distribution weight: %d%%\n", + (int)(weight * 100)); + + RAND_genDist(seed, frame->stats.hufDist, weight); + } else { + /* sometimes do restricted range literals to force + * non-huffman headers */ + DISPLAYLEVEL(5, " small range literals\n"); + RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, + 15); + } + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + + /* generate the header from the distribution instead of the + * actual data to avoid bugs with symbols that were in the + * distribution but never showed up in the output */ + hufHeaderSize = writeHufHeader( + seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op, + frame->stats.hufDist, DISTSIZE); + CHECKERR(hufHeaderSize); + /* repeat until a valid header is written */ + } while (hufHeaderSize == 0); + op += hufHeaderSize; + hType = set_compressed; + + frame->stats.hufInit = 1; + } else { + /* repeat the distribution/table from last time */ + DISPLAYLEVEL(5, " huffman repeat stats\n"); + RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, + litSize); + hufHeaderSize = 0; + hType = set_repeat; + } + + do { + compressedSize = + sizeFormat == 0 + ? HUF_compress1X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable) + : HUF_compress4X_usingCTable( + op, opend - op, LITERAL_BUFFER, litSize, + (HUF_CElt*)frame->stats.hufTable); + CHECKERR(compressedSize); + /* this only occurs when it could not compress or similar */ + } while (compressedSize <= 0); + + op += compressedSize; + + compressedSize += hufHeaderSize; + DISPLAYLEVEL(5, " regenerated size: %zu\n", litSize); + DISPLAYLEVEL(5, " compressed size: %zu\n", compressedSize); + if (compressedSize >= litSize) { + DISPLAYLEVEL(5, " trying again\n"); + /* if we have to try again, reset the stats so we don't accidentally + * try to repeat a distribution we just made */ + frame->stats = frame->oldStats; + } else { + break; + } + } while (1); + + /* write header */ + switch (sizeFormat) { + case 0: /* fall through, size is the same as case 1 */ + case 1: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 14); + MEM_writeLE24(ostart, header); + break; + } + case 2: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 18); + MEM_writeLE32(ostart, header); + break; + } + case 3: { + U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | + ((U32)compressedSize << 22); + MEM_writeLE32(ostart, header); + ostart[4] = (BYTE)(compressedSize >> 10); + break; + } + default:; /* impossible */ + } + + frame->data = op; + return litSize; +} + +static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) +{ + /* only do compressed for larger segments to avoid compressibility issues */ + if (RAND(seed) & 7 && contentSize >= 64) { + return writeLiteralsBlockCompressed(seed, frame, contentSize); + } else { + return writeLiteralsBlockSimple(seed, frame, contentSize); + } +} + +static inline void initSeqStore(seqStore_t *seqStore) { + seqStore->sequencesStart = SEQUENCE_BUFFER; + seqStore->litStart = SEQUENCE_LITERAL_BUFFER; + seqStore->llCode = SEQUENCE_LLCODE; + seqStore->mlCode = SEQUENCE_MLCODE; + seqStore->ofCode = SEQUENCE_OFCODE; + + ZSTD_resetSeqStore(seqStore); +} + +/* Randomly generate sequence commands */ +static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, + size_t contentSize, size_t literalsSize) +{ + /* The total length of all the matches */ + size_t const remainingMatch = contentSize - literalsSize; + size_t excessMatch; + U32 i; + + U32 numSequences; + + const BYTE* literals = LITERAL_BUFFER; + BYTE* srcPtr = frame->src; + + if (literalsSize == contentSize) { + numSequences = 0; + } else { + /* each match must be at least MIN_SEQ_LEN, so this is the maximum + * number of sequences we can have */ + U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; + numSequences = (RAND(seed) % maxSequences) + 1; + + /* the extra match lengths we have to allocate to each sequence */ + excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; + } + + DISPLAYLEVEL(5, " total match lengths: %zu\n", remainingMatch); + + for (i = 0; i < numSequences; i++) { + /* Generate match and literal lengths by exponential distribution to + * ensure nice numbers */ + U32 matchLen = + MIN_SEQ_LEN + + ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i))); + U32 literalLen = + (RAND(seed) & 7) + ? ROUND(RAND_exp(seed, + literalsSize / + (double)(numSequences - i))) + : 0; + /* actual offset, code to send, and point to copy up to when shifting + * codes in the repeat offsets history */ + U32 offset, offsetCode, repIndex; + + /* bounds checks */ + matchLen = MIN(matchLen, excessMatch + MIN_SEQ_LEN); + literalLen = MIN(literalLen, literalsSize); + if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; + if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + excessMatch; + + memcpy(srcPtr, literals, literalLen); + srcPtr += literalLen; + + do { + if (RAND(seed) & 7) { + /* do a normal offset */ + offset = (RAND(seed) % + MIN(frame->header.windowSize, + (BYTE*)srcPtr - (BYTE*)frame->srcStart)) + + 1; + offsetCode = offset + ZSTD_REP_MOVE; + repIndex = 2; + } else { + /* do a repeat offset */ + offsetCode = RAND(seed) % 3; + if (literalLen > 0) { + offset = frame->stats.rep[offsetCode]; + repIndex = offsetCode; + } else { + /* special case */ + offset = offsetCode == 2 ? frame->stats.rep[0] - 1 + : frame->stats.rep[offsetCode + 1]; + repIndex = MIN(2, offsetCode + 1); + } + } + } while (offset > (BYTE*)srcPtr - (BYTE*)frame->srcStart || offset == 0); + + { size_t j; + for (j = 0; j < matchLen; j++) { + *srcPtr = *(srcPtr-offset); + srcPtr++; + } + } + + { int r; + for (r = repIndex; r > 0; r--) { + frame->stats.rep[r] = frame->stats.rep[r - 1]; + } + frame->stats.rep[0] = offset; + } + + DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen); + DISPLAYLEVEL(7, " srcPos: %8zu seqNb: %3u", + (BYTE*)srcPtr - (BYTE*)frame->srcStart, i); + DISPLAYLEVEL(6, "\n"); + if (offsetCode < 3) { + DISPLAYLEVEL(7, " repeat offset: %d\n", repIndex); + } + /* use libzstd sequence handling */ + ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode, + matchLen - MINMATCH); + + literalsSize -= literalLen; + excessMatch -= (matchLen - MIN_SEQ_LEN); + literals += literalLen; + } + + memcpy(srcPtr, literals, literalsSize); + srcPtr += literalsSize; + DISPLAYLEVEL(6, " excess literals: %5zu", literalsSize); + DISPLAYLEVEL(7, " srcPos: %8zu", (BYTE*)srcPtr - (BYTE*)frame->srcStart); + DISPLAYLEVEL(6, "\n"); + + return numSequences; +} + +static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + memset(set, 0, (size_t)maxSymbolValue+1); + + for (i = 0; i < len; i++) { + set[symbols[i]] = 1; + } +} + +static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { + return 0; + } + } + return 1; +} + +static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, + size_t nbSeq) +{ + /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; + FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; + FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const oend = (BYTE*)frame->dataEnd; + BYTE* op = (BYTE*)frame->data; + BYTE* seqHead; + BYTE scratchBuffer[1<>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + if (nbSeq==0) { + frame->data = op; + + return 0; + } + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + /* do RLE if we have the chance */ + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(llCodeTable, nbSeq, + frame->stats.litlengthSymbolSet, 35)) { + /* maybe do repeat mode if we're allowed to */ + LLtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + /* maybe use the default distribution */ + FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_basic; + } else { + /* fall back on a full table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + LLtype = set_compressed; + } } + + /* CTable for Offsets */ + /* see Literal Lengths for descriptions of mode choices */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(ofCodeTable, nbSeq, + frame->stats.offsetSymbolSet, 28)) { + Offtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + Offtype = set_compressed; + } } + + /* CTable for MatchLengths */ + /* see Literal Lengths for descriptions of mode choices */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); + if (mostFrequent == nbSeq) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; + } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + isSymbolSubset(mlCodeTable, nbSeq, + frame->stats.matchlengthSymbolSet, 52)) { + MLtype = set_repeat; + } else if (!(RAND(seed) & 3)) { + /* sometimes do default distribution */ + FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_basic; + } else { + /* fall back on table */ + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); + MLtype = set_compressed; + } } + frame->stats.fseInit = 1; + initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); + initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); + initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); + + DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype); + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + frame->data = op; + + return 0; +} + +static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, + size_t literalsSize) +{ + seqStore_t seqStore; + size_t numSequences; + + + initSeqStore(&seqStore); + + /* randomly generate sequences */ + numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize); + /* write them out to the frame data */ + CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); + + return numSequences; +} + +static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize) +{ + BYTE* const blockStart = (BYTE*)frame->data; + size_t literalsSize; + size_t nbSeq; + + DISPLAYLEVEL(4, " compressed block:\n"); + + literalsSize = writeLiteralsBlock(seed, frame, contentSize); + + DISPLAYLEVEL(4, " literals size: %zu\n", literalsSize); + + nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize); + + DISPLAYLEVEL(4, " number of sequences: %zu\n", nbSeq); + + return (BYTE*)frame->data - blockStart; +} + +static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, + int lastBlock) +{ + int const blockTypeDesc = RAND(seed) % 8; + size_t blockSize; + int blockType; + + BYTE *const header = (BYTE*)frame->data; + BYTE *op = header + 3; + + DISPLAYLEVEL(3, " block:\n"); + DISPLAYLEVEL(3, " block content size: %zu\n", contentSize); + DISPLAYLEVEL(3, " last block: %s\n", lastBlock ? "yes" : "no"); + + if (blockTypeDesc == 0) { + /* Raw data frame */ + + RAND_buffer(seed, frame->src, contentSize); + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockType = 0; + blockSize = contentSize; + } else if (blockTypeDesc == 1) { + /* RLE */ + BYTE const symbol = RAND(seed) & 0xff; + + op[0] = symbol; + memset(frame->src, symbol, contentSize); + + op++; + blockType = 1; + blockSize = contentSize; + } else { + /* compressed, most common */ + size_t compressedSize; + blockType = 2; + + frame->oldStats = frame->stats; + + frame->data = op; + compressedSize = writeCompressedBlock(seed, frame, contentSize); + if (compressedSize > contentSize) { + blockType = 0; + memcpy(op, frame->src, contentSize); + + op += contentSize; + blockSize = contentSize; /* fall back on raw block if data doesn't + compress */ + + frame->stats = frame->oldStats; /* don't update the stats */ + } else { + op += compressedSize; + blockSize = compressedSize; + } + } + frame->src = (BYTE*)frame->src + contentSize; + + DISPLAYLEVEL(3, " block type: %s\n", BLOCK_TYPES[blockType]); + DISPLAYLEVEL(3, " block size field: %zu\n", blockSize); + + header[0] = (lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff; + MEM_writeLE16(header + 1, blockSize >> 5); + + frame->data = op; +} + +static void writeBlocks(U32* seed, frame_t* frame) +{ + size_t contentLeft = frame->header.contentSize; + size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize); + while (1) { + /* 1 in 4 chance of ending frame */ + int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); + size_t blockContentSize; + if (lastBlock) { + blockContentSize = contentLeft; + } else { + if (contentLeft > 0 && (RAND(seed) & 7)) { + /* some variable size blocks */ + blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); + } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { + /* some full size blocks */ + blockContentSize = maxBlockSize; + } else { + /* some empty blocks */ + blockContentSize = 0; + } + } + + writeBlock(seed, frame, blockContentSize, lastBlock); + + contentLeft -= blockContentSize; + if (lastBlock) break; + } +} + +static void writeChecksum(frame_t* frame) +{ + /* write checksum so implementations can verify their output */ + U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); + DISPLAYLEVEL(2, " checksum: %08x\n", (U32)digest); + MEM_writeLE32(frame->data, (U32)digest); + frame->data = (BYTE*)frame->data + 4; +} + +static void outputBuffer(const void* buf, size_t size, const char* const path) +{ + /* write data out to file */ + const BYTE* ip = (const BYTE*)buf; + FILE* out; + if (path) { + out = fopen(path, "wb"); + } else { + out = stdout; + } + if (!out) { + fprintf(stderr, "Failed to open file at %s: ", path); + perror(NULL); + exit(1); + } + + { + size_t fsize = size; + size_t written = 0; + while (written < fsize) { + written += fwrite(ip + written, 1, fsize - written, out); + if (ferror(out)) { + fprintf(stderr, "Failed to write to file at %s: ", path); + perror(NULL); + exit(1); + } + } + } + + if (path) { + fclose(out); + } +} + +static void initFrame(frame_t* fr) +{ + memset(fr, 0, sizeof(*fr)); + fr->data = fr->dataStart = FRAME_BUFFER; + fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); + fr->src = fr->srcStart = CONTENT_BUFFER; + fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); + + /* init repeat codes */ + fr->stats.rep[0] = 1; + fr->stats.rep[1] = 4; + fr->stats.rep[2] = 8; +} + +/* Return the final seed */ +static U32 generateFrame(U32 seed, frame_t* fr) +{ + /* generate a complete frame */ + DISPLAYLEVEL(1, "frame seed: %u\n", seed); + + initFrame(fr); + + writeFrameHeader(&seed, fr); + writeBlocks(&seed, fr); + writeChecksum(fr); + + return seed; +} + +/*-******************************************************* +* Test Mode +*********************************************************/ + +BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; + +static size_t testDecodeSimple(frame_t* fr) +{ + /* test decoding the generated data with the simple API */ + size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, + fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); + + if (ZSTD_isError(ret)) return ret; + + if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, + (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { + return ERROR(corruption_detected); + } + + return ret; +} + +static size_t testDecodeStreaming(frame_t* fr) +{ + /* test decoding the generated data with the streaming API */ + ZSTD_DStream* zd = ZSTD_createDStream(); + ZSTD_inBuffer in; + ZSTD_outBuffer out; + size_t ret; + + if (!zd) return ERROR(memory_allocation); + + in.src = fr->dataStart; + in.pos = 0; + in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; + + out.dst = DECOMPRESSED_BUFFER; + out.pos = 0; + out.size = ZSTD_DStreamOutSize(); + + ZSTD_initDStream(zd); + while (1) { + ret = ZSTD_decompressStream(zd, &out, &in); + if (ZSTD_isError(ret)) goto cleanup; /* error */ + if (ret == 0) break; /* frame is done */ + + /* force decoding to be done in chunks */ + out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); + } + + ret = out.pos; + + if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { + return ERROR(corruption_detected); + } + +cleanup: + ZSTD_freeDStream(zd); + return ret; +} + +static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS) +{ + unsigned fnum; + + clock_t const startClock = clock(); + clock_t const maxClockSpan = testDurationS * CLOCKS_PER_SEC; + + if (numFiles == 0 && !testDurationS) numFiles = 1; + + DISPLAY("seed: %u\n", seed); + + for (fnum = 0; fnum < numFiles || clockSpan(startClock) < maxClockSpan; fnum++) { + frame_t fr; + + if (fnum < numFiles) + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + else + DISPLAYUPDATE("\r%u ", fnum); + + seed = generateFrame(seed, &fr); + + { size_t const r = testDecodeSimple(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in simple mode on test seed %u: %s\n", seed + fnum, + ZSTD_getErrorName(r)); + return 1; + } + } + { size_t const r = testDecodeStreaming(&fr); + if (ZSTD_isError(r)) { + DISPLAY("Error in streaming mode on test seed %u: %s\n", seed + fnum, + ZSTD_getErrorName(r)); + return 1; + } + } + } + + DISPLAY("\r%u tests completed: ", fnum); + DISPLAY("OK\n"); + + return 0; +} + +/*-******************************************************* +* File I/O +*********************************************************/ + +static int generateFile(U32 seed, const char* const path, + const char* const origPath) +{ + frame_t fr; + + DISPLAY("seed: %u\n", seed); + + generateFrame(seed, &fr); + + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); + if (origPath) { + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); + } + return 0; +} + +static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath) +{ + char outPath[MAX_PATH]; + unsigned fnum; + + DISPLAY("seed: %u\n", seed); + + for (fnum = 0; fnum < numFiles; fnum++) { + frame_t fr; + + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + + seed = generateFrame(seed, &fr); + + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + } + + DISPLAY("\r%u/%u \n", fnum, numFiles); + + return 0; +} + + +/*_******************************************************* +* Command line +*********************************************************/ +static U32 makeSeed(void) +{ + U32 t = time(NULL); + return XXH32(&t, sizeof(t), 0) % 65536; +} + +static unsigned readInt(const char** argument) +{ + unsigned val = 0; + while ((**argument>='0') && (**argument<='9')) { + val *= 10; + val += **argument - '0'; + (*argument)++; + } + return val; +} + +static void usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -p : select output path (default:stdout)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -o : select path to output original file (default:no output)\n"); + DISPLAY( " in multiple files mode this should be a directory\n"); + DISPLAY( " -s# : select seed (default:random based on time)\n"); + DISPLAY( " -n# : number of files to generate (default:1)\n"); + DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); + DISPLAY( " -T# : length of time to run tests for\n"); + DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); + DISPLAY( " -h/H : display help/long help and exit\n"); +} + +static void advancedUsage(const char* programName) +{ + usage(programName); + DISPLAY( "\n"); + DISPLAY( "Advanced arguments :\n"); + DISPLAY( " --content-size : always include the content size in the frame header\n"); +} + +int main(int argc, char** argv) +{ + U32 seed = 0; + int seedset = 0; + unsigned numFiles = 0; + unsigned testDuration = 0; + int testMode = 0; + const char* path = NULL; + const char* origPath = NULL; + + int argNb; + + /* Check command line */ + for (argNb=1; argNb