Merge branch 'dev' into ahmed_file
This commit is contained in:
commit
c71bd45a3b
3
.gitignore
vendored
3
.gitignore
vendored
@ -31,6 +31,7 @@ projects/
|
||||
bin/
|
||||
.buckd/
|
||||
buck-out/
|
||||
build-*
|
||||
|
||||
# Other files
|
||||
.directory
|
||||
@ -43,3 +44,5 @@ _zstdbench/
|
||||
googletest/
|
||||
*.d
|
||||
*.vscode
|
||||
compile_commands.json
|
||||
.clangd
|
25
.travis.yml
25
.travis.yml
@ -32,9 +32,10 @@ matrix:
|
||||
script:
|
||||
- make check
|
||||
|
||||
- name: Trusty (Test All)
|
||||
- name: make test (complete)
|
||||
script:
|
||||
- make test
|
||||
# DEVNULLRIGHTS : will request sudo rights to test permissions on /dev/null
|
||||
- DEVNULLRIGHTS=test make test
|
||||
|
||||
- name: gcc-6 + gcc-7 compilation
|
||||
script:
|
||||
@ -80,43 +81,43 @@ matrix:
|
||||
- make clean
|
||||
- make -j check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
|
||||
|
||||
- name: Trusty (CMake)
|
||||
- name: cmake test
|
||||
script:
|
||||
- make cmakebuild
|
||||
|
||||
- name: Trusty (Static Analyze)
|
||||
- name: static analyzer scanbuild
|
||||
script:
|
||||
- make staticAnalyze
|
||||
|
||||
- name: Trusty (gcc-8 + ASan + UBSan + Fuzz Test)
|
||||
- name: gcc-8 + ASan + UBSan + Fuzz Test
|
||||
script:
|
||||
- make gcc8install
|
||||
- CC=gcc-8 make clean uasan-fuzztest
|
||||
|
||||
- name: Trusty (gcc-6 + ASan + UBSan + Fuzz Test 32bit)
|
||||
- name: gcc-6 + ASan + UBSan + Fuzz Test 32bit
|
||||
script:
|
||||
- make gcc6install libc6install
|
||||
- CC=gcc-6 CFLAGS="-O2 -m32" make uasan-fuzztest # can complain about pointer overflow
|
||||
|
||||
- name: Trusty (clang-3.8 + MSan + Fuzz Test)
|
||||
- name: clang-3.8 + MSan + Fuzz Test
|
||||
script:
|
||||
- make clang38install
|
||||
- CC=clang-3.8 make clean msan-fuzztest
|
||||
|
||||
- name: Trusty (ASan + UBSan + MSan + Regression Test)
|
||||
- name: ASan + UBSan + MSan + Regression Test
|
||||
script:
|
||||
- make -j uasanregressiontest
|
||||
- make clean
|
||||
- make -j msanregressiontest
|
||||
|
||||
- name: Trusty (Valgrind + Fuzz Test Stack Mode)
|
||||
- name: Valgrind + Fuzz Test Stack Mode
|
||||
script:
|
||||
- make valgrindinstall
|
||||
- make -C tests clean valgrindTest
|
||||
- make clean
|
||||
- make -C tests test-fuzzer-stackmode
|
||||
|
||||
- name: Trusty (ARM + Fuzz Test)
|
||||
- name: Qemu ARM emulation + Fuzz Test
|
||||
script:
|
||||
- make arminstall
|
||||
- make armfuzz
|
||||
@ -127,12 +128,12 @@ matrix:
|
||||
- make arminstall
|
||||
- make aarch64fuzz
|
||||
|
||||
- name: Trusty (PPC + Fuzz Test)
|
||||
- name: PPC + Fuzz Test
|
||||
script:
|
||||
- make ppcinstall
|
||||
- make ppcfuzz
|
||||
|
||||
- name: Trusty (Versions Compatibility Test)
|
||||
- name: Versions Compatibility Test
|
||||
script:
|
||||
- make -C tests versionsTest
|
||||
|
||||
|
4
Makefile
4
Makefile
@ -351,9 +351,9 @@ cmakebuild:
|
||||
mkdir $(BUILDIR)/cmake/build
|
||||
cd $(BUILDIR)/cmake/build ; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall
|
||||
|
||||
c90build: clean
|
||||
c89build: clean
|
||||
$(CC) -v
|
||||
CFLAGS="-std=c90 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long`
|
||||
CFLAGS="-std=c89 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long`
|
||||
|
||||
gnu90build: clean
|
||||
$(CC) -v
|
||||
|
@ -372,6 +372,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
|
||||
>
|
||||
@ -514,6 +518,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_fast.h"
|
||||
>
|
||||
|
@ -420,6 +420,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
|
||||
>
|
||||
@ -550,6 +554,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_fast.h"
|
||||
>
|
||||
|
@ -432,6 +432,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
|
||||
>
|
||||
@ -630,6 +634,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_fast.h"
|
||||
>
|
||||
|
@ -404,6 +404,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_fast.c"
|
||||
>
|
||||
@ -562,6 +566,10 @@
|
||||
RelativePath="..\..\..\lib\compress\zstd_cwksp.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_compress_superblock.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\compress\zstd_fast.h"
|
||||
>
|
||||
|
@ -169,6 +169,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstdmt_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
|
||||
@ -198,6 +199,7 @@
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
|
||||
|
@ -169,6 +169,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
|
||||
@ -201,6 +202,7 @@
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
|
||||
|
@ -33,6 +33,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
|
||||
@ -83,6 +84,7 @@
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
|
||||
|
@ -33,6 +33,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
|
||||
@ -83,6 +84,7 @@
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
|
||||
|
@ -34,6 +34,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
|
||||
@ -80,6 +81,7 @@
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
|
||||
|
@ -49,6 +49,9 @@ target_link_libraries(fullbench libzstd_static)
|
||||
add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c)
|
||||
target_link_libraries(fuzzer libzstd_static)
|
||||
|
||||
add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
|
||||
target_link_libraries(zstreamtest libzstd_static)
|
||||
|
||||
if (UNIX)
|
||||
add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
|
||||
target_link_libraries(paramgrill libzstd_static m) #m is math library
|
||||
|
@ -30,6 +30,7 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_compress.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_compress_literals.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_compress_sequences.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_compress_superblock.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstdmt_compress.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_fast.c'),
|
||||
join_paths(zstd_rootdir, 'lib/compress/zstd_double_fast.c'),
|
||||
|
@ -16,7 +16,7 @@ Distribution of this document is unlimited.
|
||||
|
||||
### Version
|
||||
|
||||
0.3.4 (16/08/19)
|
||||
0.3.5 (13/11/19)
|
||||
|
||||
|
||||
Introduction
|
||||
@ -341,6 +341,8 @@ The structure of a block is as follows:
|
||||
|:--------------:|:---------------:|
|
||||
| 3 bytes | n bytes |
|
||||
|
||||
__`Block_Header`__
|
||||
|
||||
`Block_Header` uses 3 bytes, written using __little-endian__ convention.
|
||||
It contains 3 fields :
|
||||
|
||||
@ -385,17 +387,30 @@ There are 4 block types :
|
||||
__`Block_Size`__
|
||||
|
||||
The upper 21 bits of `Block_Header` represent the `Block_Size`.
|
||||
|
||||
When `Block_Type` is `Compressed_Block` or `Raw_Block`,
|
||||
`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.
|
||||
When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
|
||||
and `Block_Size` represents the number of times this byte must be repeated.
|
||||
A block can contain and decompress into any number of bytes (even zero),
|
||||
up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
|
||||
- Window_Size
|
||||
`Block_Size` is the size of `Block_Content` (hence excluding `Block_Header`).
|
||||
|
||||
When `Block_Type` is `RLE_Block`, since `Block_Content`’s size is always 1,
|
||||
`Block_Size` represents the number of times this byte must be repeated.
|
||||
|
||||
`Block_Size` is limited by `Block_Maximum_Size` (see below).
|
||||
|
||||
__`Block_Content`__ and __`Block_Maximum_Size`__
|
||||
|
||||
The size of `Block_Content` is limited by `Block_Maximum_Size`,
|
||||
which is the smallest of:
|
||||
- `Window_Size`
|
||||
- 128 KB
|
||||
|
||||
If this condition cannot be respected when generating a `Compressed_Block`,
|
||||
the block must be sent uncompressed instead (`Raw_Block`).
|
||||
`Block_Maximum_Size` is constant for a given frame.
|
||||
This maximum is applicable to both the decompressed size
|
||||
and the compressed size of any block in the frame.
|
||||
|
||||
The reasoning for this limit is that a decoder can read this information
|
||||
at the beginning of a frame and use it to allocate buffers.
|
||||
The guarantees on the size of blocks ensure that
|
||||
the buffers will be large enough for any following block of the valid frame.
|
||||
|
||||
|
||||
Compressed Blocks
|
||||
@ -1658,6 +1673,7 @@ or at least provide a meaningful error code explaining for which reason it canno
|
||||
|
||||
Version changes
|
||||
---------------
|
||||
- 0.3.5 : clarifications for Block_Maximum_Size
|
||||
- 0.3.4 : clarifications for FSE decoding table
|
||||
- 0.3.3 : clarifications for field Block_Size
|
||||
- 0.3.2 : remove additional block size restriction on compressed blocks
|
||||
|
@ -161,7 +161,7 @@ ifneq (,$(filter Windows%,$(OS)))
|
||||
LIBZSTD = dll\libzstd.dll
|
||||
$(LIBZSTD): $(ZSTD_FILES)
|
||||
@echo compiling dynamic library $(LIBVER)
|
||||
$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@
|
||||
$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.dll.a -shared $^ -o $@
|
||||
|
||||
else
|
||||
|
||||
|
@ -48,6 +48,7 @@ extern "C" {
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "mem.h" /* unaligned access routines */
|
||||
#include "compiler.h" /* UNLIKELY() */
|
||||
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
|
||||
#include "error_private.h" /* error codes and messages */
|
||||
|
||||
@ -411,6 +412,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
return value;
|
||||
}
|
||||
|
||||
/*! BIT_reloadDStreamFast() :
|
||||
* Similar to BIT_reloadDStream(), but with two differences:
|
||||
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
|
||||
* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
|
||||
* point you must use BIT_reloadDStream() to reload.
|
||||
*/
|
||||
MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
|
||||
{
|
||||
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
|
||||
return BIT_DStream_overflow;
|
||||
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
|
||||
bitD->ptr -= bitD->bitsConsumed >> 3;
|
||||
bitD->bitsConsumed &= 7;
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
return BIT_DStream_unfinished;
|
||||
}
|
||||
|
||||
/*! BIT_reloadDStream() :
|
||||
* Refill `bitD` from buffer previously set in BIT_initDStream() .
|
||||
* This function is safe, it guarantees it will not read beyond src buffer.
|
||||
@ -422,10 +440,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
||||
return BIT_DStream_overflow;
|
||||
|
||||
if (bitD->ptr >= bitD->limitPtr) {
|
||||
bitD->ptr -= bitD->bitsConsumed >> 3;
|
||||
bitD->bitsConsumed &= 7;
|
||||
bitD->bitContainer = MEM_readLEST(bitD->ptr);
|
||||
return BIT_DStream_unfinished;
|
||||
return BIT_reloadDStreamFast(bitD);
|
||||
}
|
||||
if (bitD->ptr == bitD->start) {
|
||||
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
|
||||
|
@ -146,6 +146,19 @@
|
||||
# define DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
/* Tell the compiler that a branch is likely or unlikely.
|
||||
* Only use these macros if it causes the compiler to generate better code.
|
||||
* If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
|
||||
* and clang, please do.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define LIKELY(x) (__builtin_expect((x), 1))
|
||||
#define UNLIKELY(x) (__builtin_expect((x), 0))
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
# include <intrin.h> /* For Visual 2005 */
|
||||
|
@ -208,6 +208,7 @@ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
|
||||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
typedef enum {
|
||||
HUF_repeat_none, /**< Cannot use the previous table */
|
||||
|
@ -427,7 +427,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbo
|
||||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
|
||||
}
|
||||
|
||||
static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
{
|
||||
size_t nbBits = 0;
|
||||
int s;
|
||||
|
@ -28,11 +28,19 @@
|
||||
#include "zstd_lazy.h"
|
||||
#include "zstd_opt.h"
|
||||
#include "zstd_ldm.h"
|
||||
#include "zstd_compress_superblock.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Helper functions
|
||||
***************************************/
|
||||
/* ZSTD_compressBound()
|
||||
* Note that the result from this function is only compatible with the "normal"
|
||||
* full-block strategy.
|
||||
* When there are a lot of small blocks due to frequent flush in streaming mode
|
||||
* or targetCBlockSize, the overhead of headers can make the compressed data to
|
||||
* be larger than the return value of ZSTD_compressBound().
|
||||
*/
|
||||
size_t ZSTD_compressBound(size_t srcSize) {
|
||||
return ZSTD_COMPRESSBOUND(srcSize);
|
||||
}
|
||||
@ -249,12 +257,12 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
|
||||
/* ZSTD_assignParamsToCCtxParams() :
|
||||
* params is presumed valid at this stage */
|
||||
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
|
||||
const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
|
||||
const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
|
||||
{
|
||||
ZSTD_CCtx_params ret = *cctxParams;
|
||||
assert(!ZSTD_checkCParams(params.cParams));
|
||||
ret.cParams = params.cParams;
|
||||
ret.fParams = params.fParams;
|
||||
assert(!ZSTD_checkCParams(params->cParams));
|
||||
ret.cParams = params->cParams;
|
||||
ret.fParams = params->fParams;
|
||||
ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
|
||||
return ret;
|
||||
}
|
||||
@ -339,8 +347,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_overlapLog:
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
|
||||
bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
|
||||
#else
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 0;
|
||||
#endif
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_enableLongDistanceMatching:
|
||||
@ -845,7 +858,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
||||
{
|
||||
ZSTD_localDict* const dl = &cctx->localDict;
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
|
||||
&cctx->requestedParams, 0, dl->dictSize);
|
||||
&cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
|
||||
if (dl->dict == NULL) {
|
||||
/* No local dictionary. */
|
||||
assert(dl->dictBuffer == NULL);
|
||||
@ -1006,7 +1019,7 @@ static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
|
||||
* optimize `cPar` for a specified input (`srcSize` and `dictSize`).
|
||||
* mostly downsize to reduce memory consumption and initialization latency.
|
||||
* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
|
||||
* note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention.
|
||||
* note : `srcSize==0` means 0!
|
||||
* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
|
||||
static ZSTD_compressionParameters
|
||||
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
@ -1017,10 +1030,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
||||
assert(ZSTD_checkCParams(cPar)==0);
|
||||
|
||||
if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ )
|
||||
srcSize = minSrcSize; /* presumed small when there is a dictionary */
|
||||
else if (srcSize == 0)
|
||||
srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */
|
||||
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
||||
srcSize = minSrcSize;
|
||||
|
||||
/* resize windowLog if input is small enough, to use less memory */
|
||||
if ( (srcSize < maxWindowResize)
|
||||
@ -1049,9 +1060,13 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
||||
size_t dictSize)
|
||||
{
|
||||
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
||||
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
|
||||
}
|
||||
|
||||
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
|
||||
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
|
||||
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
|
||||
{
|
||||
@ -1059,7 +1074,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
|
||||
srcSizeHint = CCtxParams->srcSizeHint;
|
||||
}
|
||||
cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
||||
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
|
||||
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
|
||||
@ -1069,6 +1084,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
|
||||
if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
|
||||
assert(!ZSTD_checkCParams(cParams));
|
||||
/* srcSizeHint == 0 means 0 */
|
||||
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
|
||||
}
|
||||
|
||||
@ -1104,7 +1120,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
||||
{ ZSTD_compressionParameters const cParams =
|
||||
ZSTD_getCParamsFromCCtxParams(params, 0, 0);
|
||||
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
|
||||
U32 const divider = (cParams.minMatch==3) ? 3 : 4;
|
||||
size_t const maxNbSeq = blockSize / divider;
|
||||
@ -1136,7 +1152,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
||||
|
||||
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
return ZSTD_estimateCCtxSize_usingCParams(cParams);
|
||||
}
|
||||
|
||||
@ -1155,7 +1171,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
||||
{ ZSTD_compressionParameters const cParams =
|
||||
ZSTD_getCParamsFromCCtxParams(params, 0, 0);
|
||||
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
|
||||
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
|
||||
size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
|
||||
@ -1175,7 +1191,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
|
||||
|
||||
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
return ZSTD_estimateCStreamSize_usingCParams(cParams);
|
||||
}
|
||||
|
||||
@ -1243,7 +1259,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
|
||||
assert(cParams1.strategy == cParams2.strategy);
|
||||
}
|
||||
|
||||
static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
||||
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
||||
@ -1320,10 +1336,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
||||
|
||||
DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
|
||||
if (forceResetIndex == ZSTDirp_reset) {
|
||||
memset(&ms->window, 0, sizeof(ms->window));
|
||||
ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
||||
ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
|
||||
ZSTD_window_init(&ms->window);
|
||||
ZSTD_cwksp_mark_tables_dirty(ws);
|
||||
}
|
||||
|
||||
@ -1416,7 +1429,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1);
|
||||
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
|
||||
|
||||
ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue;
|
||||
ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset;
|
||||
|
||||
if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
|
||||
needsIndexReset = ZSTDirp_reset;
|
||||
@ -1541,11 +1554,12 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
|
||||
zc->maxNbLdmSequences = maxNbLdmSeq;
|
||||
|
||||
memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
|
||||
ZSTD_window_init(&zc->ldmState.window);
|
||||
ZSTD_window_clear(&zc->ldmState.window);
|
||||
}
|
||||
|
||||
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
||||
zc->initialized = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1603,6 +1617,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
assert(windowLog != 0);
|
||||
/* Resize working context table params for input only, since the dict
|
||||
* has its own tables. */
|
||||
/* pledgeSrcSize == 0 means 0! */
|
||||
params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
|
||||
params.cParams.windowLog = windowLog;
|
||||
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
||||
@ -1889,16 +1904,6 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
|
||||
|
||||
/* See doc/zstd_compression_format.md for detailed format description */
|
||||
|
||||
static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
||||
{
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
|
||||
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
||||
dstSize_tooSmall);
|
||||
MEM_writeLE24(dst, cBlockHeader24);
|
||||
memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
||||
return ZSTD_blockHeaderSize + srcSize;
|
||||
}
|
||||
|
||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
||||
{
|
||||
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
||||
@ -1936,6 +1941,16 @@ static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_useTargetCBlockSize():
|
||||
* Returns if target compressed block size param is being used.
|
||||
* If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
|
||||
* Returns 1 if true, 0 otherwise. */
|
||||
static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
|
||||
return (cctxParams->targetCBlockSize != 0);
|
||||
}
|
||||
|
||||
/* ZSTD_compressSequences_internal():
|
||||
* actually compresses both literals and sequences */
|
||||
MEM_STATIC size_t
|
||||
@ -2435,6 +2450,70 @@ out:
|
||||
return cSize;
|
||||
}
|
||||
|
||||
static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
U32 lastBlock) {
|
||||
size_t cSize = 0;
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
|
||||
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
|
||||
|
||||
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
||||
FORWARD_IF_ERROR(bss);
|
||||
if (bss == ZSTDbss_compress) {
|
||||
cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, lastBlock);
|
||||
} }
|
||||
|
||||
/* Superblock compression may fail, in which case
|
||||
* encode using ZSTD_noCompressSuperBlock writing sub blocks
|
||||
* in uncompressed mode.
|
||||
*/
|
||||
if (cSize == 0) {
|
||||
cSize = ZSTD_noCompressSuperBlock(dst, dstCapacity, src, srcSize, zc->appliedParams.targetCBlockSize, lastBlock);
|
||||
/* In compression, there is an assumption that a compressed block is always
|
||||
* within the size of ZSTD_compressBound(). However, SuperBlock compression
|
||||
* can exceed the limit due to overhead of headers from SubBlocks.
|
||||
* This breaks in streaming mode where output buffer in compress context is
|
||||
* allocated ZSTD_compressBound() amount of memory, which may not be big
|
||||
* enough for SuperBlock compression.
|
||||
* In such case, fall back to normal compression. This is possible because
|
||||
* targetCBlockSize is best effort not a guarantee. */
|
||||
if (cSize != ERROR(dstSize_tooSmall)) return cSize;
|
||||
else {
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
/* If ZSTD_noCompressSuperBlock fails with dstSize_tooSmall,
|
||||
* compress normally.
|
||||
*/
|
||||
cSize = ZSTD_compressSequences(&zc->seqStore,
|
||||
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
ostart+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||
srcSize,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||
zc->bmi2);
|
||||
if (!ZSTD_isError(cSize) && cSize != 0) {
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(ostart, cBlockHeader24);
|
||||
cSize += ZSTD_blockHeaderSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ZSTD_isError(cSize) && cSize != 0) {
|
||||
/* confirm repcodes and entropy tables when emitting a compressed block */
|
||||
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
|
||||
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
|
||||
zc->blockState.nextCBlock = tmp;
|
||||
}
|
||||
/* We check that dictionaries have offset codes available for the first
|
||||
* block. After the first block, the offcode table might not have large
|
||||
* enough codes to represent the offsets in the data.
|
||||
*/
|
||||
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
||||
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
||||
|
||||
return cSize;
|
||||
}
|
||||
|
||||
static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
||||
ZSTD_cwksp* ws,
|
||||
@ -2500,21 +2579,30 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
||||
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
||||
|
||||
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
||||
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||
ip, blockSize, 1 /* frame */);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
if (cSize == 0) { /* block is not compressible */
|
||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
||||
{ size_t cSize;
|
||||
int useTargetCBlockSize = ZSTD_useTargetCBlockSize(&cctx->appliedParams);
|
||||
if (useTargetCBlockSize) {
|
||||
cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
} else {
|
||||
const U32 cBlockHeader = cSize == 1 ?
|
||||
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
||||
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(op, cBlockHeader);
|
||||
cSize += ZSTD_blockHeaderSize;
|
||||
cSize = ZSTD_compressBlock_internal(cctx,
|
||||
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||
ip, blockSize, 1 /* frame */);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
|
||||
if (cSize == 0) { /* block is not compressible */
|
||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
} else {
|
||||
U32 const cBlockHeader = cSize == 1 ?
|
||||
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
||||
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(op, cBlockHeader);
|
||||
cSize += ZSTD_blockHeaderSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ip += blockSize;
|
||||
assert(remaining >= blockSize);
|
||||
remaining -= blockSize;
|
||||
@ -2763,37 +2851,13 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Dictionary format :
|
||||
* See :
|
||||
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
|
||||
*/
|
||||
/*! ZSTD_loadZstdDictionary() :
|
||||
* @return : dictID, or an error code
|
||||
* assumptions : magic number supposed already checked
|
||||
* dictSize supposed >= 8
|
||||
*/
|
||||
static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
ZSTD_matchState_t* ms,
|
||||
ZSTD_cwksp* ws,
|
||||
ZSTD_CCtx_params const* params,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
void* workspace)
|
||||
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
||||
short* offcodeNCount, unsigned* offcodeMaxValue,
|
||||
const void* const dict, size_t dictSize)
|
||||
{
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
|
||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||
short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff;
|
||||
size_t dictID;
|
||||
|
||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||
assert(dictSize >= 8);
|
||||
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
||||
|
||||
dictPtr += 4; /* skip magic number */
|
||||
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
|
||||
dictPtr += 4;
|
||||
dictPtr += 8;
|
||||
|
||||
{ unsigned maxSymbolValue = 255;
|
||||
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
|
||||
@ -2803,7 +2867,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
}
|
||||
|
||||
{ unsigned offcodeLog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
|
||||
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
|
||||
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
||||
@ -2852,6 +2916,42 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
bs->rep[2] = MEM_readLE32(dictPtr+8);
|
||||
dictPtr += 12;
|
||||
|
||||
return dictPtr - (const BYTE*)dict;
|
||||
}
|
||||
|
||||
/* Dictionary format :
|
||||
* See :
|
||||
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
|
||||
*/
|
||||
/*! ZSTD_loadZstdDictionary() :
|
||||
* @return : dictID, or an error code
|
||||
* assumptions : magic number supposed already checked
|
||||
* dictSize supposed >= 8
|
||||
*/
|
||||
static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
ZSTD_matchState_t* ms,
|
||||
ZSTD_cwksp* ws,
|
||||
ZSTD_CCtx_params const* params,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm,
|
||||
void* workspace)
|
||||
{
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||
short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff;
|
||||
size_t dictID;
|
||||
size_t eSize;
|
||||
|
||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||
assert(dictSize >= 8);
|
||||
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
||||
|
||||
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
|
||||
eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
|
||||
FORWARD_IF_ERROR(eSize);
|
||||
dictPtr += eSize;
|
||||
|
||||
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
||||
U32 offcodeMax = MaxOff;
|
||||
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
||||
@ -2986,7 +3086,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
||||
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
||||
{
|
||||
ZSTD_CCtx_params const cctxParams =
|
||||
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
|
||||
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms);
|
||||
return ZSTD_compressBegin_advanced_internal(cctx,
|
||||
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
|
||||
NULL /*cdict*/,
|
||||
@ -2995,9 +3095,9 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
||||
|
||||
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
ZSTD_CCtx_params const cctxParams =
|
||||
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
|
||||
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms);
|
||||
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
|
||||
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
|
||||
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
|
||||
@ -3081,7 +3181,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict,size_t dictSize,
|
||||
ZSTD_parameters params)
|
||||
const ZSTD_parameters* params)
|
||||
{
|
||||
ZSTD_CCtx_params const cctxParams =
|
||||
ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
|
||||
@ -3105,7 +3205,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
dict, dictSize,
|
||||
params);
|
||||
¶ms);
|
||||
}
|
||||
|
||||
/* Internal */
|
||||
@ -3129,8 +3229,8 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
|
||||
const void* dict, size_t dictSize,
|
||||
int compressionLevel)
|
||||
{
|
||||
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
|
||||
ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
|
||||
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
|
||||
ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms);
|
||||
assert(params.fParams.contentSizeFlag == 1);
|
||||
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
|
||||
}
|
||||
@ -3176,7 +3276,7 @@ size_t ZSTD_estimateCDictSize_advanced(
|
||||
|
||||
size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
|
||||
}
|
||||
|
||||
@ -3287,7 +3387,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
|
||||
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
|
||||
ZSTD_dlm_byCopy, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
@ -3298,7 +3398,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL
|
||||
|
||||
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
return ZSTD_createCDict_advanced(dict, dictSize,
|
||||
ZSTD_dlm_byRef, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
@ -3590,7 +3690,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
||||
FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
|
||||
FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
|
||||
FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
|
||||
zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params);
|
||||
zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms);
|
||||
FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
|
||||
return 0;
|
||||
}
|
||||
@ -3655,11 +3755,11 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
ZSTD_EndDirective const flushMode)
|
||||
{
|
||||
const char* const istart = (const char*)input->src;
|
||||
const char* const iend = istart + input->size;
|
||||
const char* ip = istart + input->pos;
|
||||
const char* const iend = input->size != 0 ? istart + input->size : istart;
|
||||
const char* ip = input->pos != 0 ? istart + input->pos : istart;
|
||||
char* const ostart = (char*)output->dst;
|
||||
char* const oend = ostart + output->size;
|
||||
char* op = ostart + output->pos;
|
||||
char* const oend = output->size != 0 ? ostart + output->size : ostart;
|
||||
char* op = output->pos != 0 ? ostart + output->pos : ostart;
|
||||
U32 someMoreWork = 1;
|
||||
|
||||
/* check expectations */
|
||||
@ -3698,7 +3798,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
zcs->inBuff + zcs->inBuffPos, toLoad,
|
||||
ip, iend-ip);
|
||||
zcs->inBuffPos += loaded;
|
||||
ip += loaded;
|
||||
if (loaded != 0)
|
||||
ip += loaded;
|
||||
if ( (flushMode == ZSTD_e_continue)
|
||||
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
|
||||
/* not enough input to fill full block : stop here */
|
||||
@ -3758,7 +3859,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
|
||||
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
|
||||
(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
|
||||
op += flushed;
|
||||
if (flushed)
|
||||
op += flushed;
|
||||
zcs->outBuffFlushedSize += flushed;
|
||||
if (toFlush!=flushed) {
|
||||
/* flush not fully completed, presumably because dst is too small */
|
||||
@ -4069,35 +4171,56 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
|
||||
},
|
||||
};
|
||||
|
||||
/*! ZSTD_getCParams() :
|
||||
/*! ZSTD_getCParams_internal() :
|
||||
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
|
||||
* Size values are optional, provide 0 if not known or unused */
|
||||
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
|
||||
* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
|
||||
* Use dictSize == 0 for unknown or unused. */
|
||||
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
|
||||
{
|
||||
size_t const addedSize = srcSizeHint ? 0 : 500;
|
||||
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */
|
||||
int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
|
||||
U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
|
||||
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
|
||||
int row = compressionLevel;
|
||||
DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
|
||||
DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
|
||||
if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
|
||||
if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
|
||||
if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
|
||||
{ ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
|
||||
if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
|
||||
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */
|
||||
/* refine parameters based on srcSize & dictSize */
|
||||
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_getCParams() :
|
||||
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
|
||||
* Size values are optional, provide 0 if not known or unused */
|
||||
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
|
||||
{
|
||||
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
|
||||
}
|
||||
|
||||
/*! ZSTD_getParams() :
|
||||
* same idea as ZSTD_getCParams()
|
||||
* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
|
||||
* Fields of `ZSTD_frameParameters` are set to default values */
|
||||
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
|
||||
ZSTD_parameters params;
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
|
||||
DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = cParams;
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
return params;
|
||||
}
|
||||
|
||||
/*! ZSTD_getParams() :
|
||||
* same idea as ZSTD_getCParams()
|
||||
* @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
|
||||
* Fields of `ZSTD_frameParameters` are set to default values */
|
||||
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
|
||||
ZSTD_parameters params;
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
|
||||
DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.cParams = cParams;
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
return params;
|
||||
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
|
||||
}
|
||||
|
@ -249,6 +249,7 @@ struct ZSTD_CCtx_s {
|
||||
size_t staticSize;
|
||||
SeqCollector seqCollector;
|
||||
int isFirstBlock;
|
||||
int initialized;
|
||||
|
||||
seqStore_t seqStore; /* sequences storage ptrs */
|
||||
ldmState_t ldmState; /* long distance matching state */
|
||||
@ -336,6 +337,20 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ZSTD_noCompressBlock() :
|
||||
* Writes uncompressed block to dst buffer from given src.
|
||||
* Returns the size of the block */
|
||||
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
|
||||
{
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
|
||||
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
|
||||
dstSize_tooSmall);
|
||||
MEM_writeLE24(dst, cBlockHeader24);
|
||||
memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
|
||||
return ZSTD_blockHeaderSize + srcSize;
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_minGain() :
|
||||
* minimum compression required
|
||||
* to generate a compress block or a compressed literals section.
|
||||
@ -844,6 +859,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
} } }
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
memset(window, 0, sizeof(*window));
|
||||
window->base = (BYTE const*)"";
|
||||
window->dictBase = (BYTE const*)"";
|
||||
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + 1; /* see issue #1241 */
|
||||
}
|
||||
|
||||
/**
|
||||
* ZSTD_window_update():
|
||||
* Updates the window by appending [src, src + srcSize) to the window.
|
||||
@ -857,6 +881,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
BYTE const* const ip = (BYTE const*)src;
|
||||
U32 contiguous = 1;
|
||||
DEBUGLOG(5, "ZSTD_window_update");
|
||||
if (srcSize == 0)
|
||||
return contiguous;
|
||||
assert(window->base != NULL);
|
||||
assert(window->dictBase != NULL);
|
||||
/* Check if blocks follow each other */
|
||||
if (src != window->nextSrc) {
|
||||
/* not contiguous */
|
||||
@ -931,6 +959,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ===============================================================
|
||||
* Shared internal declarations
|
||||
* These prototypes may be called from sources not in lib/compress
|
||||
* =============================================================== */
|
||||
|
||||
/* ZSTD_loadCEntropy() :
|
||||
* dict : must point at beginning of a valid zstd dictionary.
|
||||
* return : size of dictionary header (size of magic number + dict ID + entropy tables)
|
||||
* assumptions : magic number supposed already checked
|
||||
* and dictSize >= 8 */
|
||||
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
||||
short* offcodeNCount, unsigned* offcodeMaxValue,
|
||||
const void* const dict, size_t dictSize);
|
||||
|
||||
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
||||
|
||||
/* ==============================================================
|
||||
* Private declarations
|
||||
@ -940,6 +983,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
/* ZSTD_getCParamsFromCCtxParams() :
|
||||
* cParams are built depending on compressionLevel, src size hints,
|
||||
* LDM and manually set compression parameters.
|
||||
* Note: srcSizeHint == 0 means 0!
|
||||
*/
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
|
||||
|
@ -102,11 +102,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
cLitSize = singleStream ?
|
||||
HUF_compress1X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
255, 11, entropyWorkspace, entropyWorkspaceSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
||||
HUF_compress4X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
255, 11, entropyWorkspace, entropyWorkspaceSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
|
@ -86,7 +86,7 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
|
||||
* Returns the cost in bits of encoding the distribution in count using ctable.
|
||||
* Returns an error if ctable cannot represent all the symbols in count.
|
||||
*/
|
||||
static size_t ZSTD_fseBitCost(
|
||||
size_t ZSTD_fseBitCost(
|
||||
FSE_CTable const* ctable,
|
||||
unsigned const* count,
|
||||
unsigned const max)
|
||||
@ -117,8 +117,8 @@ static size_t ZSTD_fseBitCost(
|
||||
* table described by norm. The max symbol support by norm is assumed >= max.
|
||||
* norm must be valid for every symbol with non-zero probability in count.
|
||||
*/
|
||||
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
||||
unsigned const* count, unsigned const max)
|
||||
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
||||
unsigned const* count, unsigned const max)
|
||||
{
|
||||
unsigned const shift = 8 - accuracyLog;
|
||||
size_t cost = 0;
|
||||
|
@ -44,4 +44,11 @@ size_t ZSTD_encodeSequences(
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
|
||||
|
||||
size_t ZSTD_fseBitCost(
|
||||
FSE_CTable const* ctable,
|
||||
unsigned const* count,
|
||||
unsigned const max);
|
||||
|
||||
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
||||
unsigned const* count, unsigned const max);
|
||||
#endif /* ZSTD_COMPRESS_SEQUENCES_H */
|
||||
|
742
lib/compress/zstd_compress_superblock.c
Normal file
742
lib/compress/zstd_compress_superblock.c
Normal file
@ -0,0 +1,742 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_compress_sequences.h"
|
||||
#include "zstd_compress_literals.h"
|
||||
#include "zstd_compress_superblock.h"
|
||||
|
||||
/*-*************************************
|
||||
* Superblock entropy buffer structs
|
||||
***************************************/
|
||||
/** ZSTD_hufCTablesMetadata_t :
|
||||
* Stores Literals Block Type for a super-block in hType, and
|
||||
* huffman tree description in hufDesBuffer.
|
||||
* hufDesSize refers to the size of huffman tree description in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e hType;
|
||||
BYTE hufDesBuffer[500]; // TODO give name to this value
|
||||
size_t hufDesSize;
|
||||
} ZSTD_hufCTablesMetadata_t;
|
||||
|
||||
/** ZSTD_fseCTablesMetadata_t :
|
||||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
||||
* fse tables in fseTablesBuffer.
|
||||
* fseTablesSize refers to the size of fse tables in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e llType;
|
||||
symbolEncodingType_e ofType;
|
||||
symbolEncodingType_e mlType;
|
||||
BYTE fseTablesBuffer[500]; // TODO give name to this value
|
||||
size_t fseTablesSize;
|
||||
size_t lastCountSize; // This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences()
|
||||
} ZSTD_fseCTablesMetadata_t;
|
||||
|
||||
typedef struct {
|
||||
ZSTD_hufCTablesMetadata_t hufMetadata;
|
||||
ZSTD_fseCTablesMetadata_t fseMetadata;
|
||||
} ZSTD_entropyCTablesMetadata_t;
|
||||
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy_literal() :
|
||||
* Builds entropy for the super-block literals.
|
||||
* Stores literals block type (raw, rle, compressed) and
|
||||
* huffman description table to hufMetadata.
|
||||
* Currently, this does not consider the option of reusing huffman table from
|
||||
* previous super-block. I think it would be a good improvement to add that option.
|
||||
* @return : size of huffman description table or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
||||
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
||||
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
||||
unsigned maxSymbolValue = 255;
|
||||
unsigned huffLog = 11;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) { hufMetadata->hType = set_basic; return 0; }
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(largest);
|
||||
if (largest == srcSize) { hufMetadata->hType = set_rle; return 0; }
|
||||
if (largest <= (srcSize >> 7)+4) { hufMetadata->hType = set_basic; return 0; }
|
||||
}
|
||||
|
||||
|
||||
/* Build Huffman Tree */
|
||||
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
||||
maxSymbolValue, huffLog,
|
||||
nodeWksp, nodeWkspSize);
|
||||
FORWARD_IF_ERROR(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
{ size_t cSize = HUF_estimateCompressedSize(
|
||||
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
||||
size_t hSize = HUF_writeCTable(
|
||||
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
||||
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
|
||||
if (cSize + hSize >= srcSize) { hufMetadata->hType = set_basic; return 0; }
|
||||
hufMetadata->hType = set_compressed;
|
||||
return hSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy_sequences() :
|
||||
* Builds entropy for the super-block sequences.
|
||||
* Stores symbol compression modes and fse table to fseMetadata.
|
||||
* @return : size of fse tables or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
||||
const ZSTD_fseCTables_t* prevEntropy,
|
||||
ZSTD_fseCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
||||
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
||||
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
||||
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
||||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
||||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
||||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
||||
BYTE* op = ostart;
|
||||
|
||||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
||||
memset(workspace, 0, wkspSize);
|
||||
|
||||
fseMetadata->lastCountSize = 0;
|
||||
/* convert length/distances into codes */
|
||||
ZSTD_seqToCodes(seqStorePtr);
|
||||
/* build CTable for Literal Lengths */
|
||||
{ U32 LLtype;
|
||||
unsigned max = MaxLL;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building LL table");
|
||||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
||||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
LLFSELog, prevEntropy->litlengthCTable,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(set_basic < set_compressed && set_rle < set_compressed);
|
||||
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
||||
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize);
|
||||
if (LLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
||||
} }
|
||||
/* build CTable for Offsets */
|
||||
{ U32 Offtype;
|
||||
unsigned max = MaxOff;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||
DEBUGLOG(5, "Building OF table");
|
||||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
||||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
OffFSELog, prevEntropy->offcodeCTable,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
defaultPolicy, strategy);
|
||||
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
||||
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize);
|
||||
if (Offtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
||||
} }
|
||||
/* build CTable for MatchLengths */
|
||||
{ U32 MLtype;
|
||||
unsigned max = MaxML;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
||||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
||||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
MLFSELog, prevEntropy->matchlengthCTable,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
||||
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize);
|
||||
if (MLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
||||
} }
|
||||
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_buildSuperBlockEntropy() :
|
||||
* Builds entropy for the super-block.
|
||||
* @return : 0 on success or error code */
|
||||
static size_t
|
||||
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
||||
entropyMetadata->hufMetadata.hufDesSize =
|
||||
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
||||
&prevEntropy->huf, &nextEntropy->huf,
|
||||
&entropyMetadata->hufMetadata,
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize);
|
||||
entropyMetadata->fseMetadata.fseTablesSize =
|
||||
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
||||
&prevEntropy->fse, &nextEntropy->fse,
|
||||
cctxParams,
|
||||
&entropyMetadata->fseMetadata,
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock_literal() :
|
||||
* Compresses literals section for a sub-block.
|
||||
* Compressed literal size needs to be less than uncompressed literal size.
|
||||
* ZSTD spec doesn't have this constaint. I will explain why I have this constraint here.
|
||||
* Literals section header size ranges from 1 to 5 bytes,
|
||||
* which is dictated by regenerated size and compressed size.
|
||||
* In order to figure out the memory address to start writing compressed literal,
|
||||
* it is necessary to figure out the literals section header size.
|
||||
* The challenge is that compressed size is only known after compression.
|
||||
* This is a chicken and egg problem.
|
||||
* I am simplifying the problem by assuming that
|
||||
* compressed size will always be less than or equal to regenerated size,
|
||||
* and using regenerated size to calculate literals section header size.
|
||||
* hufMetadata->hType has literals block type info.
|
||||
* If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
|
||||
* If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
|
||||
* If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
|
||||
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
|
||||
* @return : compressed size of literals section of a sub-block
|
||||
* Or 0 if it unable to compress.
|
||||
* Or error code */
|
||||
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const BYTE* literals, size_t litSize,
|
||||
void* dst, size_t dstSize,
|
||||
const int bmi2, int writeEntropy)
|
||||
{
|
||||
size_t const lhSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart + lhSize;
|
||||
U32 singleStream = litSize < 256;
|
||||
symbolEncodingType_e hType = writeEntropy ? set_compressed : set_repeat;
|
||||
size_t cLitSize = 0;
|
||||
|
||||
(void)bmi2; // TODO bmi2...
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
|
||||
|
||||
if (writeEntropy && litSize == 0) {
|
||||
/* Literals section cannot be compressed mode when litSize == 0.
|
||||
* (This seems to be decoder constraint.)
|
||||
* Entropy cannot be written if literals section is not compressed mode.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (litSize == 0 || hufMetadata->hType == set_basic) {
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
|
||||
return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
|
||||
} else if (hufMetadata->hType == set_rle) {
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
|
||||
}
|
||||
|
||||
if (lhSize == 3) singleStream = 1;
|
||||
if (writeEntropy) {
|
||||
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
||||
op += hufMetadata->hufDesSize;
|
||||
cLitSize += hufMetadata->hufDesSize;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
||||
}
|
||||
|
||||
// TODO bmi2
|
||||
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
|
||||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
|
||||
op += cSize;
|
||||
cLitSize += cSize;
|
||||
if (cSize == 0 || ERR_isError(cSize)) {
|
||||
return 0;
|
||||
}
|
||||
if (cLitSize > litSize) {
|
||||
if (writeEntropy) return 0;
|
||||
else return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
|
||||
}
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
|
||||
}
|
||||
|
||||
/* Build header */
|
||||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 4: /* 2 - 2 - 14 - 14 */
|
||||
{ U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 5: /* 2 - 2 - 18 - 18 */
|
||||
{ U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||
break;
|
||||
}
|
||||
default: /* not possible : lhSize is {3,4,5} */
|
||||
assert(0);
|
||||
}
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
static size_t ZSTD_seqDecompressedSize(const seqDef* sequences, size_t nbSeq, size_t litSize) {
|
||||
const seqDef* const sstart = sequences;
|
||||
const seqDef* const send = sequences + nbSeq;
|
||||
const seqDef* sp = sstart;
|
||||
size_t matchLengthSum = 0;
|
||||
while (send-sp > 0) {
|
||||
matchLengthSum += sp->matchLength + MINMATCH;
|
||||
sp++;
|
||||
}
|
||||
return matchLengthSum + litSize;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock_sequences() :
|
||||
* Compresses sequences section for a sub-block.
|
||||
* fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
|
||||
* symbol compression modes for the super-block.
|
||||
* First sub-block will have these in its header. The following sub-blocks
|
||||
* will always have repeat mode.
|
||||
* @return : compressed size of sequences section of a sub-block
|
||||
* Or 0 if it is unable to compress
|
||||
* Or error code. */
|
||||
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy)
|
||||
{
|
||||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstCapacity;
|
||||
BYTE* op = ostart;
|
||||
BYTE* seqHead;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
|
||||
|
||||
/* Sequences Header */
|
||||
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
|
||||
dstSize_tooSmall);
|
||||
if (nbSeq < 0x7F)
|
||||
*op++ = (BYTE)nbSeq;
|
||||
else if (nbSeq < LONGNBSEQ)
|
||||
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
|
||||
else
|
||||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
||||
if (writeEntropy && nbSeq == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (nbSeq==0) {
|
||||
return op - ostart;
|
||||
}
|
||||
|
||||
/* seqHead : flags for FSE encoding type */
|
||||
seqHead = op++;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
|
||||
|
||||
if (writeEntropy) {
|
||||
const U32 LLtype = fseMetadata->llType;
|
||||
const U32 Offtype = fseMetadata->ofType;
|
||||
const U32 MLtype = fseMetadata->mlType;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
|
||||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
||||
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
||||
op += fseMetadata->fseTablesSize;
|
||||
} else {
|
||||
const U32 repeat = set_repeat;
|
||||
*seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
|
||||
}
|
||||
|
||||
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
||||
op, oend - op,
|
||||
fseTables->matchlengthCTable, mlCode,
|
||||
fseTables->offcodeCTable, ofCode,
|
||||
fseTables->litlengthCTable, llCode,
|
||||
sequences, nbSeq,
|
||||
longOffsets, bmi2);
|
||||
FORWARD_IF_ERROR(bitstreamSize);
|
||||
op += bitstreamSize;
|
||||
/* zstd versions <= 1.3.4 mistakenly report corruption when
|
||||
* FSE_readNCount() receives a buffer < 4 bytes.
|
||||
* Fixed by https://github.com/facebook/zstd/pull/1146.
|
||||
* This can happen when the last set_compressed table present is 2
|
||||
* bytes and the bitstream is only one byte.
|
||||
* In this exceedingly rare case, we will simply emit an uncompressed
|
||||
* block, since it isn't worth optimizing.
|
||||
*/
|
||||
if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
|
||||
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
|
||||
assert(fseMetadata->lastCountSize + bitstreamSize == 3);
|
||||
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
|
||||
"emitting an uncompressed block.");
|
||||
return 0;
|
||||
}
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
|
||||
}
|
||||
|
||||
/* zstd versions <= 1.4.0 mistakenly report error when
|
||||
* sequences section body size is less than 3 bytes.
|
||||
* Fixed by https://github.com/facebook/zstd/pull/1664.
|
||||
* This can happen when the previous sequences section block is compressed
|
||||
* with rle mode and the current block's sequences section is compressed
|
||||
* with repeat mode where sequences section body size can be 1 byte.
|
||||
*/
|
||||
if (op-seqHead < 4) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return op - ostart;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock() :
|
||||
* Compresses a single sub-block.
|
||||
* @return : compressed size of the sub-block
|
||||
* Or 0 if it failed to compress. */
|
||||
static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
||||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
const seqDef* sequences, size_t nbSeq,
|
||||
const BYTE* literals, size_t litSize,
|
||||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, int writeEntropy, U32 lastBlock)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstCapacity;
|
||||
BYTE* op = ostart + ZSTD_blockHeaderSize;
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeEntropy=%d, lastBlock=%d)",
|
||||
litSize, nbSeq, writeEntropy, lastBlock);
|
||||
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
|
||||
&entropyMetadata->hufMetadata, literals, litSize,
|
||||
op, oend-op, bmi2, writeEntropy);
|
||||
FORWARD_IF_ERROR(cLitSize);
|
||||
if (cLitSize == 0) return 0;
|
||||
op += cLitSize;
|
||||
}
|
||||
{ size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
|
||||
&entropyMetadata->fseMetadata,
|
||||
sequences, nbSeq,
|
||||
llCode, mlCode, ofCode,
|
||||
cctxParams,
|
||||
op, oend-op,
|
||||
bmi2, writeEntropy);
|
||||
FORWARD_IF_ERROR(cSeqSize);
|
||||
if (cSeqSize == 0) return 0;
|
||||
op += cSeqSize;
|
||||
}
|
||||
/* Write block header */
|
||||
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
|
||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||
MEM_writeLE24(ostart, cBlockHeader24);
|
||||
}
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
||||
const ZSTD_hufCTables_t* huf,
|
||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
void* workspace, size_t wkspSize,
|
||||
int writeEntropy)
|
||||
{
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
unsigned maxSymbolValue = 255;
|
||||
size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
|
||||
if (hufMetadata->hType == set_basic) return litSize;
|
||||
else if (hufMetadata->hType == set_rle) return 1;
|
||||
else if (hufMetadata->hType == set_compressed) {
|
||||
size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
|
||||
if (ZSTD_isError(largest)) return litSize;
|
||||
{ size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
|
||||
if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
|
||||
return cLitSizeEstimate + literalSectionHeaderSize;
|
||||
} }
|
||||
assert(0); /* impossible */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
const BYTE* codeTable, unsigned maxCode,
|
||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||
const U32* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const BYTE* ctp = codeTable;
|
||||
const BYTE* const ctStart = ctp;
|
||||
const BYTE* const ctEnd = ctStart + nbSeq;
|
||||
size_t cSymbolTypeSizeEstimateInBits = 0;
|
||||
unsigned max = maxCode;
|
||||
|
||||
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
if (type == set_basic) {
|
||||
cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
|
||||
} else if (type == set_rle) {
|
||||
cSymbolTypeSizeEstimateInBits = 0;
|
||||
} else if (type == set_compressed || type == set_repeat) {
|
||||
cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
|
||||
}
|
||||
if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
|
||||
while (ctp < ctEnd) {
|
||||
if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
|
||||
else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
|
||||
ctp++;
|
||||
}
|
||||
return cSymbolTypeSizeEstimateInBits / 8;
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
||||
const BYTE* llCodeTable,
|
||||
const BYTE* mlCodeTable,
|
||||
size_t nbSeq,
|
||||
const ZSTD_fseCTables_t* fseTables,
|
||||
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
void* workspace, size_t wkspSize,
|
||||
int writeEntropy)
|
||||
{
|
||||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
size_t cSeqSizeEstimate = 0;
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
||||
nbSeq, fseTables->offcodeCTable, NULL,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
|
||||
nbSeq, fseTables->litlengthCTable, LL_bits,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
workspace, wkspSize);
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
|
||||
nbSeq, fseTables->matchlengthCTable, ML_bits,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
workspace, wkspSize);
|
||||
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
||||
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
||||
}
|
||||
|
||||
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
||||
const BYTE* ofCodeTable,
|
||||
const BYTE* llCodeTable,
|
||||
const BYTE* mlCodeTable,
|
||||
size_t nbSeq,
|
||||
const ZSTD_entropyCTables_t* entropy,
|
||||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize,
|
||||
int writeEntropy) {
|
||||
size_t cSizeEstimate = 0;
|
||||
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
|
||||
&entropy->huf, &entropyMetadata->hufMetadata,
|
||||
workspace, wkspSize, writeEntropy);
|
||||
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
||||
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
||||
workspace, wkspSize, writeEntropy);
|
||||
return cSizeEstimate + ZSTD_blockHeaderSize;
|
||||
}
|
||||
|
||||
/** ZSTD_compressSubBlock_multi() :
|
||||
* Breaks super-block into multiple sub-blocks and compresses them.
|
||||
* Entropy will be written to the first block.
|
||||
* The following blocks will use repeat mode to compress.
|
||||
* All sub-blocks are compressed blocks (no raw or rle blocks).
|
||||
* @return : compressed size of the super block (which is multiple ZSTD blocks)
|
||||
* Or 0 if it failed to compress. */
|
||||
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* entropy,
|
||||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
void* dst, size_t dstCapacity,
|
||||
const int bmi2, U32 lastBlock,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
||||
const seqDef* const send = seqStorePtr->sequences;
|
||||
const seqDef* sp = sstart;
|
||||
const BYTE* const lstart = seqStorePtr->litStart;
|
||||
const BYTE* const lend = seqStorePtr->lit;
|
||||
const BYTE* lp = lstart;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstCapacity;
|
||||
BYTE* op = ostart;
|
||||
const BYTE* llCodePtr = seqStorePtr->llCode;
|
||||
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
||||
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
||||
size_t targetCBlockSize = cctxParams->targetCBlockSize;
|
||||
size_t litSize, seqCount;
|
||||
int writeEntropy = 1;
|
||||
size_t remaining = ZSTD_seqDecompressedSize(sstart, send-sstart, lend-lstart);
|
||||
size_t cBlockSizeEstimate = 0;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
|
||||
(unsigned)(lend-lp), (unsigned)(send-sstart));
|
||||
|
||||
litSize = 0;
|
||||
seqCount = 0;
|
||||
while (sp + seqCount < send) {
|
||||
const seqDef* const sequence = sp + seqCount;
|
||||
const U32 lastSequence = sequence+1 == send;
|
||||
litSize = (sequence == send) ? (size_t)(lend-lp) : litSize + sequence->litLength;
|
||||
seqCount++;
|
||||
/* I think there is an optimization opportunity here.
|
||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
||||
* since it recalculates estimate from scratch.
|
||||
* For example, it would recount literal distribution and symbol codes everytime.
|
||||
*/
|
||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
||||
entropy, entropyMetadata,
|
||||
workspace, wkspSize, writeEntropy);
|
||||
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
|
||||
const size_t decompressedSize = ZSTD_seqDecompressedSize(sp, seqCount, litSize);
|
||||
const size_t cSize = ZSTD_compressSubBlock(entropy, entropyMetadata,
|
||||
sp, seqCount,
|
||||
lp, litSize,
|
||||
llCodePtr, mlCodePtr, ofCodePtr,
|
||||
cctxParams,
|
||||
op, oend-op,
|
||||
bmi2, writeEntropy, lastBlock && lastSequence);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
if (cSize > 0 && cSize < decompressedSize) {
|
||||
assert(remaining >= decompressedSize);
|
||||
remaining -= decompressedSize;
|
||||
sp += seqCount;
|
||||
lp += litSize;
|
||||
op += cSize;
|
||||
llCodePtr += seqCount;
|
||||
mlCodePtr += seqCount;
|
||||
ofCodePtr += seqCount;
|
||||
litSize = 0;
|
||||
seqCount = 0;
|
||||
writeEntropy = 0; // Entropy only needs to be written once
|
||||
}
|
||||
}
|
||||
}
|
||||
if (remaining) {
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi failed to compress");
|
||||
return 0;
|
||||
}
|
||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
unsigned lastBlock) {
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
|
||||
&zc->blockState.prevCBlock->entropy,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
&entropyMetadata,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */));
|
||||
|
||||
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&entropyMetadata,
|
||||
&zc->appliedParams,
|
||||
dst, dstCapacity,
|
||||
zc->bmi2, lastBlock,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
}
|
||||
|
||||
size_t ZSTD_noCompressSuperBlock(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
size_t targetCBlockSize,
|
||||
unsigned lastBlock) {
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* ip = istart;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstCapacity;
|
||||
BYTE* op = ostart;
|
||||
DEBUGLOG(5, "ZSTD_noCompressSuperBlock (dstCapacity=%zu, srcSize=%zu, targetCBlockSize=%zu)",
|
||||
dstCapacity, srcSize, targetCBlockSize);
|
||||
while (ip < iend) {
|
||||
size_t remaining = iend-ip;
|
||||
unsigned lastSubBlock = remaining <= targetCBlockSize;
|
||||
size_t blockSize = lastSubBlock ? remaining : targetCBlockSize;
|
||||
size_t cSize = ZSTD_noCompressBlock(op, oend-op, ip, blockSize, lastSubBlock && lastBlock);
|
||||
FORWARD_IF_ERROR(cSize);
|
||||
ip += blockSize;
|
||||
op += cSize;
|
||||
}
|
||||
return op-ostart;
|
||||
}
|
41
lib/compress/zstd_compress_superblock.h
Normal file
41
lib/compress/zstd_compress_superblock.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_COMPRESS_ADVANCED_H
|
||||
#define ZSTD_COMPRESS_ADVANCED_H
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
|
||||
#include "zstd.h" /* ZSTD_CCtx */
|
||||
|
||||
/*-*************************************
|
||||
* Target Compressed Block Size
|
||||
***************************************/
|
||||
|
||||
/* ZSTD_compressSuperBlock() :
|
||||
* Used to compress a super block when targetCBlockSize is being used.
|
||||
* The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
|
||||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
void* dst, size_t dstCapacity,
|
||||
unsigned lastBlock);
|
||||
|
||||
/* ZSTD_noCompressSuperBlock() :
|
||||
* Used to break a super block into multiple uncompressed sub blocks
|
||||
* when targetCBlockSize is being used.
|
||||
* The given block will be broken into multiple uncompressed sub blocks that are
|
||||
* around targetCBlockSize. */
|
||||
size_t ZSTD_noCompressSuperBlock(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
size_t targetCBlockSize,
|
||||
unsigned lastBlock);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_ADVANCED_H */
|
@ -96,7 +96,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
dictCParams->hashLog : hBitsL;
|
||||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
@ -271,7 +271,7 @@ _match_stored:
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex2 < prefixLowestIndex ?
|
||||
dictBase - dictIndexDelta + repIndex2 :
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
|
@ -660,7 +660,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
|
@ -490,7 +490,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
||||
/* Size the seq pool tables */
|
||||
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
||||
/* Reset the window */
|
||||
ZSTD_window_clear(&serialState->ldmState.window);
|
||||
ZSTD_window_init(&serialState->ldmState.window);
|
||||
serialState->ldmWindow = serialState->ldmState.window;
|
||||
/* Resize tables and output space if necessary. */
|
||||
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
||||
@ -1076,7 +1076,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
||||
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
|
||||
compressionLevel);
|
||||
mtctx->params.compressionLevel = compressionLevel;
|
||||
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
|
||||
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
cParams.windowLog = saved_wlog;
|
||||
mtctx->params.cParams = cParams;
|
||||
}
|
||||
@ -1786,7 +1786,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
||||
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
||||
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
||||
BYTE const* const rangeStart = (BYTE const*)range.start;
|
||||
BYTE const* const rangeEnd = rangeStart + range.size;
|
||||
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
||||
|
||||
if (rangeStart == NULL || bufferStart == NULL)
|
||||
return 0;
|
||||
|
@ -181,17 +181,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
||||
|
||||
/* fill DTable */
|
||||
{ U32 n;
|
||||
for (n=0; n<nbSymbols; n++) {
|
||||
U32 const w = huffWeight[n];
|
||||
U32 const length = (1 << w) >> 1;
|
||||
U32 u;
|
||||
size_t const nEnd = nbSymbols;
|
||||
for (n=0; n<nEnd; n++) {
|
||||
size_t const w = huffWeight[n];
|
||||
size_t const length = (1 << w) >> 1;
|
||||
size_t const uStart = rankVal[w];
|
||||
size_t const uEnd = uStart + length;
|
||||
size_t u;
|
||||
HUF_DEltX1 D;
|
||||
D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
|
||||
for (u = rankVal[w]; u < rankVal[w] + length; u++)
|
||||
dt[u] = D;
|
||||
rankVal[w] += length;
|
||||
} }
|
||||
|
||||
D.byte = (BYTE)n;
|
||||
D.nbBits = (BYTE)(tableLog + 1 - w);
|
||||
rankVal[w] = (U32)uEnd;
|
||||
if (length < 4) {
|
||||
/* Use length in the loop bound so the compiler knows it is short. */
|
||||
for (u = 0; u < length; ++u)
|
||||
dt[uStart + u] = D;
|
||||
} else {
|
||||
/* Unroll the loop 4 times, we know it is a power of 2. */
|
||||
for (u = uStart; u < uEnd; u += 4) {
|
||||
dt[u + 0] = D;
|
||||
dt[u + 1] = D;
|
||||
dt[u + 2] = D;
|
||||
dt[u + 3] = D;
|
||||
} } } }
|
||||
return iSize;
|
||||
}
|
||||
|
||||
@ -282,6 +294,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
{ const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* const olimit = oend - 3;
|
||||
const void* const dtPtr = DTable + 1;
|
||||
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
||||
|
||||
@ -306,9 +319,9 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal = BIT_DStream_unfinished;
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
U32 const dtLog = dtd.tableLog;
|
||||
U32 endSignal = 1;
|
||||
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
||||
@ -317,8 +330,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
||||
|
||||
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
||||
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
||||
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
||||
@ -335,10 +347,10 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
||||
BIT_reloadDStream(&bitD1);
|
||||
BIT_reloadDStream(&bitD2);
|
||||
BIT_reloadDStream(&bitD3);
|
||||
BIT_reloadDStream(&bitD4);
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
@ -757,7 +769,6 @@ HUF_decompress1X2_usingDTable_internal_body(
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_decompress4X2_usingDTable_internal_body(
|
||||
void* dst, size_t dstSize,
|
||||
@ -769,6 +780,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
{ const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
||||
const void* const dtPtr = DTable+1;
|
||||
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
||||
|
||||
@ -793,7 +805,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal;
|
||||
U32 endSignal = 1;
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
U32 const dtLog = dtd.tableLog;
|
||||
|
||||
@ -804,8 +816,29 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
||||
|
||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
||||
for ( ; (endSignal) & (op4 < olimit); ) {
|
||||
#ifdef __clang__
|
||||
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
||||
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
||||
#else
|
||||
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
@ -822,8 +855,11 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
||||
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
|
@ -618,7 +618,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)(*srcPtr);
|
||||
BYTE* const ostart = (BYTE* const)dst;
|
||||
BYTE* const oend = ostart + dstCapacity;
|
||||
BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
|
||||
BYTE* op = ostart;
|
||||
size_t remainingSrcSize = *srcSizePtr;
|
||||
|
||||
@ -669,7 +669,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
if (ZSTD_isError(decodedSize)) return decodedSize;
|
||||
if (dctx->fParams.checksumFlag)
|
||||
XXH64_update(&dctx->xxhState, op, decodedSize);
|
||||
op += decodedSize;
|
||||
if (decodedSize != 0)
|
||||
op += decodedSize;
|
||||
assert(ip != NULL);
|
||||
ip += cBlockSize;
|
||||
remainingSrcSize -= cBlockSize;
|
||||
if (blockProperties.lastBlock) break;
|
||||
@ -776,7 +778,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
"error.");
|
||||
if (ZSTD_isError(res)) return res;
|
||||
assert(res <= dstCapacity);
|
||||
dst = (BYTE*)dst + res;
|
||||
if (res != 0)
|
||||
dst = (BYTE*)dst + res;
|
||||
dstCapacity -= res;
|
||||
}
|
||||
moreThan1Frame = 1;
|
||||
@ -1486,11 +1489,13 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
||||
|
||||
size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
||||
{
|
||||
const char* const istart = (const char*)(input->src) + input->pos;
|
||||
const char* const iend = (const char*)(input->src) + input->size;
|
||||
const char* const src = (const char*)input->src;
|
||||
const char* const istart = input->pos != 0 ? src + input->pos : src;
|
||||
const char* const iend = input->size != 0 ? src + input->size : src;
|
||||
const char* ip = istart;
|
||||
char* const ostart = (char*)(output->dst) + output->pos;
|
||||
char* const oend = (char*)(output->dst) + output->size;
|
||||
char* const dst = (char*)output->dst;
|
||||
char* const ostart = output->pos != 0 ? dst + output->pos : dst;
|
||||
char* const oend = output->size != 0 ? dst + output->size : dst;
|
||||
char* op = ostart;
|
||||
U32 someMoreWork = 1;
|
||||
|
||||
|
@ -715,7 +715,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
|
||||
/* Errors and uncommon cases handled here. */
|
||||
assert(oLitEnd < oMatchEnd);
|
||||
if (iLitEnd > litLimit || oMatchEnd > oend_w)
|
||||
if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w))
|
||||
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
||||
|
||||
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
||||
@ -729,7 +729,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
*/
|
||||
assert(WILDCOPY_OVERLENGTH >= 16);
|
||||
ZSTD_copy16(op, (*litPtr));
|
||||
if (sequence.litLength > 16) {
|
||||
if (UNLIKELY(sequence.litLength > 16)) {
|
||||
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
||||
}
|
||||
op = oLitEnd;
|
||||
@ -738,7 +738,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
/* Copy Match */
|
||||
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
||||
/* offset beyond prefix -> go into extDict */
|
||||
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
||||
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected);
|
||||
match = dictEnd + (match - prefixStart);
|
||||
if (match + sequence.matchLength <= dictEnd) {
|
||||
memmove(oLitEnd, match, sequence.matchLength);
|
||||
@ -760,7 +760,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
||||
* without overlap checking.
|
||||
*/
|
||||
if (sequence.offset >= WILDCOPY_VECLEN) {
|
||||
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
||||
/* We bet on a full wildcopy for matches, since we expect matches to be
|
||||
* longer than literals (in general). In silesia, ~10% of matches are longer
|
||||
* than 16 bytes.
|
||||
@ -802,6 +802,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
||||
DStatePtr->state = DInfo.nextState + lowBits;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
||||
{
|
||||
U32 const nbBits = DInfo.nbBits;
|
||||
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
||||
DStatePtr->state = DInfo.nextState + lowBits;
|
||||
}
|
||||
|
||||
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
||||
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
||||
* bits before reloading. This value is the maximum number of bytes we read
|
||||
@ -813,25 +821,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
|
||||
: 0)
|
||||
|
||||
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
||||
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
FORCE_INLINE_TEMPLATE seq_t
|
||||
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
||||
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
|
||||
{
|
||||
seq_t seq;
|
||||
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
||||
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
||||
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
||||
U32 const totalBits = llBits+mlBits+ofBits;
|
||||
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
||||
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
||||
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
||||
ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
|
||||
ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
|
||||
ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
|
||||
U32 const llBase = llDInfo.baseValue;
|
||||
U32 const mlBase = mlDInfo.baseValue;
|
||||
U32 const ofBase = ofDInfo.baseValue;
|
||||
BYTE const llBits = llDInfo.nbAdditionalBits;
|
||||
BYTE const mlBits = mlDInfo.nbAdditionalBits;
|
||||
BYTE const ofBits = ofDInfo.nbAdditionalBits;
|
||||
BYTE const totalBits = llBits+mlBits+ofBits;
|
||||
|
||||
/* sequence */
|
||||
{ size_t offset;
|
||||
if (!ofBits)
|
||||
offset = 0;
|
||||
else {
|
||||
if (ofBits > 1) {
|
||||
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
||||
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
||||
assert(ofBits <= MaxOff);
|
||||
@ -845,53 +854,89 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
||||
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
||||
}
|
||||
}
|
||||
|
||||
if (ofBits <= 1) {
|
||||
offset += (llBase==0);
|
||||
if (offset) {
|
||||
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
||||
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
||||
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset = temp;
|
||||
} else { /* offset == 0 */
|
||||
offset = seqState->prevOffset[0];
|
||||
}
|
||||
} else {
|
||||
seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset;
|
||||
}
|
||||
} else {
|
||||
U32 const ll0 = (llBase == 0);
|
||||
if (LIKELY((ofBits == 0))) {
|
||||
if (LIKELY(!ll0))
|
||||
offset = seqState->prevOffset[0];
|
||||
else {
|
||||
offset = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset;
|
||||
}
|
||||
} else {
|
||||
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
||||
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
||||
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
||||
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset = temp;
|
||||
} } }
|
||||
seq.offset = offset;
|
||||
}
|
||||
|
||||
seq.matchLength = mlBase
|
||||
+ ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
|
||||
seq.matchLength = mlBase;
|
||||
if (mlBits > 0)
|
||||
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
||||
|
||||
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
||||
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
||||
|
||||
seq.litLength = llBase
|
||||
+ ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
|
||||
seq.litLength = llBase;
|
||||
if (llBits > 0)
|
||||
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
||||
|
||||
if (MEM_32bits())
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
|
||||
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
||||
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
||||
|
||||
/* ANS state update */
|
||||
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
||||
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
||||
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
||||
if (prefetch == ZSTD_p_prefetch) {
|
||||
size_t const pos = seqState->pos + seq.litLength;
|
||||
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
||||
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
||||
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
||||
seqState->pos = pos + seq.matchLength;
|
||||
}
|
||||
|
||||
/* ANS state update
|
||||
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
||||
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
||||
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
||||
* better option, so it is the default for other compilers. But, if you
|
||||
* measure that it is worse, please put up a pull request.
|
||||
*/
|
||||
{
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
const int kUseUpdateFseState = 1;
|
||||
#else
|
||||
const int kUseUpdateFseState = 0;
|
||||
#endif
|
||||
if (kUseUpdateFseState) {
|
||||
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
||||
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
||||
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
||||
} else {
|
||||
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
||||
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
||||
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
||||
}
|
||||
}
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
@ -914,6 +959,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
/* Regen sequences */
|
||||
if (nbSeq) {
|
||||
seqState_t seqState;
|
||||
size_t error = 0;
|
||||
dctx->fseEntropy = 1;
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
||||
RETURN_ERROR_IF(
|
||||
@ -928,17 +974,25 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
||||
BIT_DStream_completed < BIT_DStream_overflow);
|
||||
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
|
||||
nbSeq--;
|
||||
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
||||
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
||||
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||
op += oneSeqSize;
|
||||
} }
|
||||
for ( ; ; ) {
|
||||
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
|
||||
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
||||
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
||||
BIT_reloadDStream(&(seqState.DStream));
|
||||
/* gcc and clang both don't like early returns in this loop.
|
||||
* gcc doesn't like early breaks either.
|
||||
* Instead save an error and report it at the end.
|
||||
* When there is an error, don't increment op, so we don't
|
||||
* overwrite.
|
||||
*/
|
||||
if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
|
||||
else op += oneSeqSize;
|
||||
if (UNLIKELY(!--nbSeq)) break;
|
||||
}
|
||||
|
||||
/* check if reached exact end */
|
||||
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
||||
if (ZSTD_isError(error)) return error;
|
||||
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
||||
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
||||
/* save reps for next block */
|
||||
@ -965,87 +1019,7 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
||||
}
|
||||
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
||||
|
||||
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
||||
FORCE_INLINE_TEMPLATE seq_t
|
||||
ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
||||
{
|
||||
seq_t seq;
|
||||
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
|
||||
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
|
||||
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
|
||||
U32 const totalBits = llBits+mlBits+ofBits;
|
||||
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
|
||||
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
|
||||
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
|
||||
|
||||
/* sequence */
|
||||
{ size_t offset;
|
||||
if (!ofBits)
|
||||
offset = 0;
|
||||
else {
|
||||
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
||||
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
||||
assert(ofBits <= MaxOff);
|
||||
if (MEM_32bits() && longOffsets) {
|
||||
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
||||
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
||||
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
||||
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
||||
} else {
|
||||
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
||||
}
|
||||
}
|
||||
|
||||
if (ofBits <= 1) {
|
||||
offset += (llBase==0);
|
||||
if (offset) {
|
||||
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
||||
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
|
||||
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset = temp;
|
||||
} else {
|
||||
offset = seqState->prevOffset[0];
|
||||
}
|
||||
} else {
|
||||
seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset;
|
||||
}
|
||||
seq.offset = offset;
|
||||
}
|
||||
|
||||
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
||||
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
||||
|
||||
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits())
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
|
||||
{ size_t const pos = seqState->pos + seq.litLength;
|
||||
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
|
||||
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
||||
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
|
||||
seqState->pos = pos + seq.matchLength;
|
||||
}
|
||||
|
||||
/* ANS state update */
|
||||
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
||||
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
||||
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_decompressSequencesLong_body(
|
||||
ZSTD_DCtx* dctx,
|
||||
@ -1088,14 +1062,14 @@ ZSTD_decompressSequencesLong_body(
|
||||
|
||||
/* prepare in advance */
|
||||
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
||||
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||
}
|
||||
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
|
||||
|
||||
/* decode and decompress */
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
||||
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
|
||||
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||
|
@ -160,7 +160,7 @@ struct ZSTD_DCtx_s
|
||||
|
||||
/*! ZSTD_loadDEntropy() :
|
||||
* dict : must point at beginning of a valid zstd dictionary.
|
||||
* @return : size of entropy tables read */
|
||||
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
||||
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
const void* const dict, size_t const dictSize);
|
||||
|
||||
|
@ -48,6 +48,7 @@
|
||||
# define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
#include "compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -99,6 +100,29 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
||||
return MEM_readLE32((const char*)dictBuffer + 4);
|
||||
}
|
||||
|
||||
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
||||
{
|
||||
size_t headerSize;
|
||||
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
||||
|
||||
{ unsigned offcodeMaxValue = MaxOff;
|
||||
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
||||
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
||||
short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
|
||||
if (!bs || !wksp || !offcodeNCount) {
|
||||
headerSize = ERROR(memory_allocation);
|
||||
} else {
|
||||
ZSTD_reset_compressedBlockState(bs);
|
||||
headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
free(bs);
|
||||
free(wksp);
|
||||
free(offcodeNCount);
|
||||
}
|
||||
|
||||
return headerSize;
|
||||
}
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
@ -588,12 +612,12 @@ typedef struct
|
||||
|
||||
#define MAXREPOFFSET 1024
|
||||
|
||||
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
||||
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
||||
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
||||
const void* src, size_t srcSize,
|
||||
U32 notificationLevel)
|
||||
{
|
||||
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
||||
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
||||
size_t cSize;
|
||||
|
||||
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
||||
@ -731,7 +755,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
|
||||
/* collect stats on all samples */
|
||||
for (u=0; u<nbFiles; u++) {
|
||||
ZDICT_countEStats(esr, params,
|
||||
ZDICT_countEStats(esr, ¶ms,
|
||||
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
||||
(const char*)srcBuffer + pos, fileSizes[u],
|
||||
notificationLevel);
|
||||
|
@ -64,6 +64,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
||||
|
||||
/*====== Helper functions ======*/
|
||||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
||||
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
||||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
||||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||
|
||||
|
@ -763,7 +763,7 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
|
||||
|
||||
/* This function is redundant with the advanced API and equivalent to:
|
||||
*
|
||||
* ZSTD_DCtx_reset(zds);
|
||||
* ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
|
||||
* ZSTD_DCtx_refDDict(zds, NULL);
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
|
||||
|
1
programs/.gitignore
vendored
1
programs/.gitignore
vendored
@ -7,6 +7,7 @@ zstd-decompress
|
||||
zstd-frugal
|
||||
zstd-small
|
||||
zstd-nolegacy
|
||||
zstd-dictBuilder
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
|
@ -158,7 +158,7 @@ default: zstd-release
|
||||
all: zstd
|
||||
|
||||
.PHONY: allVariants
|
||||
allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy
|
||||
allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy zstd-dictBuilder
|
||||
|
||||
$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
|
||||
|
||||
@ -231,6 +231,10 @@ zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn
|
||||
zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c
|
||||
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
|
||||
|
||||
zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS
|
||||
zstd-dictBuilder: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZDICT_FILES) zstdcli.c util.c timefn.c fileio.c dibio.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
zstdmt: zstd
|
||||
ln -sf zstd zstdmt
|
||||
|
||||
@ -245,7 +249,7 @@ clean:
|
||||
@$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
|
||||
zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \
|
||||
zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \
|
||||
*.gcda default.profraw have_zlib$(EXT)
|
||||
zstd-dictBuilder$(EXT) *.gcda default.profraw have_zlib$(EXT)
|
||||
@echo Cleaning completed
|
||||
|
||||
MD2ROFF = ronn
|
||||
|
@ -502,7 +502,7 @@ static int FIO_remove(const char* path)
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
/* windows doesn't allow remove read-only files,
|
||||
* so try to make it writable first */
|
||||
chmod(path, _S_IWRITE);
|
||||
UTIL_chmod(path, _S_IWRITE);
|
||||
#endif
|
||||
return remove(path);
|
||||
}
|
||||
@ -526,9 +526,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
|
||||
}
|
||||
|
||||
if (!UTIL_isRegularFile(srcFileName)
|
||||
#ifndef _MSC_VER
|
||||
&& !UTIL_isFIFO(srcFileName)
|
||||
#endif /* _MSC_VER */
|
||||
&& !UTIL_isFIFO(srcFileName)
|
||||
) {
|
||||
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
|
||||
srcFileName);
|
||||
@ -609,8 +607,11 @@ FIO_openDstFile(FIO_prefs_t* const prefs,
|
||||
{ FILE* const f = fopen( dstFileName, "wb" );
|
||||
if (f == NULL) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
|
||||
} else if(srcFileName != NULL && strcmp (srcFileName, stdinmark)) {
|
||||
chmod(dstFileName, 00600);
|
||||
} else if (srcFileName != NULL
|
||||
&& strcmp (srcFileName, stdinmark)
|
||||
&& strcmp(dstFileName, nulmark) ) {
|
||||
/* reduce rights on newly created dst file while compression is ongoing */
|
||||
UTIL_chmod(dstFileName, 00600);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
@ -1393,7 +1394,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
|
||||
assert(ress.srcFile != NULL);
|
||||
if (ress.dstFile == NULL) {
|
||||
closeDstFile = 1;
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
|
||||
ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
|
||||
if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
|
||||
/* Must only be added after FIO_openDstFile() succeeds.
|
||||
@ -1415,6 +1416,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
|
||||
|
||||
clearHandler();
|
||||
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
|
||||
if (fclose(dstFile)) { /* error closing dstFile */
|
||||
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
|
||||
result=1;
|
||||
@ -1427,7 +1429,10 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
|
||||
} else if ( strcmp(dstFileName, stdoutmark)
|
||||
&& strcmp(dstFileName, nulmark)
|
||||
&& transfer_permissions) {
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transfering permissions into dst: %s \n", dstFileName);
|
||||
UTIL_setFileStat(dstFileName, &statbuf);
|
||||
} else {
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1462,6 +1467,7 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
|
||||
int compressionLevel)
|
||||
{
|
||||
int result;
|
||||
DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
|
||||
|
||||
/* ensure src is not a directory */
|
||||
if (UTIL_isDirectory(srcFileName)) {
|
||||
|
@ -44,7 +44,6 @@ extern "C" {
|
||||
exit(1); \
|
||||
} }
|
||||
|
||||
|
||||
/*
|
||||
* A modified version of realloc().
|
||||
* If UTIL_realloc() fails the original block is freed.
|
||||
@ -57,6 +56,10 @@ UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define chmod _chmod
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Console log
|
||||
@ -64,9 +67,16 @@ UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
|
||||
int g_utilDisplayLevel;
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Public API
|
||||
******************************************/
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Functions
|
||||
***************************************/
|
||||
|
||||
int UTIL_fileExist(const char* filename)
|
||||
{
|
||||
@ -98,6 +108,13 @@ int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* like chmod, but avoid changing permission of /dev/null */
|
||||
int UTIL_chmod(char const* filename, mode_t permissions)
|
||||
{
|
||||
if (!strcmp(filename, "/dev/null")) return 0; /* pretend success, but don't change anything */
|
||||
return chmod(filename, permissions);
|
||||
}
|
||||
|
||||
int UTIL_setFileStat(const char *filename, stat_t *statbuf)
|
||||
{
|
||||
int res = 0;
|
||||
@ -117,7 +134,7 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf)
|
||||
{
|
||||
/* (atime, mtime) */
|
||||
struct timespec timebuf[2] = { {0, UTIME_NOW} };
|
||||
timebuf[1] = statbuf->st_mtim;
|
||||
timebuf[1].tv_sec = statbuf->st_mtime;
|
||||
res += utimensat(AT_FDCWD, filename, timebuf, 0);
|
||||
}
|
||||
#endif
|
||||
@ -126,21 +143,20 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf)
|
||||
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
|
||||
#endif
|
||||
|
||||
res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
|
||||
res += UTIL_chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
|
||||
|
||||
errno = 0;
|
||||
return -res; /* number of errors is returned */
|
||||
}
|
||||
|
||||
U32 UTIL_isDirectory(const char* infilename)
|
||||
int UTIL_isDirectory(const char* infilename)
|
||||
{
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
#if defined(_MSC_VER)
|
||||
r = _stat64(infilename, &statbuf);
|
||||
int const r = _stat64(infilename, &statbuf);
|
||||
if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
|
||||
#else
|
||||
r = stat(infilename, &statbuf);
|
||||
int const r = stat(infilename, &statbuf);
|
||||
if (!r && S_ISDIR(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
return 0;
|
||||
@ -170,28 +186,25 @@ int UTIL_isSameFile(const char* fName1, const char* fName2)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef _MSC_VER
|
||||
/* Using this to distinguish named pipes */
|
||||
U32 UTIL_isFIFO(const char* infilename)
|
||||
/* UTIL_isFIFO : distinguish named pipes */
|
||||
int UTIL_isFIFO(const char* infilename)
|
||||
{
|
||||
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
|
||||
#if PLATFORM_POSIX_VERSION >= 200112L
|
||||
stat_t statbuf;
|
||||
int r = UTIL_getFileStat(infilename, &statbuf);
|
||||
int const r = UTIL_getFileStat(infilename, &statbuf);
|
||||
if (!r && S_ISFIFO(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
(void)infilename;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
U32 UTIL_isLink(const char* infilename)
|
||||
int UTIL_isLink(const char* infilename)
|
||||
{
|
||||
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
|
||||
#if PLATFORM_POSIX_VERSION >= 200112L
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
r = lstat(infilename, &statbuf);
|
||||
int const r = lstat(infilename, &statbuf);
|
||||
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
(void)infilename;
|
||||
|
@ -30,12 +30,12 @@ extern "C" {
|
||||
# include <io.h> /* _chmod */
|
||||
#else
|
||||
# include <unistd.h> /* chown, stat */
|
||||
#if PLATFORM_POSIX_VERSION < 200809L
|
||||
# include <utime.h> /* utime */
|
||||
#else
|
||||
# include <fcntl.h> /* AT_FDCWD */
|
||||
# include <sys/stat.h> /* utimensat */
|
||||
#endif
|
||||
# if PLATFORM_POSIX_VERSION < 200809L
|
||||
# include <utime.h> /* utime */
|
||||
# else
|
||||
# include <fcntl.h> /* AT_FDCWD */
|
||||
# include <sys/stat.h> /* utimensat */
|
||||
# endif
|
||||
#endif
|
||||
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
|
||||
#include "mem.h" /* U32, U64 */
|
||||
@ -85,12 +85,6 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
|
||||
|
||||
/*-****************************************
|
||||
* Compiler specifics
|
||||
******************************************/
|
||||
@ -120,8 +114,8 @@ extern int g_utilDisplayLevel;
|
||||
* File functions
|
||||
******************************************/
|
||||
#if defined(_MSC_VER)
|
||||
#define chmod _chmod
|
||||
typedef struct __stat64 stat_t;
|
||||
typedef int mode_t;
|
||||
#else
|
||||
typedef struct stat stat_t;
|
||||
#endif
|
||||
@ -129,22 +123,20 @@ extern int g_utilDisplayLevel;
|
||||
|
||||
int UTIL_fileExist(const char* filename);
|
||||
int UTIL_isRegularFile(const char* infilename);
|
||||
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
|
||||
U32 UTIL_isDirectory(const char* infilename);
|
||||
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
|
||||
int UTIL_isDirectory(const char* infilename);
|
||||
int UTIL_isSameFile(const char* file1, const char* file2);
|
||||
int UTIL_compareStr(const void *p1, const void *p2);
|
||||
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
|
||||
const char* UTIL_getFileExtension(const char* infilename);
|
||||
int UTIL_isLink(const char* infilename);
|
||||
int UTIL_isFIFO(const char* infilename);
|
||||
|
||||
#ifndef _MSC_VER
|
||||
U32 UTIL_isFIFO(const char* infilename);
|
||||
#endif
|
||||
U32 UTIL_isLink(const char* infilename);
|
||||
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
|
||||
U64 UTIL_getFileSize(const char* infilename);
|
||||
|
||||
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
|
||||
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
|
||||
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
|
||||
int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */
|
||||
int UTIL_compareStr(const void *p1, const void *p2);
|
||||
const char* UTIL_getFileExtension(const char* infilename);
|
||||
|
||||
|
||||
/*-****************************************
|
||||
|
@ -405,7 +405,7 @@ The list of available _options_:
|
||||
Bigger hash tables cause less collisions which usually makes compression
|
||||
faster, but requires more memory during compression.
|
||||
|
||||
The minimum _hlog_ is 6 (64 B) and the maximum is 26 (128 MiB).
|
||||
The minimum _hlog_ is 6 (64 B) and the maximum is 30 (1 GiB).
|
||||
|
||||
- `chainLog`=_clog_, `clog`=_clog_:
|
||||
Specify the maximum number of bits for a hash chain or a binary tree.
|
||||
@ -416,7 +416,8 @@ The list of available _options_:
|
||||
compression.
|
||||
This option is ignored for the ZSTD_fast strategy.
|
||||
|
||||
The minimum _clog_ is 6 (64 B) and the maximum is 28 (256 MiB).
|
||||
The minimum _clog_ is 6 (64 B) and the maximum is 29 (524 Mib) on 32-bit platforms
|
||||
and 30 (1 Gib) on 64-bit platforms.
|
||||
|
||||
- `searchLog`=_slog_, `slog`=_slog_:
|
||||
Specify the maximum number of searches in a hash chain or a binary tree
|
||||
@ -425,7 +426,7 @@ The list of available _options_:
|
||||
More searches increases the chance to find a match which usually increases
|
||||
compression ratio but decreases compression speed.
|
||||
|
||||
The minimum _slog_ is 1 and the maximum is 26.
|
||||
The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
|
||||
|
||||
- `minMatch`=_mml_, `mml`=_mml_:
|
||||
Specify the minimum searched length of a match in a hash table.
|
||||
@ -450,7 +451,7 @@ The list of available _options_:
|
||||
|
||||
For all other strategies, this field has no impact.
|
||||
|
||||
The minimum _tlen_ is 0 and the maximum is 999.
|
||||
The minimum _tlen_ is 0 and the maximum is 128 Kib.
|
||||
|
||||
- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
|
||||
Determine `overlapSize`, amount of data reloaded from previous job.
|
||||
@ -473,7 +474,7 @@ The list of available _options_:
|
||||
Bigger hash tables usually improve compression ratio at the expense of more
|
||||
memory during compression and a decrease in compression speed.
|
||||
|
||||
The minimum _lhlog_ is 6 and the maximum is 26 (default: 20).
|
||||
The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
|
||||
|
||||
- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
|
||||
Specify the minimum searched length of a match for long distance matching.
|
||||
@ -493,7 +494,7 @@ The list of available _options_:
|
||||
Larger bucket sizes improve collision resolution but decrease compression
|
||||
speed.
|
||||
|
||||
The minimum _lblog_ is 0 and the maximum is 8 (default: 3).
|
||||
The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
|
||||
|
||||
- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
|
||||
Specify the frequency of inserting entries into the long distance matching
|
||||
|
@ -1005,12 +1005,10 @@ int main(int argCount, const char* argv[])
|
||||
unsigned u, fileNamesNb;
|
||||
unsigned const nbFilenames = (unsigned)filenames->tableSize;
|
||||
for (u=0, fileNamesNb=0; u<nbFilenames; u++) {
|
||||
if (UTIL_isLink(filenames->fileNames[u])
|
||||
#ifndef _MSC_VER
|
||||
&& !UTIL_isFIFO(filenames->fileNames[u])
|
||||
#endif /* _MSC_VER */
|
||||
if ( UTIL_isLink(filenames->fileNames[u])
|
||||
&& !UTIL_isFIFO(filenames->fileNames[u])
|
||||
) {
|
||||
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenames->fileNames[u]);
|
||||
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);
|
||||
} else {
|
||||
filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];
|
||||
} }
|
||||
|
6
tests/fuzz/.gitignore
vendored
6
tests/fuzz/.gitignore
vendored
@ -2,7 +2,13 @@
|
||||
corpora
|
||||
block_decompress
|
||||
block_round_trip
|
||||
dictionary_decompress
|
||||
dictionary_loader
|
||||
dictionary_round_trip
|
||||
simple_compress
|
||||
simple_decompress
|
||||
simple_round_trip
|
||||
stream_decompress
|
||||
stream_round_trip
|
||||
zstd_frame_info
|
||||
fuzz-*.log
|
@ -73,7 +73,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
|
||||
size_t const rBufSize = size;
|
||||
void* rBuf = malloc(rBufSize);
|
||||
size_t cBufSize = ZSTD_compressBound(size);
|
||||
size_t cBufSize = ZSTD_compressBound(size) * 2;
|
||||
void *cBuf;
|
||||
/* Half of the time fuzz with a 1 byte smaller output size.
|
||||
* This will still succeed because we force the checksum to be disabled,
|
||||
|
@ -48,7 +48,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
size_t const rBufSize = size;
|
||||
void* rBuf = malloc(rBufSize);
|
||||
size_t cBufSize = ZSTD_compressBound(size);
|
||||
size_t cBufSize = ZSTD_compressBound(size) * 2;
|
||||
void* cBuf;
|
||||
|
||||
/* Give a random portion of src data to the producer, to use for
|
||||
|
@ -96,6 +96,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
|
||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
||||
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
|
||||
}
|
||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
||||
setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer);
|
||||
}
|
||||
}
|
||||
|
||||
FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
|
||||
|
@ -490,6 +490,19 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : compress a NULL input with each level : ", testNb++);
|
||||
{ int level = -1;
|
||||
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
if (!cctx) goto _output_error;
|
||||
for (level = -1; level <= ZSTD_maxCLevel(); ++level) {
|
||||
CHECK_Z( ZSTD_compress(compressedBuffer, compressedBufferSize, NULL, 0, level) );
|
||||
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) );
|
||||
CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, NULL, 0) );
|
||||
}
|
||||
ZSTD_freeCCtx(cctx);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++);
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19);
|
||||
@ -884,6 +897,28 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
ZSTDMT_freeCCtx(mtctx);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(3, "test%3u : compress empty string and decompress with small window log : ", testNb++);
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
char out[32];
|
||||
if (cctx == NULL || dctx == NULL) goto _output_error;
|
||||
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
|
||||
CHECK_VAR(cSize, ZSTD_compress2(cctx, out, sizeof(out), NULL, 0) );
|
||||
DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)cSize);
|
||||
|
||||
CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) );
|
||||
{ char const* outPtr = out;
|
||||
ZSTD_inBuffer inBuffer = { outPtr, cSize, 0 };
|
||||
ZSTD_outBuffer outBuffer = { NULL, 0, 0 };
|
||||
size_t dSize;
|
||||
CHECK_VAR(dSize, ZSTD_decompressStream(dctx, &outBuffer, &inBuffer) );
|
||||
if (dSize != 0) goto _output_error;
|
||||
}
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
|
||||
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||
size_t cSize1, cSize2;
|
||||
@ -1137,6 +1172,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
||||
size_t dictSize;
|
||||
U32 dictID;
|
||||
size_t dictHeaderSize;
|
||||
|
||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||
free(dictBuffer);
|
||||
@ -1226,6 +1262,29 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
if (dictID==0) goto _output_error;
|
||||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check dict header size no error : ", testNb++);
|
||||
dictHeaderSize = ZDICT_getDictHeaderSize(dictBuffer, dictSize);
|
||||
if (dictHeaderSize==0) goto _output_error;
|
||||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
|
||||
{ unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
|
||||
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
|
||||
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
|
||||
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
|
||||
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
|
||||
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
|
||||
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
|
||||
dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
|
||||
if (dictHeaderSize != 115) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++);
|
||||
cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize,
|
||||
@ -1999,14 +2058,14 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
|
||||
/* long rle test */
|
||||
{ size_t sampleSize = 0;
|
||||
size_t expectedCompressedSize = 39; /* block 1, 2: compressed, block 3: RLE, zstd 1.4.4 */
|
||||
DISPLAYLEVEL(3, "test%3i : Long RLE test : ", testNb++);
|
||||
RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., seed+1);
|
||||
memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1);
|
||||
sampleSize += 256 KB - 1;
|
||||
RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., seed+2);
|
||||
memset((char*)CNBuffer+sampleSize, 'A', 96 KB);
|
||||
sampleSize += 96 KB;
|
||||
cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
if (ZSTD_isError(cSize) || cSize > expectedCompressedSize) goto _output_error;
|
||||
{ CHECK_NEWV(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
|
||||
if (regenSize!=sampleSize) goto _output_error; }
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
@ -92,6 +92,11 @@ case "$UNAME" in
|
||||
*) MD5SUM="md5sum" ;;
|
||||
esac
|
||||
|
||||
MTIME="stat -c %Y"
|
||||
case "$UNAME" in
|
||||
Darwin | FreeBSD | OpenBSD) MTIME="stat -f %m" ;;
|
||||
esac
|
||||
|
||||
DIFF="diff"
|
||||
case "$UNAME" in
|
||||
SunOS) DIFF="gdiff" ;;
|
||||
@ -215,20 +220,19 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t
|
||||
$ZSTD -b --fast=1 -i0e1 tmp --compress-literals
|
||||
$ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals
|
||||
|
||||
println "test: --exclude-compressed flag"
|
||||
println "\n===> --exclude-compressed flag"
|
||||
rm -rf precompressedFilterTestDir
|
||||
mkdir -p precompressedFilterTestDir
|
||||
./datagen $size > precompressedFilterTestDir/input.5
|
||||
./datagen $size > precompressedFilterTestDir/input.6
|
||||
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
|
||||
sleep 5
|
||||
./datagen $size > precompressedFilterTestDir/input.7
|
||||
./datagen $size > precompressedFilterTestDir/input.8
|
||||
$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir
|
||||
test ! -f precompressedFilterTestDir/input.5.zst.zst
|
||||
test ! -f precompressedFilterTestDir/input.6.zst.zst
|
||||
file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s`
|
||||
file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s`
|
||||
file1timestamp=`$MTIME precompressedFilterTestDir/input.5.zst`
|
||||
file2timestamp=`$MTIME precompressedFilterTestDir/input.7.zst`
|
||||
if [[ $file2timestamp -ge $file1timestamp ]]; then
|
||||
println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again."
|
||||
else
|
||||
@ -246,7 +250,7 @@ test -f precompressedFilterTestDir/input.5.zst.zst
|
||||
test -f precompressedFilterTestDir/input.6.zst.zst
|
||||
println "Test completed"
|
||||
|
||||
println "test : file removal"
|
||||
println "\n===> file removal"
|
||||
$ZSTD -f --rm tmp
|
||||
test ! -f tmp # tmp should no longer be present
|
||||
$ZSTD -f -d --rm tmp.zst
|
||||
@ -274,14 +278,17 @@ test -f tmp.zst # destination file should still be present
|
||||
rm -rf tmp* # may also erase tmp* directory from previous failed run
|
||||
|
||||
|
||||
println "\n===> decompression only tests "
|
||||
head -c 1048576 /dev/zero > tmp
|
||||
println "\n===> decompression only tests "
|
||||
# the following test verifies that the decoder is compatible with RLE as first block
|
||||
# older versions of zstd cli are not able to decode such corner case.
|
||||
# As a consequence, the zstd cli do not generate them, to maintain compatibility with older versions.
|
||||
dd bs=1048576 count=1 if=/dev/zero of=tmp
|
||||
$ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
|
||||
$DIFF -s tmp1 tmp
|
||||
rm tmp*
|
||||
|
||||
|
||||
println "test : compress multiple files"
|
||||
println "\n===> compress multiple files"
|
||||
println hello > tmp1
|
||||
println world > tmp2
|
||||
$ZSTD tmp1 tmp2 -o "$INTOVOID" -f
|
||||
@ -328,7 +335,22 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
||||
rm tmp*
|
||||
|
||||
|
||||
println "test : compress multiple files into an output directory, --output-dir-flat"
|
||||
if [ -n "$DEVNULLRIGHTS" ]
|
||||
then
|
||||
# these tests requires sudo rights, which is uncommon.
|
||||
# they are only triggered if DEVNULLRIGHTS macro is defined.
|
||||
println "\n===> checking /dev/null permissions are unaltered "
|
||||
./datagen > tmp
|
||||
sudo $ZSTD tmp -o $INTOVOID # sudo rights could modify /dev/null permissions
|
||||
sudo $ZSTD tmp -c > $INTOVOID
|
||||
$ZSTD tmp -f -o tmp.zst
|
||||
sudo $ZSTD -d tmp.zst -c > $INTOVOID
|
||||
sudo $ZSTD -d tmp.zst -o $INTOVOID
|
||||
ls -las $INTOVOID | grep "rw-rw-rw-"
|
||||
fi
|
||||
|
||||
|
||||
println "\n===> compress multiple files into an output directory, --output-dir-flat"
|
||||
println henlo > tmp1
|
||||
mkdir tmpInputTestDir
|
||||
mkdir tmpInputTestDir/we
|
||||
@ -436,7 +458,6 @@ $ZSTD -dcf tmp1
|
||||
|
||||
|
||||
println "\n===> frame concatenation "
|
||||
|
||||
println "hello " > hello.tmp
|
||||
println "world!" > world.tmp
|
||||
cat hello.tmp world.tmp > helloworld.tmp
|
||||
@ -1241,9 +1262,9 @@ rm -f tmp* dictionary
|
||||
if [ "$isWindows" = false ] ; then
|
||||
|
||||
println "\n===> zstd fifo named pipe test "
|
||||
head -c 10 /dev/zero > tmp_original
|
||||
dd bs=1 count=10 if=/dev/zero of=tmp_original
|
||||
mkfifo named_pipe
|
||||
head -c 10 /dev/zero > named_pipe &
|
||||
dd bs=1 count=10 if=/dev/zero of=named_pipe &
|
||||
$ZSTD named_pipe -o tmp_compressed
|
||||
$ZSTD -d -o tmp_decompressed tmp_compressed
|
||||
$DIFF -s tmp_original tmp_decompressed
|
||||
|
@ -509,6 +509,33 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : NULL buffers : ", testNb++);
|
||||
inBuff.src = NULL;
|
||||
inBuff.size = 0;
|
||||
inBuff.pos = 0;
|
||||
outBuff.dst = NULL;
|
||||
outBuff.size = 0;
|
||||
outBuff.pos = 0;
|
||||
CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) );
|
||||
CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed");
|
||||
CHECK_Z( ZSTD_endStream(zc, &outBuff) );
|
||||
outBuff.dst = (char*)(compressedBuffer);
|
||||
outBuff.size = compressedBufferSize;
|
||||
outBuff.pos = 0;
|
||||
{ size_t const r = ZSTD_endStream(zc, &outBuff);
|
||||
CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r);
|
||||
}
|
||||
inBuff.src = outBuff.dst;
|
||||
inBuff.size = outBuff.pos;
|
||||
inBuff.pos = 0;
|
||||
outBuff.dst = NULL;
|
||||
outBuff.size = 0;
|
||||
outBuff.pos = 0;
|
||||
CHECK_Z( ZSTD_initDStream(zd) );
|
||||
{ size_t const ret = ZSTD_decompressStream(zd, &outBuff, &inBuff);
|
||||
if (ret != 0) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK\n");
|
||||
/* _srcSize compression test */
|
||||
DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
|
||||
CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) );
|
||||
|
@ -20,8 +20,8 @@ TEST_FILE = ../doc/zstd_compression_format.md
|
||||
|
||||
CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \
|
||||
-I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH)
|
||||
STDFLAGS = -std=c90 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \
|
||||
-DNO_snprintf -DNO_vsnprintf # strict ISO C90 is missing these prototypes
|
||||
STDFLAGS = -std=c89 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \
|
||||
-DNO_snprintf -DNO_vsnprintf # strict ANSI C89 is missing these prototypes
|
||||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \
|
||||
-Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
|
||||
-Wstrict-aliasing=1
|
||||
|
Loading…
x
Reference in New Issue
Block a user