From a8006264cfb4629e5ec23534fac1330609c99ebb Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Aug 2020 15:28:48 -0700 Subject: [PATCH 01/36] small blocks benchmark --- tests/Makefile | 4 + tests/smallbench.c | 835 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 839 insertions(+) create mode 100644 tests/smallbench.c diff --git a/tests/Makefile b/tests/Makefile index d347a948..3ae6d429 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -140,6 +140,10 @@ fullbench fullbench32 : $(ZSTD_FILES) fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) +smallbench: DEBUGFLAGS = -DNDEBUG +smallbench: $(ZSTD_OBJECTS) $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c smallbench.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ fullbench-lib : zstd-staticLib fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c diff --git a/tests/smallbench.c b/tests/smallbench.c new file mode 100644 index 00000000..319ebba5 --- /dev/null +++ b/tests/smallbench.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*_************************************ +* Includes +**************************************/ +#include "util.h" /* Compiler options, UTIL_GetFileSize */ +#include /* malloc */ +#include /* fprintf, fopen, ftello64 */ +#include + +#include "mem.h" /* U32 */ +#include "zstd_internal.h" /* ZSTD_decodeSeqHeaders, ZSTD_blockHeaderSize, ZSTD_getcBlockSize, blockType_e, KB, MB */ +#include "decompress/zstd_decompress_internal.h" /* ZSTD_DCtx internals */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */ +#include "zstd.h" /* ZSTD_versionString */ +#include "util.h" /* time functions */ +#include "timefn.h" /* time functions */ +#include "datagen.h" +#include "benchfn.h" /* CustomBench */ +#include "benchzstd.h" /* MB_UNIT */ + + +/*_************************************ +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "Zstandard small blocks benchmark" +#define AUTHOR "Nick Terrell" +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_versionString(), (int)(sizeof(void*)*8), AUTHOR, __DATE__ + +#define NBLOOPS 6 +#define TIMELOOP_S 2 + +#define MAX_MEM (1984 MB) + +#define DEFAULT_CLEVEL 1 + +#define COMPRESSIBILITY_DEFAULT 0.50 +static const size_t kSampleSizeDefault = 10000000; + +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + + +/*_************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + +#define CONTROL(c) { if (!(c)) { DISPLAY("%s:%d:%s: CONTROL failed: %s \n", __FILE__, __LINE__, __func__, #c); abort(); } } /* like assert(), but cannot be disabled */ + +/*_************************************ +* Benchmark Parameters +**************************************/ +static unsigned g_nbIterations = NBLOOPS; + + +/*_******************************************************* +* Private functions +*********************************************************/ +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + requiredMem += step; + do { + testmem = malloc ((size_t)requiredMem); + requiredMem -= step; + } while (!testmem); + + free (testmem); + return (size_t) requiredMem; +} + + +/*_******************************************************* +* Benchmark wrappers +*********************************************************/ +typedef struct { + BYTE const* begin; + BYTE const* end; + size_t uncompressedSize; +} block_t; + +typedef struct { + size_t numBlocks; + block_t blocks[]; +} blocks_t; + +static size_t block_getSize(block_t block) { + return (size_t)(block.end - block.begin); +} + +static size_t compressBlockBound(size_t srcSize, size_t blockSize) +{ + size_t const blockBound = ZSTD_compressBound(blockSize); + return blockBound * (srcSize + blockSize - 1) / blockSize; +} + +static blocks_t* compressBlocks(ZSTD_CCtx* cctx, void* dst, size_t dstSize, void const* src, size_t srcSize, size_t blockSize) +{ + uint8_t* op = (uint8_t*)dst; + uint8_t* const oend = op + dstSize; + uint8_t const* ip = (uint8_t const*)src; + uint8_t const* const iend = ip + srcSize; + size_t const numBlocks = (srcSize + blockSize - 1) / blockSize; + blocks_t* const blocks = (blocks_t*)malloc(sizeof(blocks_t) + numBlocks * sizeof(block_t)); + CONTROL(blocks != NULL); + + blocks->numBlocks = numBlocks; + for (size_t i = 0; i < numBlocks; ++i) { + size_t const isize = MIN(blockSize, (size_t)(iend - ip)); + size_t const cBlockSize = ZSTD_compress2(cctx, op, (size_t)(oend - op), ip, isize); + CONTROL(!ZSTD_isError(cBlockSize)); + CONTROL(isize > 0); + blocks->blocks[i].begin = op; + blocks->blocks[i].end = op + cBlockSize; + blocks->blocks[i].uncompressedSize = isize; + ip += isize; + op += cBlockSize; + } + CONTROL(ip == iend); + + return blocks; +} + +static void skipToLiterals(blocks_t* blocks) +{ + size_t b; + size_t outBlock = 0; + for (b = 0; b < blocks->numBlocks; ++b) { + block_t block = blocks->blocks[b]; + /* Skip frame header */ + { + size_t const fhSize = ZSTD_frameHeaderSize(block.begin, block_getSize(block)); + CONTROL(!ZSTD_isError(fhSize)); + block.begin += fhSize; + } + /* Truncate to end of first block and skip uncompressed blocks */ + { + blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(block.begin, block_getSize(block), &bp); + CONTROL(!ZSTD_isError(cBlockSize)); + if (bp.blockType != bt_compressed) { + /* Don't write the output block */ + continue; + } + /* End of first block */ + block.end = block.begin + ZSTD_blockHeaderSize + cBlockSize; + } + /* Skip block header */ + block.begin += ZSTD_blockHeaderSize; + /* Write the output block */ + blocks->blocks[outBlock++] = block; + } + CONTROL(outBlock <= blocks->numBlocks); + blocks->numBlocks = outBlock; +} + +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); +static void skipToSequences(blocks_t* blocks, ZSTD_DCtx* dctx) +{ + skipToLiterals(blocks); + size_t b; + for (b = 0; b < blocks->numBlocks; ++b) { + block_t* const block = &blocks->blocks[b]; + CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); + CONTROL(block->begin < block->end); + { + size_t const litSize = ZSTD_decodeLiteralsBlock(dctx, block->begin, block_getSize(*block)); + CONTROL(!ZSTD_isError(litSize)); + block->begin += litSize; + } + CONTROL(block->begin < block->end); + } +} + +static size_t totalUncompressedSize(blocks_t const* blocks) +{ + size_t total = 0; + size_t b; + for (b = 0; b < blocks->numBlocks; ++b) { + total += blocks->blocks[b].uncompressedSize; + } + return total; +} + +FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src, size_t srcSize) +{ + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + { + BYTE const* istart = (BYTE const*)src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + if (litEncType == set_compressed) { + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + return HUF_readDTableX1_wksp( + dctx->entropy.hufTable, + istart+lhSize, litCSize, + dctx->workspace, sizeof(dctx->workspace)); + } + } + return 0; +} + +static void benchmark_ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, blocks_t const* blocks) +{ + size_t const numBlocks = blocks->numBlocks; + size_t b; + CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); + for (b = 0; b < numBlocks; ++b) { + block_t const block = blocks->blocks[b]; + size_t const ret = ZSTD_decodeLiteralsHeader(dctx, block.begin, block_getSize(block)); + CONTROL(!ZSTD_isError(ret)); + } +} + +static void benchmark_ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, blocks_t const* blocks) +{ + size_t const numBlocks = blocks->numBlocks; + size_t b; + CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); + for (b = 0; b < numBlocks; ++b) { + block_t const block = blocks->blocks[b]; + int nbSeq; + size_t const cSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, block.begin, block_getSize(block)); + CONTROL(!ZSTD_isError(cSize)); + } +} + +#if 0 +static ZSTD_CCtx* g_zcc = NULL; + +static size_t +local_ZSTD_compress(const void* src, size_t srcSize, + void* dst, size_t dstSize, + void* payload) +{ + ZSTD_parameters p; + ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p); + //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel); +} + +static size_t g_cSize = 0; +static size_t local_ZSTD_decompress(const void* src, size_t srcSize, + void* dst, size_t dstSize, + void* buff2) +{ + (void)src; (void)srcSize; + return ZSTD_decompress(dst, dstSize, buff2, g_cSize); +} + +static ZSTD_DCtx* g_zdc = NULL; + +#ifndef ZSTD_DLL_IMPORT +extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); +static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) +{ + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize); +} + +static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) +{ + int nbSeq; + (void)src; (void)srcSize; (void)dst; (void)dstSize; + return ZSTD_decodeSeqHeaders(g_zdc, &nbSeq, buff2, g_cSize); +} +#endif + +static ZSTD_CStream* g_cstream= NULL; +static size_t +local_ZSTD_compressStream(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + ZSTD_parameters p; + ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0}; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream(g_cstream, &buffOut, &buffIn); + ZSTD_endStream(g_cstream, &buffOut); + return buffOut.pos; +} + +static size_t +local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + size_t r; + assert(cctx != NULL); + + r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload); + + ZSTD_freeCCtx(cctx); + + return r; +} + +static size_t +local_ZSTD_compress_generic_end(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + (void)payload; + return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); +} + +static size_t +local_ZSTD_compress_generic_continue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)payload; + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end); + return buffOut.pos; +} + +static size_t +local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + (void)payload; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); + return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); +} + +static size_t +local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)payload; + ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = src; + buffIn.size = srcSize; + buffIn.pos = 0; + ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); + while(ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end)) {} + return buffOut.pos; +} + +static ZSTD_DStream* g_dstream= NULL; +static size_t +local_ZSTD_decompressStream(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* buff2) +{ + ZSTD_outBuffer buffOut; + ZSTD_inBuffer buffIn; + (void)src; (void)srcSize; + ZSTD_initDStream(g_dstream); + buffOut.dst = dst; + buffOut.size = dstCapacity; + buffOut.pos = 0; + buffIn.src = buff2; + buffIn.size = g_cSize; + buffIn.pos = 0; + ZSTD_decompressStream(g_dstream, &buffOut, &buffIn); + return buffOut.pos; +} + +#ifndef ZSTD_DLL_IMPORT +static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + ZSTD_parameters p; + ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize); +} + +#define FIRST_BLOCK_SIZE 8 +static size_t +local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* payload) +{ + BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; + + ZSTD_parameters p; + ZSTD_frameParameters const f = { 1, 0, 0 }; + p.fParams = f; + p.cParams = *(ZSTD_compressionParameters*)payload; + ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); + memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE); + + { size_t const compressResult = ZSTD_compressContinue(g_zcc, + dst, dstCapacity, + firstBlockBuf, FIRST_BLOCK_SIZE); + if (ZSTD_isError(compressResult)) { + DISPLAY("local_ZSTD_compressContinue_extDict error : %s\n", + ZSTD_getErrorName(compressResult)); + return compressResult; + } + dst = (BYTE*)dst + compressResult; + dstCapacity -= compressResult; + } + return ZSTD_compressEnd(g_zcc, dst, dstCapacity, + (const BYTE*)src + FIRST_BLOCK_SIZE, + srcSize - FIRST_BLOCK_SIZE); +} + +static size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize, + void* dst, size_t dstCapacity, + void* buff2) +{ + size_t regeneratedSize = 0; + const BYTE* ip = (const BYTE*)buff2; + const BYTE* const iend = ip + g_cSize; + BYTE* op = (BYTE*)dst; + size_t remainingCapacity = dstCapacity; + + (void)src; (void)srcSize; /* unused */ + ZSTD_decompressBegin(g_zdc); + while (ip < iend) { + size_t const iSize = ZSTD_nextSrcSizeToDecompress(g_zdc); + size_t const decodedSize = ZSTD_decompressContinue(g_zdc, op, remainingCapacity, ip, iSize); + ip += iSize; + regeneratedSize += decodedSize; + op += decodedSize; + remainingCapacity -= decodedSize; + } + + return regeneratedSize; +} +#endif +#endif + +/*_******************************************************* +* Bench functions +*********************************************************/ +static void benchMem(unsigned benchNb, unsigned nbIters, + const void* src, size_t srcSize, size_t blockSize, + int cLevel, ZSTD_compressionParameters cparams) +{ + size_t const dstSize = compressBlockBound(srcSize, blockSize); + void* const dst = malloc(dstSize); + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + CONTROL(dst != NULL); + CONTROL(cctx != NULL); + CONTROL(dctx != NULL); + + DISPLAY("block size: %u \n", (unsigned)blockSize); + DISPLAY("params: cLevel %d, wlog %d hlog %d clog %d slog %d mml %d tlen %d strat %d \n", + cLevel, cparams.windowLog, cparams.hashLog, cparams.chainLog, cparams.searchLog, + cparams.minMatch, cparams.targetLength, cparams.strategy); + + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)cparams.windowLog))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, (int)cparams.hashLog))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, (int)cparams.chainLog))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, (int)cparams.searchLog))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, (int)cparams.minMatch))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, (int)cparams.targetLength))); + CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy))); + + { + /* Preparation */ + blocks_t* const blocks = compressBlocks(cctx, dst, dstSize, src, srcSize, blockSize); + char const* benchName = ""; + size_t iter; + switch (benchNb) + { + case 1: + benchName = "ZSTD_decodeLiteralsHeaders"; + skipToLiterals(blocks); + break; + case 2: + benchName = "ZSTD_decodeSeqHeaders"; + skipToSequences(blocks, dctx); + break; + default: + break; + } + + /* Benchmark loop */ + { + UTIL_time_t const begin = UTIL_getTime(); + for (iter = 0; iter < nbIters; ++iter) { + switch (benchNb) + { + case 1: + benchmark_ZSTD_decodeLiteralsHeader(dctx, blocks); + break; + case 2: + benchmark_ZSTD_decodeSeqHeaders(dctx, blocks); + break; + default: + break; + } + } + { + UTIL_time_t const end = UTIL_getTime(); + size_t const bytesProcessed = nbIters * totalUncompressedSize(blocks); + size_t const nanos = UTIL_getSpanTimeNano(begin, end); + double const MBps = ((double)bytesProcessed * TIMELOOP_NANOSEC) / (nanos * MB_UNIT); + DISPLAY("%2u#%-29.29s: %8.1f MB/s (%u bytes in %u blocks over %u iters) \n", benchNb, benchName, MBps, (unsigned)bytesProcessed, (unsigned)blocks->numBlocks * nbIters, nbIters); + } + } + free(blocks); + } + + free(dst); + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); +} + + +static int benchSample(U32 benchNb, U32 nbIters, size_t blockSize, + size_t benchedSize, double compressibility, + int cLevel, ZSTD_compressionParameters cparams) +{ + /* Allocation */ + void* const origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } + + /* Fill buffer */ + RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0); + + /* bench */ + DISPLAY("\r%70s\r", ""); + DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize); + benchMem(benchNb, nbIters, origBuff, benchedSize, blockSize, cLevel, cparams); + + free(origBuff); + return 0; +} + + +static int benchFiles(U32 benchNb, U32 nbIters, size_t blockSize, + const char** fileNamesTable, const int nbFiles, + int cLevel, ZSTD_compressionParameters cparams) +{ + /* Loop for each file */ + int fileIdx; + for (fileIdx=0; fileIdx inFileSize) + benchedSize = (size_t)inFileSize; + if ((U64)benchedSize < inFileSize) { + DISPLAY("Not enough memory for '%s' full size; testing %u MB only... \n", + inFileName, (unsigned)(benchedSize>>20)); + } } + + /* Alloc */ + { void* const origBuff = malloc(benchedSize); + if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); fclose(inFile); return 12; } + + /* Fill input buffer */ + DISPLAY("Loading %s... \r", inFileName); + { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); + fclose(inFile); + if (readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(origBuff); + return 13; + } } + + /* bench */ + DISPLAY("\r%70s\r", ""); /* blank line */ + DISPLAY(" %s : \n", inFileName); + benchMem(benchNb, nbIters, origBuff, benchedSize, blockSize, cLevel, cparams); + + free(origBuff); + } } + + return 0; +} + + + +/*_******************************************************* +* Argument Parsing +*********************************************************/ + +#define ERROR_OUT(msg) { DISPLAY("%s \n", msg); exit(1); } + +static unsigned readU32FromChar(const char** stringPtr) +{ + const char errorMsg[] = "error: numeric value too large"; + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + if (result > max) ERROR_OUT(errorMsg); + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) ERROR_OUT(errorMsg); + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) ERROR_OUT(errorMsg); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +/*_******************************************************* +* Command line +*********************************************************/ + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(const char* exename) +{ + usage(exename); + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -b# : test only function # \n"); + DISPLAY( " -l# : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL); + DISPLAY( "--zstd= : custom parameter selection. Format same as zstdcli \n"); + DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); + DISPLAY( " -B# : sample size (default : %u)\n", (unsigned)kSampleSizeDefault); + DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 1; +} + +int main(int argc, const char** argv) +{ + int argNb, filenamesStart=0, result; + const char* const exename = argv[0]; + const char* input_filename = NULL; + U32 benchNb = 0, main_pause = 0; + int cLevel = DEFAULT_CLEVEL; + ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0); + size_t sampleSize = kSampleSizeDefault; + double compressibility = COMPRESSIBILITY_DEFAULT; + + DISPLAY(WELCOME_MESSAGE); + if (argc<1) return badusage(exename); + + for (argNb=1; argNb Date: Fri, 14 Aug 2020 15:28:59 -0700 Subject: [PATCH 02/36] speed up small blocks --- lib/common/entropy_common.c | 84 ++++++++++++++++------- lib/compress/fse_compress.c | 21 ++---- lib/decompress/zstd_decompress.c | 9 ++- lib/decompress/zstd_decompress_block.c | 84 +++++++++++++++++++---- lib/decompress/zstd_decompress_block.h | 2 +- lib/decompress/zstd_decompress_internal.h | 2 + 6 files changed, 143 insertions(+), 59 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 6b825afe..0a13d9d9 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -50,6 +50,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t U32 bitStream; int bitCount; unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; int previous0 = 0; if (hbSize < 4) { @@ -76,27 +77,39 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t threshold = 1<1) & (charnum<=*maxSVPtr)) { + for (;;) { if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; + // TODO: Generalize to FSE_countTrailingZeros() or something + int repeats = __builtin_ctz(~bitStream) >> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (ip < iend-6) { + ip += 3; bitStream = MEM_readLE32(ip) >> bitCount; } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; + bitStream >>= 24; + bitCount += 24; + } + repeats = __builtin_ctz(~bitStream) >> 1; } - n0 += bitStream & 3; + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + assert(bitCount < 30 && (bitStream & 3) != 3); + charnum += bitStream & 3; bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; @@ -104,8 +117,10 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t bitStream = MEM_readLE32(ip) >> bitCount; } else { bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; + } + } + { + int const max = (2*threshold-1) - remaining; int count; if ((bitStream & (threshold-1)) < (U32)max) { @@ -118,15 +133,31 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t } count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } normalizedCounter[charnum++] = (short)count; previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))) { ip += bitCount>>3; bitCount &= 7; } else { @@ -134,8 +165,10 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t ip = iend - 4; } bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + } } if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); if (bitCount > 32) return ERROR(corruption_detected); *maxSVPtr = charnum-1; @@ -143,7 +176,6 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t return ip-istart; } - /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 5290a918..1187e3e6 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -341,6 +341,8 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); } +// TODO: Emit -1 based on # of symbols +#define LOW_PROB 0 /* Secondary normalization method. To be used when primary method fails. */ @@ -361,7 +363,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, norm[s]=0; continue; } - if (count[s] <= lowThreshold) { + if (LOW_PROB && count[s] <= lowThreshold) { norm[s] = -1; distributed++; total -= count[s]; @@ -431,7 +433,6 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, return 0; } - size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, unsigned maxSymbolValue) @@ -455,7 +456,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, for (s=0; s<=maxSymbolValue; s++) { if (count[s] == total) return 0; /* rle special case */ if (count[s] == 0) { normalizedCounter[s]=0; continue; } - if (count[s] <= lowThreshold) { + if (LOW_PROB && count[s] <= lowThreshold) { normalizedCounter[s] = -1; stillToDistribute--; } else { @@ -476,20 +477,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, else normalizedCounter[largest] += (short)stillToDistribute; } -#if 0 - { /* Print Table (debug) */ - U32 s; - U32 nTotal = 0; - for (s=0; s<=maxSymbolValue; s++) - RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); - for (s=0; s<=maxSymbolValue; s++) - nTotal += abs(normalizedCounter[s]); - if (nTotal != (1U<OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, - offcodeLog); + offcodeLog, + entropy->workspace, sizeof(entropy->workspace)); dictPtr += offcodeHeaderSize; } @@ -1104,7 +1105,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, - matchlengthLog); + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace)); dictPtr += matchlengthHeaderSize; } @@ -1117,7 +1119,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, - litlengthLog); + litlengthLog, + entropy->workspace, sizeof(entropy->workspace)); dictPtr += litlengthHeaderSize; } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index e93d6feb..95afcaa3 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -368,19 +368,18 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) + unsigned tableLog, U32* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; U16 symbolNext[MaxSeq+1]; U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); - + U32 highThreshold = tableSize - 1; /* Init, lay down lowprob symbols */ { ZSTD_seqSymbol_header DTableH; DTableH.tableLog = tableLog; @@ -400,12 +399,68 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + BYTE* spread = (BYTE*)wksp; + assert(wkspSize >= (1u << MaxFSELog) + sizeof(U64)); + (void)wkspSize; + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ @@ -414,7 +469,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } /* Build Decoding table */ - { U32 u; + { + U32 u; for (u=0; u maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); *DTablePtr = DTableSpace; return headerSize; } @@ -520,7 +577,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace)); RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -530,7 +588,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace)); RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -540,7 +599,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace)); RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index bf39b735..201d6a9f 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -53,7 +53,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); + unsigned tableLog, U32* wksp, size_t wkspSize); #endif /* ZSTD_DEC_BLOCK_H */ diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 9ad96c55..1a5c7ee6 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -72,6 +72,7 @@ static const U32 ML_base[MaxML+1] = { } ZSTD_seqSymbol; #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + #define ZSTD_FSE_WKSP_SIZE_U32 130 typedef struct { ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ @@ -79,6 +80,7 @@ typedef struct { ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_FSE_WKSP_SIZE_U32]; } ZSTD_entropyDTables_t; typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, From ba1fd17a9f90e87c827e470ca7a2656bc3def669 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sun, 16 Aug 2020 22:22:33 -0700 Subject: [PATCH 03/36] speed up literal header decoding --- lib/common/entropy_common.c | 13 +- lib/common/fse.h | 13 +- lib/common/fse_decompress.c | 75 ++++++++- lib/common/huf.h | 13 ++ lib/compress/fse_compress.c | 4 +- lib/decompress/huf_decompress.c | 205 ++++++++++++++++++++----- lib/decompress/zstd_decompress_block.c | 12 +- lib/decompress/zstd_decompress_block.h | 4 +- 8 files changed, 274 insertions(+), 65 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 0a13d9d9..3969d2b3 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -96,7 +96,6 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t bitStream >>= 2 * repeats; bitCount += 2 * repeats; - assert(bitCount < 30 && (bitStream & 3) != 3); charnum += bitStream & 3; bitCount += 2; @@ -186,6 +185,15 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp)); +} + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) { U32 weightTotal; const BYTE* ip = (const BYTE*) src; @@ -208,9 +216,8 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, huffWeight[n+1] = ip[n/2] & 15; } } } else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize); /* max (hwSize-1) values decoded, as last one is implied */ if (FSE_isError(oSize)) return oSize; } diff --git a/lib/common/fse.h b/lib/common/fse.h index 55dd8f3a..8d15b340 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -311,7 +311,7 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. */ -#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); @@ -326,14 +326,21 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); */ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1 << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); -/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ typedef enum { FSE_repeat_none, /**< Cannot use the previous table */ diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 54dab255..119ee7b6 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -68,17 +68,24 @@ void FSE_freeDTable (FSE_DTable* dt) free(dt); } -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + +size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; U32 highThreshold = tableSize-1; /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); @@ -100,7 +107,53 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s wkspSize) return ERROR(tableLog_tooLarge); + workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= FSE_DTABLE_SIZE(tableLog); - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ + CHECK_F( FSE_buildDTable_wksp(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, dtable); /* always return, even if it is an error code */ } @@ -278,8 +336,9 @@ typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) { - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); } diff --git a/lib/common/huf.h b/lib/common/huf.h index ef432685..90e613a1 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* *** Dependencies *** */ #include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" /* *** Constants *** */ @@ -226,6 +228,17 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize); +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize); + /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 1187e3e6..48b654d0 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -630,7 +630,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); /* init conditions */ - if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); + if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); if (srcSize <= 1) return 0; /* Not compressible */ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; @@ -674,7 +674,7 @@ typedef struct { size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) { fseWkspMax_t scratchBuffer; - DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 68293a13..fbe4127a 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -115,6 +115,70 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) /*-***************************/ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +#if 0 +// TODO: Remove this +/* BMI2 version that uses _pdep_u64() for weight 1 and 2 symbols. + * This doesn't provide much gains, so not worth the complexity. + * Leaving in for now but will remove before I commit. + */ +#include + +static U64 HUF_DEltX1_pack4(BYTE const* symbols, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + U64 const nbBits4 = nbBits * 0x0100010001000100ULL; + U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0x00FF00FF00FF00FFULL); + D4 = symbols4 | nbBits4; + } else { + U64 const nbBits4 = nbBits * 0x0001000100010001ULL; + U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0xFF00FF00FF00FF00ULL); + D4 = symbols4 | nbBits4; + } + return D4; +} + +static U64 HUF_DEltX1_pack2(BYTE const* symbols, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + U64 const nbBits4 = nbBits * 0x0100010001000100ULL; + U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x000000FF000000FFULL); + symbols4 = symbols4 * 0x00010001ULL; + D4 = symbols4 | nbBits4; + } else { + U64 const nbBits4 = nbBits * 0x0001000100010001ULL; + U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x0000FF000000FF00ULL); + symbols4 *= 0x00010001ULL; + D4 = symbols4 | nbBits4; + } + return D4; +} +#endif + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +// TODO: Template based on BMI2 (5% boost) size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { U32 tableLog = 0; @@ -122,22 +186,15 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize size_t iSize; void* const dtPtr = DTable + 1; HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp)); if (HUF_isError(iSize)) return iSize; /* Table header */ @@ -148,39 +205,103 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize memcpy(DTable, &dtd, sizeof(dtd)); } - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; nrankVal[n]; + wksp->rankStart[n] = current; + } + // TODO: This loop is now the bottleneck: Can this be made faster? + for (n=0; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = n; + } + } - /* fill DTable */ - { U32 n; - size_t const nEnd = nbSymbols; - for (n=0; n> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = tableLog + 1 - w; + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } return iSize; } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 95afcaa3..51ef977f 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -368,14 +368,17 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog, U32* wksp, size_t wkspSize) + unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); @@ -414,9 +417,6 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, * all symbols have counts <= 8. We ensure we have 8 bytes at the end of * our buffer to handle the over-write. */ - BYTE* spread = (BYTE*)wksp; - assert(wkspSize >= (1u << MaxFSELog) + sizeof(U64)); - (void)wkspSize; { U64 const add = 0x0101010101010101ull; size_t pos = 0; diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index 201d6a9f..03afdde4 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -48,12 +48,14 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * this function must be called with valid parameters only * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes. * Internal use only. */ +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog, U32* wksp, size_t wkspSize); + unsigned tableLog, void* wksp, size_t wkspSize); #endif /* ZSTD_DEC_BLOCK_H */ From 612e947c5e7bff2d2124b3932489d8683fb0972c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 17 Aug 2020 13:44:49 -0700 Subject: [PATCH 04/36] wire up bmi2 support --- lib/common/entropy_common.c | 84 +++++++++++++++++++++-- lib/common/fse.h | 10 +++ lib/common/fse_decompress.c | 53 ++++++++++++-- lib/common/huf.h | 7 +- lib/decompress/huf_decompress.c | 24 +++++-- lib/decompress/zstd_decompress.c | 9 ++- lib/decompress/zstd_decompress_block.c | 55 +++++++++++++-- lib/decompress/zstd_decompress_block.h | 7 +- lib/decompress/zstd_decompress_internal.h | 6 +- tests/smallbench.c | 5 +- 10 files changed, 226 insertions(+), 34 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 3969d2b3..052ec45f 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -38,8 +38,9 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) { const BYTE* const istart = (const BYTE*) headerBuffer; const BYTE* const iend = istart + hbSize; @@ -175,6 +176,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t return ip-istart; } +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. @@ -187,13 +225,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, const void* src, size_t srcSize) { U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; - return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp)); + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); } -size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, +FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, - void* workSpace, size_t wkspSize) + void* workSpace, size_t wkspSize, + int bmi2) { U32 weightTotal; const BYTE* ip = (const BYTE*) src; @@ -217,7 +256,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, } } } else { /* header compressed with FSE (normal case) */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize); /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); /* max (hwSize-1) values decoded, as last one is implied */ if (FSE_isError(oSize)) return oSize; } @@ -252,3 +291,36 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, *nbSymbolsPtr = (U32)(oSize+1); return iSize+1; } + +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} diff --git a/lib/common/fse.h b/lib/common/fse.h index 8d15b340..12309ac8 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -228,6 +228,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ @@ -342,6 +349,9 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + typedef enum { FSE_repeat_none, /**< Cannot use the previous table */ FSE_repeat_check, /**< Can use the previous table but it must be checked */ diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 119ee7b6..64693024 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -73,7 +73,7 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); } -size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); @@ -178,6 +178,11 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi return 0; } +size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize); +} + #ifndef FSE_COMMONDEFS_ONLY @@ -306,6 +311,15 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) { const BYTE* const istart = (const BYTE*)cSrc; const BYTE* ip = istart; @@ -315,7 +329,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size FSE_DTable* const dtable = (FSE_DTable*)workSpace; /* normal FSE decoding mode */ - size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); if (FSE_isError(NCountLength)) return NCountLength; if (tableLog > maxLog) return ERROR(tableLog_tooLarge); assert(NCountLength <= cSrcSize); @@ -326,9 +340,40 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); wkspSize -= FSE_DTABLE_SIZE(tableLog); - CHECK_F( FSE_buildDTable_wksp(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, dtable); /* always return, even if it is an error code */ + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); } diff --git a/lib/common/huf.h b/lib/common/huf.h index 90e613a1..45264b97 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -231,13 +231,15 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, /*! HUF_readStats_wksp() : * Same as HUF_readStats() but takes an external workspace which must be * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. */ #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, - void* workspace, size_t wkspSize); + void* workspace, size_t wkspSize, + int bmi2); /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ @@ -345,6 +347,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS #endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif #endif /* HUF_STATIC_LINKING_ONLY */ diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index fbe4127a..7ea49424 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -180,6 +180,11 @@ typedef struct { // TODO: Template based on BMI2 (5% boost) size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) { U32 tableLog = 0; U32 nbSymbols = 0; @@ -194,7 +199,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp)); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); if (HUF_isError(iSize)) return iSize; /* Table header */ @@ -220,13 +225,21 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize { int n; int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; for (n=0; n<(int)tableLog+1; n++) { U32 const current = nextRankStart; nextRankStart += wksp->rankVal[n]; wksp->rankStart[n] = current; } - // TODO: This loop is now the bottleneck: Can this be made faster? - for (n=0; n < (int)nbSymbols; ++n) { + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = n+u; + } + } + for (; n < (int)nbSymbols; ++n) { size_t const w = wksp->huffWeight[n]; wksp->symbols[wksp->rankStart[w]++] = n; } @@ -540,8 +553,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; @@ -1320,7 +1332,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 7d321b28..4cfacc20 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1092,7 +1092,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, offcodeLog, - entropy->workspace, sizeof(entropy->workspace)); + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); dictPtr += offcodeHeaderSize; } @@ -1106,7 +1107,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, matchlengthLog, - entropy->workspace, sizeof(entropy->workspace)); + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += matchlengthHeaderSize; } @@ -1120,7 +1122,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, litlengthLog, - entropy->workspace, sizeof(entropy->workspace)); + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += litlengthHeaderSize; } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 51ef977f..c045e3b7 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -364,8 +364,8 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB * generate FSE decoding table for one symbol (ll, ml or off) * cannot fail if input is valid => * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) @@ -378,6 +378,7 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); @@ -483,6 +484,42 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } } +static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); +} +#endif + +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); + } +#endif + (void)bmi2; + return ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); +} + /*! ZSTD_buildSeqTable() : * @return : nb bytes read from src, @@ -492,7 +529,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb const void* src, size_t srcSize, const U32* baseValue, const U32* nbAdditionalBits, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, - int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize) + int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, + int bmi2) { switch(type) { @@ -524,7 +562,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); *DTablePtr = DTableSpace; return headerSize; } @@ -578,7 +616,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, - dctx->workspace, sizeof(dctx->workspace)); + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -589,7 +628,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, - dctx->workspace, sizeof(dctx->workspace)); + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -600,7 +640,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq, - dctx->workspace, sizeof(dctx->workspace)); + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index 03afdde4..4a274c3d 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -48,14 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * this function must be called with valid parameters only * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) * in which case it cannot fail. - * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. * Internal use only. */ -#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog, void* wksp, size_t wkspSize); + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); #endif /* ZSTD_DEC_BLOCK_H */ diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 1a5c7ee6..8a1ca348 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -72,7 +72,9 @@ static const U32 ML_base[MaxML+1] = { } ZSTD_seqSymbol; #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) - #define ZSTD_FSE_WKSP_SIZE_U32 130 + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) typedef struct { ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ @@ -80,7 +82,7 @@ typedef struct { ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; - U32 workspace[ZSTD_FSE_WKSP_SIZE_U32]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE]; } ZSTD_entropyDTables_t; typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, diff --git a/tests/smallbench.c b/tests/smallbench.c index 319ebba5..24ccc45a 100644 --- a/tests/smallbench.c +++ b/tests/smallbench.c @@ -232,10 +232,11 @@ FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src } RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - return HUF_readDTableX1_wksp( + return HUF_readDTableX1_wksp_bmi2( dctx->entropy.hufTable, istart+lhSize, litCSize, - dctx->workspace, sizeof(dctx->workspace)); + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); } } return 0; From 575731b6dbe052abca8b2f7b02e4f1b8e9a639a6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 18 Aug 2020 15:26:54 -0700 Subject: [PATCH 05/36] Use ncount=1 when < 4096 symbols --- lib/common/fse.h | 4 +++- lib/compress/fse_compress.c | 20 +++++++++----------- lib/compress/huf_compress.c | 2 +- lib/compress/zstd_compress_sequences.c | 17 +++++++++++++++-- lib/decompress/zstd_decompress_block.c | 6 +++--- lib/dictBuilder/zdict.c | 6 +++--- tests/fuzzer.c | 6 +++--- 7 files changed, 37 insertions(+), 24 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 12309ac8..2c0d9ae1 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -137,10 +137,12 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a bool param which is set to 1 to use count=-1 or set to 0 to + use count=1 instead, which speeds up FSE_readNCount() and FSE_buildDTable(). @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 48b654d0..2900091e 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -341,13 +341,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); } -// TODO: Emit -1 based on # of symbols -#define LOW_PROB 0 - /* Secondary normalization method. To be used when primary method fails. */ -static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) { short const NOT_YET_ASSIGNED = -2; U32 s; @@ -363,8 +360,8 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, norm[s]=0; continue; } - if (LOW_PROB && count[s] <= lowThreshold) { - norm[s] = -1; + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; distributed++; total -= count[s]; continue; @@ -435,7 +432,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, - unsigned maxSymbolValue) + unsigned maxSymbolValue, unsigned useLowProbCount) { /* Sanity checks */ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; @@ -444,6 +441,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20); @@ -456,8 +454,8 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, for (s=0; s<=maxSymbolValue; s++) { if (count[s] == total) return 0; /* rle special case */ if (count[s] == 0) { normalizedCounter[s]=0; continue; } - if (LOW_PROB && count[s] <= lowThreshold) { - normalizedCounter[s] = -1; + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; stillToDistribute--; } else { short proba = (short)((count[s]*step) >> scale); @@ -471,7 +469,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, } } if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); if (FSE_isError(errorCode)) return errorCode; } else normalizedCounter[largest] += (short)stillToDistribute; @@ -643,7 +641,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src } tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) ); /* Write table description header */ { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 54687986..8739df36 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -85,7 +85,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight } tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); /* Write table description header */ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index f9f8097c..87adb711 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { return maxSymbolValue; } +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + /** * Returns the cost in bytes of encoding the normalized count header. * Returns an error if any of the helper functions return an error. @@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, BYTE wksp[FSE_NCOUNTBOUND]; S16 norm[MaxSeq + 1]; const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); } @@ -253,7 +266,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, nbSeq_1--; } assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index c045e3b7..4777a267 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -376,14 +376,14 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, U16* symbolNext = (U16*)wksp; BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; - assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); - (void)wkspSize; /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); - U32 highThreshold = tableSize - 1; + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; /* Init, lay down lowprob symbols */ { ZSTD_seqSymbol_header DTableH; DTableH.tableLog = tableLog; diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 98c6c413..6bb66347 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -786,7 +786,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* note : the result of this phase should be used to better appreciate the impact on statistics */ total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; - errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); + errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); @@ -795,7 +795,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, Offlog = (U32)errorCode; total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; - errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); + errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); @@ -804,7 +804,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, mlLog = (U32)errorCode; total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; - errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); + errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 8ac2864f..cd7dab45 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1573,11 +1573,11 @@ static int basicUnitTests(U32 const seed, double compressibility) const void* const contentStart = (const char*)dict + flatdictSize; size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, 3770, 3770, 3770, 3750, 3750, - 3740, 3670, 3670, 3660, 3660, + 3742, 3670, 3670, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660 }; size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940, - 2950, 2950, 2920, 2900, 2890, + 2950, 2950, 2921, 2900, 2891, 2910, 2910, 2910, 2770, 2760, 2750, 2750, 2750, 2750, 2750, 2750, 2750, 2750 }; @@ -2744,7 +2744,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Calling FSE_normalizeCount() on a uniform distribution should not * cause a division by zero. */ - FSE_normalizeCount(norm, tableLog, count, nbSeq, maxSymbolValue); + FSE_normalizeCount(norm, tableLog, count, nbSeq, maxSymbolValue, /* useLowProbCount */ 1); } DISPLAYLEVEL(3, "OK \n"); #ifdef ZSTD_MULTITHREAD From 8f8bd2d1ac555ac7b6f6bcfd67ba0a426f5a8309 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 18 Aug 2020 16:57:35 -0700 Subject: [PATCH 06/36] [regression] Update results.csv --- lib/common/entropy_common.c | 33 +- lib/decompress/huf_decompress.c | 46 +- lib/decompress/zstd_decompress_internal.h | 2 +- tests/decodecorpus.c | 6 +- tests/regression/results.csv | 1008 ++++++++++----------- 5 files changed, 538 insertions(+), 557 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 052ec45f..2cc2b4dc 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -38,6 +38,28 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + FORCE_INLINE_TEMPLATE size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) @@ -54,9 +76,9 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne unsigned const maxSV1 = *maxSVPtr + 1; int previous0 = 0; - if (hbSize < 4) { + if (hbSize < 8) { /* This function only works when hbSize >= 4 */ - char buffer[4] = {0}; + char buffer[8] = {0}; memcpy(buffer, headerBuffer, hbSize); { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, buffer, sizeof(buffer)); @@ -80,18 +102,17 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne for (;;) { if (previous0) { - // TODO: Generalize to FSE_countTrailingZeros() or something - int repeats = __builtin_ctz(~bitStream) >> 1; + int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; - if (ip < iend-6) { + if (ip <= iend-7) { ip += 3; bitStream = MEM_readLE32(ip) >> bitCount; } else { bitStream >>= 24; bitCount += 24; } - repeats = __builtin_ctz(~bitStream) >> 1; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; } charnum += 3 * repeats; bitStream >>= 2 * repeats; diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 7ea49424..eb7dffb8 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -130,45 +130,6 @@ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { return D4; } -#if 0 -// TODO: Remove this -/* BMI2 version that uses _pdep_u64() for weight 1 and 2 symbols. - * This doesn't provide much gains, so not worth the complexity. - * Leaving in for now but will remove before I commit. - */ -#include - -static U64 HUF_DEltX1_pack4(BYTE const* symbols, BYTE nbBits) { - U64 D4; - if (MEM_isLittleEndian()) { - U64 const nbBits4 = nbBits * 0x0100010001000100ULL; - U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0x00FF00FF00FF00FFULL); - D4 = symbols4 | nbBits4; - } else { - U64 const nbBits4 = nbBits * 0x0001000100010001ULL; - U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0xFF00FF00FF00FF00ULL); - D4 = symbols4 | nbBits4; - } - return D4; -} - -static U64 HUF_DEltX1_pack2(BYTE const* symbols, BYTE nbBits) { - U64 D4; - if (MEM_isLittleEndian()) { - U64 const nbBits4 = nbBits * 0x0100010001000100ULL; - U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x000000FF000000FFULL); - symbols4 = symbols4 * 0x00010001ULL; - D4 = symbols4 | nbBits4; - } else { - U64 const nbBits4 = nbBits * 0x0001000100010001ULL; - U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x0000FF000000FF00ULL); - symbols4 *= 0x00010001ULL; - D4 = symbols4 | nbBits4; - } - return D4; -} -#endif - typedef struct { U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; @@ -178,7 +139,6 @@ typedef struct { } HUF_ReadDTableX1_Workspace; -// TODO: Template based on BMI2 (5% boost) size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); @@ -236,12 +196,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr int u; for (u=0; u < unroll; ++u) { size_t const w = wksp->huffWeight[n+u]; - wksp->symbols[wksp->rankStart[w]++] = n+u; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); } } for (; n < (int)nbSymbols; ++n) { size_t const w = wksp->huffWeight[n]; - wksp->symbols[wksp->rankStart[w]++] = n; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; } } @@ -259,7 +219,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr int const symbolCount = wksp->rankVal[w]; int const length = (1 << w) >> 1; int uStart = rankStart; - BYTE const nbBits = tableLog + 1 - w; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); int s; int u; switch (length) { diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 8a1ca348..b2558d1b 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -82,7 +82,7 @@ typedef struct { ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; - U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; } ZSTD_entropyDTables_t; typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index a46fc24d..76a78989 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -859,7 +859,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } @@ -887,7 +887,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } @@ -917,7 +917,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 4db42a48..39de0728 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -1,611 +1,611 @@ Data, Config, Method, Total compressed size -silesia.tar, level -5, compress simple, 6738558 -silesia.tar, level -3, compress simple, 6446362 -silesia.tar, level -1, compress simple, 6186038 -silesia.tar, level 0, compress simple, 4861374 -silesia.tar, level 1, compress simple, 5334825 -silesia.tar, level 3, compress simple, 4861374 -silesia.tar, level 4, compress simple, 4799583 -silesia.tar, level 5, compress simple, 4722271 -silesia.tar, level 6, compress simple, 4672231 -silesia.tar, level 7, compress simple, 4606657 -silesia.tar, level 9, compress simple, 4554099 -silesia.tar, level 13, compress simple, 4491706 -silesia.tar, level 16, compress simple, 4381265 -silesia.tar, level 19, compress simple, 4281551 -silesia.tar, uncompressed literals, compress simple, 4861374 -silesia.tar, uncompressed literals optimal, compress simple, 4281551 -silesia.tar, huffman literals, compress simple, 6186038 -silesia, level -5, compress cctx, 6737567 -silesia, level -3, compress cctx, 6444663 -silesia, level -1, compress cctx, 6178442 -silesia, level 0, compress cctx, 4849491 -silesia, level 1, compress cctx, 5313144 -silesia, level 3, compress cctx, 4849491 -silesia, level 4, compress cctx, 4786913 -silesia, level 5, compress cctx, 4710178 -silesia, level 6, compress cctx, 4659996 -silesia, level 7, compress cctx, 4596234 -silesia, level 9, compress cctx, 4543862 -silesia, level 13, compress cctx, 4482073 -silesia, level 16, compress cctx, 4377389 -silesia, level 19, compress cctx, 4293262 -silesia, long distance mode, compress cctx, 4849491 -silesia, multithreaded, compress cctx, 4849491 -silesia, multithreaded long distance mode, compress cctx, 4849491 -silesia, small window log, compress cctx, 7078156 -silesia, small hash log, compress cctx, 6554898 -silesia, small chain log, compress cctx, 4931093 -silesia, explicit params, compress cctx, 4794609 -silesia, uncompressed literals, compress cctx, 4849491 -silesia, uncompressed literals optimal, compress cctx, 4293262 -silesia, huffman literals, compress cctx, 6178442 -silesia, multithreaded with advanced params, compress cctx, 4849491 +silesia.tar, level -5, compress simple, 6738593 +silesia.tar, level -3, compress simple, 6446372 +silesia.tar, level -1, compress simple, 6186042 +silesia.tar, level 0, compress simple, 4861425 +silesia.tar, level 1, compress simple, 5334885 +silesia.tar, level 3, compress simple, 4861425 +silesia.tar, level 4, compress simple, 4799630 +silesia.tar, level 5, compress simple, 4722324 +silesia.tar, level 6, compress simple, 4672279 +silesia.tar, level 7, compress simple, 4606715 +silesia.tar, level 9, compress simple, 4554147 +silesia.tar, level 13, compress simple, 4491764 +silesia.tar, level 16, compress simple, 4381332 +silesia.tar, level 19, compress simple, 4281605 +silesia.tar, uncompressed literals, compress simple, 4861425 +silesia.tar, uncompressed literals optimal, compress simple, 4281605 +silesia.tar, huffman literals, compress simple, 6186042 +silesia, level -5, compress cctx, 6737607 +silesia, level -3, compress cctx, 6444677 +silesia, level -1, compress cctx, 6178460 +silesia, level 0, compress cctx, 4849552 +silesia, level 1, compress cctx, 5313204 +silesia, level 3, compress cctx, 4849552 +silesia, level 4, compress cctx, 4786970 +silesia, level 5, compress cctx, 4710237 +silesia, level 6, compress cctx, 4660057 +silesia, level 7, compress cctx, 4596295 +silesia, level 9, compress cctx, 4543924 +silesia, level 13, compress cctx, 4482135 +silesia, level 16, compress cctx, 4377465 +silesia, level 19, compress cctx, 4293330 +silesia, long distance mode, compress cctx, 4849552 +silesia, multithreaded, compress cctx, 4849552 +silesia, multithreaded long distance mode, compress cctx, 4849552 +silesia, small window log, compress cctx, 7084179 +silesia, small hash log, compress cctx, 6555021 +silesia, small chain log, compress cctx, 4931148 +silesia, explicit params, compress cctx, 4794666 +silesia, uncompressed literals, compress cctx, 4849552 +silesia, uncompressed literals optimal, compress cctx, 4293330 +silesia, huffman literals, compress cctx, 6178460 +silesia, multithreaded with advanced params, compress cctx, 4849552 github, level -5, compress cctx, 205285 github, level -5 with dict, compress cctx, 47294 github, level -3, compress cctx, 190643 github, level -3 with dict, compress cctx, 48047 github, level -1, compress cctx, 175568 github, level -1 with dict, compress cctx, 43527 -github, level 0, compress cctx, 136311 +github, level 0, compress cctx, 136335 github, level 0 with dict, compress cctx, 41534 -github, level 1, compress cctx, 142450 +github, level 1, compress cctx, 142465 github, level 1 with dict, compress cctx, 42157 -github, level 3, compress cctx, 136311 +github, level 3, compress cctx, 136335 github, level 3 with dict, compress cctx, 41534 -github, level 4, compress cctx, 136144 +github, level 4, compress cctx, 136199 github, level 4 with dict, compress cctx, 41725 -github, level 5, compress cctx, 135106 +github, level 5, compress cctx, 135121 github, level 5 with dict, compress cctx, 38934 -github, level 6, compress cctx, 135108 +github, level 6, compress cctx, 135122 github, level 6 with dict, compress cctx, 38628 -github, level 7, compress cctx, 135108 -github, level 7 with dict, compress cctx, 38741 -github, level 9, compress cctx, 135108 -github, level 9 with dict, compress cctx, 39335 -github, level 13, compress cctx, 133717 -github, level 13 with dict, compress cctx, 39923 -github, level 16, compress cctx, 133717 +github, level 7, compress cctx, 135122 +github, level 7 with dict, compress cctx, 38745 +github, level 9, compress cctx, 135122 +github, level 9 with dict, compress cctx, 39341 +github, level 13, compress cctx, 134064 +github, level 13 with dict, compress cctx, 39948 +github, level 16, compress cctx, 134064 github, level 16 with dict, compress cctx, 37568 -github, level 19, compress cctx, 133717 +github, level 19, compress cctx, 134064 github, level 19 with dict, compress cctx, 37567 -github, long distance mode, compress cctx, 141101 -github, multithreaded, compress cctx, 141101 -github, multithreaded long distance mode, compress cctx, 141101 -github, small window log, compress cctx, 141101 -github, small hash log, compress cctx, 138943 -github, small chain log, compress cctx, 139239 -github, explicit params, compress cctx, 140924 -github, uncompressed literals, compress cctx, 136311 -github, uncompressed literals optimal, compress cctx, 133717 +github, long distance mode, compress cctx, 141102 +github, multithreaded, compress cctx, 141102 +github, multithreaded long distance mode, compress cctx, 141102 +github, small window log, compress cctx, 141102 +github, small hash log, compress cctx, 138949 +github, small chain log, compress cctx, 139242 +github, explicit params, compress cctx, 140932 +github, uncompressed literals, compress cctx, 136335 +github, uncompressed literals optimal, compress cctx, 134064 github, huffman literals, compress cctx, 175568 -github, multithreaded with advanced params, compress cctx, 141101 -silesia, level -5, zstdcli, 6882514 -silesia, level -3, zstdcli, 6568406 -silesia, level -1, zstdcli, 6183433 -silesia, level 0, zstdcli, 4849539 -silesia, level 1, zstdcli, 5314157 -silesia, level 3, zstdcli, 4849539 -silesia, level 4, zstdcli, 4786961 -silesia, level 5, zstdcli, 4710226 -silesia, level 6, zstdcli, 4660044 -silesia, level 7, zstdcli, 4596282 -silesia, level 9, zstdcli, 4543910 -silesia, level 13, zstdcli, 4482121 -silesia, level 16, zstdcli, 4377437 -silesia, level 19, zstdcli, 4293310 -silesia, long distance mode, zstdcli, 4839698 -silesia, multithreaded, zstdcli, 4849539 -silesia, multithreaded long distance mode, zstdcli, 4839698 -silesia, small window log, zstdcli, 7104616 -silesia, small hash log, zstdcli, 6554946 -silesia, small chain log, zstdcli, 4931141 -silesia, explicit params, zstdcli, 4797048 -silesia, uncompressed literals, zstdcli, 5128008 -silesia, uncompressed literals optimal, zstdcli, 4325482 -silesia, huffman literals, zstdcli, 5331158 -silesia, multithreaded with advanced params, zstdcli, 5128008 -silesia.tar, level -5, zstdcli, 6738906 -silesia.tar, level -3, zstdcli, 6448409 -silesia.tar, level -1, zstdcli, 6186908 -silesia.tar, level 0, zstdcli, 4861462 -silesia.tar, level 1, zstdcli, 5336255 -silesia.tar, level 3, zstdcli, 4861462 -silesia.tar, level 4, zstdcli, 4800482 -silesia.tar, level 5, zstdcli, 4723312 -silesia.tar, level 6, zstdcli, 4673616 -silesia.tar, level 7, zstdcli, 4608346 -silesia.tar, level 9, zstdcli, 4554702 -silesia.tar, level 13, zstdcli, 4491710 -silesia.tar, level 16, zstdcli, 4381269 -silesia.tar, level 19, zstdcli, 4281555 -silesia.tar, no source size, zstdcli, 4861458 -silesia.tar, long distance mode, zstdcli, 4853140 -silesia.tar, multithreaded, zstdcli, 4861462 -silesia.tar, multithreaded long distance mode, zstdcli, 4853140 -silesia.tar, small window log, zstdcli, 7095284 -silesia.tar, small hash log, zstdcli, 6587841 -silesia.tar, small chain log, zstdcli, 4943269 -silesia.tar, explicit params, zstdcli, 4822318 -silesia.tar, uncompressed literals, zstdcli, 5129548 -silesia.tar, uncompressed literals optimal, zstdcli, 4320914 -silesia.tar, huffman literals, zstdcli, 5347560 -silesia.tar, multithreaded with advanced params, zstdcli, 5129548 +github, multithreaded with advanced params, compress cctx, 141102 +silesia, level -5, zstdcli, 6882553 +silesia, level -3, zstdcli, 6568424 +silesia, level -1, zstdcli, 6183451 +silesia, level 0, zstdcli, 4849600 +silesia, level 1, zstdcli, 5314210 +silesia, level 3, zstdcli, 4849600 +silesia, level 4, zstdcli, 4787018 +silesia, level 5, zstdcli, 4710285 +silesia, level 6, zstdcli, 4660105 +silesia, level 7, zstdcli, 4596343 +silesia, level 9, zstdcli, 4543972 +silesia, level 13, zstdcli, 4482183 +silesia, level 16, zstdcli, 4377513 +silesia, level 19, zstdcli, 4293378 +silesia, long distance mode, zstdcli, 4839756 +silesia, multithreaded, zstdcli, 4849600 +silesia, multithreaded long distance mode, zstdcli, 4839756 +silesia, small window log, zstdcli, 7111012 +silesia, small hash log, zstdcli, 6555069 +silesia, small chain log, zstdcli, 4931196 +silesia, explicit params, zstdcli, 4797100 +silesia, uncompressed literals, zstdcli, 5128030 +silesia, uncompressed literals optimal, zstdcli, 4325520 +silesia, huffman literals, zstdcli, 5331216 +silesia, multithreaded with advanced params, zstdcli, 5128030 +silesia.tar, level -5, zstdcli, 6738934 +silesia.tar, level -3, zstdcli, 6448419 +silesia.tar, level -1, zstdcli, 6186912 +silesia.tar, level 0, zstdcli, 4861512 +silesia.tar, level 1, zstdcli, 5336318 +silesia.tar, level 3, zstdcli, 4861512 +silesia.tar, level 4, zstdcli, 4800529 +silesia.tar, level 5, zstdcli, 4723364 +silesia.tar, level 6, zstdcli, 4673663 +silesia.tar, level 7, zstdcli, 4608403 +silesia.tar, level 9, zstdcli, 4554751 +silesia.tar, level 13, zstdcli, 4491768 +silesia.tar, level 16, zstdcli, 4381336 +silesia.tar, level 19, zstdcli, 4281609 +silesia.tar, no source size, zstdcli, 4861508 +silesia.tar, long distance mode, zstdcli, 4853190 +silesia.tar, multithreaded, zstdcli, 4861512 +silesia.tar, multithreaded long distance mode, zstdcli, 4853190 +silesia.tar, small window log, zstdcli, 7101576 +silesia.tar, small hash log, zstdcli, 6587959 +silesia.tar, small chain log, zstdcli, 4943310 +silesia.tar, explicit params, zstdcli, 4822354 +silesia.tar, uncompressed literals, zstdcli, 5129559 +silesia.tar, uncompressed literals optimal, zstdcli, 4320931 +silesia.tar, huffman literals, zstdcli, 5347610 +silesia.tar, multithreaded with advanced params, zstdcli, 5129559 github, level -5, zstdcli, 207285 github, level -5 with dict, zstdcli, 48718 github, level -3, zstdcli, 192643 github, level -3 with dict, zstdcli, 47395 github, level -1, zstdcli, 177568 github, level -1 with dict, zstdcli, 45170 -github, level 0, zstdcli, 138311 +github, level 0, zstdcli, 138335 github, level 0 with dict, zstdcli, 43148 -github, level 1, zstdcli, 144450 +github, level 1, zstdcli, 144465 github, level 1 with dict, zstdcli, 43682 -github, level 3, zstdcli, 138311 +github, level 3, zstdcli, 138335 github, level 3 with dict, zstdcli, 43148 -github, level 4, zstdcli, 138144 +github, level 4, zstdcli, 138199 github, level 4 with dict, zstdcli, 43251 -github, level 5, zstdcli, 137106 +github, level 5, zstdcli, 137121 github, level 5 with dict, zstdcli, 40938 -github, level 6, zstdcli, 137108 +github, level 6, zstdcli, 137122 github, level 6 with dict, zstdcli, 40632 -github, level 7, zstdcli, 137108 -github, level 7 with dict, zstdcli, 40766 -github, level 9, zstdcli, 137108 -github, level 9 with dict, zstdcli, 41326 -github, level 13, zstdcli, 135717 -github, level 13 with dict, zstdcli, 41716 -github, level 16, zstdcli, 135717 +github, level 7, zstdcli, 137122 +github, level 7 with dict, zstdcli, 40771 +github, level 9, zstdcli, 137122 +github, level 9 with dict, zstdcli, 41332 +github, level 13, zstdcli, 136064 +github, level 13 with dict, zstdcli, 41743 +github, level 16, zstdcli, 136064 github, level 16 with dict, zstdcli, 39577 -github, level 19, zstdcli, 135717 +github, level 19, zstdcli, 136064 github, level 19 with dict, zstdcli, 39576 -github, long distance mode, zstdcli, 138311 -github, multithreaded, zstdcli, 138311 -github, multithreaded long distance mode, zstdcli, 138311 -github, small window log, zstdcli, 138311 -github, small hash log, zstdcli, 137467 -github, small chain log, zstdcli, 138314 -github, explicit params, zstdcli, 136140 +github, long distance mode, zstdcli, 138335 +github, multithreaded, zstdcli, 138335 +github, multithreaded long distance mode, zstdcli, 138335 +github, small window log, zstdcli, 138335 +github, small hash log, zstdcli, 137590 +github, small chain log, zstdcli, 138341 +github, explicit params, zstdcli, 136197 github, uncompressed literals, zstdcli, 167915 -github, uncompressed literals optimal, zstdcli, 158824 -github, huffman literals, zstdcli, 144450 +github, uncompressed literals optimal, zstdcli, 159227 +github, huffman literals, zstdcli, 144465 github, multithreaded with advanced params, zstdcli, 167915 -silesia, level -5, advanced one pass, 6737567 -silesia, level -3, advanced one pass, 6444663 -silesia, level -1, advanced one pass, 6178442 -silesia, level 0, advanced one pass, 4849491 -silesia, level 1, advanced one pass, 5313144 -silesia, level 3, advanced one pass, 4849491 -silesia, level 4, advanced one pass, 4786913 -silesia, level 5, advanced one pass, 4710178 -silesia, level 6, advanced one pass, 4659996 -silesia, level 7, advanced one pass, 4596234 -silesia, level 9, advanced one pass, 4543862 -silesia, level 13, advanced one pass, 4482073 -silesia, level 16, advanced one pass, 4377389 -silesia, level 19, advanced one pass, 4293262 -silesia, no source size, advanced one pass, 4849491 -silesia, long distance mode, advanced one pass, 4839650 -silesia, multithreaded, advanced one pass, 4849491 -silesia, multithreaded long distance mode, advanced one pass, 4839650 -silesia, small window log, advanced one pass, 7089646 -silesia, small hash log, advanced one pass, 6554898 -silesia, small chain log, advanced one pass, 4931093 -silesia, explicit params, advanced one pass, 4797035 -silesia, uncompressed literals, advanced one pass, 5127960 -silesia, uncompressed literals optimal, advanced one pass, 4325434 -silesia, huffman literals, advanced one pass, 5326210 -silesia, multithreaded with advanced params, advanced one pass, 5127960 -silesia.tar, level -5, advanced one pass, 6738558 -silesia.tar, level -3, advanced one pass, 6446362 -silesia.tar, level -1, advanced one pass, 6186038 -silesia.tar, level 0, advanced one pass, 4861374 -silesia.tar, level 1, advanced one pass, 5334825 -silesia.tar, level 3, advanced one pass, 4861374 -silesia.tar, level 4, advanced one pass, 4799583 -silesia.tar, level 5, advanced one pass, 4722271 -silesia.tar, level 6, advanced one pass, 4672231 -silesia.tar, level 7, advanced one pass, 4606657 -silesia.tar, level 9, advanced one pass, 4554099 -silesia.tar, level 13, advanced one pass, 4491706 -silesia.tar, level 16, advanced one pass, 4381265 -silesia.tar, level 19, advanced one pass, 4281551 -silesia.tar, no source size, advanced one pass, 4861374 -silesia.tar, long distance mode, advanced one pass, 4848046 -silesia.tar, multithreaded, advanced one pass, 4860726 -silesia.tar, multithreaded long distance mode, advanced one pass, 4847343 -silesia.tar, small window log, advanced one pass, 7095237 -silesia.tar, small hash log, advanced one pass, 6587833 -silesia.tar, small chain log, advanced one pass, 4943266 -silesia.tar, explicit params, advanced one pass, 4808543 -silesia.tar, uncompressed literals, advanced one pass, 5129447 -silesia.tar, uncompressed literals optimal, advanced one pass, 4320910 -silesia.tar, huffman literals, advanced one pass, 5347283 -silesia.tar, multithreaded with advanced params, advanced one pass, 5129766 +silesia, level -5, advanced one pass, 6737607 +silesia, level -3, advanced one pass, 6444677 +silesia, level -1, advanced one pass, 6178460 +silesia, level 0, advanced one pass, 4849552 +silesia, level 1, advanced one pass, 5313204 +silesia, level 3, advanced one pass, 4849552 +silesia, level 4, advanced one pass, 4786970 +silesia, level 5, advanced one pass, 4710237 +silesia, level 6, advanced one pass, 4660057 +silesia, level 7, advanced one pass, 4596295 +silesia, level 9, advanced one pass, 4543924 +silesia, level 13, advanced one pass, 4482135 +silesia, level 16, advanced one pass, 4377465 +silesia, level 19, advanced one pass, 4293330 +silesia, no source size, advanced one pass, 4849552 +silesia, long distance mode, advanced one pass, 4839708 +silesia, multithreaded, advanced one pass, 4849552 +silesia, multithreaded long distance mode, advanced one pass, 4839708 +silesia, small window log, advanced one pass, 7095919 +silesia, small hash log, advanced one pass, 6555021 +silesia, small chain log, advanced one pass, 4931148 +silesia, explicit params, advanced one pass, 4797086 +silesia, uncompressed literals, advanced one pass, 5127982 +silesia, uncompressed literals optimal, advanced one pass, 4325472 +silesia, huffman literals, advanced one pass, 5326268 +silesia, multithreaded with advanced params, advanced one pass, 5127982 +silesia.tar, level -5, advanced one pass, 6738593 +silesia.tar, level -3, advanced one pass, 6446372 +silesia.tar, level -1, advanced one pass, 6186042 +silesia.tar, level 0, advanced one pass, 4861425 +silesia.tar, level 1, advanced one pass, 5334885 +silesia.tar, level 3, advanced one pass, 4861425 +silesia.tar, level 4, advanced one pass, 4799630 +silesia.tar, level 5, advanced one pass, 4722324 +silesia.tar, level 6, advanced one pass, 4672279 +silesia.tar, level 7, advanced one pass, 4606715 +silesia.tar, level 9, advanced one pass, 4554147 +silesia.tar, level 13, advanced one pass, 4491764 +silesia.tar, level 16, advanced one pass, 4381332 +silesia.tar, level 19, advanced one pass, 4281605 +silesia.tar, no source size, advanced one pass, 4861425 +silesia.tar, long distance mode, advanced one pass, 4848098 +silesia.tar, multithreaded, advanced one pass, 4860781 +silesia.tar, multithreaded long distance mode, advanced one pass, 4847398 +silesia.tar, small window log, advanced one pass, 7101530 +silesia.tar, small hash log, advanced one pass, 6587951 +silesia.tar, small chain log, advanced one pass, 4943307 +silesia.tar, explicit params, advanced one pass, 4808581 +silesia.tar, uncompressed literals, advanced one pass, 5129458 +silesia.tar, uncompressed literals optimal, advanced one pass, 4320927 +silesia.tar, huffman literals, advanced one pass, 5347335 +silesia.tar, multithreaded with advanced params, advanced one pass, 5129777 github, level -5, advanced one pass, 205285 github, level -5 with dict, advanced one pass, 46718 github, level -3, advanced one pass, 190643 github, level -3 with dict, advanced one pass, 45395 github, level -1, advanced one pass, 175568 github, level -1 with dict, advanced one pass, 43170 -github, level 0, advanced one pass, 136311 +github, level 0, advanced one pass, 136335 github, level 0 with dict, advanced one pass, 41148 -github, level 1, advanced one pass, 142450 +github, level 1, advanced one pass, 142465 github, level 1 with dict, advanced one pass, 41682 -github, level 3, advanced one pass, 136311 +github, level 3, advanced one pass, 136335 github, level 3 with dict, advanced one pass, 41148 -github, level 4, advanced one pass, 136144 +github, level 4, advanced one pass, 136199 github, level 4 with dict, advanced one pass, 41251 -github, level 5, advanced one pass, 135106 +github, level 5, advanced one pass, 135121 github, level 5 with dict, advanced one pass, 38938 -github, level 6, advanced one pass, 135108 +github, level 6, advanced one pass, 135122 github, level 6 with dict, advanced one pass, 38632 -github, level 7, advanced one pass, 135108 -github, level 7 with dict, advanced one pass, 38766 -github, level 9, advanced one pass, 135108 -github, level 9 with dict, advanced one pass, 39326 -github, level 13, advanced one pass, 133717 -github, level 13 with dict, advanced one pass, 39716 -github, level 16, advanced one pass, 133717 +github, level 7, advanced one pass, 135122 +github, level 7 with dict, advanced one pass, 38771 +github, level 9, advanced one pass, 135122 +github, level 9 with dict, advanced one pass, 39332 +github, level 13, advanced one pass, 134064 +github, level 13 with dict, advanced one pass, 39743 +github, level 16, advanced one pass, 134064 github, level 16 with dict, advanced one pass, 37577 -github, level 19, advanced one pass, 133717 +github, level 19, advanced one pass, 134064 github, level 19 with dict, advanced one pass, 37576 -github, no source size, advanced one pass, 136311 -github, long distance mode, advanced one pass, 136311 -github, multithreaded, advanced one pass, 136311 -github, multithreaded long distance mode, advanced one pass, 136311 -github, small window log, advanced one pass, 136311 -github, small hash log, advanced one pass, 135467 -github, small chain log, advanced one pass, 136314 -github, explicit params, advanced one pass, 137670 +github, no source size, advanced one pass, 136335 +github, long distance mode, advanced one pass, 136335 +github, multithreaded, advanced one pass, 136335 +github, multithreaded long distance mode, advanced one pass, 136335 +github, small window log, advanced one pass, 136335 +github, small hash log, advanced one pass, 135590 +github, small chain log, advanced one pass, 136341 +github, explicit params, advanced one pass, 137727 github, uncompressed literals, advanced one pass, 165915 -github, uncompressed literals optimal, advanced one pass, 156824 -github, huffman literals, advanced one pass, 142450 +github, uncompressed literals optimal, advanced one pass, 157227 +github, huffman literals, advanced one pass, 142465 github, multithreaded with advanced params, advanced one pass, 165915 -silesia, level -5, advanced one pass small out, 6737567 -silesia, level -3, advanced one pass small out, 6444663 -silesia, level -1, advanced one pass small out, 6178442 -silesia, level 0, advanced one pass small out, 4849491 -silesia, level 1, advanced one pass small out, 5313144 -silesia, level 3, advanced one pass small out, 4849491 -silesia, level 4, advanced one pass small out, 4786913 -silesia, level 5, advanced one pass small out, 4710178 -silesia, level 6, advanced one pass small out, 4659996 -silesia, level 7, advanced one pass small out, 4596234 -silesia, level 9, advanced one pass small out, 4543862 -silesia, level 13, advanced one pass small out, 4482073 -silesia, level 16, advanced one pass small out, 4377389 -silesia, level 19, advanced one pass small out, 4293262 -silesia, no source size, advanced one pass small out, 4849491 -silesia, long distance mode, advanced one pass small out, 4839650 -silesia, multithreaded, advanced one pass small out, 4849491 -silesia, multithreaded long distance mode, advanced one pass small out, 4839650 -silesia, small window log, advanced one pass small out, 7089646 -silesia, small hash log, advanced one pass small out, 6554898 -silesia, small chain log, advanced one pass small out, 4931093 -silesia, explicit params, advanced one pass small out, 4797035 -silesia, uncompressed literals, advanced one pass small out, 5127960 -silesia, uncompressed literals optimal, advanced one pass small out, 4325434 -silesia, huffman literals, advanced one pass small out, 5326210 -silesia, multithreaded with advanced params, advanced one pass small out, 5127960 -silesia.tar, level -5, advanced one pass small out, 6738558 -silesia.tar, level -3, advanced one pass small out, 6446362 -silesia.tar, level -1, advanced one pass small out, 6186038 -silesia.tar, level 0, advanced one pass small out, 4861374 -silesia.tar, level 1, advanced one pass small out, 5334825 -silesia.tar, level 3, advanced one pass small out, 4861374 -silesia.tar, level 4, advanced one pass small out, 4799583 -silesia.tar, level 5, advanced one pass small out, 4722271 -silesia.tar, level 6, advanced one pass small out, 4672231 -silesia.tar, level 7, advanced one pass small out, 4606657 -silesia.tar, level 9, advanced one pass small out, 4554099 -silesia.tar, level 13, advanced one pass small out, 4491706 -silesia.tar, level 16, advanced one pass small out, 4381265 -silesia.tar, level 19, advanced one pass small out, 4281551 -silesia.tar, no source size, advanced one pass small out, 4861374 -silesia.tar, long distance mode, advanced one pass small out, 4848046 -silesia.tar, multithreaded, advanced one pass small out, 4860726 -silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847343 -silesia.tar, small window log, advanced one pass small out, 7095237 -silesia.tar, small hash log, advanced one pass small out, 6587833 -silesia.tar, small chain log, advanced one pass small out, 4943266 -silesia.tar, explicit params, advanced one pass small out, 4808543 -silesia.tar, uncompressed literals, advanced one pass small out, 5129447 -silesia.tar, uncompressed literals optimal, advanced one pass small out, 4320910 -silesia.tar, huffman literals, advanced one pass small out, 5347283 -silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129766 +silesia, level -5, advanced one pass small out, 6737607 +silesia, level -3, advanced one pass small out, 6444677 +silesia, level -1, advanced one pass small out, 6178460 +silesia, level 0, advanced one pass small out, 4849552 +silesia, level 1, advanced one pass small out, 5313204 +silesia, level 3, advanced one pass small out, 4849552 +silesia, level 4, advanced one pass small out, 4786970 +silesia, level 5, advanced one pass small out, 4710237 +silesia, level 6, advanced one pass small out, 4660057 +silesia, level 7, advanced one pass small out, 4596295 +silesia, level 9, advanced one pass small out, 4543924 +silesia, level 13, advanced one pass small out, 4482135 +silesia, level 16, advanced one pass small out, 4377465 +silesia, level 19, advanced one pass small out, 4293330 +silesia, no source size, advanced one pass small out, 4849552 +silesia, long distance mode, advanced one pass small out, 4839708 +silesia, multithreaded, advanced one pass small out, 4849552 +silesia, multithreaded long distance mode, advanced one pass small out, 4839708 +silesia, small window log, advanced one pass small out, 7095919 +silesia, small hash log, advanced one pass small out, 6555021 +silesia, small chain log, advanced one pass small out, 4931148 +silesia, explicit params, advanced one pass small out, 4797086 +silesia, uncompressed literals, advanced one pass small out, 5127982 +silesia, uncompressed literals optimal, advanced one pass small out, 4325472 +silesia, huffman literals, advanced one pass small out, 5326268 +silesia, multithreaded with advanced params, advanced one pass small out, 5127982 +silesia.tar, level -5, advanced one pass small out, 6738593 +silesia.tar, level -3, advanced one pass small out, 6446372 +silesia.tar, level -1, advanced one pass small out, 6186042 +silesia.tar, level 0, advanced one pass small out, 4861425 +silesia.tar, level 1, advanced one pass small out, 5334885 +silesia.tar, level 3, advanced one pass small out, 4861425 +silesia.tar, level 4, advanced one pass small out, 4799630 +silesia.tar, level 5, advanced one pass small out, 4722324 +silesia.tar, level 6, advanced one pass small out, 4672279 +silesia.tar, level 7, advanced one pass small out, 4606715 +silesia.tar, level 9, advanced one pass small out, 4554147 +silesia.tar, level 13, advanced one pass small out, 4491764 +silesia.tar, level 16, advanced one pass small out, 4381332 +silesia.tar, level 19, advanced one pass small out, 4281605 +silesia.tar, no source size, advanced one pass small out, 4861425 +silesia.tar, long distance mode, advanced one pass small out, 4848098 +silesia.tar, multithreaded, advanced one pass small out, 4860781 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847398 +silesia.tar, small window log, advanced one pass small out, 7101530 +silesia.tar, small hash log, advanced one pass small out, 6587951 +silesia.tar, small chain log, advanced one pass small out, 4943307 +silesia.tar, explicit params, advanced one pass small out, 4808581 +silesia.tar, uncompressed literals, advanced one pass small out, 5129458 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4320927 +silesia.tar, huffman literals, advanced one pass small out, 5347335 +silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129777 github, level -5, advanced one pass small out, 205285 github, level -5 with dict, advanced one pass small out, 46718 github, level -3, advanced one pass small out, 190643 github, level -3 with dict, advanced one pass small out, 45395 github, level -1, advanced one pass small out, 175568 github, level -1 with dict, advanced one pass small out, 43170 -github, level 0, advanced one pass small out, 136311 +github, level 0, advanced one pass small out, 136335 github, level 0 with dict, advanced one pass small out, 41148 -github, level 1, advanced one pass small out, 142450 +github, level 1, advanced one pass small out, 142465 github, level 1 with dict, advanced one pass small out, 41682 -github, level 3, advanced one pass small out, 136311 +github, level 3, advanced one pass small out, 136335 github, level 3 with dict, advanced one pass small out, 41148 -github, level 4, advanced one pass small out, 136144 +github, level 4, advanced one pass small out, 136199 github, level 4 with dict, advanced one pass small out, 41251 -github, level 5, advanced one pass small out, 135106 +github, level 5, advanced one pass small out, 135121 github, level 5 with dict, advanced one pass small out, 38938 -github, level 6, advanced one pass small out, 135108 +github, level 6, advanced one pass small out, 135122 github, level 6 with dict, advanced one pass small out, 38632 -github, level 7, advanced one pass small out, 135108 -github, level 7 with dict, advanced one pass small out, 38766 -github, level 9, advanced one pass small out, 135108 -github, level 9 with dict, advanced one pass small out, 39326 -github, level 13, advanced one pass small out, 133717 -github, level 13 with dict, advanced one pass small out, 39716 -github, level 16, advanced one pass small out, 133717 +github, level 7, advanced one pass small out, 135122 +github, level 7 with dict, advanced one pass small out, 38771 +github, level 9, advanced one pass small out, 135122 +github, level 9 with dict, advanced one pass small out, 39332 +github, level 13, advanced one pass small out, 134064 +github, level 13 with dict, advanced one pass small out, 39743 +github, level 16, advanced one pass small out, 134064 github, level 16 with dict, advanced one pass small out, 37577 -github, level 19, advanced one pass small out, 133717 +github, level 19, advanced one pass small out, 134064 github, level 19 with dict, advanced one pass small out, 37576 -github, no source size, advanced one pass small out, 136311 -github, long distance mode, advanced one pass small out, 136311 -github, multithreaded, advanced one pass small out, 136311 -github, multithreaded long distance mode, advanced one pass small out, 136311 -github, small window log, advanced one pass small out, 136311 -github, small hash log, advanced one pass small out, 135467 -github, small chain log, advanced one pass small out, 136314 -github, explicit params, advanced one pass small out, 137670 +github, no source size, advanced one pass small out, 136335 +github, long distance mode, advanced one pass small out, 136335 +github, multithreaded, advanced one pass small out, 136335 +github, multithreaded long distance mode, advanced one pass small out, 136335 +github, small window log, advanced one pass small out, 136335 +github, small hash log, advanced one pass small out, 135590 +github, small chain log, advanced one pass small out, 136341 +github, explicit params, advanced one pass small out, 137727 github, uncompressed literals, advanced one pass small out, 165915 -github, uncompressed literals optimal, advanced one pass small out, 156824 -github, huffman literals, advanced one pass small out, 142450 +github, uncompressed literals optimal, advanced one pass small out, 157227 +github, huffman literals, advanced one pass small out, 142465 github, multithreaded with advanced params, advanced one pass small out, 165915 -silesia, level -5, advanced streaming, 6882466 -silesia, level -3, advanced streaming, 6568358 -silesia, level -1, advanced streaming, 6183385 -silesia, level 0, advanced streaming, 4849491 -silesia, level 1, advanced streaming, 5314109 -silesia, level 3, advanced streaming, 4849491 -silesia, level 4, advanced streaming, 4786913 -silesia, level 5, advanced streaming, 4710178 -silesia, level 6, advanced streaming, 4659996 -silesia, level 7, advanced streaming, 4596234 -silesia, level 9, advanced streaming, 4543862 -silesia, level 13, advanced streaming, 4482073 -silesia, level 16, advanced streaming, 4377389 -silesia, level 19, advanced streaming, 4293262 -silesia, no source size, advanced streaming, 4849455 -silesia, long distance mode, advanced streaming, 4839650 -silesia, multithreaded, advanced streaming, 4849491 -silesia, multithreaded long distance mode, advanced streaming, 4839650 -silesia, small window log, advanced streaming, 7105714 -silesia, small hash log, advanced streaming, 6554898 -silesia, small chain log, advanced streaming, 4931093 -silesia, explicit params, advanced streaming, 4797048 -silesia, uncompressed literals, advanced streaming, 5127960 -silesia, uncompressed literals optimal, advanced streaming, 4325434 -silesia, huffman literals, advanced streaming, 5331110 -silesia, multithreaded with advanced params, advanced streaming, 5127960 -silesia.tar, level -5, advanced streaming, 6982738 -silesia.tar, level -3, advanced streaming, 6641264 -silesia.tar, level -1, advanced streaming, 6190789 -silesia.tar, level 0, advanced streaming, 4861376 -silesia.tar, level 1, advanced streaming, 5336879 -silesia.tar, level 3, advanced streaming, 4861376 -silesia.tar, level 4, advanced streaming, 4799583 -silesia.tar, level 5, advanced streaming, 4722276 -silesia.tar, level 6, advanced streaming, 4672240 -silesia.tar, level 7, advanced streaming, 4606657 -silesia.tar, level 9, advanced streaming, 4554106 -silesia.tar, level 13, advanced streaming, 4491707 -silesia.tar, level 16, advanced streaming, 4381284 -silesia.tar, level 19, advanced streaming, 4281511 -silesia.tar, no source size, advanced streaming, 4861372 -silesia.tar, long distance mode, advanced streaming, 4848046 -silesia.tar, multithreaded, advanced streaming, 4861458 -silesia.tar, multithreaded long distance mode, advanced streaming, 4853136 -silesia.tar, small window log, advanced streaming, 7112148 -silesia.tar, small hash log, advanced streaming, 6587834 -silesia.tar, small chain log, advanced streaming, 4943271 -silesia.tar, explicit params, advanced streaming, 4808570 -silesia.tar, uncompressed literals, advanced streaming, 5129450 -silesia.tar, uncompressed literals optimal, advanced streaming, 4320841 -silesia.tar, huffman literals, advanced streaming, 5352306 -silesia.tar, multithreaded with advanced params, advanced streaming, 5129544 +silesia, level -5, advanced streaming, 6882505 +silesia, level -3, advanced streaming, 6568376 +silesia, level -1, advanced streaming, 6183403 +silesia, level 0, advanced streaming, 4849552 +silesia, level 1, advanced streaming, 5314162 +silesia, level 3, advanced streaming, 4849552 +silesia, level 4, advanced streaming, 4786970 +silesia, level 5, advanced streaming, 4710237 +silesia, level 6, advanced streaming, 4660057 +silesia, level 7, advanced streaming, 4596295 +silesia, level 9, advanced streaming, 4543924 +silesia, level 13, advanced streaming, 4482135 +silesia, level 16, advanced streaming, 4377465 +silesia, level 19, advanced streaming, 4293330 +silesia, no source size, advanced streaming, 4849516 +silesia, long distance mode, advanced streaming, 4839708 +silesia, multithreaded, advanced streaming, 4849552 +silesia, multithreaded long distance mode, advanced streaming, 4839708 +silesia, small window log, advanced streaming, 7112062 +silesia, small hash log, advanced streaming, 6555021 +silesia, small chain log, advanced streaming, 4931148 +silesia, explicit params, advanced streaming, 4797100 +silesia, uncompressed literals, advanced streaming, 5127982 +silesia, uncompressed literals optimal, advanced streaming, 4325472 +silesia, huffman literals, advanced streaming, 5331168 +silesia, multithreaded with advanced params, advanced streaming, 5127982 +silesia.tar, level -5, advanced streaming, 6982759 +silesia.tar, level -3, advanced streaming, 6641283 +silesia.tar, level -1, advanced streaming, 6190795 +silesia.tar, level 0, advanced streaming, 4861427 +silesia.tar, level 1, advanced streaming, 5336939 +silesia.tar, level 3, advanced streaming, 4861427 +silesia.tar, level 4, advanced streaming, 4799630 +silesia.tar, level 5, advanced streaming, 4722329 +silesia.tar, level 6, advanced streaming, 4672288 +silesia.tar, level 7, advanced streaming, 4606715 +silesia.tar, level 9, advanced streaming, 4554154 +silesia.tar, level 13, advanced streaming, 4491765 +silesia.tar, level 16, advanced streaming, 4381350 +silesia.tar, level 19, advanced streaming, 4281562 +silesia.tar, no source size, advanced streaming, 4861423 +silesia.tar, long distance mode, advanced streaming, 4848098 +silesia.tar, multithreaded, advanced streaming, 4861508 +silesia.tar, multithreaded long distance mode, advanced streaming, 4853186 +silesia.tar, small window log, advanced streaming, 7118769 +silesia.tar, small hash log, advanced streaming, 6587952 +silesia.tar, small chain log, advanced streaming, 4943312 +silesia.tar, explicit params, advanced streaming, 4808608 +silesia.tar, uncompressed literals, advanced streaming, 5129461 +silesia.tar, uncompressed literals optimal, advanced streaming, 4320858 +silesia.tar, huffman literals, advanced streaming, 5352360 +silesia.tar, multithreaded with advanced params, advanced streaming, 5129555 github, level -5, advanced streaming, 205285 github, level -5 with dict, advanced streaming, 46718 github, level -3, advanced streaming, 190643 github, level -3 with dict, advanced streaming, 45395 github, level -1, advanced streaming, 175568 github, level -1 with dict, advanced streaming, 43170 -github, level 0, advanced streaming, 136311 +github, level 0, advanced streaming, 136335 github, level 0 with dict, advanced streaming, 41148 -github, level 1, advanced streaming, 142450 +github, level 1, advanced streaming, 142465 github, level 1 with dict, advanced streaming, 41682 -github, level 3, advanced streaming, 136311 +github, level 3, advanced streaming, 136335 github, level 3 with dict, advanced streaming, 41148 -github, level 4, advanced streaming, 136144 +github, level 4, advanced streaming, 136199 github, level 4 with dict, advanced streaming, 41251 -github, level 5, advanced streaming, 135106 +github, level 5, advanced streaming, 135121 github, level 5 with dict, advanced streaming, 38938 -github, level 6, advanced streaming, 135108 +github, level 6, advanced streaming, 135122 github, level 6 with dict, advanced streaming, 38632 -github, level 7, advanced streaming, 135108 -github, level 7 with dict, advanced streaming, 38766 -github, level 9, advanced streaming, 135108 -github, level 9 with dict, advanced streaming, 39326 -github, level 13, advanced streaming, 133717 -github, level 13 with dict, advanced streaming, 39716 -github, level 16, advanced streaming, 133717 +github, level 7, advanced streaming, 135122 +github, level 7 with dict, advanced streaming, 38771 +github, level 9, advanced streaming, 135122 +github, level 9 with dict, advanced streaming, 39332 +github, level 13, advanced streaming, 134064 +github, level 13 with dict, advanced streaming, 39743 +github, level 16, advanced streaming, 134064 github, level 16 with dict, advanced streaming, 37577 -github, level 19, advanced streaming, 133717 +github, level 19, advanced streaming, 134064 github, level 19 with dict, advanced streaming, 37576 -github, no source size, advanced streaming, 136311 -github, long distance mode, advanced streaming, 136311 -github, multithreaded, advanced streaming, 136311 -github, multithreaded long distance mode, advanced streaming, 136311 -github, small window log, advanced streaming, 136311 -github, small hash log, advanced streaming, 135467 -github, small chain log, advanced streaming, 136314 -github, explicit params, advanced streaming, 137670 +github, no source size, advanced streaming, 136335 +github, long distance mode, advanced streaming, 136335 +github, multithreaded, advanced streaming, 136335 +github, multithreaded long distance mode, advanced streaming, 136335 +github, small window log, advanced streaming, 136335 +github, small hash log, advanced streaming, 135590 +github, small chain log, advanced streaming, 136341 +github, explicit params, advanced streaming, 137727 github, uncompressed literals, advanced streaming, 165915 -github, uncompressed literals optimal, advanced streaming, 156824 -github, huffman literals, advanced streaming, 142450 +github, uncompressed literals optimal, advanced streaming, 157227 +github, huffman literals, advanced streaming, 142465 github, multithreaded with advanced params, advanced streaming, 165915 -silesia, level -5, old streaming, 6882466 -silesia, level -3, old streaming, 6568358 -silesia, level -1, old streaming, 6183385 -silesia, level 0, old streaming, 4849491 -silesia, level 1, old streaming, 5314109 -silesia, level 3, old streaming, 4849491 -silesia, level 4, old streaming, 4786913 -silesia, level 5, old streaming, 4710178 -silesia, level 6, old streaming, 4659996 -silesia, level 7, old streaming, 4596234 -silesia, level 9, old streaming, 4543862 -silesia, level 13, old streaming, 4482073 -silesia, level 16, old streaming, 4377389 -silesia, level 19, old streaming, 4293262 -silesia, no source size, old streaming, 4849455 -silesia, uncompressed literals, old streaming, 4849491 -silesia, uncompressed literals optimal, old streaming, 4293262 -silesia, huffman literals, old streaming, 6183385 -silesia.tar, level -5, old streaming, 6982738 -silesia.tar, level -3, old streaming, 6641264 -silesia.tar, level -1, old streaming, 6190789 -silesia.tar, level 0, old streaming, 4861376 -silesia.tar, level 1, old streaming, 5336879 -silesia.tar, level 3, old streaming, 4861376 -silesia.tar, level 4, old streaming, 4799583 -silesia.tar, level 5, old streaming, 4722276 -silesia.tar, level 6, old streaming, 4672240 -silesia.tar, level 7, old streaming, 4606657 -silesia.tar, level 9, old streaming, 4554106 -silesia.tar, level 13, old streaming, 4491707 -silesia.tar, level 16, old streaming, 4381284 -silesia.tar, level 19, old streaming, 4281511 -silesia.tar, no source size, old streaming, 4861372 -silesia.tar, uncompressed literals, old streaming, 4861376 -silesia.tar, uncompressed literals optimal, old streaming, 4281511 -silesia.tar, huffman literals, old streaming, 6190789 +silesia, level -5, old streaming, 6882505 +silesia, level -3, old streaming, 6568376 +silesia, level -1, old streaming, 6183403 +silesia, level 0, old streaming, 4849552 +silesia, level 1, old streaming, 5314162 +silesia, level 3, old streaming, 4849552 +silesia, level 4, old streaming, 4786970 +silesia, level 5, old streaming, 4710237 +silesia, level 6, old streaming, 4660057 +silesia, level 7, old streaming, 4596295 +silesia, level 9, old streaming, 4543924 +silesia, level 13, old streaming, 4482135 +silesia, level 16, old streaming, 4377465 +silesia, level 19, old streaming, 4293330 +silesia, no source size, old streaming, 4849516 +silesia, uncompressed literals, old streaming, 4849552 +silesia, uncompressed literals optimal, old streaming, 4293330 +silesia, huffman literals, old streaming, 6183403 +silesia.tar, level -5, old streaming, 6982759 +silesia.tar, level -3, old streaming, 6641283 +silesia.tar, level -1, old streaming, 6190795 +silesia.tar, level 0, old streaming, 4861427 +silesia.tar, level 1, old streaming, 5336939 +silesia.tar, level 3, old streaming, 4861427 +silesia.tar, level 4, old streaming, 4799630 +silesia.tar, level 5, old streaming, 4722329 +silesia.tar, level 6, old streaming, 4672288 +silesia.tar, level 7, old streaming, 4606715 +silesia.tar, level 9, old streaming, 4554154 +silesia.tar, level 13, old streaming, 4491765 +silesia.tar, level 16, old streaming, 4381350 +silesia.tar, level 19, old streaming, 4281562 +silesia.tar, no source size, old streaming, 4861423 +silesia.tar, uncompressed literals, old streaming, 4861427 +silesia.tar, uncompressed literals optimal, old streaming, 4281562 +silesia.tar, huffman literals, old streaming, 6190795 github, level -5, old streaming, 205285 github, level -5 with dict, old streaming, 46718 github, level -3, old streaming, 190643 github, level -3 with dict, old streaming, 45395 github, level -1, old streaming, 175568 github, level -1 with dict, old streaming, 43170 -github, level 0, old streaming, 136311 +github, level 0, old streaming, 136335 github, level 0 with dict, old streaming, 41148 -github, level 1, old streaming, 142450 +github, level 1, old streaming, 142465 github, level 1 with dict, old streaming, 41682 -github, level 3, old streaming, 136311 +github, level 3, old streaming, 136335 github, level 3 with dict, old streaming, 41148 -github, level 4, old streaming, 136144 +github, level 4, old streaming, 136199 github, level 4 with dict, old streaming, 41251 -github, level 5, old streaming, 135106 +github, level 5, old streaming, 135121 github, level 5 with dict, old streaming, 38938 -github, level 6, old streaming, 135108 +github, level 6, old streaming, 135122 github, level 6 with dict, old streaming, 38632 -github, level 7, old streaming, 135108 -github, level 7 with dict, old streaming, 38766 -github, level 9, old streaming, 135108 -github, level 9 with dict, old streaming, 39326 -github, level 13, old streaming, 133717 -github, level 13 with dict, old streaming, 39716 -github, level 16, old streaming, 133717 +github, level 7, old streaming, 135122 +github, level 7 with dict, old streaming, 38771 +github, level 9, old streaming, 135122 +github, level 9 with dict, old streaming, 39332 +github, level 13, old streaming, 134064 +github, level 13 with dict, old streaming, 39743 +github, level 16, old streaming, 134064 github, level 16 with dict, old streaming, 37577 -github, level 19, old streaming, 133717 +github, level 19, old streaming, 134064 github, level 19 with dict, old streaming, 37576 -github, no source size, old streaming, 140631 -github, uncompressed literals, old streaming, 136311 -github, uncompressed literals optimal, old streaming, 133717 +github, no source size, old streaming, 140632 +github, uncompressed literals, old streaming, 136335 +github, uncompressed literals optimal, old streaming, 134064 github, huffman literals, old streaming, 175568 -silesia, level -5, old streaming advanced, 6882466 -silesia, level -3, old streaming advanced, 6568358 -silesia, level -1, old streaming advanced, 6183385 -silesia, level 0, old streaming advanced, 4849491 -silesia, level 1, old streaming advanced, 5314109 -silesia, level 3, old streaming advanced, 4849491 -silesia, level 4, old streaming advanced, 4786913 -silesia, level 5, old streaming advanced, 4710178 -silesia, level 6, old streaming advanced, 4659996 -silesia, level 7, old streaming advanced, 4596234 -silesia, level 9, old streaming advanced, 4543862 -silesia, level 13, old streaming advanced, 4482073 -silesia, level 16, old streaming advanced, 4377389 -silesia, level 19, old streaming advanced, 4293262 -silesia, no source size, old streaming advanced, 4849455 -silesia, long distance mode, old streaming advanced, 4849491 -silesia, multithreaded, old streaming advanced, 4849491 -silesia, multithreaded long distance mode, old streaming advanced, 4849491 -silesia, small window log, old streaming advanced, 7105714 -silesia, small hash log, old streaming advanced, 6554898 -silesia, small chain log, old streaming advanced, 4931093 -silesia, explicit params, old streaming advanced, 4797048 -silesia, uncompressed literals, old streaming advanced, 4849491 -silesia, uncompressed literals optimal, old streaming advanced, 4293262 -silesia, huffman literals, old streaming advanced, 6183385 -silesia, multithreaded with advanced params, old streaming advanced, 4849491 -silesia.tar, level -5, old streaming advanced, 6982738 -silesia.tar, level -3, old streaming advanced, 6641264 -silesia.tar, level -1, old streaming advanced, 6190789 -silesia.tar, level 0, old streaming advanced, 4861376 -silesia.tar, level 1, old streaming advanced, 5336879 -silesia.tar, level 3, old streaming advanced, 4861376 -silesia.tar, level 4, old streaming advanced, 4799583 -silesia.tar, level 5, old streaming advanced, 4722276 -silesia.tar, level 6, old streaming advanced, 4672240 -silesia.tar, level 7, old streaming advanced, 4606657 -silesia.tar, level 9, old streaming advanced, 4554106 -silesia.tar, level 13, old streaming advanced, 4491707 -silesia.tar, level 16, old streaming advanced, 4381284 -silesia.tar, level 19, old streaming advanced, 4281511 -silesia.tar, no source size, old streaming advanced, 4861372 -silesia.tar, long distance mode, old streaming advanced, 4861376 -silesia.tar, multithreaded, old streaming advanced, 4861376 -silesia.tar, multithreaded long distance mode, old streaming advanced, 4861376 -silesia.tar, small window log, old streaming advanced, 7112151 -silesia.tar, small hash log, old streaming advanced, 6587834 -silesia.tar, small chain log, old streaming advanced, 4943271 -silesia.tar, explicit params, old streaming advanced, 4808570 -silesia.tar, uncompressed literals, old streaming advanced, 4861376 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4281511 -silesia.tar, huffman literals, old streaming advanced, 6190789 -silesia.tar, multithreaded with advanced params, old streaming advanced, 4861376 +silesia, level -5, old streaming advanced, 6882505 +silesia, level -3, old streaming advanced, 6568376 +silesia, level -1, old streaming advanced, 6183403 +silesia, level 0, old streaming advanced, 4849552 +silesia, level 1, old streaming advanced, 5314162 +silesia, level 3, old streaming advanced, 4849552 +silesia, level 4, old streaming advanced, 4786970 +silesia, level 5, old streaming advanced, 4710237 +silesia, level 6, old streaming advanced, 4660057 +silesia, level 7, old streaming advanced, 4596295 +silesia, level 9, old streaming advanced, 4543924 +silesia, level 13, old streaming advanced, 4482135 +silesia, level 16, old streaming advanced, 4377465 +silesia, level 19, old streaming advanced, 4293330 +silesia, no source size, old streaming advanced, 4849516 +silesia, long distance mode, old streaming advanced, 4849552 +silesia, multithreaded, old streaming advanced, 4849552 +silesia, multithreaded long distance mode, old streaming advanced, 4849552 +silesia, small window log, old streaming advanced, 7112062 +silesia, small hash log, old streaming advanced, 6555021 +silesia, small chain log, old streaming advanced, 4931148 +silesia, explicit params, old streaming advanced, 4797100 +silesia, uncompressed literals, old streaming advanced, 4849552 +silesia, uncompressed literals optimal, old streaming advanced, 4293330 +silesia, huffman literals, old streaming advanced, 6183403 +silesia, multithreaded with advanced params, old streaming advanced, 4849552 +silesia.tar, level -5, old streaming advanced, 6982759 +silesia.tar, level -3, old streaming advanced, 6641283 +silesia.tar, level -1, old streaming advanced, 6190795 +silesia.tar, level 0, old streaming advanced, 4861427 +silesia.tar, level 1, old streaming advanced, 5336939 +silesia.tar, level 3, old streaming advanced, 4861427 +silesia.tar, level 4, old streaming advanced, 4799630 +silesia.tar, level 5, old streaming advanced, 4722329 +silesia.tar, level 6, old streaming advanced, 4672288 +silesia.tar, level 7, old streaming advanced, 4606715 +silesia.tar, level 9, old streaming advanced, 4554154 +silesia.tar, level 13, old streaming advanced, 4491765 +silesia.tar, level 16, old streaming advanced, 4381350 +silesia.tar, level 19, old streaming advanced, 4281562 +silesia.tar, no source size, old streaming advanced, 4861423 +silesia.tar, long distance mode, old streaming advanced, 4861427 +silesia.tar, multithreaded, old streaming advanced, 4861427 +silesia.tar, multithreaded long distance mode, old streaming advanced, 4861427 +silesia.tar, small window log, old streaming advanced, 7118772 +silesia.tar, small hash log, old streaming advanced, 6587952 +silesia.tar, small chain log, old streaming advanced, 4943312 +silesia.tar, explicit params, old streaming advanced, 4808608 +silesia.tar, uncompressed literals, old streaming advanced, 4861427 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4281562 +silesia.tar, huffman literals, old streaming advanced, 6190795 +silesia.tar, multithreaded with advanced params, old streaming advanced, 4861427 github, level -5, old streaming advanced, 216734 github, level -5 with dict, old streaming advanced, 49562 github, level -3, old streaming advanced, 192160 github, level -3 with dict, old streaming advanced, 44956 github, level -1, old streaming advanced, 181108 github, level -1 with dict, old streaming advanced, 42383 -github, level 0, old streaming advanced, 141090 +github, level 0, old streaming advanced, 141104 github, level 0 with dict, old streaming advanced, 41113 -github, level 1, old streaming advanced, 143682 +github, level 1, old streaming advanced, 143692 github, level 1 with dict, old streaming advanced, 42430 -github, level 3, old streaming advanced, 141090 +github, level 3, old streaming advanced, 141104 github, level 3 with dict, old streaming advanced, 41113 -github, level 4, old streaming advanced, 141090 +github, level 4, old streaming advanced, 141104 github, level 4 with dict, old streaming advanced, 41084 -github, level 5, old streaming advanced, 139391 +github, level 5, old streaming advanced, 139399 github, level 5 with dict, old streaming advanced, 39159 -github, level 6, old streaming advanced, 139394 +github, level 6, old streaming advanced, 139402 github, level 6 with dict, old streaming advanced, 38749 -github, level 7, old streaming advanced, 138675 +github, level 7, old streaming advanced, 138676 github, level 7 with dict, old streaming advanced, 38746 -github, level 9, old streaming advanced, 138675 -github, level 9 with dict, old streaming advanced, 38987 -github, level 13, old streaming advanced, 138675 -github, level 13 with dict, old streaming advanced, 39724 -github, level 16, old streaming advanced, 138675 -github, level 16 with dict, old streaming advanced, 40771 -github, level 19, old streaming advanced, 133717 +github, level 9, old streaming advanced, 138676 +github, level 9 with dict, old streaming advanced, 38993 +github, level 13, old streaming advanced, 138676 +github, level 13 with dict, old streaming advanced, 39731 +github, level 16, old streaming advanced, 138676 +github, level 16 with dict, old streaming advanced, 40789 +github, level 19, old streaming advanced, 134064 github, level 19 with dict, old streaming advanced, 37576 -github, no source size, old streaming advanced, 140631 -github, long distance mode, old streaming advanced, 141090 -github, multithreaded, old streaming advanced, 141090 -github, multithreaded long distance mode, old streaming advanced, 141090 -github, small window log, old streaming advanced, 141090 -github, small hash log, old streaming advanced, 141578 -github, small chain log, old streaming advanced, 139258 -github, explicit params, old streaming advanced, 140930 -github, uncompressed literals, old streaming advanced, 141090 -github, uncompressed literals optimal, old streaming advanced, 133717 +github, no source size, old streaming advanced, 140632 +github, long distance mode, old streaming advanced, 141104 +github, multithreaded, old streaming advanced, 141104 +github, multithreaded long distance mode, old streaming advanced, 141104 +github, small window log, old streaming advanced, 141104 +github, small hash log, old streaming advanced, 141597 +github, small chain log, old streaming advanced, 139275 +github, explicit params, old streaming advanced, 140937 +github, uncompressed literals, old streaming advanced, 141104 +github, uncompressed literals optimal, old streaming advanced, 134064 github, huffman literals, old streaming advanced, 181108 -github, multithreaded with advanced params, old streaming advanced, 141090 +github, multithreaded with advanced params, old streaming advanced, 141104 github, level -5 with dict, old streaming cdcit, 46718 github, level -3 with dict, old streaming cdcit, 45395 github, level -1 with dict, old streaming cdcit, 43170 @@ -615,9 +615,9 @@ github, level 3 with dict, old stre github, level 4 with dict, old streaming cdcit, 41251 github, level 5 with dict, old streaming cdcit, 38938 github, level 6 with dict, old streaming cdcit, 38632 -github, level 7 with dict, old streaming cdcit, 38766 -github, level 9 with dict, old streaming cdcit, 39326 -github, level 13 with dict, old streaming cdcit, 39716 +github, level 7 with dict, old streaming cdcit, 38771 +github, level 9 with dict, old streaming cdcit, 39332 +github, level 13 with dict, old streaming cdcit, 39743 github, level 16 with dict, old streaming cdcit, 37577 github, level 19 with dict, old streaming cdcit, 37576 github, level -5 with dict, old streaming advanced cdict, 49562 @@ -630,7 +630,7 @@ github, level 4 with dict, old stre github, level 5 with dict, old streaming advanced cdict, 39158 github, level 6 with dict, old streaming advanced cdict, 38748 github, level 7 with dict, old streaming advanced cdict, 38744 -github, level 9 with dict, old streaming advanced cdict, 38986 -github, level 13 with dict, old streaming advanced cdict, 39724 -github, level 16 with dict, old streaming advanced cdict, 40771 +github, level 9 with dict, old streaming advanced cdict, 38992 +github, level 13 with dict, old streaming advanced cdict, 39731 +github, level 16 with dict, old streaming advanced cdict, 40789 github, level 19 with dict, old streaming advanced cdict, 37576 From b5cddda073ccdfadbee022d452b34d3eaf7d30ed Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 15:59:03 -0400 Subject: [PATCH 07/36] Add new definition of ZSTD_d_forceSkipChecksum in experimental section --- lib/zstd.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index 2cb82d7a..76afd183 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -528,11 +528,13 @@ typedef enum { * At the time of this writing, they include : * ZSTD_d_format * ZSTD_d_stableOutBuffer + * ZSTD_d_forceSkipChecksum * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001 + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002 } ZSTD_dParameter; @@ -1690,6 +1692,16 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS */ #define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 +/* ZSTD_d_forceSkipChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless. + * of whether or not checksumming was specified during decompression. This offers some + * slight performance benefits, and may be useful for debugging. + */ +#define ZSTD_d_forceSkipChecksum ZSTD_d_experimentalParam3 + /*! ZSTD_DCtx_setFormat() : * Instruct the decoder context about what kind of data to decode next. * This instruction is mandatory to decode data without a fully-formed header, From 2f391243425132001d30615d64bfb953cabb9631 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 16:23:39 -0400 Subject: [PATCH 08/36] Rename to ZSTD_d_forceIgnoreChecksum, add to DCtx, add function to set the advanced param --- lib/decompress/zstd_decompress.c | 15 +++++++++++++-- lib/decompress/zstd_decompress_internal.h | 6 ++++++ lib/zstd.h | 6 +++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index be5c7cfc..82c24f32 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1397,6 +1397,11 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } +size_t ZSTD_DCtx_setForceSkipChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceSkipChecksum, format); +} + ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) { ZSTD_bounds bounds = { 0, 0, 0 }; @@ -1414,6 +1419,9 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.lowerBound = (int)ZSTD_obm_buffered; bounds.upperBound = (int)ZSTD_obm_stable; return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1453,6 +1461,9 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); dctx->outBufferMode = (ZSTD_outBufferMode_e)value; return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_ignoreChecksumMode_e)value; default:; } RETURN_ERROR(parameter_unsupported, ""); @@ -1524,7 +1535,7 @@ static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const ne { if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) zds->oversizedDuration++; - else + else zds->oversizedDuration = 0; } @@ -1731,7 +1742,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); - + if (tooSmall || tooLarge) { size_t const bufferSize = neededInBuffSize + neededOutBuffSize; DEBUGLOG(4, "inBuff : from %u to %u", diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 9ad96c55..0481ce64 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -100,6 +100,11 @@ typedef enum { ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ } ZSTD_outBufferMode_e; +typedef enum { + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_ignoreChecksumMode_e; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -122,6 +127,7 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; ZSTD_format_e format; + ZSTD_ignoreChecksumMode_e forceIgnoreChecksum; /* if enabled, will ignore checksums in compressed frame */ const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; diff --git a/lib/zstd.h b/lib/zstd.h index 76afd183..d37c5996 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -528,7 +528,7 @@ typedef enum { * At the time of this writing, they include : * ZSTD_d_format * ZSTD_d_stableOutBuffer - * ZSTD_d_forceSkipChecksum + * ZSTD_d_forceIgnoreChecksum * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ @@ -1692,7 +1692,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS */ #define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 -/* ZSTD_d_forceSkipChecksum +/* ZSTD_d_forceIgnoreChecksum * Experimental parameter. * Default is 0 == disabled. Set to 1 to enable * @@ -1700,7 +1700,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * of whether or not checksumming was specified during decompression. This offers some * slight performance benefits, and may be useful for debugging. */ -#define ZSTD_d_forceSkipChecksum ZSTD_d_experimentalParam3 +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 /*! ZSTD_DCtx_setFormat() : * Instruct the decoder context about what kind of data to decode next. From 6a8dbdcd1f0e68782d244e95b0e14ba6fe307fb4 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 16:46:46 -0400 Subject: [PATCH 09/36] Modify decompression loop to gnore checksums if flag is enabled --- lib/decompress/zstd_decompress.c | 9 +++++---- lib/decompress/zstd_decompress_internal.h | 4 ++-- tests/fuzzer.c | 3 +++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 82c24f32..73ed7e1a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -114,6 +114,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); dctx->outBufferMode = ZSTD_obm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -661,7 +662,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) + if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); if (decodedSize != 0) op += decodedSize; @@ -675,7 +676,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, corruption_detected, ""); } - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) { /* Frame content checksum verification */ U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); U32 checkRead; RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); @@ -1399,7 +1400,7 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) size_t ZSTD_DCtx_setForceSkipChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceSkipChecksum, format); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, format); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) @@ -1463,7 +1464,7 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value return 0; case ZSTD_d_forceIgnoreChecksum: CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); - dctx->forceIgnoreChecksum = (ZSTD_ignoreChecksumMode_e)value; + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; default:; } RETURN_ERROR(parameter_unsupported, ""); diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 0481ce64..6f1dda20 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -103,7 +103,7 @@ typedef enum { typedef enum { ZSTD_d_validateChecksum = 0, ZSTD_d_ignoreChecksum = 1 -} ZSTD_ignoreChecksumMode_e; +} ZSTD_forceIgnoreChecksum_e; struct ZSTD_DCtx_s { @@ -127,7 +127,7 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; ZSTD_format_e format; - ZSTD_ignoreChecksumMode_e forceIgnoreChecksum; /* if enabled, will ignore checksums in compressed frame */ + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* if == 1, will ignore checksums in compressed frame */ const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 8ac2864f..cb68ca5d 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2354,6 +2354,9 @@ static int basicUnitTests(U32 const seed, double compressibility) { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (ZSTD_format_e)888); if (!ZSTD_isError(sr)) goto _output_error; } + { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, (ZSTD_forceIgnoreChecksum_e)888); + if (!ZSTD_isError(sr)) goto _output_error; + } DISPLAYLEVEL(3, "OK \n"); ZSTD_freeDCtx(dctx); From 08d3567ba8666873b29a4b8adfea24cfd68cac1e Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 16:51:43 -0400 Subject: [PATCH 10/36] Add function prototype --- lib/decompress/zstd_decompress.c | 2 +- lib/zstd.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 73ed7e1a..de2314db 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1398,7 +1398,7 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } -size_t ZSTD_DCtx_setForceSkipChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format) +size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format) { return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, format); } diff --git a/lib/zstd.h b/lib/zstd.h index d37c5996..c3a34387 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1709,6 +1709,11 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +/*! ZSTD_DCtx_setForceIgnoreChecksum() : + * Instruct the decoder context to ignore checksums in compressed frame. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format); + /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), * but using only integral types as arguments. From 1b34b15e6bd1a2148a2338535c7ad764ebb39d7c Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 17:49:30 -0400 Subject: [PATCH 11/36] Adding CLI capability to invoke decompression with no checksum --- programs/fileio.c | 4 ++++ tests/fuzzer.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index d5b8a7d1..491740c9 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1754,6 +1754,10 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi if (ress.dctx==NULL) EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); + if (!prefs->checksumFlag) { + DISPLAY("Ignoring checksums..."); + CHECK( ZSTD_DCtx_setForceIgnoreChecksum(ress.dctx, 1 /* ZSTD_d_ignoreChecksum */)); + } ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index cb68ca5d..8ac2864f 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2354,9 +2354,6 @@ static int basicUnitTests(U32 const seed, double compressibility) { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (ZSTD_format_e)888); if (!ZSTD_isError(sr)) goto _output_error; } - { size_t const sr = ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, (ZSTD_forceIgnoreChecksum_e)888); - if (!ZSTD_isError(sr)) goto _output_error; - } DISPLAYLEVEL(3, "OK \n"); ZSTD_freeDCtx(dctx); From 47685ac8560b0700a144a3ba7a7bc7a52d6859f9 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 21 Aug 2020 18:18:53 -0400 Subject: [PATCH 12/36] Move enum into zstd.h, and fix pesky switch() logic --- lib/decompress/zstd_decompress.c | 6 ++++-- lib/decompress/zstd_decompress_internal.h | 5 ----- lib/zstd.h | 7 ++++++- programs/fileio.c | 4 +--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index de2314db..0177f1d4 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1398,9 +1398,9 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } -size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format) +size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e shouldIgnore) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, format); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, shouldIgnore); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) @@ -1423,6 +1423,7 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) case ZSTD_d_forceIgnoreChecksum: bounds.lowerBound = (int)ZSTD_d_validateChecksum; bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1465,6 +1466,7 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value case ZSTD_d_forceIgnoreChecksum: CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; default:; } RETURN_ERROR(parameter_unsupported, ""); diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 6f1dda20..17802380 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -100,11 +100,6 @@ typedef enum { ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ } ZSTD_outBufferMode_e; -typedef enum { - ZSTD_d_validateChecksum = 0, - ZSTD_d_ignoreChecksum = 1 -} ZSTD_forceIgnoreChecksum_e; - struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; diff --git a/lib/zstd.h b/lib/zstd.h index c3a34387..c5dd2e94 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1162,6 +1162,11 @@ typedef enum { * Decoder cannot recognise automatically this format, requiring this instruction. */ } ZSTD_format_e; +typedef enum { + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + typedef enum { /* Note: this enum and the behavior it controls are effectively internal * implementation details of the compressor. They are expected to continue @@ -1712,7 +1717,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_DCtx_setForceIgnoreChecksum() : * Instruct the decoder context to ignore checksums in compressed frame. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e format); /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), diff --git a/programs/fileio.c b/programs/fileio.c index 491740c9..5349bc6b 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1754,10 +1754,8 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi if (ress.dctx==NULL) EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); - if (!prefs->checksumFlag) { - DISPLAY("Ignoring checksums..."); + if (!prefs->checksumFlag) CHECK( ZSTD_DCtx_setForceIgnoreChecksum(ress.dctx, 1 /* ZSTD_d_ignoreChecksum */)); - } ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); From 20eb0958822af10440cfd689925e689fd041caeb Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Sat, 22 Aug 2020 13:26:33 -0400 Subject: [PATCH 13/36] Added unit test to fuzzer.c, changed definition param name --- lib/decompress/zstd_decompress.c | 20 ++++++++++--------- lib/zstd.h | 2 +- tests/fuzzer.c | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 0177f1d4..21e724d7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -676,12 +676,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, corruption_detected, ""); } - if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } ip += 4; remainingSrcSize -= 4; } @@ -978,7 +980,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; /* Stay on the same stage until we are finished streaming the block. */ @@ -1398,9 +1400,9 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } -size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e shouldIgnore) +size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e value) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, shouldIgnore); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, value); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) diff --git a/lib/zstd.h b/lib/zstd.h index c5dd2e94..9f9f991f 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1717,7 +1717,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_DCtx_setForceIgnoreChecksum() : * Instruct the decoder context to ignore checksums in compressed frame. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e format); +ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e value); /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 8ac2864f..4f0676ab 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -544,6 +544,40 @@ static int basicUnitTests(U32 const seed, double compressibility) if (ZSTD_getErrorCode(r) != ZSTD_error_dstSize_tooSmall) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress with corrupted checksum : ", testNb++); + { /* create compressed buffer with checksumming enabled */ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); + CHECK_VAR(cSize, ZSTD_compress2(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + { /* copy the compressed buffer and corrupt the checksum */ + char* corruptedChecksumCompressedBuffer = (char*)malloc(cSize); + if (!corruptedChecksumCompressedBuffer) { + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } + + memcpy(corruptedChecksumCompressedBuffer, compressedBuffer, cSize); + corruptedChecksumCompressedBuffer[cSize-1] += 1; + size_t r = ZSTD_decompress(decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_checksum_wrong) goto _output_error; + + ZSTD_DCtx* dctx = ZSTD_createDCtx(); assert(dctx != NULL); + CHECK_Z(ZSTD_DCtx_setForceIgnoreChecksum(dctx, ZSTD_d_ignoreChecksum)); + r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); + if (ZSTD_isError(r)) goto _output_error; + + ZSTD_freeDCtx(dctx); + free(corruptedChecksumCompressedBuffer); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++); { /* create compressed buffer with content size missing */ ZSTD_CCtx* const cctx = ZSTD_createCCtx(); From e3f5f9658a46efd6619e50cae857fa3903c32059 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Sat, 22 Aug 2020 16:05:40 -0400 Subject: [PATCH 14/36] Added CLI tests for --no-check, fixed ignore checksum logic --- lib/decompress/zstd_decompress.c | 13 ++++++++----- programs/fileio.c | 2 +- tests/playTests.sh | 5 +++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 21e724d7..6999966a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -447,7 +447,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), dictionary_wrong, ""); #endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) XXH64_reset(&dctx->xxhState, 0); return 0; } @@ -1010,10 +1010,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_checkChecksum: assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + { + if (!dctx->forceIgnoreChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; diff --git a/programs/fileio.c b/programs/fileio.c index 5349bc6b..38f59a73 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1755,7 +1755,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); if (!prefs->checksumFlag) - CHECK( ZSTD_DCtx_setForceIgnoreChecksum(ress.dctx, 1 /* ZSTD_d_ignoreChecksum */)); + CHECK( ZSTD_DCtx_setForceIgnoreChecksum(ress.dctx, ZSTD_d_ignoreChecksum)); ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/tests/playTests.sh b/tests/playTests.sh index b7bfa76c..3d099efb 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -260,6 +260,11 @@ zstd tmp -c --compress-literals --fast=1 | zstd -t zstd tmp -c --compress-literals -19 | zstd -t zstd -b --fast=1 -i0e1 tmp --compress-literals zstd -b --fast=1 -i0e1 tmp --no-compress-literals +println "test: --no-check for decompression" +zstd -f tmp --check +zstd -f tmp -o tmp1.zst --no-check +zstd -d -f tmp.zst --no-check +zstd -d -f tmp1.zst --no-check println "\n===> zstdgrep tests" ln -sf "$ZSTD_BIN" zstdcat From ffaa0df76d509c5243103b99b049e57ad85d48c2 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Sat, 22 Aug 2020 16:58:41 -0400 Subject: [PATCH 15/36] Document change in CLI for --no-check during decompression in --help menu --- lib/decompress/zstd_decompress.c | 4 ++-- lib/zstd.h | 4 ++-- programs/zstdcli.c | 1 + tests/fuzzer.c | 9 +++++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6999966a..d0263810 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1543,7 +1543,7 @@ static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const ne { if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) zds->oversizedDuration++; - else + else zds->oversizedDuration = 0; } @@ -1750,7 +1750,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); - + if (tooSmall || tooLarge) { size_t const bufferSize = neededInBuffSize + neededOutBuffSize; DEBUGLOG(4, "inBuff : from %u to %u", diff --git a/lib/zstd.h b/lib/zstd.h index 9f9f991f..572f5063 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1701,8 +1701,8 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * Experimental parameter. * Default is 0 == disabled. Set to 1 to enable * - * Tells the decompressor to skip checksum validation during decompression, regardless. - * of whether or not checksumming was specified during decompression. This offers some + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some * slight performance benefits, and may be useful for debugging. */ #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 1aea63dc..b7e7d1fd 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -196,6 +196,7 @@ static void usage_advanced(const char* programName) DISPLAYOUT( " -l : print information about zstd compressed files \n"); DISPLAYOUT( "--test : test compressed file integrity \n"); DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n"); + DISPLAYOUT( "--no-check : disable validation of checksums in compressed frame \n"); # if ZSTD_SPARSE_DEFAULT DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n"); # else diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 4f0676ab..7f8d9f65 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -554,8 +554,10 @@ static int basicUnitTests(U32 const seed, double compressibility) ZSTD_freeCCtx(cctx); } { /* copy the compressed buffer and corrupt the checksum */ - char* corruptedChecksumCompressedBuffer = (char*)malloc(cSize); - if (!corruptedChecksumCompressedBuffer) { + size_t r; + char* const corruptedChecksumCompressedBuffer = (char*)malloc(cSize); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (!corruptedChecksumCompressedBuffer || !dctx) { DISPLAY("Not enough memory, aborting\n"); testResult = 1; goto _end; @@ -563,11 +565,10 @@ static int basicUnitTests(U32 const seed, double compressibility) memcpy(corruptedChecksumCompressedBuffer, compressedBuffer, cSize); corruptedChecksumCompressedBuffer[cSize-1] += 1; - size_t r = ZSTD_decompress(decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); + r = ZSTD_decompress(decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); if (!ZSTD_isError(r)) goto _output_error; if (ZSTD_getErrorCode(r) != ZSTD_error_checksum_wrong) goto _output_error; - ZSTD_DCtx* dctx = ZSTD_createDCtx(); assert(dctx != NULL); CHECK_Z(ZSTD_DCtx_setForceIgnoreChecksum(dctx, ZSTD_d_ignoreChecksum)); r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); if (ZSTD_isError(r)) goto _output_error; From 8def0e5fd373109c2f2a441fd1c0b90d27ce5e4f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 24 Aug 2020 12:24:45 -0700 Subject: [PATCH 16/36] Fix up code after reading through --- lib/common/entropy_common.c | 10 +- lib/common/fse.h | 4 +- lib/compress/fse_compress.c | 14 + tests/Makefile | 4 - tests/fullbench.c | 58 +++ tests/smallbench.c | 836 ------------------------------------ 6 files changed, 82 insertions(+), 844 deletions(-) delete mode 100644 tests/smallbench.c diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 2cc2b4dc..e2ec694f 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -77,7 +77,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne int previous0 = 0; if (hbSize < 8) { - /* This function only works when hbSize >= 4 */ + /* This function only works when hbSize >= 8 */ char buffer[8] = {0}; memcpy(buffer, headerBuffer, hbSize); { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, @@ -86,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne if (countSize > hbSize) return ERROR(corruption_detected); return countSize; } } - assert(hbSize >= 4); + assert(hbSize >= 8); /* init */ memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ @@ -102,6 +102,11 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne for (;;) { if (previous0) { + /* Count the number of repeats. Each time the + * 2-bit repeat code is 0b11 there is another + * repeat. + * Avoid UB by setting the high bit to 1. + */ int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; @@ -118,6 +123,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne bitStream >>= 2 * repeats; bitCount += 2 * repeats; + /* Add the final repeat which isn't 0b11. */ charnum += bitStream & 3; bitCount += 2; diff --git a/lib/common/fse.h b/lib/common/fse.h index 2c0d9ae1..e9a071c4 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -318,7 +318,7 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. */ #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); @@ -335,7 +335,7 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); */ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); -#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1 << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 2900091e..d02677fc 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -475,6 +475,20 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, else normalizedCounter[largest] += (short)stillToDistribute; } +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { + size_t lhSize, litSize, litCSize; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + return HUF_readDTableX1_wksp_bmi2( + dctx->entropy.hufTable, + istart+lhSize, litCSize, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + } + } + } + return 0; +} + +static size_t local_ZSTD_decodeLiteralsHeader(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) +{ + (void)dst, (void)dstSize, (void)src, (void)srcSize; + return ZSTD_decodeLiteralsHeader(g_zdc, buff2, g_cSize); +} #endif static ZSTD_CStream* g_cstream= NULL; @@ -358,6 +411,9 @@ static int benchMem(unsigned benchNb, case 13: benchFunction = local_ZSTD_decompressContinue; benchName = "decompressContinue"; break; + case 30: + benchFunction = local_ZSTD_decodeLiteralsHeader; benchName = "decodeLiteralsHeader"; + break; case 31: benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "decodeLiteralsBlock"; break; @@ -446,6 +502,8 @@ static int benchMem(unsigned benchNb, case 13 : g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel); break; + case 30: /* ZSTD_decodeLiteralsHeader */ + /* fall-through */ case 31: /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */ { size_t frameHeaderSize; g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel); diff --git a/tests/smallbench.c b/tests/smallbench.c deleted file mode 100644 index 24ccc45a..00000000 --- a/tests/smallbench.c +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/*_************************************ -* Includes -**************************************/ -#include "util.h" /* Compiler options, UTIL_GetFileSize */ -#include /* malloc */ -#include /* fprintf, fopen, ftello64 */ -#include - -#include "mem.h" /* U32 */ -#include "zstd_internal.h" /* ZSTD_decodeSeqHeaders, ZSTD_blockHeaderSize, ZSTD_getcBlockSize, blockType_e, KB, MB */ -#include "decompress/zstd_decompress_internal.h" /* ZSTD_DCtx internals */ -#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */ -#include "zstd.h" /* ZSTD_versionString */ -#include "util.h" /* time functions */ -#include "timefn.h" /* time functions */ -#include "datagen.h" -#include "benchfn.h" /* CustomBench */ -#include "benchzstd.h" /* MB_UNIT */ - - -/*_************************************ -* Constants -**************************************/ -#define PROGRAM_DESCRIPTION "Zstandard small blocks benchmark" -#define AUTHOR "Nick Terrell" -#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_versionString(), (int)(sizeof(void*)*8), AUTHOR, __DATE__ - -#define NBLOOPS 6 -#define TIMELOOP_S 2 - -#define MAX_MEM (1984 MB) - -#define DEFAULT_CLEVEL 1 - -#define COMPRESSIBILITY_DEFAULT 0.50 -static const size_t kSampleSizeDefault = 10000000; - -#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ - - -/*_************************************ -* Macros -**************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) - -#define CONTROL(c) { if (!(c)) { DISPLAY("%s:%d:%s: CONTROL failed: %s \n", __FILE__, __LINE__, __func__, #c); abort(); } } /* like assert(), but cannot be disabled */ - -/*_************************************ -* Benchmark Parameters -**************************************/ -static unsigned g_nbIterations = NBLOOPS; - - -/*_******************************************************* -* Private functions -*********************************************************/ -static size_t BMK_findMaxMem(U64 requiredMem) -{ - size_t const step = 64 MB; - void* testmem = NULL; - - requiredMem = (((requiredMem >> 26) + 1) << 26); - if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; - - requiredMem += step; - do { - testmem = malloc ((size_t)requiredMem); - requiredMem -= step; - } while (!testmem); - - free (testmem); - return (size_t) requiredMem; -} - - -/*_******************************************************* -* Benchmark wrappers -*********************************************************/ -typedef struct { - BYTE const* begin; - BYTE const* end; - size_t uncompressedSize; -} block_t; - -typedef struct { - size_t numBlocks; - block_t blocks[]; -} blocks_t; - -static size_t block_getSize(block_t block) { - return (size_t)(block.end - block.begin); -} - -static size_t compressBlockBound(size_t srcSize, size_t blockSize) -{ - size_t const blockBound = ZSTD_compressBound(blockSize); - return blockBound * (srcSize + blockSize - 1) / blockSize; -} - -static blocks_t* compressBlocks(ZSTD_CCtx* cctx, void* dst, size_t dstSize, void const* src, size_t srcSize, size_t blockSize) -{ - uint8_t* op = (uint8_t*)dst; - uint8_t* const oend = op + dstSize; - uint8_t const* ip = (uint8_t const*)src; - uint8_t const* const iend = ip + srcSize; - size_t const numBlocks = (srcSize + blockSize - 1) / blockSize; - blocks_t* const blocks = (blocks_t*)malloc(sizeof(blocks_t) + numBlocks * sizeof(block_t)); - CONTROL(blocks != NULL); - - blocks->numBlocks = numBlocks; - for (size_t i = 0; i < numBlocks; ++i) { - size_t const isize = MIN(blockSize, (size_t)(iend - ip)); - size_t const cBlockSize = ZSTD_compress2(cctx, op, (size_t)(oend - op), ip, isize); - CONTROL(!ZSTD_isError(cBlockSize)); - CONTROL(isize > 0); - blocks->blocks[i].begin = op; - blocks->blocks[i].end = op + cBlockSize; - blocks->blocks[i].uncompressedSize = isize; - ip += isize; - op += cBlockSize; - } - CONTROL(ip == iend); - - return blocks; -} - -static void skipToLiterals(blocks_t* blocks) -{ - size_t b; - size_t outBlock = 0; - for (b = 0; b < blocks->numBlocks; ++b) { - block_t block = blocks->blocks[b]; - /* Skip frame header */ - { - size_t const fhSize = ZSTD_frameHeaderSize(block.begin, block_getSize(block)); - CONTROL(!ZSTD_isError(fhSize)); - block.begin += fhSize; - } - /* Truncate to end of first block and skip uncompressed blocks */ - { - blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(block.begin, block_getSize(block), &bp); - CONTROL(!ZSTD_isError(cBlockSize)); - if (bp.blockType != bt_compressed) { - /* Don't write the output block */ - continue; - } - /* End of first block */ - block.end = block.begin + ZSTD_blockHeaderSize + cBlockSize; - } - /* Skip block header */ - block.begin += ZSTD_blockHeaderSize; - /* Write the output block */ - blocks->blocks[outBlock++] = block; - } - CONTROL(outBlock <= blocks->numBlocks); - blocks->numBlocks = outBlock; -} - -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); -static void skipToSequences(blocks_t* blocks, ZSTD_DCtx* dctx) -{ - skipToLiterals(blocks); - size_t b; - for (b = 0; b < blocks->numBlocks; ++b) { - block_t* const block = &blocks->blocks[b]; - CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); - CONTROL(block->begin < block->end); - { - size_t const litSize = ZSTD_decodeLiteralsBlock(dctx, block->begin, block_getSize(*block)); - CONTROL(!ZSTD_isError(litSize)); - block->begin += litSize; - } - CONTROL(block->begin < block->end); - } -} - -static size_t totalUncompressedSize(blocks_t const* blocks) -{ - size_t total = 0; - size_t b; - for (b = 0; b < blocks->numBlocks; ++b) { - total += blocks->blocks[b].uncompressedSize; - } - return total; -} - -FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src, size_t srcSize) -{ - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); - { - BYTE const* istart = (BYTE const*)src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - if (litEncType == set_compressed) { - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); - size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - return HUF_readDTableX1_wksp_bmi2( - dctx->entropy.hufTable, - istart+lhSize, litCSize, - dctx->workspace, sizeof(dctx->workspace), - dctx->bmi2); - } - } - return 0; -} - -static void benchmark_ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, blocks_t const* blocks) -{ - size_t const numBlocks = blocks->numBlocks; - size_t b; - CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); - for (b = 0; b < numBlocks; ++b) { - block_t const block = blocks->blocks[b]; - size_t const ret = ZSTD_decodeLiteralsHeader(dctx, block.begin, block_getSize(block)); - CONTROL(!ZSTD_isError(ret)); - } -} - -static void benchmark_ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, blocks_t const* blocks) -{ - size_t const numBlocks = blocks->numBlocks; - size_t b; - CONTROL(!ZSTD_isError(ZSTD_decompressBegin(dctx))); - for (b = 0; b < numBlocks; ++b) { - block_t const block = blocks->blocks[b]; - int nbSeq; - size_t const cSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, block.begin, block_getSize(block)); - CONTROL(!ZSTD_isError(cSize)); - } -} - -#if 0 -static ZSTD_CCtx* g_zcc = NULL; - -static size_t -local_ZSTD_compress(const void* src, size_t srcSize, - void* dst, size_t dstSize, - void* payload) -{ - ZSTD_parameters p; - ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; - p.fParams = f; - p.cParams = *(ZSTD_compressionParameters*)payload; - return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p); - //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel); -} - -static size_t g_cSize = 0; -static size_t local_ZSTD_decompress(const void* src, size_t srcSize, - void* dst, size_t dstSize, - void* buff2) -{ - (void)src; (void)srcSize; - return ZSTD_decompress(dst, dstSize, buff2, g_cSize); -} - -static ZSTD_DCtx* g_zdc = NULL; - -#ifndef ZSTD_DLL_IMPORT -extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); -static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) -{ - (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize); -} - -static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) -{ - int nbSeq; - (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeSeqHeaders(g_zdc, &nbSeq, buff2, g_cSize); -} -#endif - -static ZSTD_CStream* g_cstream= NULL; -static size_t -local_ZSTD_compressStream(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - ZSTD_outBuffer buffOut; - ZSTD_inBuffer buffIn; - ZSTD_parameters p; - ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0}; - p.fParams = f; - p.cParams = *(ZSTD_compressionParameters*)payload; - ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN); - buffOut.dst = dst; - buffOut.size = dstCapacity; - buffOut.pos = 0; - buffIn.src = src; - buffIn.size = srcSize; - buffIn.pos = 0; - ZSTD_compressStream(g_cstream, &buffOut, &buffIn); - ZSTD_endStream(g_cstream, &buffOut); - return buffOut.pos; -} - -static size_t -local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - size_t r; - assert(cctx != NULL); - - r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload); - - ZSTD_freeCCtx(cctx); - - return r; -} - -static size_t -local_ZSTD_compress_generic_end(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - (void)payload; - return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); -} - -static size_t -local_ZSTD_compress_generic_continue(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - ZSTD_outBuffer buffOut; - ZSTD_inBuffer buffIn; - (void)payload; - buffOut.dst = dst; - buffOut.size = dstCapacity; - buffOut.pos = 0; - buffIn.src = src; - buffIn.size = srcSize; - buffIn.pos = 0; - ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); - ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end); - return buffOut.pos; -} - -static size_t -local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - (void)payload; - ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); - return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize); -} - -static size_t -local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - ZSTD_outBuffer buffOut; - ZSTD_inBuffer buffIn; - (void)payload; - ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2); - buffOut.dst = dst; - buffOut.size = dstCapacity; - buffOut.pos = 0; - buffIn.src = src; - buffIn.size = srcSize; - buffIn.pos = 0; - ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_continue); - while(ZSTD_compressStream2(g_cstream, &buffOut, &buffIn, ZSTD_e_end)) {} - return buffOut.pos; -} - -static ZSTD_DStream* g_dstream= NULL; -static size_t -local_ZSTD_decompressStream(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* buff2) -{ - ZSTD_outBuffer buffOut; - ZSTD_inBuffer buffIn; - (void)src; (void)srcSize; - ZSTD_initDStream(g_dstream); - buffOut.dst = dst; - buffOut.size = dstCapacity; - buffOut.pos = 0; - buffIn.src = buff2; - buffIn.size = g_cSize; - buffIn.pos = 0; - ZSTD_decompressStream(g_dstream, &buffOut, &buffIn); - return buffOut.pos; -} - -#ifndef ZSTD_DLL_IMPORT -static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - ZSTD_parameters p; - ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 }; - p.fParams = f; - p.cParams = *(ZSTD_compressionParameters*)payload; - ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); - return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize); -} - -#define FIRST_BLOCK_SIZE 8 -static size_t -local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* payload) -{ - BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; - - ZSTD_parameters p; - ZSTD_frameParameters const f = { 1, 0, 0 }; - p.fParams = f; - p.cParams = *(ZSTD_compressionParameters*)payload; - ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize); - memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE); - - { size_t const compressResult = ZSTD_compressContinue(g_zcc, - dst, dstCapacity, - firstBlockBuf, FIRST_BLOCK_SIZE); - if (ZSTD_isError(compressResult)) { - DISPLAY("local_ZSTD_compressContinue_extDict error : %s\n", - ZSTD_getErrorName(compressResult)); - return compressResult; - } - dst = (BYTE*)dst + compressResult; - dstCapacity -= compressResult; - } - return ZSTD_compressEnd(g_zcc, dst, dstCapacity, - (const BYTE*)src + FIRST_BLOCK_SIZE, - srcSize - FIRST_BLOCK_SIZE); -} - -static size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize, - void* dst, size_t dstCapacity, - void* buff2) -{ - size_t regeneratedSize = 0; - const BYTE* ip = (const BYTE*)buff2; - const BYTE* const iend = ip + g_cSize; - BYTE* op = (BYTE*)dst; - size_t remainingCapacity = dstCapacity; - - (void)src; (void)srcSize; /* unused */ - ZSTD_decompressBegin(g_zdc); - while (ip < iend) { - size_t const iSize = ZSTD_nextSrcSizeToDecompress(g_zdc); - size_t const decodedSize = ZSTD_decompressContinue(g_zdc, op, remainingCapacity, ip, iSize); - ip += iSize; - regeneratedSize += decodedSize; - op += decodedSize; - remainingCapacity -= decodedSize; - } - - return regeneratedSize; -} -#endif -#endif - -/*_******************************************************* -* Bench functions -*********************************************************/ -static void benchMem(unsigned benchNb, unsigned nbIters, - const void* src, size_t srcSize, size_t blockSize, - int cLevel, ZSTD_compressionParameters cparams) -{ - size_t const dstSize = compressBlockBound(srcSize, blockSize); - void* const dst = malloc(dstSize); - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - CONTROL(dst != NULL); - CONTROL(cctx != NULL); - CONTROL(dctx != NULL); - - DISPLAY("block size: %u \n", (unsigned)blockSize); - DISPLAY("params: cLevel %d, wlog %d hlog %d clog %d slog %d mml %d tlen %d strat %d \n", - cLevel, cparams.windowLog, cparams.hashLog, cparams.chainLog, cparams.searchLog, - cparams.minMatch, cparams.targetLength, cparams.strategy); - - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)cparams.windowLog))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, (int)cparams.hashLog))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, (int)cparams.chainLog))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, (int)cparams.searchLog))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, (int)cparams.minMatch))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, (int)cparams.targetLength))); - CONTROL(!ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy))); - - { - /* Preparation */ - blocks_t* const blocks = compressBlocks(cctx, dst, dstSize, src, srcSize, blockSize); - char const* benchName = ""; - size_t iter; - switch (benchNb) - { - case 1: - benchName = "ZSTD_decodeLiteralsHeaders"; - skipToLiterals(blocks); - break; - case 2: - benchName = "ZSTD_decodeSeqHeaders"; - skipToSequences(blocks, dctx); - break; - default: - break; - } - - /* Benchmark loop */ - { - UTIL_time_t const begin = UTIL_getTime(); - for (iter = 0; iter < nbIters; ++iter) { - switch (benchNb) - { - case 1: - benchmark_ZSTD_decodeLiteralsHeader(dctx, blocks); - break; - case 2: - benchmark_ZSTD_decodeSeqHeaders(dctx, blocks); - break; - default: - break; - } - } - { - UTIL_time_t const end = UTIL_getTime(); - size_t const bytesProcessed = nbIters * totalUncompressedSize(blocks); - size_t const nanos = UTIL_getSpanTimeNano(begin, end); - double const MBps = ((double)bytesProcessed * TIMELOOP_NANOSEC) / (nanos * MB_UNIT); - DISPLAY("%2u#%-29.29s: %8.1f MB/s (%u bytes in %u blocks over %u iters) \n", benchNb, benchName, MBps, (unsigned)bytesProcessed, (unsigned)blocks->numBlocks * nbIters, nbIters); - } - } - free(blocks); - } - - free(dst); - ZSTD_freeCCtx(cctx); - ZSTD_freeDCtx(dctx); -} - - -static int benchSample(U32 benchNb, U32 nbIters, size_t blockSize, - size_t benchedSize, double compressibility, - int cLevel, ZSTD_compressionParameters cparams) -{ - /* Allocation */ - void* const origBuff = malloc(benchedSize); - if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } - - /* Fill buffer */ - RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0); - - /* bench */ - DISPLAY("\r%70s\r", ""); - DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize); - benchMem(benchNb, nbIters, origBuff, benchedSize, blockSize, cLevel, cparams); - - free(origBuff); - return 0; -} - - -static int benchFiles(U32 benchNb, U32 nbIters, size_t blockSize, - const char** fileNamesTable, const int nbFiles, - int cLevel, ZSTD_compressionParameters cparams) -{ - /* Loop for each file */ - int fileIdx; - for (fileIdx=0; fileIdx inFileSize) - benchedSize = (size_t)inFileSize; - if ((U64)benchedSize < inFileSize) { - DISPLAY("Not enough memory for '%s' full size; testing %u MB only... \n", - inFileName, (unsigned)(benchedSize>>20)); - } } - - /* Alloc */ - { void* const origBuff = malloc(benchedSize); - if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); fclose(inFile); return 12; } - - /* Fill input buffer */ - DISPLAY("Loading %s... \r", inFileName); - { size_t const readSize = fread(origBuff, 1, benchedSize, inFile); - fclose(inFile); - if (readSize != benchedSize) { - DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); - free(origBuff); - return 13; - } } - - /* bench */ - DISPLAY("\r%70s\r", ""); /* blank line */ - DISPLAY(" %s : \n", inFileName); - benchMem(benchNb, nbIters, origBuff, benchedSize, blockSize, cLevel, cparams); - - free(origBuff); - } } - - return 0; -} - - - -/*_******************************************************* -* Argument Parsing -*********************************************************/ - -#define ERROR_OUT(msg) { DISPLAY("%s \n", msg); exit(1); } - -static unsigned readU32FromChar(const char** stringPtr) -{ - const char errorMsg[] = "error: numeric value too large"; - unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) { - unsigned const max = (((unsigned)(-1)) / 10) - 1; - if (result > max) ERROR_OUT(errorMsg); - result *= 10; - result += (unsigned)(**stringPtr - '0'); - (*stringPtr)++ ; - } - if ((**stringPtr=='K') || (**stringPtr=='M')) { - unsigned const maxK = ((unsigned)(-1)) >> 10; - if (result > maxK) ERROR_OUT(errorMsg); - result <<= 10; - if (**stringPtr=='M') { - if (result > maxK) ERROR_OUT(errorMsg); - result <<= 10; - } - (*stringPtr)++; /* skip `K` or `M` */ - if (**stringPtr=='i') (*stringPtr)++; - if (**stringPtr=='B') (*stringPtr)++; - } - return result; -} - -static int longCommandWArg(const char** stringPtr, const char* longCommand) -{ - size_t const comSize = strlen(longCommand); - int const result = !strncmp(*stringPtr, longCommand, comSize); - if (result) *stringPtr += comSize; - return result; -} - - -/*_******************************************************* -* Command line -*********************************************************/ - -static int usage(const char* exename) -{ - DISPLAY( "Usage :\n"); - DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); - DISPLAY( "Arguments :\n"); - DISPLAY( " -H/-h : Help (this text + advanced options)\n"); - return 0; -} - -static int usage_advanced(const char* exename) -{ - usage(exename); - DISPLAY( "\nAdvanced options :\n"); - DISPLAY( " -b# : test only function # \n"); - DISPLAY( " -l# : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL); - DISPLAY( "--zstd= : custom parameter selection. Format same as zstdcli \n"); - DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100); - DISPLAY( " -B# : sample size (default : %u)\n", (unsigned)kSampleSizeDefault); - DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); - return 0; -} - -static int badusage(const char* exename) -{ - DISPLAY("Wrong parameters\n"); - usage(exename); - return 1; -} - -int main(int argc, const char** argv) -{ - int argNb, filenamesStart=0, result; - const char* const exename = argv[0]; - const char* input_filename = NULL; - U32 benchNb = 0, main_pause = 0; - int cLevel = DEFAULT_CLEVEL; - ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0); - size_t sampleSize = kSampleSizeDefault; - double compressibility = COMPRESSIBILITY_DEFAULT; - - DISPLAY(WELCOME_MESSAGE); - if (argc<1) return badusage(exename); - - for (argNb=1; argNb Date: Mon, 24 Aug 2020 16:14:19 -0400 Subject: [PATCH 17/36] Addressing comments: more comments, cleanup, remove extra function, checksum logic --- lib/decompress/zstd_decompress.c | 7 +------ lib/zstd.h | 7 ++----- programs/fileio.c | 8 ++++++-- programs/zstdcli.c | 16 ++++++++++++++-- tests/fuzzer.c | 18 +++++++++++------- tests/playTests.sh | 3 ++- 6 files changed, 36 insertions(+), 23 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index d0263810..f26e5ff6 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -677,10 +677,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, corruption_detected, ""); } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); if (!dctx->forceIgnoreChecksum) { U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); checkRead = MEM_readLE32(ip); RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); } @@ -1403,11 +1403,6 @@ size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); } -size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e value) -{ - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, value); -} - ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) { ZSTD_bounds bounds = { 0, 0, 0 }; diff --git a/lib/zstd.h b/lib/zstd.h index 572f5063..742fab44 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1163,6 +1163,7 @@ typedef enum { } ZSTD_format_e; typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ ZSTD_d_validateChecksum = 0, ZSTD_d_ignoreChecksum = 1 } ZSTD_forceIgnoreChecksum_e; @@ -1704,6 +1705,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * Tells the decompressor to skip checksum validation during decompression, regardless * of whether checksumming was specified during compression. This offers some * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e */ #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 @@ -1714,11 +1716,6 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); -/*! ZSTD_DCtx_setForceIgnoreChecksum() : - * Instruct the decoder context to ignore checksums in compressed frame. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setForceIgnoreChecksum(ZSTD_DCtx* dctx, ZSTD_forceIgnoreChecksum_e value); - /*! ZSTD_decompressStream_simpleArgs() : * Same as ZSTD_decompressStream(), * but using only integral types as arguments. diff --git a/programs/fileio.c b/programs/fileio.c index 38f59a73..f2b8447a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1754,8 +1754,12 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi if (ress.dctx==NULL) EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); - if (!prefs->checksumFlag) - CHECK( ZSTD_DCtx_setForceIgnoreChecksum(ress.dctx, ZSTD_d_ignoreChecksum)); + if (!prefs->checksumFlag) { + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, ZSTD_d_ignoreChecksum)); + } else { + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, ZSTD_d_validateChecksum)); + } + ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b7e7d1fd..fce88d8c 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -154,6 +154,20 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n"); #endif + +#ifndef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)"); +#ifndef ZSTD_NODECOMPRESS + DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)."); +#endif +#endif /* ZSTD_NOCOMPRESS */ +#ifndef ZSTD_NODECOMPRESS +#ifdef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); +#endif +#endif /* ZSTD_NODECOMPRESS */ + DISPLAYOUT( "\n"); + DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); #ifndef ZSTD_NOCOMPRESS @@ -174,7 +188,6 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n"); DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n"); - DISPLAYOUT( "--[no-]check : add XXH64 integrity checksum to frame (default: enabled) \n"); DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n"); DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n"); @@ -196,7 +209,6 @@ static void usage_advanced(const char* programName) DISPLAYOUT( " -l : print information about zstd compressed files \n"); DISPLAYOUT( "--test : test compressed file integrity \n"); DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n"); - DISPLAYOUT( "--no-check : disable validation of checksums in compressed frame \n"); # if ZSTD_SPARSE_DEFAULT DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n"); # else diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 7f8d9f65..d85ccef5 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -547,6 +547,11 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : decompress with corrupted checksum : ", testNb++); { /* create compressed buffer with checksumming enabled */ ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (!cctx) { + DISPLAY("Not enough memory, aborting\n"); + testResult = 1; + goto _end; + } CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); CHECK_VAR(cSize, ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, @@ -555,26 +560,25 @@ static int basicUnitTests(U32 const seed, double compressibility) } { /* copy the compressed buffer and corrupt the checksum */ size_t r; - char* const corruptedChecksumCompressedBuffer = (char*)malloc(cSize); ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - if (!corruptedChecksumCompressedBuffer || !dctx) { + if (!dctx) { DISPLAY("Not enough memory, aborting\n"); testResult = 1; goto _end; } - memcpy(corruptedChecksumCompressedBuffer, compressedBuffer, cSize); - corruptedChecksumCompressedBuffer[cSize-1] += 1; - r = ZSTD_decompress(decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); + ((char*)compressedBuffer)[cSize-1] += 1; + r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); if (!ZSTD_isError(r)) goto _output_error; if (ZSTD_getErrorCode(r) != ZSTD_error_checksum_wrong) goto _output_error; CHECK_Z(ZSTD_DCtx_setForceIgnoreChecksum(dctx, ZSTD_d_ignoreChecksum)); - r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, corruptedChecksumCompressedBuffer, cSize); + r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize-1); + if (!ZSTD_isError(r)) goto _output_error; /* wrong checksum size should still throw error */ + r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); if (ZSTD_isError(r)) goto _output_error; ZSTD_freeDCtx(dctx); - free(corruptedChecksumCompressedBuffer); } DISPLAYLEVEL(3, "OK \n"); diff --git a/tests/playTests.sh b/tests/playTests.sh index 3d099efb..affd64eb 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -261,8 +261,9 @@ zstd tmp -c --compress-literals -19 | zstd -t zstd -b --fast=1 -i0e1 tmp --compress-literals zstd -b --fast=1 -i0e1 tmp --no-compress-literals println "test: --no-check for decompression" -zstd -f tmp --check +zstd -f tmp -o tmp.zst --check zstd -f tmp -o tmp1.zst --no-check +printf '\xDE\xAD\xBE\xEF' | dd of=tmp.zst bs=1 seek=$(($(wc -c <"tmp.zst") - 4)) count=4 conv=notrunc # corrupt checksum in tmp zstd -d -f tmp.zst --no-check zstd -d -f tmp1.zst --no-check From 1302f8d67691356b2f0aec8c62c1a7af2886a7cc Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sat, 15 Aug 2020 12:32:57 -0700 Subject: [PATCH 18/36] [fix] Always return dstSize_tooSmall when it is the case --- lib/decompress/zstd_decompress.c | 4 +- lib/decompress/zstd_decompress_block.c | 12 ++-- tests/fuzz/Makefile | 6 +- tests/fuzz/decompress_dstSize_tooSmall.c | 70 ++++++++++++++++++++++++ tests/fuzz/fuzz.py | 1 + tests/fuzz/simple_compress.c | 6 +- tests/fuzzer.c | 4 +- 7 files changed, 91 insertions(+), 12 deletions(-) create mode 100644 tests/fuzz/decompress_dstSize_tooSmall.c diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index be5c7cfc..4b04218f 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -579,11 +579,11 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); memcpy(dst, src, srcSize); return srcSize; } @@ -592,11 +592,11 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, BYTE b, size_t regenSize) { + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (regenSize == 0) return 0; RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); memset(dst, b, regenSize); return regenSize; } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index e93d6feb..ca0de662 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1084,14 +1084,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, #endif DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. + * Instead break and check for an error at the end of the loop. */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } if (UNLIKELY(!--nbSeq)) break; } diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 1af3dc73..42988c34 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -95,7 +95,8 @@ FUZZ_TARGETS := \ simple_compress \ dictionary_loader \ raw_dictionary_round_trip \ - dictionary_stream_round_trip + dictionary_stream_round_trip \ + decompress_dstSize_tooSmall all: $(FUZZ_TARGETS) @@ -180,6 +181,9 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_zstd_frame_info.o dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loader.o $(LIB_FUZZING_ENGINE) -o $@ +decompress_dstSize_tooSmall: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/decompress_dstSize_tooSmall.c b/tests/fuzz/decompress_dstSize_tooSmall.c new file mode 100644 index 00000000..e47b3d04 --- /dev/null +++ b/tests/fuzz/decompress_dstSize_tooSmall.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target attempts to decompress a valid compressed frame into + * an output buffer that is too small to ensure we always get + * ZSTD_error_dstSize_tooSmall. + */ + +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd.h" +#include "zstd_errors.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size_t rBufSize = FUZZ_dataProducer_uint32Range(producer, 0, size); + size = FUZZ_dataProducer_remainingBytes(producer); + /* Ensure the round-trip buffer is too small. */ + if (rBufSize >= size) { + rBufSize = size > 0 ? size - 1 : 0; + } + size_t const cBufSize = ZSTD_compressBound(size); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + void *cBuf = FUZZ_malloc(cBufSize); + void *rBuf = FUZZ_malloc(rBufSize); + size_t const cSize = ZSTD_compressCCtx(cctx, cBuf, cBufSize, src, size, 1); + FUZZ_ZASSERT(cSize); + size_t const rSize = ZSTD_decompressDCtx(dctx, rBuf, rBufSize, cBuf, cSize); + if (size == 0) { + FUZZ_ASSERT(rSize == 0); + } else { + FUZZ_ASSERT(ZSTD_isError(rSize)); + FUZZ_ASSERT(ZSTD_getErrorCode(rSize) == ZSTD_error_dstSize_tooSmall); + } + free(cBuf); + free(rBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 6875d1d6..6332eeb9 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -59,6 +59,7 @@ TARGET_INFO = { 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), + 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c index b64f373e..620177fb 100644 --- a/tests/fuzz/simple_compress.c +++ b/tests/fuzz/simple_compress.c @@ -19,6 +19,7 @@ #include #include "fuzz_helpers.h" #include "zstd.h" +#include "zstd_errors.h" #include "zstd_helpers.h" #include "fuzz_data_producer.h" @@ -42,7 +43,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } void *rBuf = FUZZ_malloc(bufSize); - ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel); + size_t const ret = ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel); + if (ZSTD_isError(ret)) { + FUZZ_ASSERT(ZSTD_getErrorCode(ret) == ZSTD_error_dstSize_tooSmall); + } free(rBuf); FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 8ac2864f..abb4e861 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -3053,7 +3053,7 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const DISPLAYLEVEL(5, "fuzzer t%u: compress into too small buffer of size %u (missing %u bytes) \n", testNb, (unsigned)tooSmallSize, (unsigned)missing); { size_t const errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); - CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (unsigned)tooSmallSize, (unsigned)cSize); } + CHECK(ZSTD_getErrorCode(errorCode) != ZSTD_error_dstSize_tooSmall, "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (unsigned)tooSmallSize, (unsigned)cSize); } { unsigned endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, sizeof(endCheck)); CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow (check.%08X != %08X.mark)", endCheck, endMark); } } } @@ -3100,7 +3100,7 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const static const BYTE token = 0xA9; dstBuffer[tooSmallSize] = token; { size_t const errorCode = ZSTD_decompress(dstBuffer, tooSmallSize, cBuffer, cSize); - CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (unsigned)errorCode, (unsigned)tooSmallSize); } + CHECK(ZSTD_getErrorCode(errorCode) != ZSTD_error_dstSize_tooSmall, "ZSTD_decompress should have failed : %u > %u (dst buffer too small)", (unsigned)errorCode, (unsigned)tooSmallSize); } CHECK(dstBuffer[tooSmallSize] != token, "ZSTD_decompress : dst buffer overflow"); } From cebe0b5c0b15954e7ceaea82ba8e918049475b6c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 24 Aug 2020 13:58:34 -0700 Subject: [PATCH 19/36] Improve FSE_normalizeCount() docs --- lib/common/fse.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index e9a071c4..7abd34c8 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -137,8 +137,12 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - useLowProbCount is a bool param which is set to 1 to use count=-1 or set to 0 to - use count=1 instead, which speeds up FSE_readNCount() and FSE_buildDTable(). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, From a030560d6274ee2b4d9a262f40e27e920bee9aa9 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 24 Aug 2020 17:28:00 -0400 Subject: [PATCH 20/36] Add new DCtx param: validateChecksum and update unit tests --- lib/decompress/zstd_decompress.c | 10 ++++++---- lib/decompress/zstd_decompress_internal.h | 3 ++- programs/fileio.c | 6 +----- tests/fuzzer.c | 4 ++-- tests/playTests.sh | 9 +++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index f26e5ff6..c8cfa716 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -115,6 +115,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); dctx->outBufferMode = ZSTD_obm_buffered; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->validateChecksum = 1; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -447,7 +448,8 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), dictionary_wrong, ""); #endif - if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); return 0; } @@ -662,7 +664,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); if (decodedSize != 0) op += decodedSize; @@ -980,7 +982,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; /* Stay on the same stage until we are finished streaming the block. */ @@ -1011,7 +1013,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_checkChecksum: assert(srcSize == 4); /* guaranteed by dctx->expected */ { - if (!dctx->forceIgnoreChecksum) { + if (dctx->validateChecksum) { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); U32 const check32 = MEM_readLE32(src); DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 17802380..891d1bed 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -122,7 +122,8 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; ZSTD_format_e format; - ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* if == 1, will ignore checksums in compressed frame */ + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; diff --git a/programs/fileio.c b/programs/fileio.c index f2b8447a..1970f6cb 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1754,11 +1754,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi if (ress.dctx==NULL) EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); - if (!prefs->checksumFlag) { - CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, ZSTD_d_ignoreChecksum)); - } else { - CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, ZSTD_d_validateChecksum)); - } + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index d85ccef5..e5c3e6e3 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -571,8 +571,8 @@ static int basicUnitTests(U32 const seed, double compressibility) r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); if (!ZSTD_isError(r)) goto _output_error; if (ZSTD_getErrorCode(r) != ZSTD_error_checksum_wrong) goto _output_error; - - CHECK_Z(ZSTD_DCtx_setForceIgnoreChecksum(dctx, ZSTD_d_ignoreChecksum)); + + CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_forceIgnoreChecksum, ZSTD_d_ignoreChecksum)); r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize-1); if (!ZSTD_isError(r)) goto _output_error; /* wrong checksum size should still throw error */ r = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); diff --git a/tests/playTests.sh b/tests/playTests.sh index affd64eb..18f6f3fb 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -261,11 +261,12 @@ zstd tmp -c --compress-literals -19 | zstd -t zstd -b --fast=1 -i0e1 tmp --compress-literals zstd -b --fast=1 -i0e1 tmp --no-compress-literals println "test: --no-check for decompression" -zstd -f tmp -o tmp.zst --check -zstd -f tmp -o tmp1.zst --no-check -printf '\xDE\xAD\xBE\xEF' | dd of=tmp.zst bs=1 seek=$(($(wc -c <"tmp.zst") - 4)) count=4 conv=notrunc # corrupt checksum in tmp +zstd -f tmp -o tmp_corrupt.zst --check +zstd -f tmp -o tmp.zst --no-check +printf '\xDE\xAD\xBE\xEF' | dd of=tmp_corrupt.zst bs=1 seek=$(($(wc -c < "tmp_corrupt.zst") - 4)) count=4 conv=notrunc # corrupt checksum in tmp +zstd -d -f tmp_corrupt.zst --no-check +zstd -d -f tmp_corrupt.zst --check --no-check # final flag overrides zstd -d -f tmp.zst --no-check -zstd -d -f tmp1.zst --no-check println "\n===> zstdgrep tests" ln -sf "$ZSTD_BIN" zstdcat From e1e41addb3b5d318c1490f20b66a9e49d85a2ab6 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 24 Aug 2020 17:36:36 -0400 Subject: [PATCH 21/36] Adjust #ifndef logic --- programs/zstdcli.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index fce88d8c..4dda2a5c 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -160,12 +160,11 @@ static void usage_advanced(const char* programName) #ifndef ZSTD_NODECOMPRESS DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)."); #endif -#endif /* ZSTD_NOCOMPRESS */ -#ifndef ZSTD_NODECOMPRESS +#else #ifdef ZSTD_NOCOMPRESS DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); #endif -#endif /* ZSTD_NODECOMPRESS */ +#endif /* ZSTD_NOCOMPRESS */ DISPLAYOUT( "\n"); DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); From 6d2f750b37fef547e86f8648da0532afd420591d Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 24 Aug 2020 14:44:33 -0700 Subject: [PATCH 22/36] Document the BMI2 default() functions --- lib/common/entropy_common.c | 2 ++ lib/common/fse_decompress.c | 1 + lib/decompress/zstd_decompress_block.c | 1 + 3 files changed, 4 insertions(+) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index e2ec694f..0d27265a 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -203,6 +203,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne return ip-istart; } +/* Avoids the FORCE_INLINE of the _body() function. */ static size_t FSE_readNCount_body_default( short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) @@ -319,6 +320,7 @@ FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, return iSize+1; } +/* Avoids the FORCE_INLINE of the _body() function. */ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 64693024..53b248e3 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -353,6 +353,7 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( } } +/* Avoids the FORCE_INLINE of the _body() function. */ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) { return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 4777a267..2a8d4775 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -484,6 +484,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, } } +/* Avoids the FORCE_INLINE of the _body() function. */ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, From 52f33a1da5da497124928e9fd84c0814e9d3ea98 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 24 Aug 2020 16:09:45 -0700 Subject: [PATCH 23/36] Fix compiler warnings --- lib/decompress/zstd_decompress_block.c | 9 +++++---- tests/fullbench.c | 7 +++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 2a8d4775..cabba180 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -490,7 +490,7 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, const U32* baseValue, const U32* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { - return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); } @@ -500,7 +500,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol const U32* baseValue, const U32* nbAdditionalBits, unsigned tableLog, void* wksp, size_t wkspSize) { - return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); } #endif @@ -512,12 +512,13 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, { #if DYNAMIC_BMI2 if (bmi2) { - return ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue, + ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); + return; } #endif (void)bmi2; - return ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue, + ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); } diff --git a/tests/fullbench.c b/tests/fullbench.c index 30906481..2207239e 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -171,11 +171,18 @@ FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src } RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); +#ifndef HUF_FORCE_DECOMPRESS_X2 return HUF_readDTableX1_wksp_bmi2( dctx->entropy.hufTable, istart+lhSize, litCSize, dctx->workspace, sizeof(dctx->workspace), dctx->bmi2); +#else + return HUF_readDTableX2_wksp( + dctx->entropy.hufTable, + istart+lhSize, litCSize, + dctx->workspace, sizeof(dctx->workspace)); +#endif } } } From da38891a87dc1be9796a65ce2c8214d16fe49379 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 25 Aug 2020 16:46:47 -0400 Subject: [PATCH 24/36] Add initial live update with displayLevel = 2, add new field to FIO_prefs_t --- programs/fileio.c | 48 ++++++++++++++++++++++++++++++---------------- programs/fileio.h | 1 + programs/zstdcli.c | 15 +++++++-------- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index d5b8a7d1..079b3bc4 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -319,6 +319,7 @@ struct FIO_prefs_s { int excludeCompressedFiles; int patchFromMode; int contentSize; + int nbFiles; }; @@ -360,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->testMode = 0; ret->literalCompressionMode = ZSTD_lcm_auto; ret->excludeCompressedFiles = 0; + ret->nbFiles = 1; return ret; } @@ -495,6 +497,11 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value) prefs->contentSize = value != 0; } +void FIO_setNbFiles(FIO_prefs_t* const prefs, int value) +{ + prefs->nbFiles = value; +} + /*-************************************* * Functions ***************************************/ @@ -1254,17 +1261,17 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, /* display progress notifications */ if (g_display_prefs.displayLevel >= 3) { - DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ", + DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%%\033 ", compressionLevel, (unsigned)((zfp.ingested - zfp.consumed) >> 20), (unsigned)(zfp.consumed >> 20), (unsigned)(zfp.produced >> 20), cShare ); } else { /* summarized notifications if == 2; */ - DISPLAYLEVEL(2, "\rRead : %u ", (unsigned)(zfp.consumed >> 20)); + DISPLAYLEVEL(2, "\033[s Read : %u ", (unsigned)(zfp.consumed >> 20)); if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20)); - DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare); + DISPLAYLEVEL(2, "MB ==> %2.f%%\033[u", cShare); DELAY_NEXT_UPDATE(); } @@ -1427,18 +1434,21 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, } /* Status */ - DISPLAYLEVEL(2, "\r%79s\r", ""); - if (readsize == 0) { - DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", - srcFileName, - (unsigned long long)readsize, (unsigned long long) compressedfilesize, - dstFileName); - } else { - DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", - srcFileName, - (double)compressedfilesize / readsize * 100, - (unsigned long long)readsize, (unsigned long long) compressedfilesize, - dstFileName); + + if (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark)))) { + DISPLAYLEVEL(2, "\r%79s\r", ""); + if (readsize == 0) { + DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", + srcFileName, + (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); + } else { + DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", + srcFileName, + (double)compressedfilesize / readsize * 100, + (unsigned long long)readsize, (unsigned long long) compressedfilesize, + dstFileName); + } } /* Elapsed Time and CPU Load */ @@ -1682,8 +1692,10 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, error = 1; } else { unsigned u; - for (u=0; u 1) + DISPLAYUPDATE(2, "\rCompressing %u/%u files. Current source: %s |", u+1, nbFiles, srcFileName); error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); } diff --git a/programs/fileio.h b/programs/fileio.h index ef3c9e72..5913c8a1 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -96,6 +96,7 @@ void FIO_setNotificationLevel(int level); void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles); void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value); void FIO_setContentSize(FIO_prefs_t* const prefs, int value); +void FIO_setNbFiles(FIO_prefs_t* const prefs, int value); /*-************************************* * Single File functions diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 1aea63dc..534a848e 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1244,10 +1244,6 @@ int main(int const argCount, const char* argv[]) CLEAN_RETURN(1); } - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1; - if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1; - /* IO Stream/File */ FIO_setNotificationLevel(g_displayLevel); FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL); @@ -1306,10 +1302,12 @@ int main(int const argCount, const char* argv[]) } } - if ((filenames->tableSize==1) && outFileName) - operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); - else - operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); + if ((filenames->tableSize==1) && outFileName) { + operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); + } else { + FIO_setNbFiles(prefs, (int)filenames->tableSize); + operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); + } #else (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); @@ -1319,6 +1317,7 @@ int main(int const argCount, const char* argv[]) if (filenames->tableSize == 1 && outFileName) { operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName); } else { + FIO_setNbFiles(prefs, (int)filenames->tableSize); operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, dictFileName); } #else From a3401ca7d0deae37f21bd2d6fa1262320b5675da Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 25 Aug 2020 17:23:47 -0400 Subject: [PATCH 25/36] Make compatible with displaylevel = 2 and decompression --- programs/fileio.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 079b3bc4..c81b640e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1435,8 +1435,9 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, /* Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ if (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark)))) { - DISPLAYLEVEL(2, "\r%79s\r", ""); if (readsize == 0) { DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", srcFileName, @@ -1693,7 +1694,8 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, } else { unsigned u; for (u=0; u 1) + DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s | ", u+1, nbFiles, inFileNamesTable[u]); error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel); } if (fclose(ress.dstFile)) @@ -1726,7 +1728,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, /* No status message in pipe mode (stdin - stdout) or multi-files mode */ // if (!strcmp(inFileNamesTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_display_prefs.displayLevel==2)) g_displayLevel=1; if (nbFiles > 1) - DISPLAYUPDATE(2, "\rCompressing %u/%u files. Current source: %s |", u+1, nbFiles, srcFileName); + DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName); error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); } @@ -2001,7 +2003,7 @@ FIO_decompressZstdFrame(dRess_t* ress, FILE* finput, /* Write block */ storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); frameSize += outBuff.pos; - DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", + DISPLAYUPDATE(2, "\033[s%-20.20s : %u MB... \033[u", srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); if (inBuff.pos > 0) { @@ -2584,8 +2586,10 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); } - for (u=0; utestMode) && (fclose(ress.dstFile))) EXM_THROW(72, "Write error : %s : cannot properly close output file", strerror(errno)); @@ -2609,6 +2613,8 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, dstFileName = FIO_determineDstName(srcFileName, outDirName); } if (dstFileName == NULL) { error=1; continue; } + if (nbFiles > 1) + DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName[u]); error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); } if (outDirName) From acee48d4c1e73a0a8a67f7c9fdb8be15e939a197 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 25 Aug 2020 17:34:00 -0400 Subject: [PATCH 26/36] Small cleanups --- programs/fileio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index c81b640e..25d98098 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1261,7 +1261,7 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, /* display progress notifications */ if (g_display_prefs.displayLevel >= 3) { - DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%%\033 ", + DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ", compressionLevel, (unsigned)((zfp.ingested - zfp.consumed) >> 20), (unsigned)(zfp.consumed >> 20), @@ -1725,8 +1725,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ } - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - // if (!strcmp(inFileNamesTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_display_prefs.displayLevel==2)) g_displayLevel=1; if (nbFiles > 1) DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName); error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); @@ -2614,7 +2612,7 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, } if (dstFileName == NULL) { error=1; continue; } if (nbFiles > 1) - DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName[u]); + DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName); error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); } if (outDirName) From 47a9ad2ae3b21680f24ff8b78b8834eeee46ffc1 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 25 Aug 2020 17:54:00 -0400 Subject: [PATCH 27/36] Remove decompression status update --- programs/fileio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 25d98098..4be31696 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1437,7 +1437,7 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, DISPLAYLEVEL(2, "\r%79s\r", ""); /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark)))) { + if (g_display_prefs.displayLevel > 2 || (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark))))) { if (readsize == 0) { DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", srcFileName, @@ -2332,7 +2332,9 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ + if (g_display_prefs.displayLevel > 2 || (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark))))) + DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); return 0; } @@ -2585,7 +2587,7 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); } for (u=0; utestMode) && (fclose(ress.dstFile))) @@ -2612,7 +2614,7 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, } if (dstFileName == NULL) { error=1; continue; } if (nbFiles > 1) - DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName); + DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: ", u+1, nbFiles); error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); } if (outDirName) From 96222cd195193715204292ea26a545582554f4b6 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 25 Aug 2020 18:03:08 -0400 Subject: [PATCH 28/36] Small formatting changes --- programs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 4be31696..20166b30 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1695,7 +1695,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, unsigned u; for (u=0; u 1) - DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s | ", u+1, nbFiles, inFileNamesTable[u]); + DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s ", u+1, nbFiles, inFileNamesTable[u]); error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel); } if (fclose(ress.dstFile)) @@ -1726,7 +1726,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, } if (nbFiles > 1) - DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s | ", u+1, nbFiles, srcFileName); + DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s ", u+1, nbFiles, srcFileName); error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); } From 4193638996eaae030fc8993cd15159ab81fe166f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 25 Aug 2020 11:37:41 -0700 Subject: [PATCH 29/36] [bug] Fix FSE_readNCount() * Fix bug introduced in PR #2271 * Fix long-standing bug that is impossible to trigger inside of zstd * Add a fuzzer that makes sure the normalized count always round trips correctly --- lib/common/entropy_common.c | 23 ++++---- tests/fuzz/Makefile | 6 ++- tests/fuzz/fse_read_ncount.c | 100 +++++++++++++++++++++++++++++++++++ tests/fuzz/fuzz.py | 1 + 4 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 tests/fuzz/fse_read_ncount.c diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 0d27265a..38e18b16 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -110,13 +110,14 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; - if (ip <= iend-7) { + if (LIKELY(ip <= iend-7)) { ip += 3; - bitStream = MEM_readLE32(ip) >> bitCount; } else { - bitStream >>= 24; - bitCount += 24; + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; } + bitStream = MEM_readLE32(ip) >> bitCount; repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; } charnum += 3 * repeats; @@ -124,6 +125,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne bitCount += 2 * repeats; /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); charnum += bitStream & 3; bitCount += 2; @@ -137,14 +139,16 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * because we already memset the whole buffer to 0. */ - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; } else { - bitStream >>= 2; + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; } + bitStream = MEM_readLE32(ip) >> bitCount; } { int const max = (2*threshold-1) - remaining; @@ -184,14 +188,15 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne } if (charnum >= maxSV1) break; - if (LIKELY((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))) { + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { ip += bitCount>>3; bitCount &= 7; } else { bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; ip = iend - 4; } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); + bitStream = MEM_readLE32(ip) >> bitCount; } } if (remaining != 1) return ERROR(corruption_detected); /* Only possible when there are too many zeros. */ diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 42988c34..d88fae9c 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -96,7 +96,8 @@ FUZZ_TARGETS := \ dictionary_loader \ raw_dictionary_round_trip \ dictionary_stream_round_trip \ - decompress_dstSize_tooSmall + decompress_dstSize_tooSmall \ + fse_read_ncount all: $(FUZZ_TARGETS) @@ -184,6 +185,9 @@ dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loa decompress_dstSize_tooSmall: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o $(LIB_FUZZING_ENGINE) -o $@ +fse_read_ncount: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/fse_read_ncount.c b/tests/fuzz/fse_read_ncount.c new file mode 100644 index 00000000..e20a9382 --- /dev/null +++ b/tests/fuzz/fse_read_ncount.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target round trips the FSE normalized count with FSE_writeNCount() + * and FSE_readNcount() to ensure that it can always round trip correctly. + */ + +#define FSE_STATIC_LINKING_ONLY +#define ZSTD_STATIC_LINKING_ONLY + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" +#include "fse.h" + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + + /* Pick a random tableLog and maxSymbolValue */ + unsigned const tableLog = FUZZ_dataProducer_uint32Range(producer, FSE_MIN_TABLELOG, FSE_MAX_TABLELOG); + unsigned const maxSymbolValue = FUZZ_dataProducer_uint32Range(producer, 0, 255); + + unsigned remainingWeight = (1u << tableLog) - 1; + size_t dataSize; + BYTE data[512]; + short ncount[256]; + + /* Randomly fill the normalized count */ + memset(ncount, 0, sizeof(ncount)); + { + unsigned s; + for (s = 0; s < maxSymbolValue && remainingWeight > 0; ++s) { + short n = (short)FUZZ_dataProducer_int32Range(producer, -1, remainingWeight); + ncount[s] = n; + if (n < 0) { + remainingWeight -= 1; + } else { + assert((unsigned)n <= remainingWeight); + remainingWeight -= n; + } + } + /* Ensure ncount[maxSymbolValue] != 0 and the sum is (1<= FSE_NCountWriteBound(maxSymbolValue, tableLog)); + dataSize = FSE_writeNCount(data, sizeof(data), ncount, maxSymbolValue, tableLog); + FUZZ_ZASSERT(dataSize); + } + /* Read & validate the normalized count */ + { + short rtNcount[256]; + unsigned rtMaxSymbolValue = 255; + unsigned rtTableLog; + /* Copy into a buffer with a random amount of random data at the end */ + size_t const buffSize = (size_t)FUZZ_dataProducer_uint32Range(producer, dataSize, sizeof(data)); + BYTE* const buff = FUZZ_malloc(buffSize); + size_t rtDataSize; + memcpy(buff, data, dataSize); + { + size_t b; + for (b = dataSize; b < buffSize; ++b) { + buff[b] = (BYTE)FUZZ_dataProducer_uint32Range(producer, 0, 255); + } + } + + rtDataSize = FSE_readNCount(rtNcount, &rtMaxSymbolValue, &rtTableLog, buff, buffSize); + FUZZ_ZASSERT(rtDataSize); + FUZZ_ASSERT(rtDataSize == dataSize); + FUZZ_ASSERT(rtMaxSymbolValue == maxSymbolValue); + FUZZ_ASSERT(rtTableLog == tableLog); + { + unsigned s; + for (s = 0; s <= maxSymbolValue; ++s) { + FUZZ_ASSERT(ncount[s] == rtNcount[s]); + } + } + free(buff); + } + + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 6332eeb9..24430a22 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -60,6 +60,7 @@ TARGET_INFO = { 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), + 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] From 51dd7e69f999006207d14f45e1865fc403d887e6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 25 Aug 2020 16:28:41 -0700 Subject: [PATCH 30/36] [github actions] Disable armbuild and armfuzz tests --- .github/workflows/generic-dev.yml | 34 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/.github/workflows/generic-dev.yml b/.github/workflows/generic-dev.yml index 42aa367d..64902f23 100644 --- a/.github/workflows/generic-dev.yml +++ b/.github/workflows/generic-dev.yml @@ -155,23 +155,25 @@ jobs: sudo apt-get install gcc-mingw-w64 CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd - armbuild: - runs-on: ubuntu-16.04 # doesn't work on latest - steps: - - uses: actions/checkout@v2 - - name: ARM Build Test - run: | - make arminstall - make armbuild +# TODO: Broken test - fix and uncomment +# armbuild: +# runs-on: ubuntu-16.04 # doesn't work on latest +# steps: +# - uses: actions/checkout@v2 +# - name: ARM Build Test +# run: | +# make arminstall +# make armbuild - armfuzz: - runs-on: ubuntu-16.04 # doesn't work on latest - steps: - - uses: actions/checkout@v2 - - name: Qemu ARM emulation + Fuzz Test - run: | - make arminstall - make armfuzz +# TODO: Broken test - fix and uncomment +# armfuzz: +# runs-on: ubuntu-16.04 # doesn't work on latest +# steps: +# - uses: actions/checkout@v2 +# - name: Qemu ARM emulation + Fuzz Test +# run: | +# make arminstall +# make armfuzz bourne-shell: runs-on: ubuntu-latest From 49eeb2d1fca9072dbe4f48166f78bebd6801c3b1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 25 Aug 2020 16:32:00 -0700 Subject: [PATCH 31/36] [fuzz] Disable superblock expansion test --- tests/fuzz/simple_round_trip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 2f008d06..6e58fb1c 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -47,8 +47,12 @@ static size_t roundTripTest(void *result, size_t resultCapacity, FUZZ_ZASSERT(cSize); dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); FUZZ_ZASSERT(dSize); - /* When superblock is enabled make sure we don't expand the block more than expected. */ - if (targetCBlockSize != 0) { + /* When superblock is enabled make sure we don't expand the block more than expected. + * NOTE: This test is currently disabled because superblock mode can arbitrarily + * expand the block in the worst case. Once superblock mode has been improved we can + * re-enable this test. + */ + if (0 && targetCBlockSize != 0) { size_t normalCSize; FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetCBlockSize, 0)); normalCSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); From ae163015b159ef6f6e7c758a85101e28928e1d08 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 25 Aug 2020 17:10:04 -0700 Subject: [PATCH 32/36] [fuzz] Fix stream_decompress timeouts --- tests/fuzz/fuzz_data_producer.c | 4 +++ tests/fuzz/fuzz_data_producer.h | 3 +++ tests/fuzz/stream_decompress.c | 47 ++++++++++++++++++--------------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c index 6518af30..f2d5a1b5 100644 --- a/tests/fuzz/fuzz_data_producer.c +++ b/tests/fuzz/fuzz_data_producer.c @@ -66,6 +66,10 @@ size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ return producer->size; } +int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) { + return producer->size == 0; +} + size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) { newSize = newSize > producer->size ? producer->size : newSize; diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h index 41e0b52d..25cc937f 100644 --- a/tests/fuzz/fuzz_data_producer.h +++ b/tests/fuzz/fuzz_data_producer.h @@ -49,6 +49,9 @@ int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, /* Returns the size of the remaining bytes of data in the producer */ size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); +/* Returns true if the data producer is out of bytes */ +int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer); + /* Restricts the producer to only the last newSize bytes of data. If newSize > current data size, nothing happens. Returns the number of bytes the producer won't use anymore, after contracting. */ diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 25901b1e..5d2bb2aa 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -22,18 +22,19 @@ #include "zstd.h" #include "fuzz_data_producer.h" -static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX; - static ZSTD_DStream *dstream = NULL; -static void* buf = NULL; uint32_t seed; -static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer, uint32_t min) +static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer, void* buf, size_t bufSize) { ZSTD_outBuffer buffer = { buf, 0, 0 }; - buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, kBufSize)); - FUZZ_ASSERT(buffer.size <= kBufSize); + if (FUZZ_dataProducer_empty(producer)) { + buffer.size = bufSize; + } else { + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 0, bufSize)); + } + FUZZ_ASSERT(buffer.size <= bufSize); if (buffer.size == 0) { buffer.dst = NULL; @@ -43,13 +44,16 @@ static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer, uint32_t min) } static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, - FUZZ_dataProducer_t *producer, - uint32_t min) + FUZZ_dataProducer_t *producer) { ZSTD_inBuffer buffer = { *src, 0, 0 }; FUZZ_ASSERT(*size > 0); - buffer.size = (FUZZ_dataProducer_uint32Range(producer, min, *size)); + if (FUZZ_dataProducer_empty(producer)) { + buffer.size = *size; + } else { + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 0, *size)); + } FUZZ_ASSERT(buffer.size <= *size); *src += buffer.size; *size -= buffer.size; @@ -66,18 +70,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) /* Give a random portion of src data to the producer, to use for parameter generation. The rest will be used for (de)compression */ FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); - /* Guarantee forward progress by refusing to generate 2 zero sized - * buffers in a row. */ - int prevInWasZero = 0; - int prevOutWasZero = 0; int stableOutBuffer; ZSTD_outBuffer out; + void* buf; + size_t bufSize; size = FUZZ_dataProducer_reserveDataPrefix(producer); + bufSize = MAX(10 * size, ZSTD_BLOCKSIZE_MAX); /* Allocate all buffers and contexts if not already allocated */ - if (!buf) { - buf = FUZZ_malloc(kBufSize); - } + buf = FUZZ_malloc(bufSize); if (!dstream) { dstream = ZSTD_createDStream(); @@ -90,18 +91,19 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (stableOutBuffer) { FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dstream, ZSTD_d_stableOutBuffer, 1)); out.dst = buf; - out.size = kBufSize; + out.size = bufSize; out.pos = 0; + } else { + out = makeOutBuffer(producer, buf, bufSize); } while (size > 0) { - ZSTD_inBuffer in = makeInBuffer(&src, &size, producer, prevInWasZero ? 1 : 0); - prevInWasZero = in.size == 0; + ZSTD_inBuffer in = makeInBuffer(&src, &size, producer); while (in.pos != in.size) { - if (!stableOutBuffer || prevOutWasZero || FUZZ_dataProducer_uint32Range(producer, 0, 100) == 55) { - out = makeOutBuffer(producer, prevOutWasZero ? 1 : 0); + if (out.pos == out.size) { + if (stableOutBuffer) goto error; + out = makeOutBuffer(producer, buf, bufSize); } - prevOutWasZero = out.size == 0; size_t const rc = ZSTD_decompressStream(dstream, &out, &in); if (ZSTD_isError(rc)) goto error; } @@ -112,5 +114,6 @@ error: ZSTD_freeDStream(dstream); dstream = NULL; #endif FUZZ_dataProducer_free(producer); + free(buf); return 0; } From b6abbc3ce510c9b62d1d762d5fc010b4eca58e8e Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 26 Aug 2020 11:35:07 -0400 Subject: [PATCH 33/36] Fix single file decompression summary, remove escape code dependency, add currFileIdx field to FIO_prefs, general cleanups/bugfixes --- programs/fileio.c | 75 ++++++++++++++++++++++++---------------------- programs/fileio.h | 5 ++-- programs/zstdcli.c | 14 +++++---- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 20166b30..29b499b4 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -319,6 +319,7 @@ struct FIO_prefs_s { int excludeCompressedFiles; int patchFromMode; int contentSize; + int currFileIdx; int nbFiles; }; @@ -362,6 +363,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->literalCompressionMode = ZSTD_lcm_auto; ret->excludeCompressedFiles = 0; ret->nbFiles = 1; + ret->currFileIdx = 0; return ret; } @@ -502,6 +504,11 @@ void FIO_setNbFiles(FIO_prefs_t* const prefs, int value) prefs->nbFiles = value; } +void FIO_setCurrFileIdx(FIO_prefs_t* const prefs, int value) +{ + prefs->currFileIdx = value; +} + /*-************************************* * Functions ***************************************/ @@ -1268,10 +1275,15 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, (unsigned)(zfp.produced >> 20), cShare ); } else { /* summarized notifications if == 2; */ - DISPLAYLEVEL(2, "\033[s Read : %u ", (unsigned)(zfp.consumed >> 20)); + if (prefs->nbFiles > 1) { + DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s ", prefs->currFileIdx+1, prefs->nbFiles, srcFileName); + } else { + DISPLAYLEVEL(2, "\r"); + } + DISPLAYLEVEL(2, "Read : %u ", (unsigned)(zfp.consumed >> 20)); if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20)); - DISPLAYLEVEL(2, "MB ==> %2.f%%\033[u", cShare); + DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare); DELAY_NEXT_UPDATE(); } @@ -1436,8 +1448,7 @@ FIO_compressFilename_internal(FIO_prefs_t* const prefs, /* Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if (g_display_prefs.displayLevel > 2 || (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark))))) { + if (g_display_prefs.displayLevel > 2 || (g_display_prefs.displayLevel == 2 && prefs->nbFiles <= 1)) { if (readsize == 0) { DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", srcFileName, @@ -1673,7 +1684,7 @@ static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsig * or into a destination folder (specified with -O) */ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, - const char** inFileNamesTable, unsigned nbFiles, + const char** inFileNamesTable, const char* outMirroredRootDirName, const char* outDirName, const char* outFileName, const char* suffix, @@ -1682,7 +1693,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, { int error = 0; cRess_t ress = FIO_createCResources(prefs, dictFileName, - FIO_getLargestFileSize(inFileNamesTable, nbFiles), + FIO_getLargestFileSize(inFileNamesTable, prefs->nbFiles), compressionLevel, comprParams); /* init */ @@ -1692,11 +1703,8 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, if (ress.dstFile == NULL) { /* could not open outFileName */ error = 1; } else { - unsigned u; - for (u=0; u 1) - DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s ", u+1, nbFiles, inFileNamesTable[u]); - error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel); + for (; prefs->currFileIdx < prefs->nbFiles; ++prefs->currFileIdx) { + error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[prefs->currFileIdx], compressionLevel); } if (fclose(ress.dstFile)) EXM_THROW(29, "Write error (%s) : cannot properly close %s", @@ -1704,12 +1712,11 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, ress.dstFile = NULL; } } else { - unsigned int u=0; if (outMirroredRootDirName) - UTIL_mirrorSourceFilesDirectories(inFileNamesTable, nbFiles, outMirroredRootDirName); + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, prefs->nbFiles, outMirroredRootDirName); - for (u=0; ucurrFileIdx < prefs->nbFiles; ++prefs->currFileIdx) { + const char* const srcFileName = inFileNamesTable[prefs->currFileIdx]; const char* dstFileName = NULL; if (outMirroredRootDirName) { char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); @@ -1724,14 +1731,11 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, } else { dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ } - - if (nbFiles > 1) - DISPLAYLEVEL(2, "\rCompressing %u/%u files. Current source: %s ", u+1, nbFiles, srcFileName); error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); } if (outDirName) - FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles); + FIO_checkFilenameCollisions(inFileNamesTable , prefs->nbFiles); } FIO_freeCResources(ress); @@ -2001,8 +2005,13 @@ FIO_decompressZstdFrame(dRess_t* ress, FILE* finput, /* Write block */ storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); frameSize += outBuff.pos; - DISPLAYUPDATE(2, "\033[s%-20.20s : %u MB... \033[u", - srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + if (prefs->nbFiles > 1) { + DISPLAYUPDATE(2, "\rDecompressing %u/%u files. Current source: %-20.20s : %u MB... ", + prefs->currFileIdx+1, prefs->nbFiles, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + } else { + DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", + srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); + } if (inBuff.pos > 0) { memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); @@ -2333,8 +2342,9 @@ static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if (g_display_prefs.displayLevel > 2 || (prefs->nbFiles == 1 && !((!strcmp(srcFileName, stdinmark) && dstFileName && !strcmp(dstFileName,stdoutmark))))) - DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + if (g_display_prefs.displayLevel > 2 || (g_display_prefs.displayLevel == 2 && prefs->nbFiles <= 1)) { + DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); + } return 0; } @@ -2572,7 +2582,7 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, - const char** srcNamesTable, unsigned nbFiles, + const char** srcNamesTable, const char* outMirroredRootDirName, const char* outDirName, const char* outFileName, const char* dictFileName) @@ -2581,25 +2591,22 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, dRess_t ress = FIO_createDResources(prefs, dictFileName); if (outFileName) { - unsigned u; if (!prefs->testMode) { ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); } - for (u=0; ucurrFileIdx < prefs->nbFiles; prefs->currFileIdx++) { + error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[prefs->currFileIdx]); } if ((!prefs->testMode) && (fclose(ress.dstFile))) EXM_THROW(72, "Write error : %s : cannot properly close output file", strerror(errno)); } else { - unsigned int u = 0; if (outMirroredRootDirName) - UTIL_mirrorSourceFilesDirectories(srcNamesTable, nbFiles, outMirroredRootDirName); + UTIL_mirrorSourceFilesDirectories(srcNamesTable, prefs->nbFiles, outMirroredRootDirName); - for (u=0; ucurrFileIdx < prefs->nbFiles; prefs->currFileIdx++) { /* create dstFileName */ + const char* const srcFileName = srcNamesTable[prefs->currFileIdx]; const char* dstFileName = NULL; if (outMirroredRootDirName) { char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); @@ -2613,12 +2620,10 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, dstFileName = FIO_determineDstName(srcFileName, outDirName); } if (dstFileName == NULL) { error=1; continue; } - if (nbFiles > 1) - DISPLAYLEVEL(2, "\rDecompressing %u/%u files. Current source: ", u+1, nbFiles); error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); } if (outDirName) - FIO_checkFilenameCollisions(srcNamesTable ,nbFiles); + FIO_checkFilenameCollisions(srcNamesTable , prefs->nbFiles); } FIO_freeDResources(ress); diff --git a/programs/fileio.h b/programs/fileio.h index 5913c8a1..f86ae417 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -97,6 +97,7 @@ void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompresse void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value); void FIO_setContentSize(FIO_prefs_t* const prefs, int value); void FIO_setNbFiles(FIO_prefs_t* const prefs, int value); +void FIO_setCurrFileIdx(FIO_prefs_t* const prefs, int value); /*-************************************* * Single File functions @@ -122,7 +123,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis /** FIO_compressMultipleFilenames() : * @return : nb of missing files */ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, - const char** inFileNamesTable, unsigned nbFiles, + const char** inFileNamesTable, const char* outMirroredDirName, const char* outDirName, const char* outFileName, const char* suffix, @@ -132,7 +133,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, /** FIO_decompressMultipleFilenames() : * @return : nb of missing or skipped files */ int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, - const char** srcNamesTable, unsigned nbFiles, + const char** srcNamesTable, const char* outMirroredDirName, const char* outDirName, const char* outFileName, diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 534a848e..9d4eee5d 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1243,8 +1243,12 @@ int main(int const argCount, const char* argv[]) DISPLAY("error : can't use --patch-from=# on multiple files \n"); CLEAN_RETURN(1); } + + /* No status message in pipe mode (stdin - stdout) */ + if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1; /* IO Stream/File */ + FIO_setNbFiles(prefs, (int)filenames->tableSize); FIO_setNotificationLevel(g_displayLevel); FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL); if (memLimit == 0) { @@ -1302,12 +1306,10 @@ int main(int const argCount, const char* argv[]) } } - if ((filenames->tableSize==1) && outFileName) { + if ((filenames->tableSize==1) && outFileName) operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); - } else { - FIO_setNbFiles(prefs, (int)filenames->tableSize); - operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); - } + else + operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); #else (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); @@ -1318,7 +1320,7 @@ int main(int const argCount, const char* argv[]) operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName); } else { FIO_setNbFiles(prefs, (int)filenames->tableSize); - operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, dictFileName); + operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName); } #else DISPLAY("Decompression not supported \n"); From a73e131f100bce734f3d9472a6f1606699f95574 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 26 Aug 2020 11:40:05 -0400 Subject: [PATCH 34/36] Adjust playTests.sh refuse overwrite test to include -q --- tests/playTests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index b7bfa76c..73b93280 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -361,7 +361,7 @@ zstd tmp1.zst tmp2.zst -o "$INTOVOID" -f zstd -d tmp1.zst tmp2.zst -o tmp touch tmpexists zstd tmp1 tmp2 -f -o tmpexists -zstd tmp1 tmp2 -o tmpexists && die "should have refused to overwrite" +zstd tmp1 tmp2 -q -o tmpexists && die "should have refused to overwrite" # Bug: PR #972 if [ "$?" -eq 139 ]; then die "should not have segfaulted" From fed7e7850bcc3267f5fbda167c0281d48b91758f Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 26 Aug 2020 11:57:38 -0400 Subject: [PATCH 35/36] Fix bug in user prompt where line was not flushed on negative response --- programs/fileio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/fileio.c b/programs/fileio.c index 29b499b4..e4c95848 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -624,9 +624,10 @@ FIO_openDstFile(FIO_prefs_t* const prefs, { int ch = getchar(); if ((ch!='Y') && (ch!='y')) { DISPLAY(" not overwritten \n"); + /* flush rest of input line */ + while ((ch!=EOF) && (ch!='\n')) ch = getchar(); return NULL; } - /* flush rest of input line */ while ((ch!=EOF) && (ch!='\n')) ch = getchar(); } } /* need to unlink */ From ead2387a7e77f6b3ee38a098556fb9143c7db5e4 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 26 Aug 2020 12:05:04 -0400 Subject: [PATCH 36/36] Remove extraneous FIO_setNbFiles() --- programs/zstdcli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 9d4eee5d..79113ee4 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1319,7 +1319,6 @@ int main(int const argCount, const char* argv[]) if (filenames->tableSize == 1 && outFileName) { operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName); } else { - FIO_setNbFiles(prefs, (int)filenames->tableSize); operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName); } #else