diff --git a/NEWS b/NEWS index 3ee7d0c3..c659e1f4 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ v1.3.2 license : changed /examples license to BSD + GPLv2 license : fix a few header files to reflect new license (#825) +fix : 32-bits build can now decode large offsets (levels 21+) fix : a rare compression bug when compression generates very large distances (only possible at --ultra -22) build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818) diff --git a/lib/common/threading.c b/lib/common/threading.c index 4e47b6b9..a82c975b 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -14,12 +14,8 @@ * This file will hold wrapper for systems, which do not support pthreads */ -/* When ZSTD_MULTITHREAD is not defined, this file would become an empty translation unit. - * Include some ISO C header code to prevent this and portably avoid related warnings. - * (Visual C++: C4206 / GCC: -Wpedantic / Clang: -Wempty-translation-unit) - */ -#include - +/* create fake symbol to avoid empty trnaslation unit warning */ +int g_ZSTD_threading_useles_symbol; #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 8e82d85a..ada773b9 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1088,7 +1088,10 @@ static size_t ZSTD_decompressSequences( } -FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets) +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +HINT_INLINE +seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) { seq_t seq; @@ -1180,19 +1183,12 @@ FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState return seq; } -static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) { - if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { - return ZSTD_decodeSequenceLong_generic(seqState, 1); - } else { - return ZSTD_decodeSequenceLong_generic(seqState, 0); - } -} HINT_INLINE size_t ZSTD_execSequenceLong(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -1202,11 +1198,9 @@ size_t ZSTD_execSequenceLong(BYTE* op, const BYTE* match = sequence.match; /* check */ -#if 1 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); -#endif /* copy Literals */ ZSTD_copy8(op, *litPtr); @@ -1216,7 +1210,6 @@ size_t ZSTD_execSequenceLong(BYTE* op, *litPtr = iLitEnd; /* update for next sequence */ /* copy Match */ -#if 1 if (sequence.offset > (size_t)(oLitEnd - base)) { /* offset beyond prefix */ if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); @@ -1236,8 +1229,8 @@ size_t ZSTD_execSequenceLong(BYTE* op, return sequenceLength; } } } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ -#endif + assert(op <= oend_w); + assert(sequence.matchLength >= MINMATCH); /* match within prefix */ if (sequence.offset < 8) { @@ -1285,9 +1278,12 @@ static size_t ZSTD_decompressSequencesLong( const BYTE* const base = (const BYTE*) (dctx->base); const BYTE* const vBase = (const BYTE*) (dctx->vBase); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - unsigned const windowSize32 = (unsigned)dctx->fParams.windowSize; int nbSeq; + unsigned long long const regularWindowSizeMax = 1ULL << STREAM_ACCUMULATOR_MIN; + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (dctx->fParams.windowSize >= regularWindowSizeMax)); + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + /* Build Decoding Tables */ { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); if (ZSTD_isError(seqHSize)) return seqHSize; @@ -1315,13 +1311,13 @@ static size_t ZSTD_decompressSequencesLong( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ - /* likely because of register pressure */ - /* if that's the correct cause, then 32-bits ARM should be affected differently */ - /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ - if (dctx->fParams.windowSize > (1<<23)) - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); + if (dctx->fParams.windowSize > (1<<23)) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); } diff --git a/programs/bench.c b/programs/bench.c index a4321246..d5c04c69 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -373,10 +373,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cPtr, blockTable[blockNb].cSize, ddict); if (ZSTD_isError(regenSize)) { - DISPLAY("ZSTD_decompress_usingDDict() failed on block %u of size %u : %s \n", + EXM_THROW(2, "ZSTD_decompress_usingDDict() failed on block %u of size %u : %s \n", blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize)); - clockLoop = 0; /* force immediate test end */ - break; } blockTable[blockNb].resSize = regenSize; }