From d3ec23313d4b9f9856608c18d7d6d729615904f9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 10 Oct 2018 15:48:43 -0700 Subject: [PATCH 1/4] improved decompression speed while reviewing #1364, I found a decompression speed improvement. On my laptop, the new code decompresses +5-6% faster on clang and +2-3% faster on gcc. not bad for an accidental optimization... --- lib/common/bitstream.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 2f91460c..00b22049 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -339,17 +339,8 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { -#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ -# if defined(__x86_64__) - if (sizeof(bitContainer)==8) - return _bextr_u64(bitContainer, start, nbBits); - else -# endif - return _bextr_u32(bitContainer, start, nbBits); -#else assert(nbBits < BIT_MASK_SIZE); return (bitContainer >> start) & BIT_mask[nbBits]; -#endif } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) @@ -366,9 +357,11 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ +#if 1 + assert(bitD->bitsConsumed + nbBits <= sizeof(bitD->bitContainer)*8); return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); #else + /* previous code path, seems slower */ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); #endif From 7791f192ee8ee6e80352fd9d906d015aaa7eb2e9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 10 Oct 2018 16:36:11 -0700 Subject: [PATCH 2/4] removed one assert() which can be triggered when input is corrupted. --- lib/common/bitstream.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 00b22049..faa0c216 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -340,6 +340,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { assert(nbBits < BIT_MASK_SIZE); + assert(start < sizeof(bitContainer)*8); return (bitContainer >> start) & BIT_mask[nbBits]; } @@ -357,11 +358,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { + /* arbitrate between double-shift and shift+mask */ #if 1 - assert(bitD->bitsConsumed + nbBits <= sizeof(bitD->bitContainer)*8); + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); #else - /* previous code path, seems slower */ + /* this code path is slower on my os-x laptop */ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); #endif From c012e9540ae4d9d040928f418d2eef11ecab56a1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 10 Oct 2018 17:33:04 -0700 Subject: [PATCH 3/4] removed one assert() that can be triggered by a corrupted bitstream. --- lib/common/bitstream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index faa0c216..f8ec7efc 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -340,7 +340,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { assert(nbBits < BIT_MASK_SIZE); - assert(start < sizeof(bitContainer)*8); + /* if start > bitMask, bitstream is corrupted, and result is undefined */ return (bitContainer >> start) & BIT_mask[nbBits]; } From 6ed3b526e4937900b2c8fc85800137784ae963a9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 10 Oct 2018 18:26:44 -0700 Subject: [PATCH 4/4] restored bitMask for shift values since corrupted bitstreams can generate too large values. This slightly reduces the benefits from clang on my laptop. gcc results and code generation are not affected. --- lib/common/bitstream.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index f8ec7efc..ef89b987 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -339,9 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); - /* if start > bitMask, bitstream is corrupted, and result is undefined */ - return (bitContainer >> start) & BIT_mask[nbBits]; + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)