Merge pull request #1365 from facebook/bitspeed

Improved decompression speed
This commit is contained in:
Yann Collet 2018-10-10 19:16:35 -07:00 committed by GitHub
commit d28b80e363
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -339,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
{ {
#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ U32 const regMask = sizeof(bitContainer)*8 - 1;
# if defined(__x86_64__) /* if start > regMask, bitstream is corrupted, and result is undefined */
if (sizeof(bitContainer)==8)
return _bextr_u64(bitContainer, start, nbBits);
else
# endif
return _bextr_u32(bitContainer, start, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE); assert(nbBits < BIT_MASK_SIZE);
return (bitContainer >> start) & BIT_mask[nbBits]; return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
} }
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@ -366,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* @return : value extracted */ * @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{ {
#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ /* arbitrate between double-shift and shift+mask */
#if 1
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
* bitstream is likely corrupted, and result is undefined */
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
#else #else
/* this code path is slower on my os-x laptop */
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
#endif #endif