Merge pull request #1365 from facebook/bitspeed
Improved decompression speed
This commit is contained in:
commit
d28b80e363
@ -339,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
|||||||
|
|
||||||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
||||||
{
|
{
|
||||||
#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||||
# if defined(__x86_64__)
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||||
if (sizeof(bitContainer)==8)
|
|
||||||
return _bextr_u64(bitContainer, start, nbBits);
|
|
||||||
else
|
|
||||||
# endif
|
|
||||||
return _bextr_u32(bitContainer, start, nbBits);
|
|
||||||
#else
|
|
||||||
assert(nbBits < BIT_MASK_SIZE);
|
assert(nbBits < BIT_MASK_SIZE);
|
||||||
return (bitContainer >> start) & BIT_mask[nbBits];
|
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||||
@ -366,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|||||||
* @return : value extracted */
|
* @return : value extracted */
|
||||||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||||
{
|
{
|
||||||
#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
|
/* arbitrate between double-shift and shift+mask */
|
||||||
|
#if 1
|
||||||
|
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
|
||||||
|
* bitstream is likely corrupted, and result is undefined */
|
||||||
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
||||||
#else
|
#else
|
||||||
|
/* this code path is slower on my os-x laptop */
|
||||||
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
||||||
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user