Use helper function for bit manipulations.

We already have BIT_getLowerBits, so use it. Benefits are 2fold:
1) Somewhat cleaner code
2) We are now using bzhi instructions, when available. Performance
delta is too small for microbenchmarks, but avoiding load still helps
larger applications, by reducing data cache pressure.
This commit is contained in:
Ilya Tokar 2022-02-23 17:59:56 -05:00
parent 621d798988
commit 0178c12dd9

View File

@ -162,6 +162,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0;
}
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_addBits() :
* can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */
@ -171,7 +181,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits;
}
@ -309,16 +319,6 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
#endif
}
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
#endif
}
/*! BIT_lookBits() :
* Provides next n bits from local register.
* local register is not modified.