Use helper function for bit manipulations.

We already have BIT_getLowerBits, so use it. Benefits are 2fold: 1) Somewhat cleaner code 2) We are now using bzhi instructions, when available. Performance delta is too small for microbenchmarks, but avoiding load still helps larger applications, by reducing data cache pressure.
2022-02-23 17:59:56 -05:00 · 2022-02-23 17:59:56 -05:00 · 0178c12dd9
commit 0178c12dd9
parent 621d798988
1 changed files with 11 additions and 11 deletions
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@ -162,6 +162,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
    return 0;
 }

+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
+	return  _bzhi_u64(bitContainer, nbBits);
+#else
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+#endif
+}
+
 /*! BIT_addBits() :
 *  can add up to 31 bits into `bitC`.
 *  Note : does not check for register overflow ! */
@ -171,7 +181,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
    assert(nbBits < BIT_MASK_SIZE);
    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
-    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
    bitC->bitPos += nbBits;
 }

@ -309,16 +319,6 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
 #endif
 }

-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
-{
-#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
-	return  _bzhi_u64(bitContainer, nbBits);
-#else
-    assert(nbBits < BIT_MASK_SIZE);
-    return bitContainer & BIT_mask[nbBits];
-#endif
-}
-
 /*! BIT_lookBits() :
 *  Provides next n bits from local register.
 *  local register is not modified.