Move bitwise builtins into bits.h

dev
Elliot Gorokhovsky 2022-01-21 11:29:14 -07:00
parent 970460f67d
commit db2f4a6532
16 changed files with 250 additions and 400 deletions

212
lib/common/bits.h Normal file
View File

@ -0,0 +1,212 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_BITS_H
#define ZSTD_BITS_H
#include "mem.h"
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
# if STATIC_BMI2 == 1
return _lzcnt_u32(val)^31;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return (unsigned)__builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
{
assert(val != 0);
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)__builtin_ctz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return __CTZ(val);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
{
assert(val != 0);
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
if (MEM_32bits()) {
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + (unsigned)__builtin_ctz(mostSignificantWord);
} else {
return (unsigned)__builtin_ctz(leastSignificantWord);
}
} else {
return (unsigned)__builtin_ctzll(val);
}
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
}
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _lzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, (unsigned long)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
#endif /* ZSTD_BITS_H */

View File

@ -30,6 +30,7 @@ extern "C" {
#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
#include "bits.h" /* ZSTD_highbit32 */
/*=========================================
@ -132,48 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */
/*-**************************************************************
* Internal functions
****************************************************************/
MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
# if STATIC_BMI2 == 1
return _lzcnt_u32(val) ^ 31;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*===== Local Constants =====*/
static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F,
@ -291,7 +250,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else {
bitD->ptr = bitD->start;
@ -319,7 +278,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
default: break;
}
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
}
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;

View File

@ -21,6 +21,7 @@
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h"
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
/*=== Version ===*/
@ -38,34 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
if (val != 0) {
unsigned long r;
_BitScanForward(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return __CTZ(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
@ -113,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* repeat.
* Avoid UB by setting the high bit to 1.
*/
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) {
@ -124,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
}
charnum += 3 * repeats;
bitStream >>= 2 * repeats;
@ -189,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* know that threshold > 1.
*/
if (remaining <= 1) break;
nbBits = BIT_highbit32(remaining) + 1;
nbBits = ZSTD_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1);
}
if (charnum >= maxSV1) break;
@ -312,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
U32 const verif = 1 << ZSTD_highbit32(rest);
U32 const lastWeight = ZSTD_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;

View File

@ -24,6 +24,7 @@
#include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h"
#include "bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -166,7 +167,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} }

View File

@ -360,98 +360,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
# if STATIC_BMI2 == 1
return _lzcnt_u32(val)^31;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return (U32)__builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
/**
* Counts the number of trailing zeros of a `size_t`.
* Most compilers should support CTZ as a builtin. A backup
* implementation is provided if the builtin isn't supported, but
* it may not be terribly efficient.
*/
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
{
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)__builtin_ctzll((U64)val);
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)r;
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)__builtin_ctz((U32)val);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;

View File

@ -26,6 +26,7 @@
#define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -191,7 +192,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
break;
default :
assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
@ -355,8 +356,8 @@ void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits;
@ -364,7 +365,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */

View File

@ -32,6 +32,7 @@
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "../common/error_private.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* **************************************************************
@ -407,7 +408,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits
/* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank.
*/
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1];
@ -505,7 +506,7 @@ typedef struct {
*/
#define RANK_POSITION_MAX_COUNT_LOG 32
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
/* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@ -513,7 +514,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
}
/* Helper swap function for HUF_quickSortPartition() */
@ -870,7 +871,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1
{
size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);

View File

@ -27,6 +27,7 @@
#include "zstd_opt.h"
#include "zstd_ldm.h"
#include "zstd_compress_superblock.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* ***************************************************************
* Tuning parameters

View File

@ -23,6 +23,7 @@
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
#if defined (__cplusplus)
extern "C" {
@ -699,103 +700,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
/*-*************************************
* Match length counter
***************************************/
static unsigned ZSTD_NbCommonBytes (size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _lzcnt_u64(val) >> 3;
# else
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, (unsigned long)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{
const BYTE* const pStart = pIn;

View File

@ -10,6 +10,7 @@
#include "zstd_compress_internal.h"
#include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
/*-*************************************
@ -765,44 +766,6 @@ size_t ZSTD_HcFindBestMatch(
typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */
/* ZSTD_VecMask_next():
* Starting from the LSB, returns the idx of the next non-zero bit.
* Basically counting the nb of trailing zeroes.
*/
static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
assert(val != 0);
# if defined(_MSC_VER) && defined(_WIN64)
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, val);
return (U32)(r);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
if (sizeof(size_t) == 4) {
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + (U32)__builtin_ctz(mostSignificantWord);
} else {
return (U32)__builtin_ctz(leastSignificantWord);
}
} else {
return (U32)__builtin_ctzll(val);
}
# else
/* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
* and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
*/
val = ~val & (val - 1ULL); /* Lowest set bit mask */
val = val - ((val >> 1) & 0x5555555555555555);
val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
# endif
}
/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
@ -1202,7 +1165,7 @@ size_t ZSTD_RowFindBestMatch(
/* Cycle through the matches and prefetch */
for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask;
U32 const matchIndex = row[matchPos];
assert(numMatches < rowEntries);
if (matchIndex < lowLimit)
@ -1270,7 +1233,7 @@ size_t ZSTD_RowFindBestMatch(
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
U32 const matchPos = (head + ZSTD_countTrailingZeros64(matches)) & rowMask;
U32 const matchIndex = dmsRow[matchPos];
if (matchIndex < dmsLowestIndex)
break;

View File

@ -23,6 +23,7 @@
#include "../common/huf.h"
#include "../common/error_private.h"
#include "../common/zstd_internal.h"
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
/* **************************************************************
* Constants
@ -142,7 +143,7 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
static size_t HUF_initDStream(BYTE const* ip) {
BYTE const lastByte = ip[7];
size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
size_t const value = MEM_readLEST(ip) | 1;
assert(bitsConsumed <= 8);
return value << bitsConsumed;
@ -263,7 +264,7 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
/* Construct the BIT_DStream_t. */
bit->bitContainer = MEM_readLE64(args->ip[stream]);
bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
bit->start = (const char*)args->iend[0];
bit->limitPtr = bit->start + sizeof(size_t);
bit->ptr = (const char*)args->ip[stream];

View File

@ -66,6 +66,7 @@
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#include "../common/bits.h" /* ZSTD_highbit32 */
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
# include "../legacy/zstd_legacy.h"

View File

@ -26,6 +26,7 @@
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/*_*******************************************************
* Macros
@ -551,7 +552,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (u=0; u<tableSize; u++) {
U32 const symbol = tableDecode[u].baseValue;
U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
assert(nbAdditionalBits[symbol] < 255);
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];

View File

@ -34,6 +34,7 @@
#include "../common/pool.h"
#include "../common/threading.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../common/bits.h" /* ZSTD_highbit32 */
#include "../zdict.h"
#include "cover.h"

View File

@ -54,6 +54,7 @@
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
#include "../zdict.h"
#include "divsufsort.h"
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
/*-*************************************
@ -130,85 +131,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
/*-********************************************************
* Dictionary training functions
**********************************************************/
static unsigned ZDICT_NbCommonBytes (size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
if (val != 0) {
unsigned long r;
_BitScanForward64(&r, (U64)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanForward(&r, (U32)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
if (val != 0) {
unsigned long r;
_BitScanReverse64(&r, val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
if (val != 0) {
unsigned long r;
_BitScanReverse(&r, (unsigned long)val);
return (unsigned)(r >> 3);
} else {
/* Should not reach this code path */
__assume(0);
}
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (unsigned)(__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
/*! ZDICT_count() :
Count the nb of common bytes between 2 pointers.
Note : this function presumes end of buffer followed by noisy guard band.
@ -223,7 +145,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
pMatch = (const char*)pMatch+sizeof(size_t);
continue;
}
pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
return (size_t)((const char*)pIn - pStart);
}
}

View File

@ -24,11 +24,12 @@
#include "common/huf.h"
#include "fuzz_helpers.h"
#include "fuzz_data_producer.h"
#include "common/bits.h"
static size_t adjustTableLog(size_t tableLog, size_t maxSymbol)
{
size_t const alphabetSize = maxSymbol + 1;
size_t minTableLog = BIT_highbit32(alphabetSize) + 1;
size_t minTableLog = ZSTD_highbit32(alphabetSize) + 1;
if ((alphabetSize & (alphabetSize - 1)) != 0) {
++minTableLog;
}