[regression] Update results.csv
parent
575731b6db
commit
8f8bd2d1ac
|
@ -38,6 +38,28 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
|
|||
/*-**************************************************************
|
||||
* FSE NCount encoding-decoding
|
||||
****************************************************************/
|
||||
static U32 FSE_ctz(U32 val)
|
||||
{
|
||||
assert(val != 0);
|
||||
{
|
||||
# if defined(_MSC_VER) /* Visual */
|
||||
unsigned long r=0;
|
||||
return _BitScanForward(&r, val) ? (unsigned)r : 0;
|
||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||
return __builtin_ctz(val);
|
||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||
return __CTZ(val);
|
||||
# else /* Software version */
|
||||
U32 count = 0;
|
||||
while ((val & 1) == 0) {
|
||||
val >>= 1;
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
|
@ -54,9 +76,9 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
|
|||
unsigned const maxSV1 = *maxSVPtr + 1;
|
||||
int previous0 = 0;
|
||||
|
||||
if (hbSize < 4) {
|
||||
if (hbSize < 8) {
|
||||
/* This function only works when hbSize >= 4 */
|
||||
char buffer[4] = {0};
|
||||
char buffer[8] = {0};
|
||||
memcpy(buffer, headerBuffer, hbSize);
|
||||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
|
||||
buffer, sizeof(buffer));
|
||||
|
@ -80,18 +102,17 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
|
|||
|
||||
for (;;) {
|
||||
if (previous0) {
|
||||
// TODO: Generalize to FSE_countTrailingZeros() or something
|
||||
int repeats = __builtin_ctz(~bitStream) >> 1;
|
||||
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
while (repeats >= 12) {
|
||||
charnum += 3 * 12;
|
||||
if (ip < iend-6) {
|
||||
if (ip <= iend-7) {
|
||||
ip += 3;
|
||||
bitStream = MEM_readLE32(ip) >> bitCount;
|
||||
} else {
|
||||
bitStream >>= 24;
|
||||
bitCount += 24;
|
||||
}
|
||||
repeats = __builtin_ctz(~bitStream) >> 1;
|
||||
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
|
||||
}
|
||||
charnum += 3 * repeats;
|
||||
bitStream >>= 2 * repeats;
|
||||
|
|
|
@ -130,45 +130,6 @@ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
|
|||
return D4;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// TODO: Remove this
|
||||
/* BMI2 version that uses _pdep_u64() for weight 1 and 2 symbols.
|
||||
* This doesn't provide much gains, so not worth the complexity.
|
||||
* Leaving in for now but will remove before I commit.
|
||||
*/
|
||||
#include <immintrin.h>
|
||||
|
||||
static U64 HUF_DEltX1_pack4(BYTE const* symbols, BYTE nbBits) {
|
||||
U64 D4;
|
||||
if (MEM_isLittleEndian()) {
|
||||
U64 const nbBits4 = nbBits * 0x0100010001000100ULL;
|
||||
U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0x00FF00FF00FF00FFULL);
|
||||
D4 = symbols4 | nbBits4;
|
||||
} else {
|
||||
U64 const nbBits4 = nbBits * 0x0001000100010001ULL;
|
||||
U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0xFF00FF00FF00FF00ULL);
|
||||
D4 = symbols4 | nbBits4;
|
||||
}
|
||||
return D4;
|
||||
}
|
||||
|
||||
static U64 HUF_DEltX1_pack2(BYTE const* symbols, BYTE nbBits) {
|
||||
U64 D4;
|
||||
if (MEM_isLittleEndian()) {
|
||||
U64 const nbBits4 = nbBits * 0x0100010001000100ULL;
|
||||
U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x000000FF000000FFULL);
|
||||
symbols4 = symbols4 * 0x00010001ULL;
|
||||
D4 = symbols4 | nbBits4;
|
||||
} else {
|
||||
U64 const nbBits4 = nbBits * 0x0001000100010001ULL;
|
||||
U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x0000FF000000FF00ULL);
|
||||
symbols4 *= 0x00010001ULL;
|
||||
D4 = symbols4 | nbBits4;
|
||||
}
|
||||
return D4;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
||||
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
|
||||
|
@ -178,7 +139,6 @@ typedef struct {
|
|||
} HUF_ReadDTableX1_Workspace;
|
||||
|
||||
|
||||
// TODO: Template based on BMI2 (5% boost)
|
||||
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
||||
|
@ -236,12 +196,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|||
int u;
|
||||
for (u=0; u < unroll; ++u) {
|
||||
size_t const w = wksp->huffWeight[n+u];
|
||||
wksp->symbols[wksp->rankStart[w]++] = n+u;
|
||||
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
|
||||
}
|
||||
}
|
||||
for (; n < (int)nbSymbols; ++n) {
|
||||
size_t const w = wksp->huffWeight[n];
|
||||
wksp->symbols[wksp->rankStart[w]++] = n;
|
||||
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -259,7 +219,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
|
|||
int const symbolCount = wksp->rankVal[w];
|
||||
int const length = (1 << w) >> 1;
|
||||
int uStart = rankStart;
|
||||
BYTE const nbBits = tableLog + 1 - w;
|
||||
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
|
||||
int s;
|
||||
int u;
|
||||
switch (length) {
|
||||
|
|
|
@ -82,7 +82,7 @@ typedef struct {
|
|||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
||||
U32 rep[ZSTD_REP_NUM];
|
||||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE];
|
||||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
||||
} ZSTD_entropyDTables_t;
|
||||
|
||||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
|
|
|
@ -859,7 +859,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
|||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
|
||||
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize; }
|
||||
|
@ -887,7 +887,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
|||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
|
||||
if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize; }
|
||||
|
@ -917,7 +917,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
|||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
|
||||
if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize; }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue