[regression] Update results.csv

dev
Nick Terrell 2020-08-18 16:57:35 -07:00
parent 575731b6db
commit 8f8bd2d1ac
5 changed files with 538 additions and 557 deletions

View File

@ -38,6 +38,28 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
return _BitScanForward(&r, val) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# elif defined(__ICCARM__) /* IAR Intrinsic */
return __CTZ(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
@ -54,9 +76,9 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
unsigned const maxSV1 = *maxSVPtr + 1;
int previous0 = 0;
if (hbSize < 4) {
if (hbSize < 8) {
/* This function only works when hbSize >= 4 */
char buffer[4] = {0};
char buffer[8] = {0};
memcpy(buffer, headerBuffer, hbSize);
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
buffer, sizeof(buffer));
@ -80,18 +102,17 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
for (;;) {
if (previous0) {
// TODO: Generalize to FSE_countTrailingZeros() or something
int repeats = __builtin_ctz(~bitStream) >> 1;
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (ip < iend-6) {
if (ip <= iend-7) {
ip += 3;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 24;
bitCount += 24;
}
repeats = __builtin_ctz(~bitStream) >> 1;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
}
charnum += 3 * repeats;
bitStream >>= 2 * repeats;

View File

@ -130,45 +130,6 @@ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
return D4;
}
#if 0
// TODO: Remove this
/* BMI2 version that uses _pdep_u64() for weight 1 and 2 symbols.
* This doesn't provide much gains, so not worth the complexity.
* Leaving in for now but will remove before I commit.
*/
#include <immintrin.h>
static U64 HUF_DEltX1_pack4(BYTE const* symbols, BYTE nbBits) {
U64 D4;
if (MEM_isLittleEndian()) {
U64 const nbBits4 = nbBits * 0x0100010001000100ULL;
U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0x00FF00FF00FF00FFULL);
D4 = symbols4 | nbBits4;
} else {
U64 const nbBits4 = nbBits * 0x0001000100010001ULL;
U64 const symbols4 = _pdep_u64(MEM_read32(symbols), 0xFF00FF00FF00FF00ULL);
D4 = symbols4 | nbBits4;
}
return D4;
}
static U64 HUF_DEltX1_pack2(BYTE const* symbols, BYTE nbBits) {
U64 D4;
if (MEM_isLittleEndian()) {
U64 const nbBits4 = nbBits * 0x0100010001000100ULL;
U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x000000FF000000FFULL);
symbols4 = symbols4 * 0x00010001ULL;
D4 = symbols4 | nbBits4;
} else {
U64 const nbBits4 = nbBits * 0x0001000100010001ULL;
U64 symbols4 = _pdep_u64(MEM_read16(symbols), 0x0000FF000000FF00ULL);
symbols4 *= 0x00010001ULL;
D4 = symbols4 | nbBits4;
}
return D4;
}
#endif
typedef struct {
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
@ -178,7 +139,6 @@ typedef struct {
} HUF_ReadDTableX1_Workspace;
// TODO: Template based on BMI2 (5% boost)
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
{
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
@ -236,12 +196,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
int u;
for (u=0; u < unroll; ++u) {
size_t const w = wksp->huffWeight[n+u];
wksp->symbols[wksp->rankStart[w]++] = n+u;
wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
}
}
for (; n < (int)nbSymbols; ++n) {
size_t const w = wksp->huffWeight[n];
wksp->symbols[wksp->rankStart[w]++] = n;
wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
}
}
@ -259,7 +219,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
int const symbolCount = wksp->rankVal[w];
int const length = (1 << w) >> 1;
int uStart = rankStart;
BYTE const nbBits = tableLog + 1 - w;
BYTE const nbBits = (BYTE)(tableLog + 1 - w);
int s;
int u;
switch (length) {

View File

@ -82,7 +82,7 @@ typedef struct {
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
} ZSTD_entropyDTables_t;
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,

View File

@ -859,7 +859,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
@ -887,7 +887,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
@ -917,7 +917,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }

File diff suppressed because it is too large Load Diff