fixed Huff0 quad-symbols decoder (#173)
parent
ddb8ebd5b3
commit
6d1d25299a
|
@ -76,7 +76,7 @@ extern "C" {
|
|||
******************************************/
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */
|
||||
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder */
|
||||
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
|
@ -122,7 +122,7 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
|
|||
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbol decoder */
|
||||
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
|
||||
|
||||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
|
||||
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
|
||||
|
@ -157,7 +157,6 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 weightTotal;
|
||||
U32 tableLog;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
size_t iSize = ip[0];
|
||||
size_t oSize;
|
||||
|
@ -191,31 +190,31 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|||
/* collect weight stats */
|
||||
memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
}}
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
|
||||
/* get last non-null symbol weight (implied, total must be 2^n) */
|
||||
tableLog = BIT_highbit32(weightTotal) + 1;
|
||||
if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
|
||||
/* determine last weight */
|
||||
{ U32 const total = 1 << tableLog;
|
||||
U32 const rest = total - weightTotal;
|
||||
U32 const verif = 1 << BIT_highbit32(rest);
|
||||
U32 const lastWeight = BIT_highbit32(rest) + 1;
|
||||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
|
||||
huffWeight[oSize] = (BYTE)lastWeight;
|
||||
rankStats[lastWeight]++;
|
||||
}
|
||||
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
|
||||
if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
|
||||
*tableLogPtr = tableLog;
|
||||
/* determine last weight */
|
||||
{ U32 const total = 1 << tableLog;
|
||||
U32 const rest = total - weightTotal;
|
||||
U32 const verif = 1 << BIT_highbit32(rest);
|
||||
U32 const lastWeight = BIT_highbit32(rest) + 1;
|
||||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
|
||||
huffWeight[oSize] = (BYTE)lastWeight;
|
||||
rankStats[lastWeight]++;
|
||||
} }
|
||||
|
||||
/* check tree construction validity */
|
||||
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
|
||||
|
||||
/* results */
|
||||
*nbSymbolsPtr = (U32)(oSize+1);
|
||||
*tableLogPtr = tableLog;
|
||||
return iSize+1;
|
||||
}
|
||||
|
||||
|
|
|
@ -847,17 +847,17 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
|
|||
|
||||
static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
|
||||
{
|
||||
const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
memcpy(op, ds+val, sizeof(HUF_DSeqX6));
|
||||
BIT_skipBits(DStream, dd[val].nbBits);
|
||||
return dd[val].nbBytes;
|
||||
}
|
||||
|
||||
static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
|
||||
static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStream,
|
||||
const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
|
||||
{
|
||||
const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
U32 length = dd[val].nbBytes;
|
||||
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
||||
U32 const length = dd[val].nbBytes;
|
||||
if (length <= maxL) {
|
||||
memcpy(op, ds+val, length);
|
||||
BIT_skipBits(DStream, dd[val].nbBits);
|
||||
|
@ -910,7 +910,6 @@ static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* c
|
|||
return p-pStart;
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_decompress1X6_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
|
@ -919,17 +918,15 @@ size_t HUF_decompress1X6_usingDTable(
|
|||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
BIT_DStream_t bitD;
|
||||
|
||||
/* Init */
|
||||
BIT_DStream_t bitD;
|
||||
errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
|
||||
if (HUF_isError(errorCode)) return errorCode;
|
||||
{ size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
|
||||
if (HUF_isError(errorCode)) return errorCode; }
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog);
|
||||
{ U32 const dtLog = DTable[0];
|
||||
HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog); }
|
||||
|
||||
/* check */
|
||||
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
||||
|
@ -943,7 +940,7 @@ size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|||
HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
|
||||
const BYTE* ip = (const BYTE*) cSrc;
|
||||
|
||||
size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
|
||||
size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
|
||||
if (HUF_isError(hSize)) return hSize;
|
||||
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
||||
ip += hSize;
|
||||
|
@ -953,6 +950,24 @@ size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|||
}
|
||||
|
||||
|
||||
#define HUF_DECODE_ROUNDX6 \
|
||||
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
|
||||
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
|
||||
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
|
||||
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
|
||||
HUF_DECODE_SYMBOLX6_1(op1, &bitD1); \
|
||||
HUF_DECODE_SYMBOLX6_1(op2, &bitD2); \
|
||||
HUF_DECODE_SYMBOLX6_1(op3, &bitD3); \
|
||||
HUF_DECODE_SYMBOLX6_1(op4, &bitD4); \
|
||||
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
|
||||
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
|
||||
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
|
||||
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
|
||||
HUF_DECODE_SYMBOLX6_0(op1, &bitD1); \
|
||||
HUF_DECODE_SYMBOLX6_0(op2, &bitD2); \
|
||||
HUF_DECODE_SYMBOLX6_0(op3, &bitD3); \
|
||||
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
|
||||
|
||||
size_t HUF_decompress4X6_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
|
@ -960,6 +975,7 @@ size_t HUF_decompress4X6_usingDTable(
|
|||
{
|
||||
/* Check */
|
||||
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
||||
if (dstSize < 64) return ERROR(dstSize_tooSmall); /* only work for dstSize >= 64 */
|
||||
|
||||
{ const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
|
@ -970,7 +986,6 @@ size_t HUF_decompress4X6_usingDTable(
|
|||
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
|
||||
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
|
||||
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
|
||||
size_t errorCode;
|
||||
|
||||
/* Init */
|
||||
BIT_DStream_t bitD1;
|
||||
|
@ -997,43 +1012,41 @@ size_t HUF_decompress4X6_usingDTable(
|
|||
|
||||
length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
errorCode = BIT_initDStream(&bitD1, istart1, length1);
|
||||
if (HUF_isError(errorCode)) return errorCode;
|
||||
errorCode = BIT_initDStream(&bitD2, istart2, length2);
|
||||
if (HUF_isError(errorCode)) return errorCode;
|
||||
errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
||||
if (HUF_isError(errorCode)) return errorCode;
|
||||
errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
||||
if (HUF_isError(errorCode)) return errorCode;
|
||||
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
|
||||
if (HUF_isError(errorCode)) return errorCode; }
|
||||
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
|
||||
if (HUF_isError(errorCode)) return errorCode; }
|
||||
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
||||
if (HUF_isError(errorCode)) return errorCode; }
|
||||
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
||||
if (HUF_isError(errorCode)) return errorCode; }
|
||||
|
||||
/* 16-64 symbols per loop (4-16 symbols per stream) */
|
||||
/* 4-64 symbols per loop (1-16 symbols per stream) */
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
|
||||
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
|
||||
HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
|
||||
HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
|
||||
HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
|
||||
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
|
||||
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
}
|
||||
if (endSignal==BIT_DStream_unfinished) {
|
||||
HUF_DECODE_ROUNDX6;
|
||||
if (sizeof(bitD1.bitContainer)==4) { /* need to decode at least 4 bytes per stream */
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
HUF_DECODE_ROUNDX6;
|
||||
}
|
||||
{ U32 const saved2 = MEM_read32(opStart2); /* saved from overwrite */
|
||||
U32 const saved3 = MEM_read32(opStart3);
|
||||
U32 const saved4 = MEM_read32(opStart4);
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
|
||||
HUF_DECODE_ROUNDX6;
|
||||
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
||||
}
|
||||
MEM_write32(opStart2, saved2);
|
||||
MEM_write32(opStart3, saved3);
|
||||
MEM_write32(opStart4, saved4);
|
||||
} }
|
||||
|
||||
/* check corruption */
|
||||
if (op1 > opStart2) return ERROR(corruption_detected);
|
||||
if (op2 > opStart3) return ERROR(corruption_detected);
|
||||
if (op3 > opStart4) return ERROR(corruption_detected);
|
||||
/* note : op4 supposed already verified within main loop */
|
||||
/* note : op4 already verified within main loop */
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
|
||||
|
@ -1097,12 +1110,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
|
|||
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
|
||||
/* estimate decompression time */
|
||||
U32 Q;
|
||||
const U32 D256 = (U32)(dstSize >> 8);
|
||||
U32 Dtime[3];
|
||||
U32 algoNb = 0;
|
||||
int n;
|
||||
U32 Dtime[3]; /* decompression time estimation */
|
||||
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
|
@ -1111,16 +1119,19 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
|
|||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
/* decoder timing evaluation */
|
||||
Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
|
||||
for (n=0; n<3; n++)
|
||||
Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
|
||||
{ U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
|
||||
U32 const D256 = (U32)(dstSize >> 8);
|
||||
U32 n; for (n=0; n<3; n++)
|
||||
Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
|
||||
}
|
||||
|
||||
Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
|
||||
|
||||
if (Dtime[1] < Dtime[0]) algoNb = 1;
|
||||
//if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
|
||||
|
||||
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
||||
{ U32 algoNb = 0;
|
||||
if (Dtime[1] < Dtime[0]) algoNb = 1;
|
||||
if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
|
||||
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
//return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
|
||||
//return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
|
||||
|
|
Loading…
Reference in New Issue