fixed Huff0 quad-symbols decoder (#173)

dev
Yann Collet 2016-05-05 12:41:36 +02:00
parent ddb8ebd5b3
commit 6d1d25299a
2 changed files with 87 additions and 77 deletions

View File

@ -76,7 +76,7 @@ extern "C" {
******************************************/
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder */
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
/* ****************************************
@ -122,7 +122,7 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbol decoder */
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
@ -157,7 +157,6 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
const void* src, size_t srcSize)
{
U32 weightTotal;
U32 tableLog;
const BYTE* ip = (const BYTE*) src;
size_t iSize = ip[0];
size_t oSize;
@ -191,31 +190,31 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
/* collect weight stats */
memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
}}
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }
/* get last non-null symbol weight (implied, total must be 2^n) */
tableLog = BIT_highbit32(weightTotal) + 1;
if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
}
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1;
if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected);
*tableLogPtr = tableLog;
/* determine last weight */
{ U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++;
} }
/* check tree construction validity */
if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
/* results */
*nbSymbolsPtr = (U32)(oSize+1);
*tableLogPtr = tableLog;
return iSize+1;
}

View File

@ -847,17 +847,17 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, ds+val, sizeof(HUF_DSeqX6));
BIT_skipBits(DStream, dd[val].nbBits);
return dd[val].nbBytes;
}
static U32 HUF_decodeLastSymbolsX6(void* op, const U32 maxL, BIT_DStream_t* DStream,
static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStream,
const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
U32 length = dd[val].nbBytes;
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
U32 const length = dd[val].nbBytes;
if (length <= maxL) {
memcpy(op, ds+val, length);
BIT_skipBits(DStream, dd[val].nbBits);
@ -910,7 +910,6 @@ static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* c
return p-pStart;
}
size_t HUF_decompress1X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@ -919,17 +918,15 @@ size_t HUF_decompress1X6_usingDTable(
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const U32 dtLog = DTable[0];
size_t errorCode;
BIT_DStream_t bitD;
/* Init */
BIT_DStream_t bitD;
errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
if (HUF_isError(errorCode)) return errorCode;
{ size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; }
/* finish bitStreams one by one */
HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog);
{ U32 const dtLog = DTable[0];
HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog); }
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
@ -943,7 +940,7 @@ size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG);
const BYTE* ip = (const BYTE*) cSrc;
size_t hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize;
@ -953,6 +950,24 @@ size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS
}
#define HUF_DECODE_ROUNDX6 \
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_1(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_1(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_1(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_1(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_0(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_0(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_0(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
size_t HUF_decompress4X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@ -960,6 +975,7 @@ size_t HUF_decompress4X6_usingDTable(
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
if (dstSize < 64) return ERROR(dstSize_tooSmall); /* only work for dstSize >= 64 */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
@ -970,7 +986,6 @@ size_t HUF_decompress4X6_usingDTable(
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
size_t errorCode;
/* Init */
BIT_DStream_t bitD1;
@ -997,43 +1012,41 @@ size_t HUF_decompress4X6_usingDTable(
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode;
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 16-64 symbols per loop (4-16 symbols per stream) */
/* 4-64 symbols per loop (1-16 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
if (endSignal==BIT_DStream_unfinished) {
HUF_DECODE_ROUNDX6;
if (sizeof(bitD1.bitContainer)==4) { /* need to decode at least 4 bytes per stream */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
HUF_DECODE_ROUNDX6;
}
{ U32 const saved2 = MEM_read32(opStart2); /* saved from overwrite */
U32 const saved3 = MEM_read32(opStart3);
U32 const saved4 = MEM_read32(opStart4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
HUF_DECODE_ROUNDX6;
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
MEM_write32(opStart2, saved2);
MEM_write32(opStart3, saved3);
MEM_write32(opStart4, saved4);
} }
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* note : op4 already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
@ -1097,12 +1110,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 };
/* estimate decompression time */
U32 Q;
const U32 D256 = (U32)(dstSize >> 8);
U32 Dtime[3];
U32 algoNb = 0;
int n;
U32 Dtime[3]; /* decompression time estimation */
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
@ -1111,16 +1119,19 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
/* decoder timing evaluation */
Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
for (n=0; n<3; n++)
Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
{ U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
U32 const D256 = (U32)(dstSize >> 8);
U32 n; for (n=0; n<3; n++)
Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
}
Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
if (Dtime[1] < Dtime[0]) algoNb = 1;
//if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
{ U32 algoNb = 0;
if (Dtime[1] < Dtime[0]) algoNb = 1;
if (Dtime[2] < Dtime[algoNb]) algoNb = 2;
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
}
//return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
//return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */