updated huff0 - now generates a common HUF_DTable type for all decoding tables

This commit is contained in:
Yann Collet 2016-06-08 11:11:02 +02:00
parent 302fb53a76
commit 662a541431
4 changed files with 162 additions and 524 deletions

1
NEWS
View File

@ -1,5 +1,6 @@
v0.7.0 v0.7.0
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
New : Visual build scripts, by Christophe Chevalier
New : Support for Sparse File-systems (do not use space for zero-filled sectors) New : Support for Sparse File-systems (do not use space for zero-filled sectors)
New : Frame checksum support New : Frame checksum support
New : Support pass-through mode (when using `-df`) New : Support pass-through mode (when using `-df`)

View File

@ -31,8 +31,8 @@
You can contact the author at : You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
****************************************************************** */ ****************************************************************** */
#ifndef HUF_H #ifndef HUF_H_298734234
#define HUF_H #define HUF_H_298734234
#if defined (__cplusplus) #if defined (__cplusplus)
extern "C" { extern "C" {
@ -53,8 +53,9 @@ size_t HUF_decompress(void* dst, size_t dstSize,
/* /*
HUF_compress() : HUF_compress() :
Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). 'dst' buffer must be already allocated.
Note : `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
`srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB
@return : size of compressed data (<= `dstCapacity`) @return : size of compressed data (<= `dstCapacity`)
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single repeated byte symbol (RLE compression). if return == 1, srcData is a single repeated byte symbol (RLE compression).
@ -63,7 +64,7 @@ HUF_compress() :
HUF_decompress() : HUF_decompress() :
Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
into already allocated buffer 'dst', of minimum size 'dstSize'. into already allocated buffer 'dst', of minimum size 'dstSize'.
`dstSize` : must be the **exact** size of original (uncompressed) data. `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
Note : in contrast with FSE, HUF_decompress can regenerate Note : in contrast with FSE, HUF_decompress can regenerate
RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
because it knows size to regenerate. because it knows size to regenerate.
@ -121,13 +122,12 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
/* static allocation of HUF's DTable */ /* static allocation of HUF's DTable */
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<maxTableLog)) typedef U16 HUF_DTable;
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
unsigned short DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((maxTableLog)*0x101) }
#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog)] = { maxTableLog } HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)+1)] = { (((maxTableLog)+1)*0x101) }
#define HUF_CREATE_STATIC_DTABLEX6(DTable, maxTableLog) \
unsigned int DTable[HUF_DTABLE_SIZE(maxTableLog) * 3 / 2] = { maxTableLog }
/* **************************************** /* ****************************************
@ -135,7 +135,6 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
******************************************/ ******************************************/
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbols decoder */
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
/* **************************************** /* ****************************************
@ -161,35 +160,6 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, un
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/*!
HUF_decompress() does the following:
1. select the decompression algorithm (X2, X4, X6) based on pre-computed heuristics
2. build Huffman table from save, using HUF_readDTableXn()
3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
*/
size_t HUF_readDTableX2 (unsigned short* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX6 (unsigned* DTable, const void* src, size_t srcSize);
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
size_t HUF_decompress4X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
/* single stream variants */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* quad-symbols decoder, only works for dstSize >= 64 */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned short* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
size_t HUF_decompress1X6_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const unsigned* DTable);
/*! HUF_readStats() : /*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable(). Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer. `huffWeight` is destination buffer.
@ -204,6 +174,39 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
/*
HUF_decompress() does the following:
1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
2. build Huffman table from save, using HUF_readDTableXn()
3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
*/
/** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster,
* based on a set of pre-determined metrics.
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* single stream variants */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_STATIC_LINKING_ONLY */
@ -211,4 +214,4 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* sr
} }
#endif #endif
#endif /* HUF_H */ #endif /* HUF_H_298734234 */

View File

@ -60,37 +60,26 @@
* Includes * Includes
****************************************************************/ ****************************************************************/
#include <string.h> /* memcpy, memset */ #include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h" #include "bitstream.h"
#include "fse.h" /* header compression */ #include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY #define HUF_STATIC_LINKING_ONLY
#include "huf.h" #include "huf.h"
/* ************************************************************** /* **************************************************************
* Error Management * Error Management
****************************************************************/ ****************************************************************/
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
/* *******************************************************
* HUF : Huffman block decompression
*********************************************************/
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
/*-***************************/ /*-***************************/
/* single-symbol decoding */ /* single-symbol decoding */
/*-***************************/ /*-***************************/
typedef struct { BYTE maxTableLog; BYTE currentTableLog; } DTableDesc;
size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
{ {
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
@ -101,16 +90,19 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
U32 nextRankStart; U32 nextRankStart;
void* const dtPtr = DTable + 1; void* const dtPtr = DTable + 1;
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
DTableDesc dtd;
HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */
memcpy(&dtd, DTable, sizeof(dtd));
//memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
if (HUF_isError(iSize)) return iSize; if (HUF_isError(iSize)) return iSize;
/* check result */ /* check result */
if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ if (tableLog > dtd.maxTableLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
DTable[0] = (U16)tableLog; /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */ dtd.currentTableLog = (BYTE)tableLog; /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
memcpy(DTable, &dtd, sizeof(dtd));
/* Prepare ranks */ /* Prepare ranks */
nextRankStart = 0; nextRankStart = 0;
@ -181,14 +173,18 @@ static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, B
size_t HUF_decompress1X2_usingDTable( size_t HUF_decompress1X2_usingDTable(
void* dst, size_t dstSize, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize, const void* cSrc, size_t cSrcSize,
const U16* DTable) const HUF_DTable* DTable)
{ {
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
BYTE* const oend = op + dstSize; BYTE* const oend = op + dstSize;
const U32 dtLog = DTable[0];
const void* dtPtr = DTable; const void* dtPtr = DTable;
const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1; const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1;
BIT_DStream_t bitD; BIT_DStream_t bitD;
DTableDesc dtd;
U32 dtLog;
memcpy(&dtd, DTable, sizeof(dtd));
dtLog = dtd.currentTableLog;
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; } if (HUF_isError(errorCode)) return errorCode; }
@ -219,7 +215,7 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
size_t HUF_decompress4X2_usingDTable( size_t HUF_decompress4X2_usingDTable(
void* dst, size_t dstSize, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize, const void* cSrc, size_t cSrcSize,
const U16* DTable) const HUF_DTable* DTable)
{ {
/* Check */ /* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
@ -229,18 +225,16 @@ size_t HUF_decompress4X2_usingDTable(
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable; const void* const dtPtr = DTable;
const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1; const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Init */ /* Init */
BIT_DStream_t bitD1; BIT_DStream_t bitD1;
BIT_DStream_t bitD2; BIT_DStream_t bitD2;
BIT_DStream_t bitD3; BIT_DStream_t bitD3;
BIT_DStream_t bitD4; BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart); size_t const length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2); size_t const length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4); size_t const length3 = MEM_readLE16(istart+4);
size_t length4; size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */ const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1; const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2; const BYTE* const istart3 = istart2 + length2;
@ -254,17 +248,21 @@ size_t HUF_decompress4X2_usingDTable(
BYTE* op3 = opStart3; BYTE* op3 = opStart3;
BYTE* op4 = opStart4; BYTE* op4 = opStart4;
U32 endSignal; U32 endSignal;
DTableDesc dtd;
U32 dtLog;
memcpy(&dtd, DTable, sizeof(dtd));
dtLog = dtd.currentTableLog;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1); { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD2, istart2, length2); { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD3, istart3, length3); { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD4, istart4, length4); { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */ /* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
@ -315,11 +313,11 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
const BYTE* ip = (const BYTE*) cSrc; const BYTE* ip = (const BYTE*) cSrc;
size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); size_t const hSize = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(hSize)) return hSize;
if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += errorCode; ip += hSize;
cSrcSize -= errorCode; cSrcSize -= hSize;
return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
} }
@ -328,6 +326,9 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
/* *************************/ /* *************************/
/* double-symbols decoding */ /* double-symbols decoding */
/* *************************/ /* *************************/
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
const U32* rankValOrigin, const int minWeight, const U32* rankValOrigin, const int minWeight,
@ -413,7 +414,7 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
} }
} }
size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
{ {
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
@ -422,20 +423,23 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
U32* const rankStart = rankStart0+1; U32* const rankStart = rankStart0+1;
rankVal_t rankVal; rankVal_t rankVal;
U32 tableLog, maxW, sizeOfSort, nbSymbols; U32 tableLog, maxW, sizeOfSort, nbSymbols;
const U32 memLog = DTable[0]; DTableDesc dtd;
U32 maxTableLog;
size_t iSize; size_t iSize;
void* dtPtr = DTable; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1; HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); memcpy(&dtd, DTable, sizeof(dtd));
maxTableLog = dtd.maxTableLog-1;
if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
//memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
if (HUF_isError(iSize)) return iSize; if (HUF_isError(iSize)) return iSize;
/* check result */ /* check result */
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
/* find maxWeight */ /* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
@ -464,7 +468,7 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
/* Build rankVal */ /* Build rankVal */
{ U32* const rankVal0 = rankVal[0]; { U32* const rankVal0 = rankVal[0];
{ int const rescale = (memLog-tableLog) - 1; /* tableLog <= memLog */ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
U32 nextRankVal = 0; U32 nextRankVal = 0;
U32 w; U32 w;
for (w=1; w<maxW+1; w++) { for (w=1; w<maxW+1; w++) {
@ -474,18 +478,20 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
} } } }
{ U32 const minBits = tableLog+1 - maxW; { U32 const minBits = tableLog+1 - maxW;
U32 consumed; U32 consumed;
for (consumed = minBits; consumed < memLog - minBits + 1; consumed++) { for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
U32* const rankValPtr = rankVal[consumed]; U32* const rankValPtr = rankVal[consumed];
U32 w; U32 w;
for (w = 1; w < maxW+1; w++) { for (w = 1; w < maxW+1; w++) {
rankValPtr[w] = rankVal0[w] >> consumed; rankValPtr[w] = rankVal0[w] >> consumed;
} } } } } } } }
HUF_fillDTableX4(dt, memLog, HUF_fillDTableX4(dt, maxTableLog,
sortedSymbol, sizeOfSort, sortedSymbol, sizeOfSort,
rankStart0, rankVal, maxW, rankStart0, rankVal, maxW,
tableLog+1); tableLog+1);
dtd.currentTableLog = (BYTE)maxTableLog;
memcpy(DTable, &dtd, sizeof(dtd));
return iSize; return iSize;
} }
@ -536,7 +542,7 @@ static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* c
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
} }
/* closer to the end */ /* closer to end : up to 2 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
@ -553,23 +559,24 @@ static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* c
size_t HUF_decompress1X4_usingDTable( size_t HUF_decompress1X4_usingDTable(
void* dst, size_t dstSize, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize, const void* cSrc, size_t cSrcSize,
const U32* DTable) const HUF_DTable* DTable)
{ {
const BYTE* const istart = (const BYTE*) cSrc; BIT_DStream_t bitD;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const U32 dtLog = DTable[0];
const void* const dtPtr = DTable;
const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
/* Init */ /* Init */
BIT_DStream_t bitD; { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
{ size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize); if (HUF_isError(errorCode)) return errorCode;
if (HUF_isError(errorCode)) return errorCode; } }
/* decode */ /* decode */
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtLog); { BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
DTableDesc dtd;
memcpy(&dtd, DTable, sizeof(dtd));
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.currentTableLog);
}
/* check */ /* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
@ -595,32 +602,30 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
size_t HUF_decompress4X4_usingDTable( size_t HUF_decompress4X4_usingDTable(
void* dst, size_t dstSize, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize, const void* cSrc, size_t cSrcSize,
const U32* DTable) const HUF_DTable* DTable)
{ {
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{ const BYTE* const istart = (const BYTE*) cSrc; { const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst; BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable; const void* const dtPtr = DTable+1;
const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1; const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Init */ /* Init */
BIT_DStream_t bitD1; BIT_DStream_t bitD1;
BIT_DStream_t bitD2; BIT_DStream_t bitD2;
BIT_DStream_t bitD3; BIT_DStream_t bitD3;
BIT_DStream_t bitD4; BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart); size_t const length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2); size_t const length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4); size_t const length3 = MEM_readLE16(istart+4);
size_t length4; size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */ const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1; const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2; const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3; const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4; size_t const segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize; BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize; BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize; BYTE* const opStart4 = opStart3 + segmentSize;
@ -629,17 +634,21 @@ size_t HUF_decompress4X4_usingDTable(
BYTE* op3 = opStart3; BYTE* op3 = opStart3;
BYTE* op4 = opStart4; BYTE* op4 = opStart4;
U32 endSignal; U32 endSignal;
DTableDesc dtd;
U32 dtLog;
memcpy(&dtd, DTable, sizeof(dtd));
dtLog = dtd.currentTableLog;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1); { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD2, istart2, length2); { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD3, istart3, length3); { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
errorCode = BIT_initDStream(&bitD4, istart4, length4); { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */ /* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
@ -677,8 +686,8 @@ size_t HUF_decompress4X4_usingDTable(
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
/* check */ /* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected); if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */ /* decoded size */
return dstSize; return dstSize;
@ -701,386 +710,6 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
} }
/* ********************************/
/* quad-symbol decoding */
/* ********************************/
typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6;
typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6;
/* recursive, up to level 3; may benefit from <template>-like strategy to nest each level inline */
static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSequence, int sizeLog,
const rankVal_t rankValOrigin, const U32 consumed, const int minWeight, const U32 maxWeight,
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, const U32* rankStart,
const U32 nbBitsBaseline, HUF_DSeqX6 baseSeq, HUF_DDescX6 DDesc)
{
const int scaleLog = nbBitsBaseline - sizeLog; /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */
const int minBits = nbBitsBaseline - maxWeight;
const U32 level = DDesc.nbBytes;
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
U32 symbolStartPos, s;
/* local rankVal, will be modified */
memcpy(rankVal, rankValOrigin[consumed], sizeof(rankVal));
/* fill skipped values */
if (minWeight>1) {
U32 i;
const U32 skipSize = rankVal[minWeight];
for (i = 0; i < skipSize; i++) {
DSequence[i] = baseSeq;
DDescription[i] = DDesc;
} }
/* fill DTable */
DDesc.nbBytes++;
symbolStartPos = rankStart[minWeight];
for (s=symbolStartPos; s<sortedListSize; s++) {
const BYTE symbol = sortedSymbols[s].symbol;
const U32 weight = sortedSymbols[s].weight; /* >= 1 (sorted) */
const int nbBits = nbBitsBaseline - weight; /* >= 1 (by construction) */
const int totalBits = consumed+nbBits;
const U32 start = rankVal[weight];
const U32 length = 1 << (sizeLog-nbBits);
baseSeq.byte[level] = symbol;
DDesc.nbBits = (BYTE)totalBits;
if ((level<3) && (sizeLog-totalBits >= minBits)) { /* enough room for another symbol */
int nextMinWeight = totalBits + scaleLog;
if (nextMinWeight < 1) nextMinWeight = 1;
HUF_fillDTableX6LevelN(DDescription+start, DSequence+start, sizeLog-nbBits,
rankValOrigin, totalBits, nextMinWeight, maxWeight,
sortedSymbols, sortedListSize, rankStart,
nbBitsBaseline, baseSeq, DDesc); /* recursive (max : level 3) */
} else {
U32 i;
const U32 end = start + length;
for (i = start; i < end; i++) {
DDescription[i] = DDesc;
DSequence[i] = baseSeq;
} }
rankVal[weight] += length;
}
}
/* note : same preparation as X4 */
size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
{
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
U32* const rankStart = rankStart0+1;
U32 tableLog, maxW, sizeOfSort, nbSymbols;
rankVal_t rankVal;
const U32 memLog = DTable[0];
size_t iSize;
if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
//memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
if (HUF_isError(iSize)) return iSize;
/* check result */
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
/* find maxWeight */
for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
/* Get start index of each weight */
{ U32 w, nextRankStart = 0;
for (w=1; w<maxW+1; w++) {
U32 current = nextRankStart;
nextRankStart += rankStats[w];
rankStart[w] = current;
}
rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
sizeOfSort = nextRankStart;
}
/* sort symbols by weight */
{ U32 s;
for (s=0; s<nbSymbols; s++) {
U32 w = weightList[s];
U32 r = rankStart[w]++;
sortedSymbol[r].symbol = (BYTE)s;
sortedSymbol[r].weight = (BYTE)w;
}
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
}
/* Build rankVal */
{ const U32 minBits = tableLog+1 - maxW;
U32 nextRankVal = 0;
U32 w, consumed;
const int rescale = (memLog-tableLog) - 1; /* tableLog <= memLog */
U32* rankVal0 = rankVal[0];
for (w=1; w<maxW+1; w++) {
U32 current = nextRankVal;
nextRankVal += rankStats[w] << (w+rescale);
rankVal0[w] = current;
}
for (consumed = minBits; consumed <= memLog - minBits; consumed++) {
U32* rankValPtr = rankVal[consumed];
for (w = 1; w < maxW+1; w++) {
rankValPtr[w] = rankVal0[w] >> consumed;
} } }
/* fill tables */
{ void* ddPtr = DTable+1;
HUF_DDescX6* DDescription = (HUF_DDescX6*)ddPtr;
void* dsPtr = DTable + 1 + ((size_t)1<<(memLog-1));
HUF_DSeqX6* DSequence = (HUF_DSeqX6*)dsPtr;
HUF_DSeqX6 DSeq;
HUF_DDescX6 DDesc;
DSeq.sequence = 0;
DDesc.nbBits = 0;
DDesc.nbBytes = 0;
HUF_fillDTableX6LevelN(DDescription, DSequence, memLog,
(const U32 (*)[HUF_TABLELOG_ABSOLUTEMAX + 1])rankVal, 0, 1, maxW,
sortedSymbol, sizeOfSort, rankStart0,
tableLog+1, DSeq, DDesc);
}
return iSize;
}
static U32 HUF_decodeSymbolX6(void* op, BIT_DStream_t* DStream, const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, ds+val, sizeof(HUF_DSeqX6));
BIT_skipBits(DStream, dd[val].nbBits);
return dd[val].nbBytes;
}
static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStream,
const HUF_DDescX6* dd, const HUF_DSeqX6* ds, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
U32 const length = dd[val].nbBytes;
if (length <= maxL) {
memcpy(op, ds+val, length);
BIT_skipBits(DStream, dd[val].nbBits);
return length;
}
memcpy(op, ds+val, maxL);
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
BIT_skipBits(DStream, dd[val].nbBits);
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
}
return maxL;
}
#define HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) \
ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog)
#define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
#define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr)
static inline size_t HUF_decodeStreamX6(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const U32* DTable, const U32 dtLog)
{
const void* const ddPtr = DTable+1;
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
BYTE* const pStart = p;
/* up to 16 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-16)) {
HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
HUF_DECODE_SYMBOLX6_1(p, bitDPtr);
HUF_DECODE_SYMBOLX6_2(p, bitDPtr);
HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
}
/* closer to the end, up to 4 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4))
HUF_DECODE_SYMBOLX6_0(p, bitDPtr);
while ((BIT_reloadDStream(bitDPtr) <= BIT_DStream_endOfBuffer) && (p < pEnd))
p += HUF_decodeLastSymbolsX6(p, (U32)(pEnd-p), bitDPtr, dd, ds, dtLog);
return p-pStart;
}
size_t HUF_decompress1X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const U32* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
BIT_DStream_t bitD;
/* Init */
{ size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; }
/* finish bitStreams one by one */
{ U32 const dtLog = DTable[0];
HUF_decodeStreamX6(ostart, &bitD, oend, DTable, dtLog); }
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX);
const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize;
cSrcSize -= hSize;
return HUF_decompress1X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
}
#define HUF_DECODE_ROUNDX6 \
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_1(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_1(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_1(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_1(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_2(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_2(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_2(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_2(op4, &bitD4); \
HUF_DECODE_SYMBOLX6_0(op1, &bitD1); \
HUF_DECODE_SYMBOLX6_0(op2, &bitD2); \
HUF_DECODE_SYMBOLX6_0(op3, &bitD3); \
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
size_t HUF_decompress4X6_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const U32* DTable)
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
if (dstSize < 64) return ERROR(dstSize_tooSmall); /* only work for dstSize >= 64 */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const U32 dtLog = DTable[0];
const void* const ddPtr = DTable+1;
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 4-64 symbols per loop (1-16 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
if (endSignal==BIT_DStream_unfinished) {
HUF_DECODE_ROUNDX6;
if (sizeof(bitD1.bitContainer)==4) { /* need to decode at least 4 bytes per stream */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
HUF_DECODE_ROUNDX6;
}
{ U32 const saved2 = MEM_read32(opStart2); /* saved from overwrite */
U32 const saved3 = MEM_read32(opStart3);
U32 const saved4 = MEM_read32(opStart4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
HUF_DECODE_ROUNDX6;
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
MEM_write32(opStart2, saved2);
MEM_write32(opStart3, saved3);
MEM_write32(opStart4, saved4);
} }
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
HUF_decodeStreamX6(op4, &bitD4, oend, DTable, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
}
size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX);
const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize;
cSrcSize -= hSize;
return HUF_decompress4X6_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
}
/* ********************************/ /* ********************************/
/* Generic decompression selector */ /* Generic decompression selector */
/* ********************************/ /* ********************************/
@ -1107,12 +736,29 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
{{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
}; };
/** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster,
* based on a set of pre-determined metrics.
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{
/* decoder timing evaluation */
U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
U32 const D256 = (U32)(dstSize >> 8);
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
return DTime1 < DTime0;
}
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{ {
static const decompressionAlgo decompress[3] = { HUF_decompress4X2, HUF_decompress4X4, HUF_decompress4X6 }; static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
U32 Dtime[3]; /* decompression time estimation */
/* validation checks */ /* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall); if (dstSize == 0) return ERROR(dstSize_tooSmall);
@ -1120,22 +766,10 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
/* decoder timing evaluation */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
{ U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
U32 const D256 = (U32)(dstSize >> 8);
U32 n; for (n=0; n<3; n++)
Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
}
Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
{ U32 algoNb = 0;
if (Dtime[1] < Dtime[0]) algoNb = 1;
// if (Dtime[2] < Dtime[algoNb]) algoNb = 2; /* current speed of HUF_decompress4X6 is not good */
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
} }
//return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */ //return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
//return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */ //return HUF_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
//return HUF_decompress4X6(dst, dstSize, cSrc, cSrcSize); /* multi-streams quad-symbols decoding */
} }

View File

@ -112,7 +112,7 @@ struct ZSTD_DCtx_s
FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
unsigned hufTableX4[HUF_DTABLE_SIZE(HufLog)]; HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog+1)];
const void* previousDstEnd; const void* previousDstEnd;
const void* base; const void* base;
const void* vBase; const void* vBase;
@ -143,7 +143,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->base = NULL; dctx->base = NULL;
dctx->vBase = NULL; dctx->vBase = NULL;
dctx->dictEnd = NULL; dctx->dictEnd = NULL;
dctx->hufTableX4[0] = HufLog; dctx->hufTable[0] = (HUF_DTable)((HufLog+1)*0x101);
dctx->flagRepeatTable = 0; dctx->flagRepeatTable = 0;
dctx->dictID = 0; dctx->dictID = 0;
return 0; return 0;
@ -508,7 +508,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
litCSize = ((istart[1] & 3) << 8) + istart[2]; litCSize = ((istart[1] & 3) << 8) + istart[2];
{ size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); { size_t const errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable);
if (HUF_isError(errorCode)) return ERROR(corruption_detected); if (HUF_isError(errorCode)) return ERROR(corruption_detected);
} }
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;
@ -1193,7 +1193,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const d
{ {
size_t dictSize = dictSizeStart; size_t dictSize = dictSizeStart;
{ size_t const hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize); { size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + hSize; dict = (const char*)dict + hSize;
dictSize -= hSize; dictSize -= hSize;