introduced HUF_buildCTable_wksp(), to reduce stack memory usage

This commit is contained in:
Yann Collet 2016-12-01 17:47:30 -08:00
parent 850c76d045
commit a0d742b1e4
3 changed files with 49 additions and 30 deletions

View File

@ -73,9 +73,7 @@ size_t HUF_decompress(void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize); const void* cSrc, size_t cSrcSize);
/* **************************************** /* *** Tool functions *** */
* Tool functions
******************************************/
#define HUF_BLOCKSIZE_MAX (128 * 1024) #define HUF_BLOCKSIZE_MAX (128 * 1024)
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
@ -84,15 +82,15 @@ unsigned HUF_isError(size_t code); /**< tells if a return value is an
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
/* *** Advanced function *** */ /* *** Advanced function *** */
/** HUF_compress2() : /** HUF_compress2() :
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */ * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() : /** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of <= 1024 unsigned */ * Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
@ -146,10 +144,6 @@ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, con
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
/* **************************************** /* ****************************************
* HUF detailed API * HUF detailed API
@ -174,6 +168,12 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, un
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
*/
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
/*! HUF_readStats() : /*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable(). Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer. `huffWeight` is destination buffer.
@ -213,17 +213,20 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c
/* single stream variants */ /* single stream variants */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); /**< `workSpace` must be a table of at least 1024 unsigned */ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_STATIC_LINKING_ONLY */

View File

@ -325,23 +325,26 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
} }
/** HUF_buildCTable() : /** HUF_buildCTable_wksp() :
* Note : count is used before tree is written, so they can safely overlap * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
*/ */
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{ {
nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1]; nodeElt* const huffNode0 = (nodeElt*)workSpace;
nodeElt* huffNode = huffNode0 + 1; nodeElt* const huffNode = huffNode0+1;
U32 n, nonNullRank; U32 n, nonNullRank;
int lowS, lowN; int lowS, lowN;
U16 nodeNb = STARTNODE; U16 nodeNb = STARTNODE;
U32 nodeRoot; U32 nodeRoot;
/* safety checks */ /* safety checks */
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
memset(huffNode0, 0, sizeof(huffNode0)); memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */ /* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue); HUF_sort(huffNode, count, maxSymbolValue);
@ -354,7 +357,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
nodeNb++; lowS-=2; nodeNb++; lowS-=2;
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
huffNode0[0].count = (U32)(1U<<31); huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
/* create parents */ /* create parents */
while (nodeNb <= nodeRoot) { while (nodeNb <= nodeRoot) {
@ -397,6 +400,15 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
return maxNbBits; return maxNbBits;
} }
/** HUF_buildCTable() :
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
{
huffNodeTable nodeTable;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
}
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{ {
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@ -503,7 +515,8 @@ static size_t HUF_compress_internal (
void* dst, size_t dstSize, void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream, unsigned* workSpace) unsigned singleStream,
void* workSpace, size_t wkspSize)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
@ -515,6 +528,7 @@ static size_t HUF_compress_internal (
} table; /* `count` can overlap with `CTable`; saves 1 KB */ } table; /* `count` can overlap with `CTable`; saves 1 KB */
/* checks & inits */ /* checks & inits */
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
if (!dstSize) return 0; /* cannot fit within dst budget */ if (!dstSize) return 0; /* cannot fit within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@ -523,14 +537,14 @@ static size_t HUF_compress_internal (
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Scan input and build symbol stats */ /* Scan input and build symbol stats */
{ CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace) ); { CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
} }
/* Build Huffman Tree */ /* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ CHECK_V_F(maxBits, HUF_buildCTable (table.CTable, table.count, maxSymbolValue, huffLog) ); { CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
huffLog = (U32)maxBits; huffLog = (U32)maxBits;
} }
@ -559,9 +573,10 @@ static size_t HUF_compress_internal (
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace) unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{ {
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
} }
size_t HUF_compress1X (void* dst, size_t dstSize, size_t HUF_compress1X (void* dst, size_t dstSize,
@ -569,14 +584,15 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog) unsigned maxSymbolValue, unsigned huffLog)
{ {
unsigned workSpace[1024]; unsigned workSpace[1024];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace); return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
} }
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace) unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{ {
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace); return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
} }
size_t HUF_compress2 (void* dst, size_t dstSize, size_t HUF_compress2 (void* dst, size_t dstSize,
@ -584,7 +600,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog) unsigned maxSymbolValue, unsigned huffLog)
{ {
unsigned workSpace[1024]; unsigned workSpace[1024];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace); return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
} }
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)

View File

@ -471,8 +471,8 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
singleStream = 1; singleStream = 1;
cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
} else { } else {
cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters) cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
: HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters); : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
} }
if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) if ((cLitSize==0) | (cLitSize >= srcSize - minGain))