introduced HUF_buildCTable_wksp(), to reduce stack memory usage

This commit is contained in:
Yann Collet 2016-12-01 17:47:30 -08:00
parent 850c76d045
commit a0d742b1e4
3 changed files with 49 additions and 30 deletions

View File

@ -73,9 +73,7 @@ size_t HUF_decompress(void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize);
/* ****************************************
* Tool functions
******************************************/
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024)
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
@ -91,8 +89,8 @@ const char* HUF_getErrorName(size_t code); /**< provides error code string (us
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of <= 1024 unsigned */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace);
* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
@ -146,10 +144,6 @@ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, con
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
/* ****************************************
* HUF detailed API
@ -174,6 +168,12 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, un
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
*/
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer.
@ -213,17 +213,20 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c
/* single stream variants */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); /**< `workSpace` must be a table of at least 1024 unsigned */
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif /* HUF_STATIC_LINKING_ONLY */

View File

@ -325,23 +325,26 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
}
/** HUF_buildCTable() :
* Note : count is used before tree is written, so they can safely overlap
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
nodeElt* huffNode = huffNode0 + 1;
nodeElt* const huffNode0 = (nodeElt*)workSpace;
nodeElt* const huffNode = huffNode0+1;
U32 n, nonNullRank;
int lowS, lowN;
U16 nodeNb = STARTNODE;
U32 nodeRoot;
/* safety checks */
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
memset(huffNode0, 0, sizeof(huffNode0));
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue);
@ -354,7 +357,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
nodeNb++; lowS-=2;
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
huffNode0[0].count = (U32)(1U<<31);
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
/* create parents */
while (nodeNb <= nodeRoot) {
@ -397,6 +400,15 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
return maxNbBits;
}
/** HUF_buildCTable() :
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
{
huffNodeTable nodeTable;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
}
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@ -503,7 +515,8 @@ static size_t HUF_compress_internal (
void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream, unsigned* workSpace)
unsigned singleStream,
void* workSpace, size_t wkspSize)
{
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
@ -515,6 +528,7 @@ static size_t HUF_compress_internal (
} table; /* `count` can overlap with `CTable`; saves 1 KB */
/* checks & inits */
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
if (!dstSize) return 0; /* cannot fit within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@ -523,14 +537,14 @@ static size_t HUF_compress_internal (
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Scan input and build symbol stats */
{ CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace) );
{ CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ CHECK_V_F(maxBits, HUF_buildCTable (table.CTable, table.count, maxSymbolValue, huffLog) );
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
huffLog = (U32)maxBits;
}
@ -559,9 +573,10 @@ static size_t HUF_compress_internal (
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace)
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
}
size_t HUF_compress1X (void* dst, size_t dstSize,
@ -569,14 +584,15 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[1024];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace);
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace)
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace);
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
}
size_t HUF_compress2 (void* dst, size_t dstSize,
@ -584,7 +600,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[1024];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace);
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)

View File

@ -471,8 +471,8 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
singleStream = 1;
cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
} else {
cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters)
: HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters);
cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
: HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain))