introduced HUF_buildCTable_wksp(), to reduce stack memory usage
This commit is contained in:
parent
850c76d045
commit
a0d742b1e4
@ -73,9 +73,7 @@ size_t HUF_decompress(void* dst, size_t dstSize,
|
|||||||
const void* cSrc, size_t cSrcSize);
|
const void* cSrc, size_t cSrcSize);
|
||||||
|
|
||||||
|
|
||||||
/* ****************************************
|
/* *** Tool functions *** */
|
||||||
* Tool functions
|
|
||||||
******************************************/
|
|
||||||
#define HUF_BLOCKSIZE_MAX (128 * 1024)
|
#define HUF_BLOCKSIZE_MAX (128 * 1024)
|
||||||
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
|
||||||
|
|
||||||
@ -84,15 +82,15 @@ unsigned HUF_isError(size_t code); /**< tells if a return value is an
|
|||||||
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
|
||||||
|
|
||||||
|
|
||||||
/* *** Advanced function *** */
|
/* *** Advanced function *** */
|
||||||
|
|
||||||
/** HUF_compress2() :
|
/** HUF_compress2() :
|
||||||
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
|
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
|
||||||
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||||
|
|
||||||
/** HUF_compress4X_wksp() :
|
/** HUF_compress4X_wksp() :
|
||||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of <= 1024 unsigned */
|
* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
|
||||||
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace);
|
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -146,10 +144,6 @@ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, con
|
|||||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||||
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||||
|
|
||||||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
|
||||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
|
||||||
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
|
||||||
|
|
||||||
|
|
||||||
/* ****************************************
|
/* ****************************************
|
||||||
* HUF detailed API
|
* HUF detailed API
|
||||||
@ -174,6 +168,12 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, un
|
|||||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||||
|
|
||||||
|
|
||||||
|
/** HUF_buildCTable_wksp() :
|
||||||
|
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||||
|
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
|
||||||
|
*/
|
||||||
|
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
|
||||||
|
|
||||||
/*! HUF_readStats() :
|
/*! HUF_readStats() :
|
||||||
Read compact Huffman tree, saved by HUF_writeCTable().
|
Read compact Huffman tree, saved by HUF_writeCTable().
|
||||||
`huffWeight` is destination buffer.
|
`huffWeight` is destination buffer.
|
||||||
@ -213,17 +213,20 @@ size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* c
|
|||||||
/* single stream variants */
|
/* single stream variants */
|
||||||
|
|
||||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, unsigned* workSpace); /**< `workSpace` must be a table of at least 1024 unsigned */
|
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
|
||||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||||
|
|
||||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||||
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||||
|
|
||||||
|
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||||
|
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||||
|
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||||
|
|
||||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
|
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
|
||||||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||||
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||||
|
|
||||||
|
|
||||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||||
|
|
||||||
|
|
||||||
|
@ -325,23 +325,26 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** HUF_buildCTable() :
|
/** HUF_buildCTable_wksp() :
|
||||||
* Note : count is used before tree is written, so they can safely overlap
|
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||||
|
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
|
||||||
*/
|
*/
|
||||||
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
||||||
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
|
typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
|
||||||
|
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||||
{
|
{
|
||||||
nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
|
nodeElt* const huffNode0 = (nodeElt*)workSpace;
|
||||||
nodeElt* huffNode = huffNode0 + 1;
|
nodeElt* const huffNode = huffNode0+1;
|
||||||
U32 n, nonNullRank;
|
U32 n, nonNullRank;
|
||||||
int lowS, lowN;
|
int lowS, lowN;
|
||||||
U16 nodeNb = STARTNODE;
|
U16 nodeNb = STARTNODE;
|
||||||
U32 nodeRoot;
|
U32 nodeRoot;
|
||||||
|
|
||||||
/* safety checks */
|
/* safety checks */
|
||||||
|
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
|
||||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
|
||||||
memset(huffNode0, 0, sizeof(huffNode0));
|
memset(huffNode0, 0, sizeof(huffNodeTable));
|
||||||
|
|
||||||
/* sort, decreasing order */
|
/* sort, decreasing order */
|
||||||
HUF_sort(huffNode, count, maxSymbolValue);
|
HUF_sort(huffNode, count, maxSymbolValue);
|
||||||
@ -354,7 +357,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
|
|||||||
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
|
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
|
||||||
nodeNb++; lowS-=2;
|
nodeNb++; lowS-=2;
|
||||||
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
|
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
|
||||||
huffNode0[0].count = (U32)(1U<<31);
|
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
|
||||||
|
|
||||||
/* create parents */
|
/* create parents */
|
||||||
while (nodeNb <= nodeRoot) {
|
while (nodeNb <= nodeRoot) {
|
||||||
@ -397,6 +400,15 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
|
|||||||
return maxNbBits;
|
return maxNbBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** HUF_buildCTable() :
|
||||||
|
* Note : count is used before tree is written, so they can safely overlap
|
||||||
|
*/
|
||||||
|
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
|
||||||
|
{
|
||||||
|
huffNodeTable nodeTable;
|
||||||
|
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
|
||||||
|
}
|
||||||
|
|
||||||
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
||||||
{
|
{
|
||||||
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
||||||
@ -503,7 +515,8 @@ static size_t HUF_compress_internal (
|
|||||||
void* dst, size_t dstSize,
|
void* dst, size_t dstSize,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
unsigned maxSymbolValue, unsigned huffLog,
|
unsigned maxSymbolValue, unsigned huffLog,
|
||||||
unsigned singleStream, unsigned* workSpace)
|
unsigned singleStream,
|
||||||
|
void* workSpace, size_t wkspSize)
|
||||||
{
|
{
|
||||||
BYTE* const ostart = (BYTE*)dst;
|
BYTE* const ostart = (BYTE*)dst;
|
||||||
BYTE* const oend = ostart + dstSize;
|
BYTE* const oend = ostart + dstSize;
|
||||||
@ -515,6 +528,7 @@ static size_t HUF_compress_internal (
|
|||||||
} table; /* `count` can overlap with `CTable`; saves 1 KB */
|
} table; /* `count` can overlap with `CTable`; saves 1 KB */
|
||||||
|
|
||||||
/* checks & inits */
|
/* checks & inits */
|
||||||
|
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
|
||||||
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
|
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
|
||||||
if (!dstSize) return 0; /* cannot fit within dst budget */
|
if (!dstSize) return 0; /* cannot fit within dst budget */
|
||||||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
||||||
@ -523,14 +537,14 @@ static size_t HUF_compress_internal (
|
|||||||
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
||||||
|
|
||||||
/* Scan input and build symbol stats */
|
/* Scan input and build symbol stats */
|
||||||
{ CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace) );
|
{ CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
|
||||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||||
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
|
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Build Huffman Tree */
|
/* Build Huffman Tree */
|
||||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||||
{ CHECK_V_F(maxBits, HUF_buildCTable (table.CTable, table.count, maxSymbolValue, huffLog) );
|
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
|
||||||
huffLog = (U32)maxBits;
|
huffLog = (U32)maxBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -559,9 +573,10 @@ static size_t HUF_compress_internal (
|
|||||||
|
|
||||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace)
|
unsigned maxSymbolValue, unsigned huffLog,
|
||||||
|
void* workSpace, size_t wkspSize)
|
||||||
{
|
{
|
||||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace);
|
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t HUF_compress1X (void* dst, size_t dstSize,
|
size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||||
@ -569,14 +584,15 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
|
|||||||
unsigned maxSymbolValue, unsigned huffLog)
|
unsigned maxSymbolValue, unsigned huffLog)
|
||||||
{
|
{
|
||||||
unsigned workSpace[1024];
|
unsigned workSpace[1024];
|
||||||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace);
|
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
unsigned maxSymbolValue, unsigned huffLog, unsigned* workSpace)
|
unsigned maxSymbolValue, unsigned huffLog,
|
||||||
|
void* workSpace, size_t wkspSize)
|
||||||
{
|
{
|
||||||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace);
|
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t HUF_compress2 (void* dst, size_t dstSize,
|
size_t HUF_compress2 (void* dst, size_t dstSize,
|
||||||
@ -584,7 +600,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
|
|||||||
unsigned maxSymbolValue, unsigned huffLog)
|
unsigned maxSymbolValue, unsigned huffLog)
|
||||||
{
|
{
|
||||||
unsigned workSpace[1024];
|
unsigned workSpace[1024];
|
||||||
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace);
|
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
||||||
|
@ -471,8 +471,8 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
|||||||
singleStream = 1;
|
singleStream = 1;
|
||||||
cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
|
cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
|
||||||
} else {
|
} else {
|
||||||
cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters)
|
cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
|
||||||
: HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters);
|
: HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
|
if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user