Introduced FSE_compress_wksp() and FSE_buildCTable_wksp() to reduce stack memory usage

dev
Yann Collet 2016-11-30 15:52:20 -08:00
parent 4ab3a4228f
commit d79a9a00d9
4 changed files with 90 additions and 55 deletions

View File

@ -300,12 +300,25 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* s
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `workSpace` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* wkspSize should be sized to handle worst case situation, which is `(1<<max_tableLog) * sizeof(FSE_FUNCTION_TYPE)`.
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements.
*/
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */

View File

@ -100,7 +100,13 @@ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VA
/* Function templates */
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
/* FSE_buildCTable_wksp() :
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
*/
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
U32 const tableSize = 1 << tableLog;
U32 const tableMask = tableSize - 1;
@ -111,10 +117,11 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
U32 const step = FSE_TABLESTEP(tableSize);
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
U32 highThreshold = tableSize-1;
/* CTable header */
if ((1<<tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
@ -181,6 +188,13 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
}
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
}
#ifndef FSE_COMMONDEFS_ONLY
@ -189,7 +203,7 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
@ -486,7 +500,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
U32 ToDistribute;
/* Init */
U32 lowThreshold = (U32)(total >> tableLog);
U32 const lowThreshold = (U32)(total >> tableLog);
U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
for (s=0; s<=maxSymbolValue; s++) {
@ -534,17 +548,16 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
return 0;
}
{
U64 const vStepLog = 62 - tableLog;
{ U64 const vStepLog = 62 - tableLog;
U64 const mid = (1ULL << (vStepLog-1)) - 1;
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
U64 tmpTotal = mid;
for (s=0; s<=maxSymbolValue; s++) {
if (norm[s]==-2) {
U64 end = tmpTotal + (count[s] * rStep);
U32 sStart = (U32)(tmpTotal >> vStepLog);
U32 sEnd = (U32)(end >> vStepLog);
U32 weight = sEnd - sStart;
U64 const end = tmpTotal + (count[s] * rStep);
U32 const sStart = (U32)(tmpTotal >> vStepLog);
U32 const sEnd = (U32)(end >> vStepLog);
U32 const weight = sEnd - sStart;
if (weight < 1)
return ERROR(GENERIC);
norm[s] = (short)weight;
@ -566,7 +579,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
U64 const scale = 62 - tableLog;
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
U64 const vStep = 1ULL<<(scale-20);
@ -594,7 +606,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
} }
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
/* corner case, need another normalization method */
size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
if (FSE_isError(errorCode)) return errorCode;
}
else normalizedCounter[largest] += (short)stillToDistribute;
@ -643,17 +655,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle (100% always same symbol) input */
/* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{
void* ptr = ct;
@ -685,14 +695,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
const BYTE* const iend = istart + srcSize;
const BYTE* ip=iend;
BIT_CStream_t bitC;
FSE_CState_t CState1, CState2;
/* init */
if (srcSize <= 2) return 0;
{ size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
if (FSE_isError(errorCode)) return 0; }
{ size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
@ -715,7 +724,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
}
/* 2 or 4 encoding per loop */
for ( ; ip>istart ; ) {
while ( ip>istart ) {
FSE_encodeSymbol(&bitC, &CState2, *--ip);
@ -741,7 +750,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
const void* src, size_t srcSize,
const FSE_CTable* ct)
{
const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
if (fast)
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
@ -752,7 +761,14 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
#define CHECK_E_F(e, f) size_t const e = f; if (FSE_isError(e)) return f
#define CHECK_F(f) { CHECK_E_F(_var_err__, f); }
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `workSpace` size must be `(1<<tableLog)`.
*/
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace)
{
const BYTE* const istart = (const BYTE*) src;
const BYTE* ip = istart;
@ -764,46 +780,50 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
U32 count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
CTable_max_t ct;
size_t errorCode;
/* init conditions */
if (srcSize <= 1) return 0; /* Uncompressible */
if (srcSize <= 1) return 0; /* Not compressible */
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
if (FSE_isError(errorCode)) return errorCode;
if (errorCode == srcSize) return 1;
if (errorCode == 1) return 0; /* each symbol only present once */
if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
{ CHECK_E_F(maxCount, FSE_count(count, &maxSymbolValue, ip, srcSize) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
}
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
if (FSE_isError(errorCode)) return errorCode;
CHECK_F( FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue) );
/* Write table description header */
errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
if (FSE_isError(errorCode)) return errorCode;
op += errorCode;
{ CHECK_E_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
op += nc_err;
}
/* Compress */
errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
if (errorCode == 0) return 0; /* not enough space for compressed data */
op += errorCode;
CHECK_F( FSE_buildCTable_wksp(ct, norm, maxSymbolValue, tableLog, workSpace, 1<<tableLog) );
{ CHECK_E_F(cSize, FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-1 )
return 0;
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
return op-ostart;
}
size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, scratchBuffer);
}
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
}

View File

@ -88,13 +88,15 @@ typedef struct nodeElt_s {
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
{
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
BYTE* op = (BYTE*)dst;
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
U32 n;
/* check conditions */
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
/* convert to weight */
bitsToWeight[0] = 0;
@ -103,23 +105,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
for (n=0; n<maxSymbolValue; n++)
huffWeight[n] = bitsToWeight[CTable[n].nbBits];
{ size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
/* attempt weights compression by FSE */
{ size_t const size = FSE_compress_wksp(op+1, maxDstSize-1, huffWeight, maxSymbolValue, HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER, scratchBuffer);
if (FSE_isError(size)) return size;
if ((size>1) & (size < maxSymbolValue/2)) { /* FSE compressed */
op[0] = (BYTE)size;
return size+1;
}
}
} }
/* raw values */
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen */
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */
for (n=0; n<maxSymbolValue; n+=2)
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
return ((maxSymbolValue+1)/2) + 1;
}

View File

@ -566,6 +566,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
BYTE* op = ostart;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* seqHead;
BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
@ -601,7 +602,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
LLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
LLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
@ -611,7 +612,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
LLtype = set_compressed;
} }
@ -625,7 +626,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Offtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Offtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
@ -635,7 +636,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Offtype = set_compressed;
} }
@ -649,7 +650,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
MLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
MLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
@ -659,7 +660,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
MLtype = set_compressed;
} }