fix alignment condition in FSE_buildCTable

2-bytes alignment is enough for 16-bit fields
This commit is contained in:
Yann Collet 2021-08-29 19:05:04 -07:00
parent f21977c5e6
commit 74b4171fb8
2 changed files with 23 additions and 31 deletions

View File

@ -78,12 +78,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
U32 const maxSV1 = maxSymbolValue+1; U32 const maxSV1 = maxSymbolValue+1;
U16* cumul = (U16*)workSpace; /* size = maxSV1 */ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
BYTE* spread = tableSymbol + tableSize; /* size = tableSize */
U32 highThreshold = tableSize-1; U32 highThreshold = tableSize-1;
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
/* CTable header */ /* CTable header */
tableU16[-2] = (U16) tableLog; tableU16[-2] = (U16) tableLog;
@ -105,7 +104,9 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
cumul[u] = cumul[u-1] + 1; cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
} else { } else {
cumul[u] = cumul[u-1] + normalizedCounter[u-1]; assert(normalizedCounter[u-1] >= 0);
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
assert(cumul[u] >= cumul[u-1]); /* no overflow */
} } } }
cumul[maxSV1] = (U16)(tableSize+1); cumul[maxSV1] = (U16)(tableSize+1);
} }
@ -115,8 +116,8 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
/* Case for no low prob count symbols. Lay down 8 bytes at a time /* Case for no low prob count symbols. Lay down 8 bytes at a time
* to reduce branch misses since we are operating on a small block * to reduce branch misses since we are operating on a small block
*/ */
{ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize */
U64 const add = 0x0101010101010101ull; { U64 const add = 0x0101010101010101ull;
size_t pos = 0; size_t pos = 0;
U64 sv = 0; U64 sv = 0;
U32 s; U32 s;
@ -127,15 +128,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
for (i = 8; i < n; i += 8) { for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv); MEM_write64(spread + pos + i, sv);
} }
pos += n; assert(n>=0);
pos += (size_t)n;
} }
} }
/* Spread symbols across the table. Lack of lowprob symbols means that /* Spread symbols across the table. Lack of lowprob symbols means that
* we don't need variable sized inner loop, so we can unroll the loop and * we don't need variable sized inner loop, so we can unroll the loop and
* reduce branch misses. * reduce branch misses.
*/ */
{ { size_t position = 0;
size_t position = 0;
size_t s; size_t s;
size_t const unroll = 2; /* Experimentally determined optimal unroll */ size_t const unroll = 2; /* Experimentally determined optimal unroll */
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
@ -147,7 +148,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
} }
position = (position + (unroll * step)) & tableMask; position = (position + (unroll * step)) & tableMask;
} }
assert(position == 0); assert(position == 0); /* Must have initialized all positions */
} }
} else { } else {
U32 position = 0; U32 position = 0;
@ -161,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
while (position > highThreshold) while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */ position = (position + step) & tableMask; /* Low proba area */
} } } }
assert(position==0); /* Must have initialized all positions */ assert(position==0); /* Must have initialized all positions */
} }
@ -185,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
case -1: case -1:
case 1: case 1:
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
symbolTT[s].deltaFindState = total - 1; assert(total <= INT_MAX);
symbolTT[s].deltaFindState = (int)(total - 1);
total ++; total ++;
break; break;
default : default :
{ assert(normalizedCounter[s] > 1);
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = total - normalizedCounter[s]; symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
total += normalizedCounter[s]; total += (unsigned)normalizedCounter[s];
} } } } } } } }
#if 0 /* debug : symbol costs */ #if 0 /* debug : symbol costs */
@ -205,26 +206,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
symbol, normalizedCounter[symbol], symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol), FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
} } }
}
#endif #endif
return 0; return 0;
} }
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
}
#endif
#ifndef FSE_COMMONDEFS_ONLY #ifndef FSE_COMMONDEFS_ONLY
/*-************************************************************** /*-**************************************************************
* FSE NCount encoding * FSE NCount encoding
****************************************************************/ ****************************************************************/

View File

@ -275,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
assert(nbSeq_1 > 1); assert(nbSeq_1 > 1);
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
(void)entropyWorkspaceSize; (void)entropyWorkspaceSize;
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ assert(oend >= op);
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
return NCountSize; return NCountSize;
} }
} }