Allow Templating ZSTD_fast Matchfinders on Acceleration (Lvl < -1)

This commit is contained in:
W. Felix Handte 2021-12-13 14:46:57 -05:00
parent ace6a7e746
commit b8434cb754

View File

@ -93,7 +93,7 @@ FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_noDict_generic( ZSTD_compressBlock_fast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
U32 const mls) U32 const mls, U32 const hasStep)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
@ -313,18 +313,23 @@ _match: /* Requires: ip0, match0, offcode */
goto _start; goto _start;
} }
#define ZSTD_GEN_FAST_FN(dictMode, mls) \ #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
static size_t ZSTD_compressBlock_fast_##dictMode##_##mls( \ static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \ void const* src, size_t srcSize) \
{ \ { \
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
} }
ZSTD_GEN_FAST_FN(noDict, 4) ZSTD_GEN_FAST_FN(noDict, 4, 1)
ZSTD_GEN_FAST_FN(noDict, 5) ZSTD_GEN_FAST_FN(noDict, 5, 1)
ZSTD_GEN_FAST_FN(noDict, 6) ZSTD_GEN_FAST_FN(noDict, 6, 1)
ZSTD_GEN_FAST_FN(noDict, 7) ZSTD_GEN_FAST_FN(noDict, 7, 1)
ZSTD_GEN_FAST_FN(noDict, 4, 0)
ZSTD_GEN_FAST_FN(noDict, 5, 0)
ZSTD_GEN_FAST_FN(noDict, 6, 0)
ZSTD_GEN_FAST_FN(noDict, 7, 0)
size_t ZSTD_compressBlock_fast( size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -332,24 +337,40 @@ size_t ZSTD_compressBlock_fast(
{ {
U32 const mls = ms->cParams.minMatch; U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL); assert(ms->dictMatchState == NULL);
if (ms->cParams.targetLength > 1) {
switch(mls) switch(mls)
{ {
default: /* includes case 3 */ default: /* includes case 3 */
case 4 : case 4 :
return ZSTD_compressBlock_fast_noDict_4(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
case 5 : case 5 :
return ZSTD_compressBlock_fast_noDict_5(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
case 6 : case 6 :
return ZSTD_compressBlock_fast_noDict_6(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
case 7 : case 7 :
return ZSTD_compressBlock_fast_noDict_7(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
}
} else {
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
}
} }
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_dictMatchState_generic( size_t ZSTD_compressBlock_fast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls) void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
@ -385,6 +406,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
assert(endIndex - prefixStartIndex <= maxDistance); assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
(void)hasStep; /* not currently specialized on whether it's accelerated */
/* ensure there will be no underflow /* ensure there will be no underflow
* when translating a dict index into a local index */ * when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
@ -495,10 +518,10 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} }
ZSTD_GEN_FAST_FN(dictMatchState, 4) ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
ZSTD_GEN_FAST_FN(dictMatchState, 5) ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
ZSTD_GEN_FAST_FN(dictMatchState, 6) ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
ZSTD_GEN_FAST_FN(dictMatchState, 7) ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
size_t ZSTD_compressBlock_fast_dictMatchState( size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -510,20 +533,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
{ {
default: /* includes case 3 */ default: /* includes case 3 */
case 4 : case 4 :
return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
case 5 : case 5 :
return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
case 6 : case 6 :
return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
case 7 : case 7 :
return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
} }
} }
static size_t ZSTD_compressBlock_fast_extDict_generic( static size_t ZSTD_compressBlock_fast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls) void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
@ -547,6 +570,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const ilimit = iend - 8; const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
(void)hasStep; /* not currently specialized on whether it's accelerated */
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
/* switch to "regular" variant if extDict is invalidated due to maxDistance */ /* switch to "regular" variant if extDict is invalidated due to maxDistance */
@ -624,10 +649,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }
ZSTD_GEN_FAST_FN(extDict, 4) ZSTD_GEN_FAST_FN(extDict, 4, 0)
ZSTD_GEN_FAST_FN(extDict, 5) ZSTD_GEN_FAST_FN(extDict, 5, 0)
ZSTD_GEN_FAST_FN(extDict, 6) ZSTD_GEN_FAST_FN(extDict, 6, 0)
ZSTD_GEN_FAST_FN(extDict, 7) ZSTD_GEN_FAST_FN(extDict, 7, 0)
size_t ZSTD_compressBlock_fast_extDict( size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -638,12 +663,12 @@ size_t ZSTD_compressBlock_fast_extDict(
{ {
default: /* includes case 3 */ default: /* includes case 3 */
case 4 : case 4 :
return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
case 5 : case 5 :
return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
case 6 : case 6 :
return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
case 7 : case 7 :
return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
} }
} }