Merge pull request #2863 from terrelln/fast-dfast-size

Reduce function size in fast & dfast
This commit is contained in:
Nick Terrell 2021-11-15 20:15:22 -08:00 committed by GitHub
commit 8dba88c3f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 71 additions and 27 deletions

View File

@ -468,6 +468,24 @@ _match_stored:
return (size_t)(iend - anchor);
}
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
}
ZSTD_GEN_DFAST_FN(noDict, 4)
ZSTD_GEN_DFAST_FN(noDict, 5)
ZSTD_GEN_DFAST_FN(noDict, 6)
ZSTD_GEN_DFAST_FN(noDict, 7)
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -478,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
}
}
@ -498,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}
@ -540,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_noDict_generic(ms, seqStore, rep, src, srcSize, mls);
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@ -653,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
return (size_t)(iend - anchor);
}
ZSTD_GEN_DFAST_FN(extDict, 4)
ZSTD_GEN_DFAST_FN(extDict, 5)
ZSTD_GEN_DFAST_FN(extDict, 6)
ZSTD_GEN_DFAST_FN(extDict, 7)
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -663,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}

View File

@ -90,7 +90,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
* This is also the work we do at the beginning to enter the loop initially.
*/
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_generic(
ZSTD_compressBlock_fast_noDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const mls)
@ -310,6 +310,18 @@ _match: /* Requires: ip0, match0, offcode */
goto _start;
}
#define ZSTD_GEN_FAST_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_fast_##dictMode##_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
}
ZSTD_GEN_FAST_FN(noDict, 4)
ZSTD_GEN_FAST_FN(noDict, 5)
ZSTD_GEN_FAST_FN(noDict, 6)
ZSTD_GEN_FAST_FN(noDict, 7)
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -321,13 +333,13 @@ size_t ZSTD_compressBlock_fast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_noDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_noDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_noDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_noDict_7(ms, seqStore, rep, src, srcSize);
}
}
@ -479,6 +491,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
return (size_t)(iend - anchor);
}
ZSTD_GEN_FAST_FN(dictMatchState, 4)
ZSTD_GEN_FAST_FN(dictMatchState, 5)
ZSTD_GEN_FAST_FN(dictMatchState, 6)
ZSTD_GEN_FAST_FN(dictMatchState, 7)
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
@ -489,13 +507,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}
@ -530,7 +548,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@ -603,6 +621,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
return (size_t)(iend - anchor);
}
ZSTD_GEN_FAST_FN(extDict, 4)
ZSTD_GEN_FAST_FN(extDict, 5)
ZSTD_GEN_FAST_FN(extDict, 6)
ZSTD_GEN_FAST_FN(extDict, 7)
size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -613,12 +635,12 @@ size_t ZSTD_compressBlock_fast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}