minor speed improvements 2

bench.c: block size has to be bigger than 32 bytes
zstdcli.c: support for e.g. -B16k -B16m
dev
inikep 2016-04-21 12:18:47 +02:00
parent ef51941822
commit 38654988f3
3 changed files with 29 additions and 30 deletions

View File

@ -260,7 +260,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
U32 nbCompares, const U32 mls,
U32 extDict, ZSTD_match_t* matches)
U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
{
const BYTE* const base = zc->base;
const U32 current = (U32)(ip-base);
@ -285,7 +285,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
U32 mnum = 0;
const U32 minMatch = (mls == 3) ? 3 : 4;
size_t bestLength = minMatch-1;
size_t bestLength = minMatchLen-1;
if (minMatch == 3) { /* HC3 match finder */
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
@ -385,26 +385,26 @@ update:
static U32 ZSTD_BtGetAllMatches (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
}
static U32 ZSTD_BtGetAllMatches_selectMLS (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{
switch(matchLengthSearch)
{
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default :
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
@ -412,26 +412,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS (
static U32 ZSTD_BtGetAllMatches_extDict (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit,
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches)
const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
{
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches);
return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
}
static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit,
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches)
const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
{
switch(matchLengthSearch)
{
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches);
case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
default :
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches);
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches);
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches);
case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
}
}
@ -499,7 +499,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch);
} }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
if (!last_pos && !match_num) { ip++; continue; }
@ -604,8 +604,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch);
} }
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches);
match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
@ -799,7 +798,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
if (!last_pos && !match_num) { ip++; continue; }
@ -913,7 +912,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
} while (mlen >= minMatch);
} } }
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches);
match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
if (match_num > 0 && matches[match_num-1].len > sufficient_len) {

View File

@ -248,7 +248,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
const size_t* fileSizes, U32 nbFiles,
const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
{
size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
@ -488,9 +488,9 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
dictBuffer, dictBufferSize, &result);
if (g_displayLevel == 1) {
if (g_additionalParam)
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
else
DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
total.cSize += result.cSize;
total.cSpeed += result.cSpeed;
total.dSpeed += result.dSpeed;
@ -501,7 +501,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
total.cSpeed /= 1+cLevelLast-cLevel;
total.dSpeed /= 1+cLevelLast-cLevel;
total.ratio /= 1+cLevelLast-cLevel;
DISPLAY("avg%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
DISPLAY("avg%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName);
}
}

View File

@ -41,6 +41,7 @@
#include <stdio.h> /* fprintf, getchar */
#include <stdlib.h> /* exit, calloc, free */
#include <string.h> /* strcmp, strlen */
#include <ctype.h> /* toupper */
#include "fileio.h"
#ifndef ZSTD_NOBENCH
# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */
@ -304,9 +305,9 @@ int main(int argCount, const char** argv)
argument++;
while ((*argument >='0') && (*argument <='9'))
bSize *= 10, bSize += *argument++ - '0';
if (*argument=='K') bSize<<=10, argument++; /* allows using KB notation */
if (*argument=='M') bSize<<=20, argument++;
if (*argument=='B') argument++;
if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */
if (toupper(*argument)=='M') bSize<<=20, argument++;
if (toupper(*argument)=='B') argument++;
BMK_setNotificationLevel(displayLevel);
BMK_SetBlockSize(bSize);
}
@ -368,8 +369,7 @@ int main(int argCount, const char** argv)
maxDictSize = 0;
while ((*argument>='0') && (*argument<='9'))
maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
if (*argument=='k' || *argument=='K')
maxDictSize <<= 10;
if (toupper(*argument)=='K') maxDictSize <<= 10;
continue;
}