Merge pull request #1258 from jennifermliu/fastCover

Use CDict instead of CCtx in analyzeEntropy
dev
Nick Terrell 2018-07-31 14:31:05 -07:00 committed by GitHub
commit 1420129fda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 116 additions and 120 deletions

View File

@ -18,109 +18,109 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
- Fourth column is chosen d and fifth column is chosen k - Fourth column is chosen d and fifth column is chosen k
github: github:
NODICT 0.000004 2.999642 NODICT 0.000005 2.999642
RANDOM 0.161907 8.786957 RANDOM 0.036114 8.791189
LEGACY 0.960128 8.989482 LEGACY 1.111024 8.173529
COVER 69.031037 10.641263 8 1298 COVER 57.856477 10.652243 8 1298
COVER 7.017782 10.641263 8 1298 COVER 5.769965 10.652243 8 1298
FAST15 24.710713 10.547583 8 1874 FAST15 9.965877 10.555630 8 1874
FAST15 0.271657 10.547583 8 1874 FAST15 0.140285 10.555630 8 1874
FAST16 23.906902 10.690723 8 1106 FAST16 10.337194 10.701698 8 1106
FAST16 0.315039 10.690723 8 1106 FAST16 0.114887 10.701698 8 1106
FAST17 25.384572 10.642322 8 1106 FAST17 10.207121 10.650652 8 1106
FAST17 0.319237 10.642322 8 1106 FAST17 0.135424 10.650652 8 1106
FAST18 21.935494 10.491283 8 1826 FAST18 11.463120 10.499142 8 1826
FAST18 0.255488 10.491283 8 1826 FAST18 0.154287 10.499142 8 1826
FAST19 21.349385 10.522182 8 1826 FAST19 12.143020 10.527140 8 1826
FAST19 0.311369 10.522182 8 1826 FAST19 0.158889 10.527140 8 1826
FAST20 23.124955 10.487431 8 1826 FAST20 12.510857 10.494710 8 1826
FAST20 0.317411 10.487431 8 1826 FAST20 0.171334 10.494710 8 1826
FAST21 27.311387 10.491047 8 1778 FAST21 13.201432 10.503488 8 1778
FAST21 0.398483 10.491047 8 1778 FAST21 0.192867 10.503488 8 1778
FAST22 23.993620 10.502191 8 1826 FAST22 13.754560 10.509284 8 1826
FAST22 0.329767 10.502191 8 1826 FAST22 0.206276 10.509284 8 1826
FAST23 27.793381 10.502191 8 1826 FAST23 14.708633 10.509284 8 1826
FAST23 0.359659 10.502191 8 1826 FAST23 0.221751 10.509284 8 1826
FAST24 29.281399 10.509461 8 1826 FAST24 15.134848 10.512369 8 1826
FAST24 0.398369 10.509461 8 1826 FAST24 0.234242 10.512369 8 1826
hg-commands: hg-commands:
NODICT 0.000007 2.425291 NODICT 0.000004 2.425291
RANDOM 0.083477 3.489515 RANDOM 0.055073 3.490331
LEGACY 0.941867 3.911896 LEGACY 0.927414 3.911682
COVER 67.314295 4.131136 8 386 COVER 72.749028 4.132653 8 386
COVER 2.757895 4.131136 8 386 COVER 3.391066 4.132653 8 386
FAST15 13.466983 3.920128 6 1106 FAST15 10.910989 3.920720 6 1106
FAST15 0.162656 3.920128 6 1106 FAST15 0.130480 3.920720 6 1106
FAST16 12.618110 4.032422 8 674 FAST16 10.565224 4.033306 8 674
FAST16 0.159073 4.032422 8 674 FAST16 0.146228 4.033306 8 674
FAST17 12.883772 4.063581 8 1490 FAST17 11.394137 4.064132 8 1490
FAST17 0.183131 4.063581 8 1490 FAST17 0.175567 4.064132 8 1490
FAST18 13.904432 4.085034 8 290 FAST18 11.040248 4.086714 8 290
FAST18 0.161078 4.085034 8 290 FAST18 0.132692 4.086714 8 290
FAST19 13.762269 4.097054 8 578 FAST19 11.335856 4.097947 8 578
FAST19 0.179906 4.097054 8 578 FAST19 0.181441 4.097947 8 578
FAST20 15.303927 4.101575 8 434 FAST20 14.166272 4.102851 8 434
FAST20 0.213146 4.101575 8 434 FAST20 0.203632 4.102851 8 434
FAST21 19.619482 4.104879 8 530 FAST21 15.848896 4.105350 8 530
FAST21 0.289158 4.104879 8 530 FAST21 0.269518 4.105350 8 530
FAST22 23.187937 4.102448 8 530 FAST22 15.570995 4.104100 8 530
FAST22 0.335220 4.102448 8 530 FAST22 0.238512 4.104100 8 530
FAST23 24.946655 4.095162 8 914 FAST23 17.437566 4.098110 8 914
FAST23 0.396927 4.095162 8 914 FAST23 0.270788 4.098110 8 914
FAST24 27.634065 4.114624 8 722 FAST24 18.836604 4.117367 8 722
FAST24 0.434278 4.114624 8 722 FAST24 0.323618 4.117367 8 722
hg-changelog: hg-changelog:
NODICT 0.000027 1.377613 NODICT 0.000006 1.377613
RANDOM 0.676272 2.096785 RANDOM 0.253393 2.097487
LEGACY 2.871887 2.058273 LEGACY 2.410568 2.058907
COVER 226.371004 2.188654 8 98 COVER 203.550681 2.189685 8 98
COVER 5.359820 2.188654 8 98 COVER 7.381697 2.189685 8 98
FAST15 66.776425 2.130548 6 386 FAST15 45.960609 2.130794 6 386
FAST15 0.796836 2.130548 6 386 FAST15 0.512057 2.130794 6 386
FAST16 64.405113 2.144136 8 194 FAST16 44.594817 2.144845 8 194
FAST16 0.778969 2.144136 8 194 FAST16 0.601258 2.144845 8 194
FAST17 65.062292 2.155745 8 98 FAST17 45.852992 2.156099 8 242
FAST17 0.822089 2.155745 8 98 FAST17 0.500844 2.156099 8 242
FAST18 65.819104 2.172062 6 98 FAST18 46.624930 2.172439 6 98
FAST18 0.804247 2.172062 6 98 FAST18 0.680501 2.172439 6 98
FAST19 66.184016 2.179446 6 98 FAST19 47.754905 2.180321 6 98
FAST19 0.883526 2.179446 6 98 FAST19 0.606180 2.180321 6 98
FAST20 72.900924 2.187017 6 98 FAST20 56.733632 2.187431 6 98
FAST20 0.908220 2.187017 6 98 FAST20 0.710149 2.187431 6 98
FAST21 77.869945 2.183583 6 146 FAST21 59.723173 2.184185 6 146
FAST21 0.932666 2.183583 6 146 FAST21 0.875562 2.184185 6 146
FAST22 84.041413 2.182030 6 98 FAST22 66.570788 2.182830 6 98
FAST22 1.092310 2.182030 6 98 FAST22 1.061013 2.182830 6 98
FAST23 89.539265 2.185291 8 98 FAST23 73.817645 2.186399 8 98
FAST23 1.294779 2.185291 8 98 FAST23 0.838496 2.186399 8 98
FAST24 97.193482 2.184939 6 98 FAST24 78.059933 2.185608 6 98
FAST24 1.270493 2.184939 6 98 FAST24 0.843158 2.185608 6 98
hg-manifest: hg-manifest:
NODICT 0.000004 1.866385 NODICT 0.000005 1.866385
RANDOM 0.969045 2.309485 RANDOM 0.735840 2.309436
LEGACY 8.849052 2.506775 LEGACY 9.322081 2.506977
COVER 905.855524 2.582597 8 434 COVER 885.961515 2.582528 8 434
COVER 34.951973 2.582597 8 434 COVER 32.678552 2.582528 8 434
FAST15 154.816926 2.391764 6 1826 FAST15 114.414413 2.392920 6 1826
FAST15 1.932845 2.391764 6 1826 FAST15 1.412690 2.392920 6 1826
FAST16 142.197120 2.480738 6 1922 FAST16 113.869718 2.480762 6 1922
FAST16 1.759330 2.480738 6 1922 FAST16 1.539424 2.480762 6 1922
FAST17 147.276099 2.548313 6 1682 FAST17 113.333636 2.548285 6 1682
FAST17 1.819175 2.548313 6 1682 FAST17 1.473196 2.548285 6 1682
FAST18 164.543366 2.567448 6 386 FAST18 111.717871 2.567634 6 386
FAST18 2.728845 2.567448 6 386 FAST18 1.421200 2.567634 6 386
FAST19 195.670852 2.581170 8 338 FAST19 112.428344 2.581653 8 338
FAST19 2.439487 2.581170 8 338 FAST19 1.412185 2.581653 8 338
FAST20 195.716408 2.587062 6 194 FAST20 128.897480 2.586881 8 194
FAST20 2.056303 2.587062 6 194 FAST20 1.586570 2.586881 8 194
FAST21 211.483191 2.590136 6 242 FAST21 168.465684 2.590051 6 242
FAST21 2.983587 2.590136 6 242 FAST21 2.190732 2.590051 6 242
FAST22 239.562966 2.591033 6 194 FAST22 202.320435 2.591376 6 194
FAST22 3.355746 2.591033 6 194 FAST22 2.667877 2.591376 6 194
FAST23 264.547195 2.590403 8 434 FAST23 228.952201 2.591131 8 434
FAST23 3.667851 2.590403 8 434 FAST23 3.315501 2.591131 8 434
FAST24 296.258379 2.591723 6 290 FAST24 327.320020 2.591548 6 290
FAST24 3.858688 2.591723 6 290 FAST24 5.048348 2.591548 6 290

View File

@ -293,7 +293,7 @@ static dictItem ZDICT_analyzePos(
refinedEnd = refinedStart + selectedCount; refinedEnd = refinedStart + selectedCount;
} }
/* evaluate gain based on new ref */ /* evaluate gain based on new dict */
start = refinedStart; start = refinedStart;
pos = suffix[refinedStart]; pos = suffix[refinedStart];
end = start; end = start;
@ -341,7 +341,7 @@ static dictItem ZDICT_analyzePos(
for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
savings[i] = savings[i-1] + (lengthList[i] * (i-3)); savings[i] = savings[i-1] + (lengthList[i] * (i-3));
DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
(U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
solution.pos = (U32)pos; solution.pos = (U32)pos;
@ -581,7 +581,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
typedef struct typedef struct
{ {
ZSTD_CCtx* ref; /* contains reference to dictionary */ ZSTD_CDict* dict; /* dictionary */
ZSTD_CCtx* zc; /* working context */ ZSTD_CCtx* zc; /* working context */
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
} EStats_ress_t; } EStats_ress_t;
@ -597,8 +597,9 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
size_t cSize; size_t cSize;
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
} }
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
@ -708,14 +709,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* init */ /* init */
DEBUGLOG(4, "ZDICT_analyzeEntropy"); DEBUGLOG(4, "ZDICT_analyzeEntropy");
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
if (!esr.ref || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@ -726,12 +719,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
memset(bestRepOffset, 0, sizeof(bestRepOffset)); memset(bestRepOffset, 0, sizeof(bestRepOffset));
if (compressionLevel==0) compressionLevel = g_compressionLevel_default; if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
if (ZSTD_isError(beginResult)) { esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult)); esr.zc = ZSTD_createCCtx();
eSize = ERROR(GENERIC); esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
goto _cleanup; if (!esr.dict || !esr.zc || !esr.workPlace) {
} } eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
/* collect stats on all samples */ /* collect stats on all samples */
for (u=0; u<nbFiles; u++) { for (u=0; u<nbFiles; u++) {
@ -856,7 +852,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
eSize += 12; eSize += 12;
_cleanup: _cleanup:
ZSTD_freeCCtx(esr.ref); ZSTD_freeCDict(esr.dict);
ZSTD_freeCCtx(esr.zc); ZSTD_freeCCtx(esr.zc);
free(esr.workPlace); free(esr.workPlace);