Merge pull request #2887 from terrelln/issue-2815

[zdict] Remove ZDICT_CONTENTSIZE_MIN restriction for ZDICT_finalizeDictionary
dev
Nick Terrell 2021-12-01 18:15:53 -05:00 committed by GitHub
commit 9b97fdf74f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 13 deletions

View File

@ -915,6 +915,17 @@ _cleanup:
}
/**
* @returns the maximum repcode value
*/
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
{
U32 maxRep = reps[0];
int r;
for (r = 1; r < ZSTD_REP_NUM; ++r)
maxRep = MAX(maxRep, reps[r]);
return maxRep;
}
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
const void* customDictContent, size_t dictContentSize,
@ -926,11 +937,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
BYTE header[HBUFFSIZE];
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
/* The final dictionary content must be at least as large as the largest repcode */
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
size_t paddingSize;
/* check conditions */
DEBUGLOG(4, "ZDICT_finalizeDictionary");
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
/* dictionary header */
@ -954,12 +967,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
hSize += eSize;
}
/* copy elements in final buffer ; note : src and dst buffer can overlap */
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
{ size_t const dictSize = hSize + dictContentSize;
char* dictEnd = (char*)dictBuffer + dictSize;
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
memcpy(dictBuffer, header, hSize);
/* Shrink the content size if it doesn't fit in the buffer */
if (hSize + dictContentSize > dictBufferCapacity) {
dictContentSize = dictBufferCapacity - hSize;
}
/* Pad the dictionary content with zeros if it is too small */
if (dictContentSize < minContentSize) {
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
"dictBufferCapacity too small to fit max repcode");
paddingSize = minContentSize - dictContentSize;
} else {
paddingSize = 0;
}
{
size_t const dictSize = hSize + paddingSize + dictContentSize;
/* The dictionary consists of the header, optional padding, and the content.
* The padding comes before the content because the "best" position in the
* dictionary is the last byte.
*/
BYTE* const outDictHeader = (BYTE*)dictBuffer;
BYTE* const outDictPadding = outDictHeader + hSize;
BYTE* const outDictContent = outDictPadding + paddingSize;
assert(dictSize <= dictBufferCapacity);
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
/* First copy the customDictContent into its final location.
* `customDictContent` and `dictBuffer` may overlap, so we must
* do this before any other writes into the output buffer.
* Then copy the header & padding into the output buffer.
*/
memmove(outDictContent, customDictContent, dictContentSize);
memcpy(outDictHeader, header, hSize);
memset(outDictPadding, 0, paddingSize);
return dictSize;
}
}

View File

@ -237,7 +237,6 @@ typedef struct {
* is presumed that the most profitable content is at the end of the dictionary,
* since that is the cheapest to reference.
*
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
*
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
@ -272,8 +271,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
* Use them only in association with static linking.
* ==================================================================================== */
#define ZDICT_CONTENTSIZE_MIN 128
#define ZDICT_DICTSIZE_MIN 256
/* Deprecated: Remove in v1.6.0 */
#define ZDICT_CONTENTSIZE_MIN 128
/*! ZDICT_cover_params_t:
* k and d are the only required parameters.

View File

@ -1022,12 +1022,8 @@ zstd -o tmpDict --train "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f tmpDict
zstd --train "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f dictionary
println "- Test dictionary training fails"
echo "000000000000000000000000000000000" > tmpz
zstd --train tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros"
if [ -n "$hasMT" ]
then
zstd --train -T0 tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros"
println "- Create dictionary with multithreading enabled"
zstd --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
fi