Update function names for consistency

This commit is contained in:
senhuang42 2020-12-08 13:06:18 -05:00 committed by Sen Huang
parent c56d6e49e8
commit f06f6626ed
5 changed files with 126 additions and 102 deletions

View File

@ -2192,6 +2192,32 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
return (cctxParams->targetCBlockSize != 0); return (cctxParams->targetCBlockSize != 0);
} }
/* Pseudocode algorithm for finding the optimal partition:
* Given n sequences:
* Let epsilon = 1
*
typedef struct {
size_t startIdx;
size_t endIdx;
} ZSTD_sequenceWindow;
size_t ZSTD_sequenceWindow_moveStartIdx(ZSTD_sequenceWindow* sequenceWindow) {
++sequenceWindow->startIdx;
}
size_t ZSTD_sequenceWindow_moveEndIdx(ZSTD_sequenceWindow* sequenceWindow) {
++sequenceWindow->endIdx;
}
size_t ZSTD_sequenceWindow_currentCost(ZSTD_sequenceWindow* sequenceWindow) {
return 0;
}
/* ZSTD_buildSequencesStatistics():
* Returns the size of the statistics for a given set of sequences, or a ZSTD error code
*/
MEM_STATIC size_t MEM_STATIC size_t
ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable,
const BYTE* const llCodeTable, const BYTE* const llCodeTable,
@ -2325,7 +2351,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
U32 entropyStatisticsSize; U32 entropyStatisticsSize;
const seqDef* const sequences = seqStorePtr->sequencesStart; const seqDef* const sequences = seqStorePtr->sequencesStart;
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
@ -2335,7 +2360,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity; BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart; BYTE* op = ostart;
BYTE* seqHead;
BYTE* lastNCount = NULL; BYTE* lastNCount = NULL;
entropyWorkspace = count + (MaxSeq + 1); entropyWorkspace = count + (MaxSeq + 1);
@ -2528,16 +2552,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static U32 countSeqStoreLiteralsBytes2(const seqStore_t* seqStore) {
U32 literalsBytes = 0;
U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart;
for (int i = 0; i < nbSeqs; ++i) {
seqDef seq = seqStore->sequencesStart[i];
literalsBytes += seq.litLength;
}
return literalsBytes;
}
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{ {
ZSTD_matchState_t* const ms = &zc->blockState.matchState; ZSTD_matchState_t* const ms = &zc->blockState.matchState;
@ -2761,19 +2775,19 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
/* Writes the block header */ /* Writes the block header */
static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
U32 const cBlockHeader = cSize == 1 ? U32 const cBlockHeader = cSize == 1 ?
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader); MEM_writeLE24(op, cBlockHeader);
DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
} }
/** ZSTD_buildSuperBlockEntropy_literal() : /** ZSTD_buildBlockEntropyStats_literals() :
* Builds entropy for the super-block literals. * Builds entropy for the super-block literals.
* Stores literals block type (raw, rle, compressed, repeat) and * Stores literals block type (raw, rle, compressed, repeat) and
* huffman description table to hufMetadata. * huffman description table to hufMetadata.
* @return : size of huffman description table or error code */ * @return : size of huffman description table or error code */
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
const ZSTD_hufCTables_t* prevHuf, const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf, ZSTD_hufCTables_t* nextHuf,
ZSTD_hufCTablesMetadata_t* hufMetadata, ZSTD_hufCTablesMetadata_t* hufMetadata,
@ -2791,7 +2805,7 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
unsigned huffLog = HUF_TABLELOG_DEFAULT; unsigned huffLog = HUF_TABLELOG_DEFAULT;
HUF_repeat repeat = prevHuf->repeatMode; HUF_repeat repeat = prevHuf->repeatMode;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
/* Prepare nextEntropy assuming reusing the existing table */ /* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@ -2871,11 +2885,11 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz
} }
} }
/** ZSTD_buildSuperBlockEntropy_sequences() : /** ZSTD_buildBlockEntropyStats_sequences() :
* Builds entropy for the super-block sequences. * Builds entropy for the super-block sequences.
* Stores symbol compression modes and fse table to fseMetadata. * Stores symbol compression modes and fse table to fseMetadata.
* @return : size of fse tables or error code */ * @return : size of fse tables or error code */
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
const ZSTD_fseCTables_t* prevEntropy, const ZSTD_fseCTables_t* prevEntropy,
ZSTD_fseCTables_t* nextEntropy, ZSTD_fseCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams, const ZSTD_CCtx_params* cctxParams,
@ -2902,11 +2916,10 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const ostart = fseMetadata->fseTablesBuffer;
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart; BYTE* op = ostart;
U32 entropyStatisticsSize;
BYTE* lastNCount = NULL; BYTE* lastNCount = NULL;
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
ZSTD_memset(workspace, 0, wkspSize); ZSTD_memset(workspace, 0, wkspSize);
fseMetadata->lastCountSize = 0; fseMetadata->lastCountSize = 0;
@ -2918,10 +2931,10 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
} }
/** ZSTD_buildSuperBlockEntropy() : /** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the super-block. * Builds entropy for the super-block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy, const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy, ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams, const ZSTD_CCtx_params* cctxParams,
@ -2929,21 +2942,21 @@ size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
void* workspace, size_t wkspSize) void* workspace, size_t wkspSize)
{ {
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); DEBUGLOG(5, "ZSTD_buildBlockEntropyStats");
entropyMetadata->hufMetadata.hufDesSize = entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
&prevEntropy->huf, &nextEntropy->huf, &prevEntropy->huf, &nextEntropy->huf,
&entropyMetadata->hufMetadata, &entropyMetadata->hufMetadata,
ZSTD_disableLiteralsCompression(cctxParams), ZSTD_disableLiteralsCompression(cctxParams),
workspace, wkspSize); workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
entropyMetadata->fseMetadata.fseTablesSize = entropyMetadata->fseMetadata.fseTablesSize =
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
&prevEntropy->fse, &nextEntropy->fse, &prevEntropy->fse, &nextEntropy->fse,
cctxParams, cctxParams,
&entropyMetadata->fseMetadata, &entropyMetadata->fseMetadata,
workspace, wkspSize); workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
return 0; return 0;
} }
@ -3064,7 +3077,7 @@ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx* zc, seqStore_t* seqStore) { static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx* zc, seqStore_t* seqStore) {
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
size_t estimatedSize; size_t estimatedSize;
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(seqStore, FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
&zc->blockState.prevCBlock->entropy, &zc->blockState.prevCBlock->entropy,
&zc->blockState.nextCBlock->entropy, &zc->blockState.nextCBlock->entropy,
&zc->appliedParams, &zc->appliedParams,
@ -3078,10 +3091,12 @@ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx
return estimatedSize; return estimatedSize;
} }
static U32 countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { /* Returns literals bytes represented in a seqStore */
U32 literalsBytes = 0; static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* seqStore) {
U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; size_t literalsBytes = 0;
for (int i = 0; i < nbSeqs; ++i) { size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart;
size_t i;
for (i = 0; i < nbSeqs; ++i) {
seqDef seq = seqStore->sequencesStart[i]; seqDef seq = seqStore->sequencesStart[i];
literalsBytes += seq.litLength; literalsBytes += seq.litLength;
if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) { if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) {
@ -3091,10 +3106,12 @@ static U32 countSeqStoreLiteralsBytes(const seqStore_t* seqStore) {
return literalsBytes; return literalsBytes;
} }
static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) { /* Returns match bytes represented in a seqStore */
U32 matchBytes = 0; static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) {
U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; size_t matchBytes = 0;
for (int i = 0; i < nbSeqs; ++i) { size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart;
size_t i;
for (i = 0; i < nbSeqs; ++i) {
seqDef seq = seqStore->sequencesStart[i]; seqDef seq = seqStore->sequencesStart[i];
matchBytes += seq.matchLength + MINMATCH; matchBytes += seq.matchLength + MINMATCH;
if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) { if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) {
@ -3104,12 +3121,17 @@ static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) {
return matchBytes; return matchBytes;
} }
static void splitSeqStores(const seqStore_t* originalSeqStore, /* ZSTD_splitSeqStores():
* Splits the original seqStore into two, with nbSeqFirstHalf sequences in the first
* seqStore, and the remainder in the second.
*/
static void ZSTD_splitSeqStores(const seqStore_t* originalSeqStore,
seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, seqStore_t* firstSeqStore, seqStore_t* secondSeqStore,
size_t nbSeqFirstHalf) { size_t nbSeqFirstHalf) {
BYTE* const litEnd = originalSeqStore->lit; BYTE* const litEnd = originalSeqStore->lit;
seqDef* const seqEnd = originalSeqStore->sequences; seqDef* const seqEnd = originalSeqStore->sequences;
U32 literalsBytesFirstHalf;
*firstSeqStore = *originalSeqStore; *firstSeqStore = *originalSeqStore;
*secondSeqStore = *originalSeqStore; *secondSeqStore = *originalSeqStore;
@ -3124,7 +3146,7 @@ static void splitSeqStores(const seqStore_t* originalSeqStore,
firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf;
U32 literalsBytesFirstHalf = countSeqStoreLiteralsBytes(firstSeqStore); literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(firstSeqStore);
firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf;
secondSeqStore->sequencesStart += nbSeqFirstHalf; secondSeqStore->sequencesStart += nbSeqFirstHalf;
@ -3134,15 +3156,21 @@ static void splitSeqStores(const seqStore_t* originalSeqStore,
secondSeqStore->llCode += nbSeqFirstHalf; secondSeqStore->llCode += nbSeqFirstHalf;
secondSeqStore->mlCode += nbSeqFirstHalf; secondSeqStore->mlCode += nbSeqFirstHalf;
secondSeqStore->ofCode += nbSeqFirstHalf; secondSeqStore->ofCode += nbSeqFirstHalf;
DEBUGLOG(2, "Split into: %u and %u", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart), DEBUGLOG(2, "Split into: %u and %u seqs", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart),
(U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart));
} }
#define NB_SPLIT_POINTS_TO_TEST 2 /* ZSTD_deriveSplitSeqstores()
static int setUpSeqStores(ZSTD_CCtx* zc, * Simple block splitting approach: test a set number of fixed block partitions.
* For now, just a single split down the middle of the block.
*
* Returns 1 if the a split was performed, 0 if not.
*/
#define NB_BLOCK_SEGMENTS_TO_TEST 2
static int ZSTD_deriveSplitSeqstores(ZSTD_CCtx* zc,
seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, seqStore_t* firstSeqStore, seqStore_t* secondSeqStore,
U32 nbSeq, U32 srcSize) { U32 nbSeq) {
size_t increment = nbSeq/NB_SPLIT_POINTS_TO_TEST + 1; size_t increment = nbSeq/NB_BLOCK_SEGMENTS_TO_TEST + 1;
size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore);
size_t minEstimatedCSize = estimatedOriginalSize; size_t minEstimatedCSize = estimatedOriginalSize;
size_t minEstimatedCSizeIdx = 0; size_t minEstimatedCSizeIdx = 0;
@ -3152,6 +3180,7 @@ static int setUpSeqStores(ZSTD_CCtx* zc,
return 0; return 0;
} }
DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize);
DEBUGLOG(2, "total nbseq: %u, increment: %zu", nbSeq, increment); DEBUGLOG(2, "total nbseq: %u, increment: %zu", nbSeq, increment);
for (i = increment; i < nbSeq; i += increment) { for (i = increment; i < nbSeq; i += increment) {
/* Check that splitting would actually improve compression. Return 0 if not */ /* Check that splitting would actually improve compression. Return 0 if not */
@ -3159,12 +3188,11 @@ static int setUpSeqStores(ZSTD_CCtx* zc,
size_t estimatedSecondHalfSize; size_t estimatedSecondHalfSize;
size_t estimatedSplitBlocksCompressedSize; size_t estimatedSplitBlocksCompressedSize;
size_t nbSeqFirstHalf = i; size_t nbSeqFirstHalf = i;
splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf); ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf);
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore); estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore);
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore); estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore);
estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize;
DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize);
DEBUGLOG(2, "Estimated split block size is: %zu - split: %zu - %zu", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); DEBUGLOG(2, "Estimated split block size is: %zu - split: %zu - %zu", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
if (estimatedSplitBlocksCompressedSize < minEstimatedCSize) { if (estimatedSplitBlocksCompressedSize < minEstimatedCSize) {
minEstimatedCSizeIdx = i; minEstimatedCSizeIdx = i;
@ -3174,7 +3202,7 @@ static int setUpSeqStores(ZSTD_CCtx* zc,
if (minEstimatedCSizeIdx != 0) { if (minEstimatedCSizeIdx != 0) {
DEBUGLOG(2, "WILL SPLIT"); DEBUGLOG(2, "WILL SPLIT");
splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx); ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx);
return 1; return 1;
} else { } else {
DEBUGLOG(2, "NOT SPLITTING"); DEBUGLOG(2, "NOT SPLITTING");
@ -3182,6 +3210,13 @@ static int setUpSeqStores(ZSTD_CCtx* zc,
} }
} }
/* ZSTD_compressSequences_singleBlock():
* Compresses a seqStore into a block with a block header, into the buffer dst.
*
* Returns the size of that block or a ZSTD error code
*/
/* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well */
static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore, static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
@ -3226,7 +3261,6 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
FORWARD_IF_ERROR(cSize, "RLE compress block failed"); FORWARD_IF_ERROR(cSize, "RLE compress block failed");
DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize); DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize);
} else { } else {
U32 cBlockHeader;
/* Error checking and repcodes update */ /* Error checking and repcodes update */
ZSTD_confirmRepcodesAndEntropyTables(zc); ZSTD_confirmRepcodesAndEntropyTables(zc);
writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
@ -3236,52 +3270,56 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
return cSize; return cSize;
} }
/* ZSTD_compressBlock_splitBlock():
* Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio.
*
* Returns 0 if it would not be advantageous to split the block. Otherwise, returns the combined size
* of the multiple blocks, or a ZSTD error code.
*/
static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { const void* src, size_t srcSize, U32 lastBlock, U32 nbSeq) {
const U32 rleMaxLength = 25;
size_t cSize; size_t cSize;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
seqStore_t firstHalfSeqStore;
seqStore_t secondHalfSeqStore;
size_t cSizeFirstHalf;
size_t cSizeSecondHalf;
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate); (unsigned)zc->blockState.matchState.nextToUpdate);
/* Attempt block splitting here */
DEBUGLOG(3, "Block size pre-split is: %zu - lastBlock: %u", srcSize, lastBlock); DEBUGLOG(3, "Block size pre-split is: %zu - lastBlock: %u", srcSize, lastBlock);
DEBUGLOG(3, "srcSize: %zu seq store size: %u", srcSize, countSeqStoreLiteralsBytes(&zc->seqStore) + countSeqStoreMatchBytes(&zc->seqStore)); DEBUGLOG(3, "srcSize: %zu seq store size: %zu", srcSize, ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore) + ZSTD_countSeqStoreMatchBytes(&zc->seqStore));
seqStore_t firstHalfSeqStore; /* Attempt block splitting here */
seqStore_t secondHalfSeqStore; if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) {
if (setUpSeqStores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize) != 1) { /* Not advantageous to split blocks */
return 0; return 0;
} }
assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart));
assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart)
== (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart));
size_t cSizeFirstHalf;
size_t cSizeSecondHalf;
size_t literalsBytesFirstHalf = countSeqStoreLiteralsBytes(&firstHalfSeqStore);
size_t srcBytesFirstHalf = literalsBytesFirstHalf + countSeqStoreMatchBytes(&firstHalfSeqStore);
size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf;
DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %u, orig: %u", literalsBytesFirstHalf, countSeqStoreLiteralsBytes(&secondHalfSeqStore), countSeqStoreLiteralsBytes(&zc->seqStore));
DEBUGLOG(3, "match bytes first half: %u match bytes second half: %u, orig: %u", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore));
DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf);
cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */);
{ {
int i; size_t literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(&firstHalfSeqStore);
for (i = 0; i < ZSTD_REP_NUM; ++i) size_t srcBytesFirstHalf = literalsBytesFirstHalf + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore);
zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf;
ip += srcBytesFirstHalf; DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore), ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore));
op += cSizeFirstHalf; DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&secondHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&zc->seqStore));
dstCapacity -= cSizeFirstHalf; DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf);
cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */);
{ /* Perform necessary updates before compressing next block */
ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, ZSTD_REP_NUM);
ip += srcBytesFirstHalf;
op += cSizeFirstHalf;
dstCapacity -= cSizeFirstHalf;
}
cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */);
DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf);
cSize = cSizeFirstHalf + cSizeSecondHalf;
} }
cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */);
DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf);
cSize = cSizeFirstHalf + cSizeSecondHalf;
return cSize; return cSize;
} }
@ -3311,7 +3349,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
zc->appliedParams.splitBlocks = 1; /* remove */ zc->appliedParams.splitBlocks = 1; /* remove */
if (zc->appliedParams.splitBlocks && nbSeq >= 2) { if (zc->appliedParams.splitBlocks && nbSeq >= 2) {
size_t splitBlocksCompressedSize; size_t splitBlocksCompressedSize;
splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
if (splitBlocksCompressedSize != 0) { if (splitBlocksCompressedSize != 0) {
return splitBlocksCompressedSize; return splitBlocksCompressedSize;
} }

View File

@ -89,7 +89,7 @@ typedef struct {
* Stores Literals Block Type for a super-block in hType, and * Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer. * huffman tree description in hufDesBuffer.
* hufDesSize refers to the size of huffman tree description in bytes. * hufDesSize refers to the size of huffman tree description in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
typedef struct { typedef struct {
symbolEncodingType_e hType; symbolEncodingType_e hType;
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
@ -100,7 +100,7 @@ typedef struct {
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
* fse tables in fseTablesBuffer. * fse tables in fseTablesBuffer.
* fseTablesSize refers to the size of fse tables in bytes. * fseTablesSize refers to the size of fse tables in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
typedef struct { typedef struct {
symbolEncodingType_e llType; symbolEncodingType_e llType;
symbolEncodingType_e ofType; symbolEncodingType_e ofType;
@ -115,10 +115,10 @@ typedef struct {
ZSTD_fseCTablesMetadata_t fseMetadata; ZSTD_fseCTablesMetadata_t fseMetadata;
} ZSTD_entropyCTablesMetadata_t; } ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildSuperBlockEntropy() : /** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the super-block. * Builds entropy for the super-block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy, const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy, ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams, const ZSTD_CCtx_params* cctxParams,

View File

@ -727,7 +727,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
unsigned lastBlock) { unsigned lastBlock) {
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
&zc->blockState.prevCBlock->entropy, &zc->blockState.prevCBlock->entropy,
&zc->blockState.nextCBlock->entropy, &zc->blockState.nextCBlock->entropy,
&zc->appliedParams, &zc->appliedParams,

View File

@ -763,7 +763,7 @@ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSiz
static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
DEBUGLOG(2, "ZSTD_copyRawBlock: %u", srcSize); DEBUGLOG(5, "ZSTD_copyRawBlock");
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
if (dst == NULL) { if (dst == NULL) {
if (srcSize == 0) return 0; if (srcSize == 0) return 0;
@ -847,7 +847,6 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
/* Loop on each block */ /* Loop on each block */
while (1) { while (1) {
DEBUGLOG(2, "Remaining dstCap: %u", (size_t)(oend-op));
size_t decodedSize; size_t decodedSize;
blockProperties_t blockProperties; blockProperties_t blockProperties;
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
@ -876,10 +875,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
if (ZSTD_isError(decodedSize)) return decodedSize; if (ZSTD_isError(decodedSize)) return decodedSize;
if (dctx->validateChecksum) if (dctx->validateChecksum)
XXH64_update(&dctx->xxhState, op, decodedSize); XXH64_update(&dctx->xxhState, op, decodedSize);
if (decodedSize != 0) { if (decodedSize != 0)
DEBUGLOG(2, "Decoded: %u", decodedSize);
op += decodedSize; op += decodedSize;
}
assert(ip != NULL); assert(ip != NULL);
ip += cBlockSize; ip += cBlockSize;
remainingSrcSize -= cBlockSize; remainingSrcSize -= cBlockSize;
@ -1192,7 +1189,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
} }
FORWARD_IF_ERROR(rSize, ""); FORWARD_IF_ERROR(rSize, "");
RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
DEBUGLOG(2, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
dctx->decodedSize += rSize; dctx->decodedSize += rSize;
if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
dctx->previousDstEnd = (char*)dst + rSize; dctx->previousDstEnd = (char*)dst + rSize;

View File

@ -775,9 +775,6 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
/* bounds checks : careful of address space overflow in 32-bit mode */ /* bounds checks : careful of address space overflow in 32-bit mode */
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
DEBUGLOG(2, "sequence length: %u", sequenceLength);
DEBUGLOG(2, "oLitEnd: %u iLitEnd: %u match: %u", oLitEnd, iLitEnd, match);
DEBUGLOG(2, "seq ll: %u, condition: %u", sequence.litLength, (size_t)(litLimit - *litPtr));
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
assert(op < op + sequenceLength); assert(op < op + sequenceLength);
assert(oLitEnd < op + sequenceLength); assert(oLitEnd < op + sequenceLength);
@ -853,13 +850,8 @@ size_t ZSTD_execSequence(BYTE* op,
op = oLitEnd; op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */ *litPtr = iLitEnd; /* update for next sequence */
/* Copy Match */ /* Copy Match */
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
/*DEBUGLOG(2, "oLitEnd: %u, oMatchEnd: %u iLitEnd: %u matchPos: %u", oLitEnd, oMatchEnd, iLitEnd, match);
DEBUGLOG(2, "off: %u ml: %u ll: %u", sequence.offset, sequence.matchLength, sequence.litLength);
DEBUGLOG(2, "first condition: %u", (size_t)(oLitEnd - prefixStart));
DEBUGLOG(2, "break condition: %u", (size_t)(oLitEnd - virtualStart));*/
/* offset beyond prefix -> go into extDict */ /* offset beyond prefix -> go into extDict */
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
match = dictEnd + (match - prefixStart); match = dictEnd + (match - prefixStart);
@ -1218,9 +1210,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
/* last literal segment */ /* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr; { size_t const lastLLSize = litEnd - litPtr;
if (lastLLSize > (size_t)(oend-op)) {
DEBUGLOG(2, "too small lastll");
}
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
if (op != NULL) { if (op != NULL) {
ZSTD_memcpy(op, litPtr, lastLLSize); ZSTD_memcpy(op, litPtr, lastLLSize);
@ -1469,7 +1458,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
* (note: but it could be evaluated from current-lowLimit) * (note: but it could be evaluated from current-lowLimit)
*/ */
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
DEBUGLOG(2, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");