Merge pull request #1060 from facebook/compressImpl
merge bmi2 implementation of encodeSequence into zstd_compress.c
This commit is contained in:
commit
e1c52faace
@ -1582,19 +1582,126 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FUNCTION(fn) fn##_default
|
FORCE_INLINE_TEMPLATE size_t
|
||||||
#define TARGET
|
ZSTD_encodeSequences_body(
|
||||||
#include "zstd_compress_impl.h"
|
void* dst, size_t dstCapacity,
|
||||||
#undef TARGET
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||||
#undef FUNCTION
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||||
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||||
|
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||||
|
{
|
||||||
|
BIT_CStream_t blockStream;
|
||||||
|
FSE_CState_t stateMatchLength;
|
||||||
|
FSE_CState_t stateOffsetBits;
|
||||||
|
FSE_CState_t stateLitLength;
|
||||||
|
|
||||||
|
CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
|
||||||
|
|
||||||
|
/* first symbols */
|
||||||
|
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
||||||
|
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
|
||||||
|
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||||
|
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
||||||
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||||
|
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||||
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||||
|
if (longOffsets) {
|
||||||
|
U32 const ofBits = ofCodeTable[nbSeq-1];
|
||||||
|
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||||
|
if (extraBits) {
|
||||||
|
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
||||||
|
BIT_flushBits(&blockStream);
|
||||||
|
}
|
||||||
|
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
||||||
|
ofBits - extraBits);
|
||||||
|
} else {
|
||||||
|
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
||||||
|
}
|
||||||
|
BIT_flushBits(&blockStream);
|
||||||
|
|
||||||
|
{ size_t n;
|
||||||
|
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
|
||||||
|
BYTE const llCode = llCodeTable[n];
|
||||||
|
BYTE const ofCode = ofCodeTable[n];
|
||||||
|
BYTE const mlCode = mlCodeTable[n];
|
||||||
|
U32 const llBits = LL_bits[llCode];
|
||||||
|
U32 const ofBits = ofCode;
|
||||||
|
U32 const mlBits = ML_bits[mlCode];
|
||||||
|
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
||||||
|
sequences[n].litLength,
|
||||||
|
sequences[n].matchLength + MINMATCH,
|
||||||
|
sequences[n].offset);
|
||||||
|
/* 32b*/ /* 64b*/
|
||||||
|
/* (7)*/ /* (7)*/
|
||||||
|
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
||||||
|
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
|
||||||
|
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
||||||
|
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
|
||||||
|
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
|
||||||
|
BIT_flushBits(&blockStream); /* (7)*/
|
||||||
|
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
||||||
|
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
||||||
|
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
||||||
|
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
||||||
|
if (longOffsets) {
|
||||||
|
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||||
|
if (extraBits) {
|
||||||
|
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
||||||
|
BIT_flushBits(&blockStream); /* (7)*/
|
||||||
|
}
|
||||||
|
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
||||||
|
ofBits - extraBits); /* 31 */
|
||||||
|
} else {
|
||||||
|
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
||||||
|
}
|
||||||
|
BIT_flushBits(&blockStream); /* (7)*/
|
||||||
|
} }
|
||||||
|
|
||||||
|
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
|
||||||
|
FSE_flushCState(&blockStream, &stateMatchLength);
|
||||||
|
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
|
||||||
|
FSE_flushCState(&blockStream, &stateOffsetBits);
|
||||||
|
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
|
||||||
|
FSE_flushCState(&blockStream, &stateLitLength);
|
||||||
|
|
||||||
|
{ size_t const streamSize = BIT_closeCStream(&blockStream);
|
||||||
|
if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
|
||||||
|
return streamSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
ZSTD_encodeSequences_default(
|
||||||
|
void* dst, size_t dstCapacity,
|
||||||
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||||
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||||
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||||
|
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||||
|
{
|
||||||
|
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
||||||
|
CTable_MatchLength, mlCodeTable,
|
||||||
|
CTable_OffsetBits, ofCodeTable,
|
||||||
|
CTable_LitLength, llCodeTable,
|
||||||
|
sequences, nbSeq, longOffsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#if DYNAMIC_BMI2
|
#if DYNAMIC_BMI2
|
||||||
|
|
||||||
#define FUNCTION(fn) fn##_bmi2
|
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||||
#define TARGET TARGET_ATTRIBUTE("bmi2")
|
ZSTD_encodeSequences_bmi2(
|
||||||
#include "zstd_compress_impl.h"
|
void* dst, size_t dstCapacity,
|
||||||
#undef TARGET
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||||
#undef FUNCTION
|
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||||
|
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||||
|
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||||
|
{
|
||||||
|
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
||||||
|
CTable_MatchLength, mlCodeTable,
|
||||||
|
CTable_OffsetBits, ofCodeTable,
|
||||||
|
CTable_LitLength, llCodeTable,
|
||||||
|
sequences, nbSeq, longOffsets);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,106 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018-present, Facebook, Inc.
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* This source code is licensed under both the BSD-style license (found in the
|
|
||||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
||||||
* in the COPYING file in the root directory of this source tree).
|
|
||||||
* You may select, at your option, one of the above-listed licenses.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef FUNCTION
|
|
||||||
# error "FUNCTION(name) must be defined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TARGET
|
|
||||||
# error "TARGET must be defined"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
MEM_STATIC TARGET
|
|
||||||
size_t FUNCTION(ZSTD_encodeSequences)(
|
|
||||||
void* dst, size_t dstCapacity,
|
|
||||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
||||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
|
||||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
|
||||||
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
|
||||||
{
|
|
||||||
BIT_CStream_t blockStream;
|
|
||||||
FSE_CState_t stateMatchLength;
|
|
||||||
FSE_CState_t stateOffsetBits;
|
|
||||||
FSE_CState_t stateLitLength;
|
|
||||||
|
|
||||||
CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */
|
|
||||||
|
|
||||||
/* first symbols */
|
|
||||||
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
|
||||||
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
|
|
||||||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
|
||||||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
|
||||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
|
||||||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
|
||||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
|
||||||
if (longOffsets) {
|
|
||||||
U32 const ofBits = ofCodeTable[nbSeq-1];
|
|
||||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
|
||||||
if (extraBits) {
|
|
||||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
|
||||||
BIT_flushBits(&blockStream);
|
|
||||||
}
|
|
||||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
|
||||||
ofBits - extraBits);
|
|
||||||
} else {
|
|
||||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
|
||||||
}
|
|
||||||
BIT_flushBits(&blockStream);
|
|
||||||
|
|
||||||
{ size_t n;
|
|
||||||
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
|
|
||||||
BYTE const llCode = llCodeTable[n];
|
|
||||||
BYTE const ofCode = ofCodeTable[n];
|
|
||||||
BYTE const mlCode = mlCodeTable[n];
|
|
||||||
U32 const llBits = LL_bits[llCode];
|
|
||||||
U32 const ofBits = ofCode;
|
|
||||||
U32 const mlBits = ML_bits[mlCode];
|
|
||||||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
|
||||||
sequences[n].litLength,
|
|
||||||
sequences[n].matchLength + MINMATCH,
|
|
||||||
sequences[n].offset);
|
|
||||||
/* 32b*/ /* 64b*/
|
|
||||||
/* (7)*/ /* (7)*/
|
|
||||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
|
||||||
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
|
|
||||||
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
|
||||||
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
|
|
||||||
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
|
|
||||||
BIT_flushBits(&blockStream); /* (7)*/
|
|
||||||
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
|
||||||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
|
||||||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
|
||||||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
|
||||||
if (longOffsets) {
|
|
||||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
|
||||||
if (extraBits) {
|
|
||||||
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
|
||||||
BIT_flushBits(&blockStream); /* (7)*/
|
|
||||||
}
|
|
||||||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
|
||||||
ofBits - extraBits); /* 31 */
|
|
||||||
} else {
|
|
||||||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
|
||||||
}
|
|
||||||
BIT_flushBits(&blockStream); /* (7)*/
|
|
||||||
} }
|
|
||||||
|
|
||||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
|
|
||||||
FSE_flushCState(&blockStream, &stateMatchLength);
|
|
||||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
|
|
||||||
FSE_flushCState(&blockStream, &stateOffsetBits);
|
|
||||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
|
|
||||||
FSE_flushCState(&blockStream, &stateLitLength);
|
|
||||||
|
|
||||||
{ size_t const streamSize = BIT_closeCStream(&blockStream);
|
|
||||||
if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
|
|
||||||
return streamSize;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user