Merge remote-tracking branch 'refs/remotes/Cyan4973/dev060' into dev

# Conflicts:
#	lib/zstd_compress.c
dev
inikep 2016-03-04 20:10:09 +01:00
commit 01060bc477
12 changed files with 316 additions and 300 deletions

3
.gitignore vendored
View File

@ -20,6 +20,7 @@
# Visual solution files
*.suo
*.user
*.VC.db
# Build results
[Dd]ebug/
@ -49,4 +50,4 @@ _zstdbench
lib/zstd_opt_LZ5.c
lib/zstd_opt_llen.c
lib/zstd_opt_nollen.c
lib/zstd_opt_nollen.c

View File

@ -628,12 +628,12 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
/*-**************************************************************
* FSE Compression Code
****************************************************************/
/*!
FSE_CTable is a variable size structure which contains :
U16 tableLog;
U16 maxSymbolValue;
U16 nextStateNumber[1 << tableLog]; // This size is variable
FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
/*! FSE_sizeof_CTable() :
FSE_CTable is a variable size structure which contains :
`U16 tableLog;`
`U16 maxSymbolValue;`
`U16 nextStateNumber[1 << tableLog];` // This size is variable
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
Allocation is manual, since C standard does not support variable-size structures.
*/
@ -654,10 +654,7 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
return (FSE_CTable*)malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct)
{
free(ct);
}
void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
@ -888,31 +885,32 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
const FSE_CTable* ct, const unsigned fast)
{
const BYTE* const istart = (const BYTE*) src;
const BYTE* ip;
const BYTE* const iend = istart + srcSize;
const BYTE* ip=iend;
size_t errorCode;
BIT_CStream_t bitC;
FSE_CState_t CState1, CState2;
/* init */
if (srcSize <= 2) return 0;
errorCode = BIT_initCStream(&bitC, dst, dstSize);
if (FSE_isError(errorCode)) return 0;
FSE_initCState(&CState1, ct);
CState2 = CState1;
ip=iend;
#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
/* join to even */
if (srcSize & 1) {
FSE_initCState2(&CState1, ct, *--ip);
FSE_initCState2(&CState2, ct, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
FSE_FLUSHBITS(&bitC);
} else {
FSE_initCState2(&CState2, ct, *--ip);
FSE_initCState2(&CState1, ct, *--ip);
}
/* join to mod 4 */
srcSize -= 2;
if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
FSE_encodeSymbol(&bitC, &CState2, *--ip);
FSE_encodeSymbol(&bitC, &CState1, *--ip);
@ -1106,24 +1104,25 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
/* tail */
/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
while (1) {
if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
break;
if (op>(omax-2)) return ERROR(dstSize_tooSmall);
*op++ = FSE_GETSYMBOL(&state1);
if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
*op++ = FSE_GETSYMBOL(&state2);
break;
}
if (op>(omax-2)) return ERROR(dstSize_tooSmall);
*op++ = FSE_GETSYMBOL(&state2);
}
/* end ? */
if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
return op-ostart;
if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
*op++ = FSE_GETSYMBOL(&state1);
break;
} }
if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */
return ERROR(corruption_detected);
return op-ostart;
}

164
lib/fse.h
View File

@ -1,7 +1,7 @@
/* ******************************************************************
FSE : Finite State Entropy coder
header file
Copyright (C) 2013-2015, Yann Collet.
FSE : Finite State Entropy codec
Public Prototypes declaration
Copyright (C) 2013-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -30,7 +30,6 @@
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
#ifndef FSE_H
#define FSE_H
@ -40,8 +39,8 @@ extern "C" {
#endif
/* *****************************************
* Includes
/*-*****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
@ -49,32 +48,32 @@ extern "C" {
/*-****************************************
* FSE simple functions
******************************************/
size_t FSE_compress(void* dst, size_t maxDstSize,
const void* src, size_t srcSize);
size_t FSE_decompress(void* dst, size_t maxDstSize,
const void* cSrc, size_t cSrcSize);
/*!
FSE_compress():
/*! FSE_compress() :
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
return : size of compressed data (<= maxDstSize)
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
@return : size of compressed data (<= dstCapacity).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
*/
size_t FSE_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
FSE_decompress():
/*! FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'maxDstSize'.
return : size of regenerated data (<= maxDstSize)
or an error code, which can be tested using FSE_isError()
into already allocated destination buffer 'dst', of size 'dstCapacity'.
@return : size of regenerated data (<= maxDstSize),
or an error code, which can be tested using FSE_isError() .
** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
size_t FSE_decompress(void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize);
/* *****************************************
/*-*****************************************
* Tool functions
******************************************/
size_t FSE_compressBound(size_t size); /* maximum compressed size */
@ -84,14 +83,13 @@ unsigned FSE_isError(size_t code); /* tells if a return value is an er
const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/* *****************************************
/*-*****************************************
* FSE advanced functions
******************************************/
/*!
FSE_compress2():
/*! FSE_compress2() :
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
return : size of compressed data
@return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
@ -99,7 +97,7 @@ FSE_compress2():
size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/* *****************************************
/*-*****************************************
* FSE detailed API
******************************************/
/*!
@ -122,65 +120,56 @@ or to save and provide normalized distribution using external method.
/* *** COMPRESSION *** */
/*!
FSE_count():
Provides the precise count of each byte within a table 'count'
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
*maxSymbolValuePtr will be updated if detected smaller than initial value.
@return : the count of the most frequent symbol (which is not identified)
if return == srcSize, there is only one symbol.
Can also return an error code, which can be tested with FSE_isError() */
/*! FSE_count():
Provides the precise count of each byte within a table 'count'.
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
*maxSymbolValuePtr will be updated if detected smaller than initial value.
@return : the count of the most frequent symbol (which is not identified).
if return == srcSize, there is only one symbol.
Can also return an error code, which can be tested with FSE_isError(). */
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/*!
FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
return : recommended tableLog (necessarily <= initial 'tableLog') */
/*! FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@return : recommended tableLog (necessarily <= initial 'tableLog') */
unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
/*!
FSE_normalizeCount():
normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_normalizeCount():
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
/*!
FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
Typically useful for allocation purpose. */
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
Typically useful for allocation purpose. */
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
/*!
FSE_writeNCount():
Compactly save 'normalizedCounter' into 'buffer'.
return : size of the compressed table
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_writeNCount():
Compactly save 'normalizedCounter' into 'buffer'.
@return : size of the compressed table,
or an errorCode, which can be tested using FSE_isError(). */
size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*!
Constructor and Destructor of type FSE_CTable
Note that its size depends on 'tableLog' and 'maxSymbolValue' */
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
void FSE_freeCTable (FSE_CTable* ct);
/*!
FSE_buildCTable():
Builds @ct, which must be already allocated, using FSE_createCTable()
return : 0
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_buildCTable():
Builds `ct`, which must be already allocated, using FSE_createCTable().
@return : 0, or an errorCode, which can be tested using FSE_isError() */
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*!
FSE_compress_usingCTable():
Compress @src using @ct into @dst which must be already allocated
return : size of compressed data (<= @dstCapacity)
or 0 if compressed data could not fit into @dst
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_compress_usingCTable():
Compress `src` using `ct` into `dst` which must be already allocated.
@return : size of compressed data (<= `dstCapacity`),
or 0 if compressed data could not fit into `dst`,
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
/*!
@ -221,7 +210,7 @@ If there is an error, both functions will return an ErrorCode (which can be test
'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
The function returns the size of compressed data (without header), necessarily <= @dstCapacity.
The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
If it returns '0', compressed data could not fit into 'dst'.
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
*/
@ -229,34 +218,29 @@ If there is an error, the function will return an ErrorCode (which can be tested
/* *** DECOMPRESSION *** */
/*!
FSE_readNCount():
Read compactly saved 'normalizedCounter' from 'rBuffer'.
return : size read from 'rBuffer'
or an errorCode, which can be tested using FSE_isError()
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
/*! FSE_readNCount():
Read compactly saved 'normalizedCounter' from 'rBuffer'.
@return : size read from 'rBuffer',
or an errorCode, which can be tested using FSE_isError().
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
/*!
Constructor and Destructor of type FSE_DTable
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
FSE_DTable* FSE_createDTable(unsigned tableLog);
void FSE_freeDTable(FSE_DTable* dt);
/*!
FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable()
return : 0,
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*!
FSE_decompress_usingDTable():
Decompress compressed source @cSrc of size @cSrcSize using @dt
into @dst which must be already allocated.
return : size of regenerated data (necessarily <= @dstCapacity)
or an errorCode, which can be tested using FSE_isError() */
/*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
/*!
@ -281,9 +265,9 @@ This is performed by the function FSE_buildDTable().
The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
If there is an error, the function will return an error code, which can be tested using FSE_isError().
'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
'cSrcSize' must be strictly correct, otherwise decompression will fail.
FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
`cSrcSize` must be strictly correct, otherwise decompression will fail.
FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
*/

View File

@ -810,91 +810,92 @@ size_t HUF_decompress4X2_usingDTable(
const void* cSrc, size_t cSrcSize,
const U16* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode;
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
@ -1381,7 +1382,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable is too small */
/* find maxWeight */
for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
for (maxW = tableLog; maxW && rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
/* Get start index of each weight */
{
@ -1559,95 +1560,97 @@ size_t HUF_decompress4X6_usingDTable(
const void* cSrc, size_t cSrcSize,
const U32* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const U32 dtLog = DTable[0];
const void* const ddPtr = DTable+1;
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
size_t errorCode;
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode;
const U32 dtLog = DTable[0];
const void* const ddPtr = DTable+1;
const HUF_DDescX6* dd = (const HUF_DDescX6*)ddPtr;
const void* const dsPtr = DTable + 1 + ((size_t)1<<(dtLog-1));
const HUF_DSeqX6* ds = (const HUF_DSeqX6*)dsPtr;
size_t errorCode;
/* 16-64 symbols per loop (4-16 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode;
errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode;
/* 16-64 symbols per loop (4-16 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (op3 <= opStart4) && (endSignal==BIT_DStream_unfinished) && (op4<=(oend-16)) ; ) {
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_1(op1, &bitD1);
HUF_DECODE_SYMBOLX6_1(op2, &bitD2);
HUF_DECODE_SYMBOLX6_1(op3, &bitD3);
HUF_DECODE_SYMBOLX6_1(op4, &bitD4);
HUF_DECODE_SYMBOLX6_2(op1, &bitD1);
HUF_DECODE_SYMBOLX6_2(op2, &bitD2);
HUF_DECODE_SYMBOLX6_2(op3, &bitD3);
HUF_DECODE_SYMBOLX6_2(op4, &bitD4);
HUF_DECODE_SYMBOLX6_0(op1, &bitD1);
HUF_DECODE_SYMBOLX6_0(op2, &bitD2);
HUF_DECODE_SYMBOLX6_0(op3, &bitD3);
HUF_DECODE_SYMBOLX6_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
HUF_decodeStreamX6(op4, &bitD4, oend, DTable, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX6(op1, &bitD1, opStart2, DTable, dtLog);
HUF_decodeStreamX6(op2, &bitD2, opStart3, DTable, dtLog);
HUF_decodeStreamX6(op3, &bitD3, opStart4, DTable, dtLog);
HUF_decodeStreamX6(op4, &bitD4, oend, DTable, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}

View File

@ -618,6 +618,14 @@ static void ZDICT_countEStats(EStats_ress_t esr,
litlengthCount[*bytePtr]++;
}
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
{
unsigned u;
size_t max=0;
for (u=0; u<nbFiles; u++)
if (max < fileSizes[u]) max = fileSizes[u];
return max;
}
#define OFFCODE_MAX 18 /* only applicable to first block */
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
@ -653,7 +661,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
goto _cleanup;
}
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, dictBufferSize + 15 KB);
params = ZSTD_getParams(compressionLevel, MAX(dictBufferSize, ZDICT_maxSampleSize(fileSizes, nbFiles)));
params.strategy = ZSTD_greedy;
ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params);
@ -800,7 +808,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
unsigned selectivity = params.selectivityLevel;
unsigned compressionLevel = params.compressionLevel;
size_t targetDictSize = maxDictSize - g_provision_entropySize;
size_t targetDictSize = maxDictSize;
size_t sBuffSize;
size_t dictSize = 0;
@ -859,8 +867,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */
DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
dictContentSize = (U32)ZDICT_fastSampling((char*)dictBuffer + g_provision_entropySize,
targetDictSize, samplesBuffer, sBuffSize);
dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
samplesBuffer, sBuffSize);
}
/* dictionary header */

View File

@ -60,8 +60,8 @@ extern "C" {
* Version
***************************************/
#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
#define ZSTD_VERSION_MINOR 5 /* for new (non-breaking) interface capabilities */
#define ZSTD_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_MINOR 6 /* for new (non-breaking) interface capabilities */
#define ZSTD_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber (void);

View File

@ -145,6 +145,8 @@ static unsigned ZSTD_highbit(U32 val);
void ZSTD_validateParams(ZSTD_parameters* params)
{
const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
const U32 searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
const U32 searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1;
/* validate params */
if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */
@ -153,7 +155,7 @@ void ZSTD_validateParams(ZSTD_parameters* params)
CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMP(params->hashLog3, ZSTD_HASHLOG3_MIN, ZSTD_HASHLOG3_MAX);
CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
CLAMP(params->searchLength, searchLengthMin, searchLengthMax);
CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt;
@ -233,7 +235,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
{
const U32 contentLog = (srcCCtx->params.strategy == ZSTD_fast) ? 1 : srcCCtx->params.contentLog;
const size_t tableSpace = ((1 << contentLog) + (1 << srcCCtx->params.hashLog) + (1 << srcCCtx->params.hashLog3)) * sizeof(U32);
if (srcCCtx->stage!=0) return ERROR(stage_wrong);
ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params);
@ -547,17 +549,11 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
op += cSize;
}
#if ZSTD_OPT_DEBUG >= 5
if (nbSeq >= 32768)
printf("ERROR: nbSeq=%d\n", (int)nbSeq);
#endif
/* Sequences Header */
if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall);
if (nbSeq < 128) *op++ = (BYTE)nbSeq;
else {
op[0] = (BYTE)((nbSeq>>8) + 128); op[1] = (BYTE)nbSeq; op+=2;
}
if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
if (nbSeq==0) goto _check_compressibility;
/* dumps : contains rests of large lengths */

View File

@ -496,9 +496,14 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen
/* SeqHead */
*nbSeq = *ip++;
if (*nbSeq==0) return 1;
if (*nbSeq >= 128)
*nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
if (*nbSeq >= 0x7F) {
if (*nbSeq == 0xFF)
*nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
else
*nbSeq = ((nbSeq[0]-0x80)<<8) + *ip++;
}
/* FSE table descriptors */
LLtype = *ip >> 6;
Offtype = (*ip >> 4) & 3;
MLtype = (*ip >> 2) & 3;

View File

@ -51,7 +51,7 @@ extern "C" {
/*-*************************************
* Constants
***************************************/
#define ZSTD_MAGICNUMBER 0xFD2FB525 /* v0.5 */
#define ZSTD_MAGICNUMBER 0xFD2FB526 /* v0.6 */
/*-*************************************

3
programs/.gitignore vendored
View File

@ -29,3 +29,6 @@ datagen
# Visual solution files
*.suo
*.user
# Default dictionary name
dictionary

View File

@ -259,7 +259,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
srcBuffer, fileSizes, nbFiles,
params);
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s", ZDICT_getErrorName(dictSize)); /* should not happen */
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _cleanup;
}

View File

@ -171,6 +171,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
if (ZSTD_isError(result)) goto _output_error;
if (result != COMPRESSIBLE_NOISE_LENGTH) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
{
@ -195,6 +196,22 @@ static int basicUnitTests(U32 seed, double compressibility)
if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
/* All zeroes test (#137 verif) */
#define ZEROESLENGTH 100
DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
memset(CNBuffer, 0, ZEROESLENGTH);
result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1);
if (ZSTD_isError(result)) goto _output_error;
cSize = result;
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100);
DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize);
if (ZSTD_isError(result)) goto _output_error;
if (result != ZEROESLENGTH) goto _output_error;
DISPLAYLEVEL(4, "OK \n");
/* Dictionary and Duplication tests */
{
ZSTD_CCtx* ctxOrig = ZSTD_createCCtx();