From 38b75ddeb2ed8463f09cef43ea0270ae1e79c2cb Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Sun, 24 Jul 2016 15:35:59 +0200
Subject: [PATCH] removed special case all-1 huffman distribution

---
 lib/common/entropy_common.c | 45 ++++++++++---------------
 lib/compress/huf_compress.c | 67 +++++++++++--------------------------
 zstd_compression_format.md  | 42 +++++++----------------
 3 files changed, 50 insertions(+), 104 deletions(-)

diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index 3136534b..4b79324b 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -38,10 +38,9 @@
 #include "mem.h"
 #include "error_private.h"       /* ERR_*, ERROR */
 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
-#include "fse.h"   /* FSE_isError, FSE_getErrorName */
+#include "fse.h"
 #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
-#include "huf.h"   /* HUF_isError, HUF_getErrorName */
-
+#include "huf.h"
 
 
 /*-****************************************
@@ -90,7 +89,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
     threshold = 1<<nbBits;
     nbBits++;
 
-    while ((remaining>1) && (charnum<=*maxSVPtr)) {
+    while ((remaining>1) & (charnum<=*maxSVPtr)) {
         if (previous0) {
             unsigned n0 = charnum;
             while ((bitStream & 0xFFFF) == 0xFFFF) {
@@ -115,10 +114,9 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
                 ip += bitCount>>3;
                 bitCount &= 7;
                 bitStream = MEM_readLE32(ip) >> bitCount;
-            }
-            else
+            } else {
                 bitStream >>= 2;
-        }
+        }   }
         {   short const max = (short)((2*threshold-1)-remaining);
             short count;
 
@@ -148,12 +146,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
                 ip = iend - 4;
             }
             bitStream = MEM_readLE32(ip) >> (bitCount & 31);
-    }   }   /* while ((remaining>1) && (charnum<=*maxSVPtr)) */
-    if (remaining != 1) return ERROR(GENERIC);
+    }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
+    if (remaining != 1) return ERROR(corruption_detected);
     *maxSVPtr = charnum-1;
 
     ip += (bitCount+7)>>3;
-    if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong);
     return ip-istart;
 }
 
@@ -162,7 +159,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
     Read compact Huffman tree, saved by HUF_writeCTable().
     `huffWeight` is destination buffer.
     @return : size read from `src` , or an error Code .
-    Note : Needed by HUF_readCTable() and HUF_readDTableXn() .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
 */
 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                      U32* nbSymbolsPtr, U32* tableLogPtr,
@@ -176,22 +173,16 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128) {  /* special header */
-        if (iSize >= (242)) {  /* RLE */
-            static U32 l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
-            oSize = l[iSize-242];
-            memset(huffWeight, 1, hwSize);
-            iSize = 0;
-        } else {   /* Incompressible */
-            oSize = iSize - 127;
-            iSize = ((oSize+1)/2);
-            if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-            if (oSize >= hwSize) return ERROR(corruption_detected);
-            ip += 1;
-            {   U32 n;
-                for (n=0; n<oSize; n+=2) {
-                    huffWeight[n]   = ip[n/2] >> 4;
-                    huffWeight[n+1] = ip[n/2] & 15;
-    }   }   }   }
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
     else  {   /* header compressed with FSE (normal case) */
         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
         oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize);   /* max (hwSize-1) values decoded, as last one is implied */
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index e50b2979..86a53c2e 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -105,68 +105,39 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
 {
     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
-    U32 n;
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
     BYTE* op = (BYTE*)dst;
-    size_t size;
+    U32 n;
 
      /* check conditions */
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX + 1)
-        return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
 
     /* convert to weight */
     bitsToWeight[0] = 0;
-    for (n=1; n<=huffLog; n++)
+    for (n=1; n<huffLog+1; n++)
         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
     for (n=0; n<maxSymbolValue; n++)
         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
 
-    size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);   /* don't need last symbol stat : implied */
-    if (HUF_isError(size)) return size;
-    if (size >= 128) return ERROR(GENERIC);   /* should never happen, since maxSymbolValue <= 255 */
-    if ((size <= 1) || (size >= maxSymbolValue/2)) {
-        if (size==1) {  /* RLE */
-            /* only possible case : series of 1 (because there are at least 2) */
-            /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
-            BYTE code;
-            switch(maxSymbolValue)
-            {
-            case 1: code = 0; break;
-            case 2: code = 1; break;
-            case 3: code = 2; break;
-            case 4: code = 3; break;
-            case 7: code = 4; break;
-            case 8: code = 5; break;
-            case 15: code = 6; break;
-            case 16: code = 7; break;
-            case 31: code = 8; break;
-            case 32: code = 9; break;
-            case 63: code = 10; break;
-            case 64: code = 11; break;
-            case 127: code = 12; break;
-            case 128: code = 13; break;
-            default : return ERROR(corruption_detected);
-            }
-            op[0] = (BYTE)(255-13 + code);
-            return 1;
-        }
-         /* Not compressible */
-        if (maxSymbolValue > (241-128)) return ERROR(GENERIC);   /* not implemented (not possible with current format) */
-        if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
-        op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
-        huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
-        for (n=0; n<maxSymbolValue; n+=2)
-            op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
-        return ((maxSymbolValue+1)/2) + 1;
-    }
+    {   size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
+        if (FSE_isError(size)) return size;
+        if ((size>1) & (size < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)size;
+            return size+1;
+    }   }
+
+    /* raw values */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
 
-    /* normal header case */
-    op[0] = (BYTE)size;
-    return size+1;
 }
 
 
-
 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
 {
     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
diff --git a/zstd_compression_format.md b/zstd_compression_format.md
index 1aa4268b..148b8ec7 100644
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -565,21 +565,12 @@ Therefore, `maxBits = 4` and `weight[5] = 1`.
 This is a single byte value (0-255),
 which tells how to decode the list of weights.
 
-- if headerByte >= 242 : this is one of 14 pre-defined weight distributions :
-
-| value    |242|243|244|245|246|247|248|249|250|251|252|253|254|255|
-| -------- |---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-| Nb of 1s | 1 | 2 | 3 | 4 | 7 | 8 | 15| 16| 31| 32| 63| 64|127|128|
-|Complement| 1 | 2 | 1 | 4 | 1 | 8 | 1 | 16| 1 | 32| 1 | 64| 1 |128|
-
-_Note_ : complement is found by using "join to nearest power of 2" rule.
-
 - if headerByte >= 128 : this is a direct representation,
   where each weight is written directly as a 4 bits field (0-15).
   The full representation occupies `((nbSymbols+1)/2)` bytes,
   meaning it uses a last full byte even if nbSymbols is odd.
   `nbSymbols = headerByte - 127;`.
-  Note that maximum nbSymbols is 241-127 = 114.
+  Note that maximum nbSymbols is 255-127 = 128.
   A larger serie must necessarily use FSE compression.
 
 - if headerByte < 128 :
@@ -594,20 +585,20 @@ sharing a single distribution table.
 
 To decode an FSE bitstream, it is necessary to know its compressed size.
 Compressed size is provided by `headerByte`.
-It's also necessary to know its maximum decompressed size,
+It's also necessary to know its _maximum possible_ decompressed size,
 which is `255`, since literal values span from `0` to `255`,
 and last symbol value is not represented.
 
 An FSE bitstream starts by a header, describing probabilities distribution.
 It will create a Decoding Table.
 Table must be pre-allocated, which requires to support a maximum accuracy.
-For a list of huffman weights, recommended maximum is 7 bits.
+For a list of huffman weights, maximum accuracy is 7 bits.
 
 FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format),
 and so is [FSE bitstream](#bitstream).
 The main difference is that Huffman header compression uses 2 states,
 which share the same FSE distribution table.
-Bitstream contains only FSE symbols, there are no interleaved "raw bitfields".
+Bitstream contains only FSE symbols (no interleaved "raw bitfields").
 The number of symbols to decode is discovered
 by tracking bitStream overflow condition.
 When both states have overflowed the bitstream, end is reached.
@@ -616,16 +607,12 @@ When both states have overflowed the bitstream, end is reached.
 ##### Conversion from weights to huffman prefix codes
 
 All present symbols shall now have a `weight` value.
-Symbols are sorted by weight.
-Symbols with a weight of zero are removed.
-Within same weight, symbols keep natural order.
-Starting from lowest weight,
-symbols are being allocated to a `range`.
-A `weight` directly represents a `range`,
-following the formulae : `range = weight ? 1 << (weight-1) : 0 ;`
-Similarly, it is possible to transform weights into nbBits :
+It is possible to transform weights into nbBits, using this formula :
 `nbBits = nbBits ? maxBits + 1 - weight : 0;` .
 
+Symbols are sorted by weight. Within same weight, symbols keep natural order.
+Symbols with a weight of zero are removed.
+Then, starting from lowest weight, prefix codes are distributed in order.
 
 __Example__ :
 Let's presume the following list of weights has been decoded :
@@ -640,8 +627,6 @@ it gives the following distribution :
 | Literal      |  3  |  4  |  5  |  2  |  1  |   0  |
 | ------------ | --- | --- | --- | --- | --- | ---- |
 | weight       |  0  |  1  |  1  |  2  |  3  |   4  |
-| range        |  0  |  1  |  1  |  2  |  4  |   8  |
-| table entries| N/A |  0  |  1  | 2-3 | 4-7 | 8-15 |
 | nb bits      |  0  |  4  |  4  |  3  |  2  |   1  |
 | prefix codes | N/A | 0000| 0001| 001 | 01  |   1  |
 
@@ -665,15 +650,14 @@ header only provides compressed and regenerated size of all 4 streams combined.
 In order to properly decode the 4 streams,
 it's necessary to know the compressed and regenerated size of each stream.
 
-Regenerated size is easiest :
-each stream has a size of `(totalSize+3)/4`,
-except the last one, which is up to 3 bytes smaller, to reach `totalSize`.
+Regenerated size of each stream can be calculated by `(totalSize+3)/4`,
+except for last one, which can be up to 3 bytes smaller, to reach `totalSize`.
 
-Compressed size must be provided explicitly : in the 4-streams variant,
+Compressed size is provided explicitly : in the 4-streams variant,
 bitstreams are preceded by 3 unsigned Little Endian 16-bits values.
 Each value represents the compressed size of one stream, in order.
 The last stream size is deducted from total compressed size
-and from already known stream sizes :
+and from previously decoded stream sizes :
 `stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize;`
 
 ##### Bitstreams read and decode
@@ -687,7 +671,7 @@ This is detected by a final bit flag :
 the highest bit of latest byte is a final-bit-flag.
 Consequently, a last byte of `0` is not possible.
 And the final-bit-flag itself is not part of the useful bitstream.
-Hence, the last byte contain between 0 and 7 useful bits.
+Hence, the last byte contains between 0 and 7 useful bits.
 
 Starting from the end,
 it's possible to read the bitstream in a little-endian fashion,