diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index cc54d772..7dc843e3 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -67,9 +67,10 @@
 #define ZSTD_OPT_NUM    (1<<12)
 #define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */
 
-#define ZSTD_REP_NUM    3                 /* number of repcodes */
-#define ZSTD_REP_CHECK  (ZSTD_REP_NUM-0)  /* number of repcodes to check by the optimal parser */
-#define ZSTD_REP_MOVE   (ZSTD_REP_NUM-1)
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 
 #define KB *(1 <<10)
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 0f79fc85..e218b637 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2205,8 +2205,13 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
     ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+    const BYTE* const base = zc->base;
+    const BYTE* const istart = (const BYTE*)src;
+    const U32 current = (U32)(istart-base);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
     ZSTD_resetSeqStore(&(zc->seqStore));
+    if (current > zc->nextToUpdate + 384)
+        zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* update tree not updated after finding very long rep matches */
     blockCompressor(zc, src, srcSize);
     return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
 }
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index 3a1e9e19..f30cdbfd 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -276,7 +276,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
             /* save best solution */
             if (currentMl > bestLength) {
                 bestLength = currentMl;
-                matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex3;
+                matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
                 matches[mnum].len = (U32)currentMl;
                 mnum++;
                 if (currentMl > ZSTD_OPT_NUM) goto update;
@@ -321,7 +321,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
         if (matchLength > bestLength) {
             if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
             bestLength = matchLength;
-            matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex;
+            matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
             matches[mnum].len = (U32)matchLength;
             mnum++;
             if (matchLength > ZSTD_OPT_NUM) break;
@@ -452,11 +452,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
         litlen = (U32)(ip - anchor);
 
         /* check repCode */
-        {   U32 i;
-            for (i=(ip == anchor); i<ZSTD_REP_CHECK; i++) {
-                if ((rep[i]<(U32)(ip-prefixStart))
-                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch))) {
-                    mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
+        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
+            for (i=(ip == anchor); i<last_i; i++) {
+                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
+                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
+                    mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
                     ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen);
                     if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                         best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
@@ -491,7 +492,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
             best_mlen = matches[u].len;
             ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                 mlen++;
@@ -528,26 +529,27 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                continue;
 
            mlen = opt[cur].mlen;
-           if (opt[cur].off >= ZSTD_REP_NUM) {
+            if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                 opt[cur].rep[2] = opt[cur-mlen].rep[1];
                 opt[cur].rep[1] = opt[cur-mlen].rep[0];
-                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
+                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
                 ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
            } else {
                 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
-                opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off];
+                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
                 ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
            }
 
            ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
 
            best_mlen = minMatch;
-           {   U32 i;
-               for (i=(opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {  /* check rep */
-                   if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
-                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {
-                       mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
+            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
+                for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
+                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
+                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
+                       mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
                        ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
 
                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
@@ -600,12 +602,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
                     }
 
                   //  ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen);
@@ -652,27 +654,28 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
             litLength = (U32)(ip - anchor);
            // ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
-            if (offset >= ZSTD_REP_NUM) {
+            if (offset > ZSTD_REP_MOVE_OPT) {
                 rep[2] = rep[1];
                 rep[1] = rep[0];
-                rep[0] = offset - ZSTD_REP_MOVE;
+                rep[0] = offset - ZSTD_REP_MOVE_OPT;
+                offset--;
             } else {
                 if (offset != 0) {
-                    best_off = rep[offset];
+                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
                 }
-                if ((litLength == 0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
-                if ((litLength == 0) & (offset<=2)) offset--;
+                if ((litLength==0) & (offset==0)) { ZSTD_LOG_ENCODE("ERROR (litLength==0) & (offset==0)\n"); };
+                if (litLength==0) offset--;
             }
 
             ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
 #if ZSTD_OPT_DEBUG >= 5
             U32 ml2;
-            if (offset >= ZSTD_REP_NUM)
-                ml2 = (U32)ZSTD_count(ip, ip-(offset-ZSTD_REP_MOVE), iend);
+            if (offset+1 > ZSTD_REP_MOVE_OPT)
+                ml2 = (U32)ZSTD_count(ip, ip-(offset+1-ZSTD_REP_MOVE_OPT), iend);
             else
                 ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend);
             if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) {
@@ -748,12 +751,13 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
         opt[0].litlen = (U32)(ip - anchor);
 
         /* check repCode */
-        {   U32 i;
-            for (i = (ip==anchor); i<ZSTD_REP_CHECK; i++) {
-                const U32 repIndex = (U32)(current - rep[i]);
+        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
+            for (i = (ip==anchor); i<last_i; i++) {
+                const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
+                const U32 repIndex = (U32)(current - repCur);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
-                if ( (rep[i] <= current)
+                if ( (repCur > 0 && repCur <= (S32)current)
                    && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                     /* repcode detected we should take it */
@@ -801,7 +805,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
             ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
@@ -836,27 +840,28 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 continue;
 
             mlen = opt[cur].mlen;
-            if (opt[cur].off >= ZSTD_REP_NUM) {
+            if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                 opt[cur].rep[2] = opt[cur-mlen].rep[1];
                 opt[cur].rep[1] = opt[cur-mlen].rep[0];
-                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
+                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
                 ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
             } else {
                 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
-                opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off];
+                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
                 ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
             }
 
             ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]);
             best_mlen = 0;
 
-            {   U32 i;
-                for (i = (opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {
-                    const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
+            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
+                for (i = (mlen != 1); i<last_i; i++) {
+                    const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
+                    const U32 repIndex = (U32)(current+cur - repCur);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
-                    if ( (opt[cur].rep[i] <= current+cur)
+                    if ( (repCur > 0 && repCur <= (S32)(current+cur))
                       && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                         /* repcode detected */
@@ -914,12 +919,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
                     }
 
                 //    ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen);
@@ -966,27 +971,29 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
             litLength = (U32)(ip - anchor);
          //   ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
-            if (offset >= ZSTD_REP_NUM) {
+            if (offset > ZSTD_REP_MOVE_OPT) {
                 rep[2] = rep[1];
                 rep[1] = rep[0];
-                rep[0] = offset - ZSTD_REP_MOVE;
+                rep[0] = offset - ZSTD_REP_MOVE_OPT;
+                offset--;
             } else {
                 if (offset != 0) {
-                    best_off = rep[offset];
+                    best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
                     if (offset != 1) rep[2] = rep[1];
                     rep[1] = rep[0];
                     rep[0] = best_off;
                 }
-                if ((litLength==0) & (offset==0)) offset = rep[1];  /* protection, but should never happen */
-                if ((litLength==0) & (offset<=2)) offset --;
+
+                if ((litLength==0) & (offset==0)) { ZSTD_LOG_ENCODE("ERROR (litLength==0) & (offset==0)\n"); };
+                if (litLength==0) offset--;
             }
 
             ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
 
 #if ZSTD_OPT_DEBUG >= 5
             U32 ml2;
-            if (offset >= ZSTD_REP_NUM) {
-                best_off = offset - ZSTD_REP_MOVE;
+            if (offset+1 > ZSTD_REP_MOVE_OPT) {
+                best_off = offset+1 - ZSTD_REP_MOVE_OPT;
                 if (best_off > (size_t)(ip - prefixStart))  {
                     const BYTE* match = dictEnd - (best_off - (ip - prefixStart));
                     ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart);
diff --git a/programs/bench.c b/programs/bench.c
index 2364348d..c85ab2ed 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -133,14 +133,6 @@ typedef struct
     size_t resSize;
 } blockParam_t;
 
-typedef struct
-{
-    double ratio;
-    size_t cSize;
-    double cSpeed;
-    double dSpeed;
-} benchResult_t;
-
 
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
@@ -148,7 +140,7 @@ typedef struct
 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const char* displayName, int cLevel,
                         const size_t* fileSizes, U32 nbFiles,
-                        const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
+                        const void* dictBuffer, size_t dictBufferSize)
 {
     size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
     size_t const avgSize = MIN(g_blockSize, (srcSize / nbFiles));
@@ -332,10 +324,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
 #endif
         }   /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */
 
-        result->ratio = ratio;
-        result->cSize = cSize;
-        result->cSpeed = (double)srcSize / fastestC;
-        result->dSpeed = (double)srcSize / fastestD;
+        if (g_displayLevel == 1) {
+            double cSpeed = (double)srcSize / fastestC;
+            double dSpeed = (double)srcSize / fastestD;
+            if (g_additionalParam)
+                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
+            else
+                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
+        }
         DISPLAYLEVEL(2, "%2i#\n", cLevel);
     }   /* Bench */
 
@@ -372,7 +368,6 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
                             const size_t* fileSizes, unsigned nbFiles,
                             const void* dictBuffer, size_t dictBufferSize)
 {
-    benchResult_t result;
     int l;
 
     const char* pch = strrchr(displayName, '\\'); /* Windows */
@@ -381,8 +376,6 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
 
     SET_HIGH_PRIORITY;
 
-    memset(&result, 0, sizeof(result));
-
     if (g_displayLevel == 1 && !g_additionalParam)
         DISPLAY("bench %s %s: input %u bytes, %i iterations, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbIterations, (U32)(g_blockSize>>10));
 
@@ -392,13 +385,8 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
         BMK_benchMem(srcBuffer, benchedSize,
                      displayName, l,
                      fileSizes, nbFiles,
-                     dictBuffer, dictBufferSize, &result);
-        if (g_displayLevel == 1) {
-            if (g_additionalParam)
-                DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam);
-            else
-                DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName);
-    }   }
+                     dictBuffer, dictBufferSize);
+    }
 }
 
 
diff --git a/zstd_compression_format.md b/zstd_compression_format.md
index f519c558..a04730be 100644
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -403,8 +403,8 @@ in order to properly allocate destination buffer.
 See [`Data_Block`](#the-structure-of-data_block) for more details.
 
 A compressed block consists of 2 sections :
-- [Literals section](#literals-section)
-- [Sequences section](#sequences-section)
+- [`Literals_Section`](#literals_section)
+- [`Sequences_Section`](#sequences_section)
 
 ### Prerequisites
 To decode a compressed block, the following elements are necessary :
@@ -415,95 +415,96 @@ To decode a compressed block, the following elements are necessary :
   (literals, litLength, matchLength, offset).
 
 
-### Literals section
+### `Literals_Section`
 
 During sequence phase, literals will be entangled with match copy operations.
 All literals are regrouped in the first part of the block.
 They can be decoded first, and then copied during sequence operations,
 or they can be decoded on the flow, as needed by sequence commands.
 
-| Literals section header | [Huffman Tree Description] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
-| ----------------------- | -------------------------- | ------- | --------- | --------- | --------- |
+| `Literals_Section_Header` | [`Huffman_Tree_Description`] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
+| ------------------------- | ---------------------------- | ------- | --------- | --------- | --------- |
 
 Literals can be stored uncompressed or compressed using Huffman prefix codes.
 When compressed, an optional tree description can be present,
 followed by 1 or 4 streams.
 
 
-#### Literals section header
+#### `Literals_Section_Header`
 
 Header is in charge of describing how literals are packed.
 It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
 using little-endian convention.
 
-| Literals Block Type | sizes format | regenerated size | [compressed size] |
-| ------------------- | ------------ | ---------------- | ----------------- |
-|   2 bits            |  1 - 2 bits  |    5 - 20 bits   |    0 - 18 bits    |
+| `Literals_Block_Type` | `Size_Format` | `Regenerated_Size` | [`Compressed_Size`] |
+| --------------------- | ------------- | ------------------ | ----------------- |
+|   2 bits              |  1 - 2 bits   |    5 - 20 bits     |    0 - 18 bits    |
 
 In this representation, bits on the left are smallest bits.
 
-__Literals Block Type__ :
+__`Literals_Block_Type`__ 
 
 This field uses 2 lowest bits of first byte, describing 4 different block types :
 
-|       Value         |  0  |  1  |      2     |      3      |
-| ------------------- | --- | --- | ---------- | ----------- |
-| Literals Block Type | Raw | RLE | Compressed | RepeatStats |    
+|       Value           |  0                   |  1                   |      2                      |       3                       |
+| --------------------- | -------------------- | -------------------- | --------------------------- | ----------------------------- |
+| `Literals_Block_Type` | `Raw_Literals_Block` | `RLE_Literals_Block` | `Compressed_Literals_Block` | `Repeat_Stats_Literals_Block` |
 
-- Raw literals block - Literals are stored uncompressed.
-- RLE literals block - Literals consist of a single byte value repeated N times.
-- Compressed literals block - This is a standard huffman-compressed block,
-        starting with a huffman tree description.
+- `Raw_Literals_Block` - Literals are stored uncompressed.
+- `RLE_Literals_Block` - Literals consist of a single byte value repeated N times.
+- `Compressed_Literals_Block` - This is a standard Huffman-compressed block,
+        starting with a Huffman tree description.
         See details below.
-- Repeat Stats literals block - This is a huffman-compressed block,
-        using huffman tree _from previous huffman-compressed literals block_.
+- `Repeat_Stats_Literals_Block` - This is a Huffman-compressed block,
+        using Huffman tree _from previous Huffman-compressed literals block_.
         Huffman tree description will be skipped.
 
-__Sizes format__ :
+__`Size_Format`__ 
 
-Sizes format are divided into 2 families :
+`Size_Format` is divided into 2 families :
 
-- For compressed block, it requires to decode both the compressed size
-  and the decompressed size. It will also decode the number of streams.
-- For Raw or RLE blocks, it's enough to decode the size to regenerate.
+- For `Compressed_Block`, it requires to decode both `Compressed_Size`
+  and `Regenerated_Size` (the decompressed size). It will also decode the number of streams.
+- For `Raw_Block` and `RLE_Block` it's enough to decode `Regenerated_Size`.
 
 For values spanning several bytes, convention is Little-endian.
 
-__Sizes format for Raw and RLE literals block__ :
+__`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
 
-- Value : x0 : Regenerated size uses 5 bits (0-31).
+- Value : x0 : `Regenerated_Size` uses 5 bits (0-31).
                Total literal header size is 1 byte.
                `size = h[0]>>3;`
-- Value : 01 : Regenerated size uses 12 bits (0-4095).
+- Value : 01 : `Regenerated_Size` uses 12 bits (0-4095).
                Total literal header size is 2 bytes.
                `size = (h[0]>>4) + (h[1]<<4);`
-- Value : 11 : Regenerated size uses 20 bits (0-1048575).
+- Value : 11 : `Regenerated_Size` uses 20 bits (0-1048575).
                Total literal header size is 3 bytes.
                `size = (h[0]>>4) + (h[1]<<4) + (h[2]<<12);`
 
 Note : it's allowed to represent a short value (ex : `13`)
 using a long format, accepting the reduced compacity.
 
-__Sizes format for Compressed literals block and Repeat Stats literals block__ :
+__`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block`__ :
 
 - Value : 00 : _Single stream_.
-               Compressed and regenerated sizes use 10 bits (0-1023).
+               `Compressed_Size` and `Regenerated_Size` use 10 bits (0-1023).
                Total literal header size is 3 bytes.
 - Value : 01 : 4 streams.
-               Compressed and regenerated sizes use 10 bits (0-1023).
+               `Compressed_Size` and `Regenerated_Size` use 10 bits (0-1023).
                Total literal header size is 3 bytes.
 - Value : 10 : 4 streams.
-               Compressed and regenerated sizes use 14 bits (0-16383).
+               `Compressed_Size` and `Regenerated_Size` use 14 bits (0-16383).
                Total literal header size is 4 bytes.
 - Value : 11 : 4 streams.
-               Compressed and regenerated sizes use 18 bits (0-262143).
+               `Compressed_Size` and `Regenerated_Size` use 18 bits (0-262143).
                Total literal header size is 5 bytes.
 
-Compressed and regenerated size fields follow little-endian convention.
+`Compressed_Size` and `Regenerated_Size` fields follow little-endian convention.
 
-#### Huffman Tree description
 
-This section is only present when literals block type is `Compressed` (`2`).
+#### `Huffman_Tree_Description`
+
+This section is only present when `Literals_Block_Type` type is `Compressed_Block` (`2`).
 
 Prefix coding represents symbols from an a priori known alphabet
 by bit sequences (codewords), one codeword for each symbol,
@@ -533,7 +534,7 @@ by completing to the nearest power of 2.
 This power of 2 gives `maxBits`, the depth of the current tree.
 
 __Example__ :
-Let's presume the following huffman tree must be described :
+Let's presume the following Huffman tree must be described :
 
 | literal |  0  |  1  |  2  |  3  |  4  |  5  |
 | ------- | --- | --- | --- | --- | --- | --- |
@@ -575,7 +576,7 @@ which tells how to decode the list of weights.
   the serie of weights is compressed by FSE.
   The length of the FSE-compressed serie is `headerByte` (0-127).
 
-##### FSE (Finite State Entropy) compression of huffman weights
+##### FSE (Finite State Entropy) compression of Huffman weights
 
 The serie of weights is compressed using FSE compression.
 It's a single bitstream with 2 interleaved states,
@@ -590,7 +591,7 @@ and last symbol value is not represented.
 An FSE bitstream starts by a header, describing probabilities distribution.
 It will create a Decoding Table.
 Table must be pre-allocated, which requires to support a maximum accuracy.
-For a list of huffman weights, maximum accuracy is 7 bits.
+For a list of Huffman weights, maximum accuracy is 7 bits.
 
 FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format),
 and so is [FSE bitstream](#bitstream).
@@ -602,7 +603,7 @@ by tracking bitStream overflow condition.
 When both states have overflowed the bitstream, end is reached.
 
 
-##### Conversion from weights to huffman prefix codes
+##### Conversion from weights to Huffman prefix codes
 
 All present symbols shall now have a `weight` value.
 It is possible to transform weights into nbBits, using this formula :
@@ -634,7 +635,7 @@ it gives the following distribution :
 ##### Bitstreams sizes
 
 As seen in a previous paragraph,
-there are 2 flavors of huffman-compressed literals :
+there are 2 flavors of Huffman-compressed literals :
 single stream, and 4-streams.
 
 4-streams is useful for CPU with multiple execution units and OoO operations.
@@ -685,7 +686,7 @@ hence reaching exactly its beginning position with _all_ bits consumed,
 the decoding process is considered faulty.
 
 
-### Sequences section
+### `Sequences_Section`
 
 A compressed block is a succession of _sequences_ .
 A sequence is a literal copy command, followed by a match copy command.
@@ -1144,6 +1145,6 @@ __Content__ : Where the actual dictionary content is.
 Version changes
 ---------------
 - 0.2.0 : numerous format adjustments for zstd v0.8
-- 0.1.2 : limit huffman tree depth to 11 bits
+- 0.1.2 : limit Huffman tree depth to 11 bits
 - 0.1.1 : reserved dictID ranges
 - 0.1.0 : initial release