From 1daf3c8dbc71027b2d57ac6a01f161247fdb70aa Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 9 Sep 2021 08:28:12 -0700 Subject: [PATCH] Use 32 buckets for log2 bucketing in huffman sort --- lib/compress/huf_compress.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 67b7ef95..db11ab3d 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -435,7 +435,7 @@ typedef struct { typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; /* Number of buckets available for HUF_sort() */ -#define RANK_POSITION_TABLE_SIZE 128 +#define RANK_POSITION_TABLE_SIZE 192 typedef struct { huffNodeTable huffNodeTbl; @@ -444,18 +444,15 @@ typedef struct { /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. * Strategy is to use as many buckets as possible for representing distinct - * counts while using the remainder to represent all counts up to HUF_BLOCKSIZE_MAX - * using log2 bucketing. + * counts while using the remainder to represent all "large" counts. * - * To satisfy this requirement for 128 buckets, we can do the following: - * Let buckets 0-114 represent distinct counts of [0, 114] - * Let buckets 115 to 126 represent counts of [115, HUF_BLOCKSIZE_MAX]. (the final bucket 127 must remain empty) - * - * Note that we don't actually need 17 buckets (assuming 2^17 maxcount) for log2 bucketing since - * the first few buckets in the log2 bucketing representation are already covered by the distinct count bucketing. + * To satisfy this requirement for 192 buckets, we can do the following: + * Let buckets 0-166 represent distinct counts of [0, 166] + * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. */ -#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - BIT_highbit32(HUF_BLOCKSIZE_MAX) - 1 -#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) +#define RANK_POSITION_MAX_COUNT_LOG 32 +#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ +#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ /* Return the appropriate bucket index for a given count. See definition of * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.