Use 32 buckets for log2 bucketing in huffman sort
parent
c10067c44e
commit
1daf3c8dbc
|
@ -435,7 +435,7 @@ typedef struct {
|
||||||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||||
|
|
||||||
/* Number of buckets available for HUF_sort() */
|
/* Number of buckets available for HUF_sort() */
|
||||||
#define RANK_POSITION_TABLE_SIZE 128
|
#define RANK_POSITION_TABLE_SIZE 192
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
huffNodeTable huffNodeTbl;
|
huffNodeTable huffNodeTbl;
|
||||||
|
@ -444,18 +444,15 @@ typedef struct {
|
||||||
|
|
||||||
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
||||||
* Strategy is to use as many buckets as possible for representing distinct
|
* Strategy is to use as many buckets as possible for representing distinct
|
||||||
* counts while using the remainder to represent all counts up to HUF_BLOCKSIZE_MAX
|
* counts while using the remainder to represent all "large" counts.
|
||||||
* using log2 bucketing.
|
|
||||||
*
|
*
|
||||||
* To satisfy this requirement for 128 buckets, we can do the following:
|
* To satisfy this requirement for 192 buckets, we can do the following:
|
||||||
* Let buckets 0-114 represent distinct counts of [0, 114]
|
* Let buckets 0-166 represent distinct counts of [0, 166]
|
||||||
* Let buckets 115 to 126 represent counts of [115, HUF_BLOCKSIZE_MAX]. (the final bucket 127 must remain empty)
|
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
||||||
*
|
|
||||||
* Note that we don't actually need 17 buckets (assuming 2^17 maxcount) for log2 bucketing since
|
|
||||||
* the first few buckets in the log2 bucketing representation are already covered by the distinct count bucketing.
|
|
||||||
*/
|
*/
|
||||||
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - BIT_highbit32(HUF_BLOCKSIZE_MAX) - 1
|
#define RANK_POSITION_MAX_COUNT_LOG 32
|
||||||
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN)
|
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
|
||||||
|
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
|
||||||
|
|
||||||
/* Return the appropriate bucket index for a given count. See definition of
|
/* Return the appropriate bucket index for a given count. See definition of
|
||||||
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
||||||
|
|
Loading…
Reference in New Issue