This change introduces "block cache" to avoid the cost of TLB manipulation,

and page initialization via zero-filling.

  Suppose a allocated block B1, whose virtual address is [ad1, ad2], is going
to deallocated. One Linux, it seems the only way to deallocate the pages
associated with the block is to call madvise(..MADV_DONTNEED...) (
hereinafter, call it madvise() for short unless otherwise noted).

 madvise() *immediately* remove all the pages involved, and invalidate the
related TLB entries. So, if later on we allocate a block overlapping with
B1 in virtual address; accessing to the overlapping space will result in
re-establishing TLB entries, and zero-fill-pages, which is bit expensive.

 This cost can be reduced by keeping few blocks in memory, and re-use the
memory resident pages over and over again. This is the rationale behind the
"block cache". The "cache" here may be a misnomer; it dosen't cache any data,
it just provide a way to keep small sum of idle pages in memory to avoid
cost of TLB manipulation and page initialization via zero-filling.
master
Shuxin Yang 2014-08-26 22:24:30 -07:00
parent e6c87e266c
commit f6c4ca0d94
11 changed files with 302 additions and 21 deletions

View File

@ -6,7 +6,7 @@ default : all
AR_NAME := libljmm.a
SO_NAME := libljmm.so
OPT_FLAGS = -O3 -g -DDEBUG
OPT_FLAGS = -O3 -march=native -g -DDEBUG
CFLAGS = -fvisibility=hidden -MMD -Wall $(OPT_FLAGS)
CXXFLAGS = $(CFLAGS)
@ -23,7 +23,7 @@ BUILD_AR_DIR = obj/lib
BUILD_SO_DIR = obj/so
RB_TREE_SRCS = rbtree.c
ALLOC_SRCS = chunk.c page_alloc.c mem_map.c
ALLOC_SRCS = chunk.c block_cache.c page_alloc.c mem_map.c
C_SRCS = $(RB_TREE_SRCS) $(ALLOC_SRCS)
C_OBJS = ${C_SRCS:%.c=%.o}

246
block_cache.c Normal file
View File

@ -0,0 +1,246 @@
/* Suppose a allocated block B1, whose virtual address is [ad1, ad2], is going
* to deallocated. One Linux, it seems the only way to deallocate the pages
* associated with the block is to call madvise(..MADV_DONTNEED...) (
* hereinafter, call it madvise() for short unless otherwise noted).
*
* madvise() *immediately* remove all the pages involved, and invalidate the
* related TLB entries. So, if later on we allocate a block overlapping with
* B1 in virtual address; accessing to the overlapping space will result in
* re-establishing TLB entries, and zero-fill-pages, which is bit expensive.
*
* This cost can be reduced by keeping few blocks in memory, and re-use the
* memory resident pages over and over again. This is the rationale behind the
* "block cache". The "cache" here may be a misnomer; it dosen't cache any data,
* it just provide a way to keep small sum of idle pages in memory to avoid
* cost of TLB manipulation and page initialization via zero-filling.
*/
#include <sys/mman.h>
#include <stdlib.h>
#include "util.h"
#include "page_alloc.h"
#include "block_cache.h"
#define LRU_MAX_ENTRY 64
#define INVALID_LRU_IDX (-1)
/* About 2M if page-size is 4k in byte. */
#define MAX_CACHE_PAGE_NUM 512
typedef struct blk_lru {
page_idx_t start_page;
short order;
short next;
short prev;
} blk_lru_t;
typedef struct {
/* Free blocks in the ascending order of their starting page.*/
rb_tree_t* blks;
blk_lru_t lru_v[LRU_MAX_ENTRY];
short lru_hdr;
short lru_tail;
short lru_free_list;
int total_page_num;
} block_cache_t;
static block_cache_t* blk_cache;
static char enable_blk_cache = 1;
static char blk_cache_init = 0;
static void
lru_init() {
int i;
blk_lru_t* lru = blk_cache->lru_v;
for (i = 0; i < LRU_MAX_ENTRY; i++) {
lru[i].next = i + 1;
lru[i].prev = i - 1;
}
lru[0].prev = INVALID_LRU_IDX;
lru[LRU_MAX_ENTRY-1].next = INVALID_LRU_IDX;
blk_cache->lru_hdr = blk_cache->lru_tail = INVALID_LRU_IDX;
blk_cache->lru_free_list = 0;
}
static int
lru_is_full() {
return blk_cache->lru_free_list == INVALID_LRU_IDX;
}
static int
lru_is_empty() {
return blk_cache->lru_hdr == INVALID_LRU_IDX;
}
static int
lru_append(page_idx_t start_page, int order) {
if (unlikely(lru_is_full())) {
ASSERT(0);
return INVALID_LRU_IDX;
}
blk_lru_t *lru = blk_cache->lru_v;
int new_item = blk_cache->lru_free_list;
blk_cache->lru_free_list = lru[new_item].next;
int lru_tail = blk_cache->lru_tail;
if (lru_tail != INVALID_LRU_IDX)
lru[lru_tail].next = new_item;
else {
ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX);
blk_cache->lru_hdr = new_item;
}
lru[new_item].prev = lru_tail;
lru[new_item].next = INVALID_LRU_IDX;
blk_cache->lru_tail = new_item;
lru[new_item].start_page = start_page;
lru[new_item].order = order;
return new_item;
}
static void
lru_remove(int idx) {
if (!blk_cache_init || !enable_blk_cache)
return;
blk_lru_t* lru = blk_cache->lru_v;
blk_lru_t* lru_entry = lru + idx;
int prev = lru_entry->prev;
int next = lru_entry->next;
if (prev != INVALID_LRU_IDX) {
lru[prev].next = next;
} else {
ASSERT(blk_cache->lru_hdr == idx);
blk_cache->lru_hdr = next;
}
if (next != INVALID_LRU_IDX) {
lru[next].prev = prev;
} else {
ASSERT(blk_cache->lru_tail == idx);
blk_cache->lru_tail = prev;
}
lru_entry->order = -1; /* for debugging purpose */
lru_entry->next = blk_cache->lru_free_list;
blk_cache->lru_free_list = idx;
}
static inline int
lru_popback(void) {
if (likely(blk_cache->lru_tail) != INVALID_LRU_IDX) {
lru_remove(blk_cache->lru_tail);
return 1;
}
ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX);
return 0;
}
int
bc_init(void) {
if (unlikely(blk_cache_init))
return 1;
if (unlikely(!enable_blk_cache))
return 0;
if (!(blk_cache = (block_cache_t*)malloc(sizeof(block_cache_t))))
return 0;
blk_cache->blks = rbt_create();
if (!blk_cache->blks) {
free(blk_cache);
return 0;
}
lru_init();
blk_cache_init = 1;
return 1;
}
int
bc_fini(void) {
if (unlikely(!enable_blk_cache))
return 1;
if (unlikely(!blk_cache_init))
return 0;
rbt_destroy(blk_cache->blks);
free(blk_cache);
blk_cache_init = 0;
return 1;
}
int
bc_add_blk(page_idx_t start_page, int order) {
if (!blk_cache_init || !enable_blk_cache)
return INVALID_LRU_IDX;
if (unlikely(lru_is_full())) {
bc_evict_oldest();
ASSERT(!lru_is_full());
}
int idx = lru_append(start_page, order);
ASSERT(idx != INVALID_LRU_IDX);
int rv = rbt_insert(blk_cache->blks, start_page, idx);
if (likely(rv)) {
blk_cache->total_page_num += 1 << order;
if (blk_cache->total_page_num > MAX_CACHE_PAGE_NUM &&
blk_cache->lru_hdr != blk_cache->lru_tail) {
bc_evict_oldest();
}
return 1;
}
ASSERT(0);
lru_popback();
return 0;
}
int
bc_remove_block(page_idx_t start_page, int order, int zap_page) {
if (zap_page) {
char* p = get_page_addr(start_page);
size_t len = ((size_t)(1 << order)) << alloc_info->page_size_log2;
madvise(p, len, MADV_DONTDUMP|MADV_DONTNEED);
}
if (!blk_cache_init || !enable_blk_cache)
return 0;
intptr_t idx;
if (!rbt_delete(blk_cache->blks, start_page, &idx))
return 0;
blk_lru_t* lru = blk_cache->lru_v + idx;
ASSERT(lru->order == order);
blk_cache->total_page_num -= (1 << order);
ASSERT(blk_cache->total_page_num >= 0);
lru_remove(idx);
return 1;
}
int
bc_evict_oldest() {
if (!blk_cache_init || !enable_blk_cache)
return 0;
if (!lru_is_empty()) {
blk_lru_t* lru = blk_cache->lru_v + blk_cache->lru_hdr;
page_idx_t page = lru->start_page;
return bc_remove_block(page, lru->order, 1);
}
return 1;
}

15
block_cache.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef _BLOCK_CACHE_H_
#define _BLOCK_CACHE_H_
#include "ljmm_conf.h"
struct blk_lru;
typedef struct blk_lru blk_lru_t;
int bc_init(void);
int bc_fini(void);
int bc_add_blk(page_idx_t start_page, int order);
int bc_evict_oldest(void);
int bc_remove_block(page_idx_t start_page, int order, int zap_page);
#endif /* _BLOCK_CACHE_H_ */

View File

@ -41,6 +41,8 @@ lm_alloc_chunk (void) {
if (!chunk)
return NULL;
madvise((void*)chunk, avail, MADV_DONTNEED|MADV_DONTDUMP);
big_chunk.base = (char*)chunk;
big_chunk.start = (char*)chunk;
big_chunk.alloc_size = avail;

View File

@ -52,7 +52,7 @@ lm_malloc(size_t sz) {
if (blk_idx == -1)
return NULL;
remove_free_block(blk_idx, blk_order);
remove_free_block(blk_idx, blk_order, 0);
/* The free block may be too big. If this is the case, keep splitting
* the block until it tightly fit the allocation request.

View File

@ -8,6 +8,7 @@
#include "lj_mm.h"
#include "chunk.h"
#include "page_alloc.h"
#include "block_cache.h"
/* Forward Decl */
lm_alloc_t* alloc_info = NULL;
@ -103,6 +104,9 @@ lm_init_page_alloc(lm_chunk_t* chunk, lj_mm_opt_t* mm_opt) {
}
}
/*init the block cache */
bc_init();
return 1;
}
@ -119,6 +123,8 @@ lm_fini_page_alloc(void) {
free(alloc_info);
alloc_info = 0;
}
bc_fini();
}
/* The extend given the exiting allocated block such that it could accommodate
@ -138,7 +144,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) {
page_id_t blk_id = page_idx_to_id(block_idx);
int order = alloc_info->page_info[block_idx].order;
/* step 1: perfrom try run to see if we have luck. */
/* step 1: perfrom dry-run to see if we have luck. */
int succ = 0;
int ord;
for (ord = order; ord <= alloc_info->max_order; ord++) {
@ -169,7 +175,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) {
for (t = order; t < ord; t++) {
page_id_t buddy_id = blk_id ^ (1 << t);
int buddy_idx = page_id_to_idx(buddy_id);
remove_free_block(buddy_idx, t);
remove_free_block(buddy_idx, t, 0);
reset_page_leader(alloc_info->page_info + buddy_idx);
}
@ -187,11 +193,6 @@ free_block(page_idx_t page_idx) {
int order = page->order;
ASSERT (find_block(page_idx, order, NULL) == 0);
char* block_addr = alloc_info->first_page +
(page_idx << alloc_info->page_size_log2);
size_t block_len = (1<<order) << alloc_info->page_size_log2;
madvise(block_addr, block_len, MADV_DONTNEED);
/* Consolidate adjacent buddies */
int page_num = alloc_info->page_num;
page_id_t page_id = page_idx_to_id(page_idx);
@ -208,7 +209,7 @@ free_block(page_idx_t page_idx) {
is_allocated_blk(pi + buddy_idx)) {
break;
}
remove_free_block(buddy_idx, order);
remove_free_block(buddy_idx, order, 0);
reset_page_leader(alloc_info->page_info + buddy_idx);
page_id = page_id < buddy_id ? page_id : buddy_id;

View File

@ -5,6 +5,7 @@
#include "util.h"
#include "chunk.h" /* for lm_chunk_t */
#include "lj_mm.h"
#include "block_cache.h"
/**************************************************************************
*
@ -79,10 +80,6 @@ typedef struct {
extern lm_alloc_t* alloc_info;
/* Page index to ID conversion */
typedef int page_id_t;
typedef int page_idx_t;
static inline page_id_t
page_idx_to_id(page_idx_t idx) {
ASSERT(idx >= 0 && idx < alloc_info->page_num);
@ -96,6 +93,11 @@ page_id_to_idx(page_id_t id) {
return idx;
}
static inline char*
get_page_addr(page_idx_t pg) {
return alloc_info->first_page + (pg << alloc_info->page_size_log2);
}
static inline int
verify_order(page_idx_t blk_leader, int order) {
return 0 == (page_idx_to_id(blk_leader) & ((1<<order) - 1));
@ -109,14 +111,17 @@ find_block(page_idx_t block, int order, intptr_t* value) {
return rbt_search(&alloc_info->free_blks[order], block, value);
}
/* If zap_pages is set, the corresponding pages will be removed via madvise()*/
static inline int
remove_free_block(page_idx_t block, int order) {
remove_free_block(page_idx_t block, int order, int zap_pages) {
lm_page_t* page = alloc_info->page_info + block;
ASSERT(page->order == order && find_block(block, order, NULL));
ASSERT(!is_allocated_blk(page) && verify_order(block, order));
return rbt_delete(&alloc_info->free_blks[order], block);
bc_remove_block(block, order, zap_pages);
return rbt_delete(&alloc_info->free_blks[order], block, NULL);
}
/* Add the free block of the given "order" to the buddy system */
@ -131,6 +136,7 @@ add_free_block(page_idx_t block, int order) {
set_page_leader(page);
reset_allocated_blk(page);
bc_add_blk(block, order);
return rbt_insert(&alloc_info->free_blks[order], block, 0);
}
@ -149,13 +155,18 @@ add_alloc_block(page_idx_t block, intptr_t sz, int order) {
set_page_leader(pg);
set_allocated_blk(pg);
bc_remove_block(block, order, 0);
madvise(alloc_info->first_page + block,
(1 << order) << alloc_info->page_size_log2,
MADV_DODUMP);
return res;
}
static inline int
remove_alloc_block(page_idx_t block) {
ASSERT(is_page_leader(alloc_info->page_info + block));
int res = rbt_delete(&alloc_info->alloc_blks, block);
int res = rbt_delete(&alloc_info->alloc_blks, block, NULL);
ASSERT(res);
return res;
}

View File

@ -99,7 +99,7 @@ private:
if (!rbt_verify(_rbt))
return false;
int ret = rbt_delete(_rbt, val);
int ret = rbt_delete(_rbt, val, 0);
if (_dump_tree)
Dump_Tree("after_del");

View File

@ -591,12 +591,15 @@ rbt_delete_fixup(rb_tree_t* rbt, int node_idx) {
}
int
rbt_delete(rb_tree_t* rbt, int key) {
rbt_delete(rb_tree_t* rbt, int key, intptr_t* val) {
/* step 1: find the element to be deleted */
int nd_idx = bst_search(rbt, key);
if (nd_idx == INVALID_IDX)
return 0;
if (val)
*val = rbt->tree[nd_idx].value;
/* step 2: delete the element as we normally do with a binary-search tree */
rb_node_t* nd_vect = rbt->tree;
rb_node_t* node = nd_vect + nd_idx; /* the node being deleted*/

View File

@ -39,7 +39,7 @@ void rbt_fini(rb_tree_t*);
/* RB-tree operations */
int rbt_insert(rb_tree_t*, int key, intptr_t val);
int rbt_delete(rb_tree_t*, int key);
int rbt_delete(rb_tree_t*, int key, intptr_t* val);
int rbt_get_min(rb_tree_t*);
int rbt_get_max(rb_tree_t*);

3
util.h
View File

@ -17,6 +17,9 @@
typedef unsigned int uint;
typedef int page_id_t;
typedef int page_idx_t;
static inline int
ceil_log2_int32 (unsigned num) {
int res = 31 - __builtin_clz(num);