diff --git a/Makefile b/Makefile index ed3e757..190aafa 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ default : all AR_NAME := libljmm.a SO_NAME := libljmm.so -OPT_FLAGS = -O3 -g -DDEBUG +OPT_FLAGS = -O3 -march=native -g -DDEBUG CFLAGS = -fvisibility=hidden -MMD -Wall $(OPT_FLAGS) CXXFLAGS = $(CFLAGS) @@ -23,7 +23,7 @@ BUILD_AR_DIR = obj/lib BUILD_SO_DIR = obj/so RB_TREE_SRCS = rbtree.c -ALLOC_SRCS = chunk.c page_alloc.c mem_map.c +ALLOC_SRCS = chunk.c block_cache.c page_alloc.c mem_map.c C_SRCS = $(RB_TREE_SRCS) $(ALLOC_SRCS) C_OBJS = ${C_SRCS:%.c=%.o} diff --git a/block_cache.c b/block_cache.c new file mode 100644 index 0000000..6026967 --- /dev/null +++ b/block_cache.c @@ -0,0 +1,246 @@ +/* Suppose a allocated block B1, whose virtual address is [ad1, ad2], is going + * to deallocated. One Linux, it seems the only way to deallocate the pages + * associated with the block is to call madvise(..MADV_DONTNEED...) ( + * hereinafter, call it madvise() for short unless otherwise noted). + * + * madvise() *immediately* remove all the pages involved, and invalidate the + * related TLB entries. So, if later on we allocate a block overlapping with + * B1 in virtual address; accessing to the overlapping space will result in + * re-establishing TLB entries, and zero-fill-pages, which is bit expensive. + * + * This cost can be reduced by keeping few blocks in memory, and re-use the + * memory resident pages over and over again. This is the rationale behind the + * "block cache". The "cache" here may be a misnomer; it dosen't cache any data, + * it just provide a way to keep small sum of idle pages in memory to avoid + * cost of TLB manipulation and page initialization via zero-filling. + */ +#include +#include +#include "util.h" +#include "page_alloc.h" +#include "block_cache.h" + +#define LRU_MAX_ENTRY 64 +#define INVALID_LRU_IDX (-1) + +/* About 2M if page-size is 4k in byte. */ +#define MAX_CACHE_PAGE_NUM 512 + +typedef struct blk_lru { + page_idx_t start_page; + short order; + short next; + short prev; +} blk_lru_t; + +typedef struct { + /* Free blocks in the ascending order of their starting page.*/ + rb_tree_t* blks; + blk_lru_t lru_v[LRU_MAX_ENTRY]; + short lru_hdr; + short lru_tail; + short lru_free_list; + int total_page_num; +} block_cache_t; + +static block_cache_t* blk_cache; +static char enable_blk_cache = 1; +static char blk_cache_init = 0; + +static void +lru_init() { + int i; + blk_lru_t* lru = blk_cache->lru_v; + for (i = 0; i < LRU_MAX_ENTRY; i++) { + lru[i].next = i + 1; + lru[i].prev = i - 1; + } + lru[0].prev = INVALID_LRU_IDX; + lru[LRU_MAX_ENTRY-1].next = INVALID_LRU_IDX; + + blk_cache->lru_hdr = blk_cache->lru_tail = INVALID_LRU_IDX; + blk_cache->lru_free_list = 0; +} + +static int +lru_is_full() { + return blk_cache->lru_free_list == INVALID_LRU_IDX; +} + +static int +lru_is_empty() { + return blk_cache->lru_hdr == INVALID_LRU_IDX; +} + +static int +lru_append(page_idx_t start_page, int order) { + if (unlikely(lru_is_full())) { + ASSERT(0); + return INVALID_LRU_IDX; + } + + blk_lru_t *lru = blk_cache->lru_v; + int new_item = blk_cache->lru_free_list; + blk_cache->lru_free_list = lru[new_item].next; + + int lru_tail = blk_cache->lru_tail; + if (lru_tail != INVALID_LRU_IDX) + lru[lru_tail].next = new_item; + else { + ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX); + blk_cache->lru_hdr = new_item; + } + + lru[new_item].prev = lru_tail; + lru[new_item].next = INVALID_LRU_IDX; + blk_cache->lru_tail = new_item; + + lru[new_item].start_page = start_page; + lru[new_item].order = order; + + return new_item; +} + +static void +lru_remove(int idx) { + if (!blk_cache_init || !enable_blk_cache) + return; + + blk_lru_t* lru = blk_cache->lru_v; + blk_lru_t* lru_entry = lru + idx; + int prev = lru_entry->prev; + int next = lru_entry->next; + + if (prev != INVALID_LRU_IDX) { + lru[prev].next = next; + } else { + ASSERT(blk_cache->lru_hdr == idx); + blk_cache->lru_hdr = next; + } + + if (next != INVALID_LRU_IDX) { + lru[next].prev = prev; + } else { + ASSERT(blk_cache->lru_tail == idx); + blk_cache->lru_tail = prev; + } + + lru_entry->order = -1; /* for debugging purpose */ + lru_entry->next = blk_cache->lru_free_list; + blk_cache->lru_free_list = idx; +} + +static inline int +lru_popback(void) { + if (likely(blk_cache->lru_tail) != INVALID_LRU_IDX) { + lru_remove(blk_cache->lru_tail); + return 1; + } + ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX); + return 0; +} + +int +bc_init(void) { + if (unlikely(blk_cache_init)) + return 1; + + if (unlikely(!enable_blk_cache)) + return 0; + + if (!(blk_cache = (block_cache_t*)malloc(sizeof(block_cache_t)))) + return 0; + + blk_cache->blks = rbt_create(); + if (!blk_cache->blks) { + free(blk_cache); + return 0; + } + + lru_init(); + blk_cache_init = 1; + + return 1; +} + +int +bc_fini(void) { + if (unlikely(!enable_blk_cache)) + return 1; + + if (unlikely(!blk_cache_init)) + return 0; + + rbt_destroy(blk_cache->blks); + free(blk_cache); + blk_cache_init = 0; + + return 1; +} + +int +bc_add_blk(page_idx_t start_page, int order) { + if (!blk_cache_init || !enable_blk_cache) + return INVALID_LRU_IDX; + + if (unlikely(lru_is_full())) { + bc_evict_oldest(); + ASSERT(!lru_is_full()); + } + + int idx = lru_append(start_page, order); + ASSERT(idx != INVALID_LRU_IDX); + + int rv = rbt_insert(blk_cache->blks, start_page, idx); + if (likely(rv)) { + blk_cache->total_page_num += 1 << order; + if (blk_cache->total_page_num > MAX_CACHE_PAGE_NUM && + blk_cache->lru_hdr != blk_cache->lru_tail) { + bc_evict_oldest(); + } + return 1; + } + + ASSERT(0); + lru_popback(); + return 0; +} + +int +bc_remove_block(page_idx_t start_page, int order, int zap_page) { + if (zap_page) { + char* p = get_page_addr(start_page); + size_t len = ((size_t)(1 << order)) << alloc_info->page_size_log2; + madvise(p, len, MADV_DONTDUMP|MADV_DONTNEED); + } + + if (!blk_cache_init || !enable_blk_cache) + return 0; + + intptr_t idx; + if (!rbt_delete(blk_cache->blks, start_page, &idx)) + return 0; + + blk_lru_t* lru = blk_cache->lru_v + idx; + ASSERT(lru->order == order); + blk_cache->total_page_num -= (1 << order); + ASSERT(blk_cache->total_page_num >= 0); + + lru_remove(idx); + + return 1; +} + +int +bc_evict_oldest() { + if (!blk_cache_init || !enable_blk_cache) + return 0; + + if (!lru_is_empty()) { + blk_lru_t* lru = blk_cache->lru_v + blk_cache->lru_hdr; + page_idx_t page = lru->start_page; + return bc_remove_block(page, lru->order, 1); + } + + return 1; +} diff --git a/block_cache.h b/block_cache.h new file mode 100644 index 0000000..45f6cc3 --- /dev/null +++ b/block_cache.h @@ -0,0 +1,15 @@ +#ifndef _BLOCK_CACHE_H_ +#define _BLOCK_CACHE_H_ + +#include "ljmm_conf.h" + +struct blk_lru; +typedef struct blk_lru blk_lru_t; + +int bc_init(void); +int bc_fini(void); +int bc_add_blk(page_idx_t start_page, int order); +int bc_evict_oldest(void); +int bc_remove_block(page_idx_t start_page, int order, int zap_page); + +#endif /* _BLOCK_CACHE_H_ */ diff --git a/chunk.c b/chunk.c index f989d4b..f8df8cd 100644 --- a/chunk.c +++ b/chunk.c @@ -41,6 +41,8 @@ lm_alloc_chunk (void) { if (!chunk) return NULL; + madvise((void*)chunk, avail, MADV_DONTNEED|MADV_DONTDUMP); + big_chunk.base = (char*)chunk; big_chunk.start = (char*)chunk; big_chunk.alloc_size = avail; diff --git a/mem_map.c b/mem_map.c index 5b50611..eb9c8f7 100644 --- a/mem_map.c +++ b/mem_map.c @@ -52,7 +52,7 @@ lm_malloc(size_t sz) { if (blk_idx == -1) return NULL; - remove_free_block(blk_idx, blk_order); + remove_free_block(blk_idx, blk_order, 0); /* The free block may be too big. If this is the case, keep splitting * the block until it tightly fit the allocation request. diff --git a/page_alloc.c b/page_alloc.c index 619d35b..1d337cc 100644 --- a/page_alloc.c +++ b/page_alloc.c @@ -8,6 +8,7 @@ #include "lj_mm.h" #include "chunk.h" #include "page_alloc.h" +#include "block_cache.h" /* Forward Decl */ lm_alloc_t* alloc_info = NULL; @@ -103,6 +104,9 @@ lm_init_page_alloc(lm_chunk_t* chunk, lj_mm_opt_t* mm_opt) { } } + /*init the block cache */ + bc_init(); + return 1; } @@ -119,6 +123,8 @@ lm_fini_page_alloc(void) { free(alloc_info); alloc_info = 0; } + + bc_fini(); } /* The extend given the exiting allocated block such that it could accommodate @@ -138,7 +144,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) { page_id_t blk_id = page_idx_to_id(block_idx); int order = alloc_info->page_info[block_idx].order; - /* step 1: perfrom try run to see if we have luck. */ + /* step 1: perfrom dry-run to see if we have luck. */ int succ = 0; int ord; for (ord = order; ord <= alloc_info->max_order; ord++) { @@ -169,7 +175,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) { for (t = order; t < ord; t++) { page_id_t buddy_id = blk_id ^ (1 << t); int buddy_idx = page_id_to_idx(buddy_id); - remove_free_block(buddy_idx, t); + remove_free_block(buddy_idx, t, 0); reset_page_leader(alloc_info->page_info + buddy_idx); } @@ -187,11 +193,6 @@ free_block(page_idx_t page_idx) { int order = page->order; ASSERT (find_block(page_idx, order, NULL) == 0); - char* block_addr = alloc_info->first_page + - (page_idx << alloc_info->page_size_log2); - size_t block_len = (1<page_size_log2; - madvise(block_addr, block_len, MADV_DONTNEED); - /* Consolidate adjacent buddies */ int page_num = alloc_info->page_num; page_id_t page_id = page_idx_to_id(page_idx); @@ -208,7 +209,7 @@ free_block(page_idx_t page_idx) { is_allocated_blk(pi + buddy_idx)) { break; } - remove_free_block(buddy_idx, order); + remove_free_block(buddy_idx, order, 0); reset_page_leader(alloc_info->page_info + buddy_idx); page_id = page_id < buddy_id ? page_id : buddy_id; diff --git a/page_alloc.h b/page_alloc.h index ca4dffd..fc75a1a 100644 --- a/page_alloc.h +++ b/page_alloc.h @@ -5,6 +5,7 @@ #include "util.h" #include "chunk.h" /* for lm_chunk_t */ #include "lj_mm.h" +#include "block_cache.h" /************************************************************************** * @@ -79,10 +80,6 @@ typedef struct { extern lm_alloc_t* alloc_info; -/* Page index to ID conversion */ -typedef int page_id_t; -typedef int page_idx_t; - static inline page_id_t page_idx_to_id(page_idx_t idx) { ASSERT(idx >= 0 && idx < alloc_info->page_num); @@ -96,6 +93,11 @@ page_id_to_idx(page_id_t id) { return idx; } +static inline char* +get_page_addr(page_idx_t pg) { + return alloc_info->first_page + (pg << alloc_info->page_size_log2); +} + static inline int verify_order(page_idx_t blk_leader, int order) { return 0 == (page_idx_to_id(blk_leader) & ((1<free_blks[order], block, value); } +/* If zap_pages is set, the corresponding pages will be removed via madvise()*/ static inline int -remove_free_block(page_idx_t block, int order) { +remove_free_block(page_idx_t block, int order, int zap_pages) { lm_page_t* page = alloc_info->page_info + block; ASSERT(page->order == order && find_block(block, order, NULL)); ASSERT(!is_allocated_blk(page) && verify_order(block, order)); - return rbt_delete(&alloc_info->free_blks[order], block); + bc_remove_block(block, order, zap_pages); + + return rbt_delete(&alloc_info->free_blks[order], block, NULL); } /* Add the free block of the given "order" to the buddy system */ @@ -131,6 +136,7 @@ add_free_block(page_idx_t block, int order) { set_page_leader(page); reset_allocated_blk(page); + bc_add_blk(block, order); return rbt_insert(&alloc_info->free_blks[order], block, 0); } @@ -149,13 +155,18 @@ add_alloc_block(page_idx_t block, intptr_t sz, int order) { set_page_leader(pg); set_allocated_blk(pg); + bc_remove_block(block, order, 0); + madvise(alloc_info->first_page + block, + (1 << order) << alloc_info->page_size_log2, + MADV_DODUMP); + return res; } static inline int remove_alloc_block(page_idx_t block) { ASSERT(is_page_leader(alloc_info->page_info + block)); - int res = rbt_delete(&alloc_info->alloc_blks, block); + int res = rbt_delete(&alloc_info->alloc_blks, block, NULL); ASSERT(res); return res; } diff --git a/rb_test.cxx b/rb_test.cxx index b2fb420..b14e599 100644 --- a/rb_test.cxx +++ b/rb_test.cxx @@ -99,7 +99,7 @@ private: if (!rbt_verify(_rbt)) return false; - int ret = rbt_delete(_rbt, val); + int ret = rbt_delete(_rbt, val, 0); if (_dump_tree) Dump_Tree("after_del"); diff --git a/rbtree.c b/rbtree.c index 8861332..577331f 100644 --- a/rbtree.c +++ b/rbtree.c @@ -591,12 +591,15 @@ rbt_delete_fixup(rb_tree_t* rbt, int node_idx) { } int -rbt_delete(rb_tree_t* rbt, int key) { +rbt_delete(rb_tree_t* rbt, int key, intptr_t* val) { /* step 1: find the element to be deleted */ int nd_idx = bst_search(rbt, key); if (nd_idx == INVALID_IDX) return 0; + if (val) + *val = rbt->tree[nd_idx].value; + /* step 2: delete the element as we normally do with a binary-search tree */ rb_node_t* nd_vect = rbt->tree; rb_node_t* node = nd_vect + nd_idx; /* the node being deleted*/ diff --git a/rbtree.h b/rbtree.h index dd615a0..3e0d97f 100644 --- a/rbtree.h +++ b/rbtree.h @@ -39,7 +39,7 @@ void rbt_fini(rb_tree_t*); /* RB-tree operations */ int rbt_insert(rb_tree_t*, int key, intptr_t val); -int rbt_delete(rb_tree_t*, int key); +int rbt_delete(rb_tree_t*, int key, intptr_t* val); int rbt_get_min(rb_tree_t*); int rbt_get_max(rb_tree_t*); diff --git a/util.h b/util.h index 1568a47..969acd4 100644 --- a/util.h +++ b/util.h @@ -17,6 +17,9 @@ typedef unsigned int uint; +typedef int page_id_t; +typedef int page_idx_t; + static inline int ceil_log2_int32 (unsigned num) { int res = 31 - __builtin_clz(num);