This change introduces "block cache" to avoid the cost of TLB manipulation,
and page initialization via zero-filling. Suppose a allocated block B1, whose virtual address is [ad1, ad2], is going to deallocated. One Linux, it seems the only way to deallocate the pages associated with the block is to call madvise(..MADV_DONTNEED...) ( hereinafter, call it madvise() for short unless otherwise noted). madvise() *immediately* remove all the pages involved, and invalidate the related TLB entries. So, if later on we allocate a block overlapping with B1 in virtual address; accessing to the overlapping space will result in re-establishing TLB entries, and zero-fill-pages, which is bit expensive. This cost can be reduced by keeping few blocks in memory, and re-use the memory resident pages over and over again. This is the rationale behind the "block cache". The "cache" here may be a misnomer; it dosen't cache any data, it just provide a way to keep small sum of idle pages in memory to avoid cost of TLB manipulation and page initialization via zero-filling.master
parent
e6c87e266c
commit
f6c4ca0d94
4
Makefile
4
Makefile
|
@ -6,7 +6,7 @@ default : all
|
|||
AR_NAME := libljmm.a
|
||||
SO_NAME := libljmm.so
|
||||
|
||||
OPT_FLAGS = -O3 -g -DDEBUG
|
||||
OPT_FLAGS = -O3 -march=native -g -DDEBUG
|
||||
CFLAGS = -fvisibility=hidden -MMD -Wall $(OPT_FLAGS)
|
||||
CXXFLAGS = $(CFLAGS)
|
||||
|
||||
|
@ -23,7 +23,7 @@ BUILD_AR_DIR = obj/lib
|
|||
BUILD_SO_DIR = obj/so
|
||||
|
||||
RB_TREE_SRCS = rbtree.c
|
||||
ALLOC_SRCS = chunk.c page_alloc.c mem_map.c
|
||||
ALLOC_SRCS = chunk.c block_cache.c page_alloc.c mem_map.c
|
||||
|
||||
C_SRCS = $(RB_TREE_SRCS) $(ALLOC_SRCS)
|
||||
C_OBJS = ${C_SRCS:%.c=%.o}
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
/* Suppose a allocated block B1, whose virtual address is [ad1, ad2], is going
|
||||
* to deallocated. One Linux, it seems the only way to deallocate the pages
|
||||
* associated with the block is to call madvise(..MADV_DONTNEED...) (
|
||||
* hereinafter, call it madvise() for short unless otherwise noted).
|
||||
*
|
||||
* madvise() *immediately* remove all the pages involved, and invalidate the
|
||||
* related TLB entries. So, if later on we allocate a block overlapping with
|
||||
* B1 in virtual address; accessing to the overlapping space will result in
|
||||
* re-establishing TLB entries, and zero-fill-pages, which is bit expensive.
|
||||
*
|
||||
* This cost can be reduced by keeping few blocks in memory, and re-use the
|
||||
* memory resident pages over and over again. This is the rationale behind the
|
||||
* "block cache". The "cache" here may be a misnomer; it dosen't cache any data,
|
||||
* it just provide a way to keep small sum of idle pages in memory to avoid
|
||||
* cost of TLB manipulation and page initialization via zero-filling.
|
||||
*/
|
||||
#include <sys/mman.h>
|
||||
#include <stdlib.h>
|
||||
#include "util.h"
|
||||
#include "page_alloc.h"
|
||||
#include "block_cache.h"
|
||||
|
||||
#define LRU_MAX_ENTRY 64
|
||||
#define INVALID_LRU_IDX (-1)
|
||||
|
||||
/* About 2M if page-size is 4k in byte. */
|
||||
#define MAX_CACHE_PAGE_NUM 512
|
||||
|
||||
typedef struct blk_lru {
|
||||
page_idx_t start_page;
|
||||
short order;
|
||||
short next;
|
||||
short prev;
|
||||
} blk_lru_t;
|
||||
|
||||
typedef struct {
|
||||
/* Free blocks in the ascending order of their starting page.*/
|
||||
rb_tree_t* blks;
|
||||
blk_lru_t lru_v[LRU_MAX_ENTRY];
|
||||
short lru_hdr;
|
||||
short lru_tail;
|
||||
short lru_free_list;
|
||||
int total_page_num;
|
||||
} block_cache_t;
|
||||
|
||||
static block_cache_t* blk_cache;
|
||||
static char enable_blk_cache = 1;
|
||||
static char blk_cache_init = 0;
|
||||
|
||||
static void
|
||||
lru_init() {
|
||||
int i;
|
||||
blk_lru_t* lru = blk_cache->lru_v;
|
||||
for (i = 0; i < LRU_MAX_ENTRY; i++) {
|
||||
lru[i].next = i + 1;
|
||||
lru[i].prev = i - 1;
|
||||
}
|
||||
lru[0].prev = INVALID_LRU_IDX;
|
||||
lru[LRU_MAX_ENTRY-1].next = INVALID_LRU_IDX;
|
||||
|
||||
blk_cache->lru_hdr = blk_cache->lru_tail = INVALID_LRU_IDX;
|
||||
blk_cache->lru_free_list = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
lru_is_full() {
|
||||
return blk_cache->lru_free_list == INVALID_LRU_IDX;
|
||||
}
|
||||
|
||||
static int
|
||||
lru_is_empty() {
|
||||
return blk_cache->lru_hdr == INVALID_LRU_IDX;
|
||||
}
|
||||
|
||||
static int
|
||||
lru_append(page_idx_t start_page, int order) {
|
||||
if (unlikely(lru_is_full())) {
|
||||
ASSERT(0);
|
||||
return INVALID_LRU_IDX;
|
||||
}
|
||||
|
||||
blk_lru_t *lru = blk_cache->lru_v;
|
||||
int new_item = blk_cache->lru_free_list;
|
||||
blk_cache->lru_free_list = lru[new_item].next;
|
||||
|
||||
int lru_tail = blk_cache->lru_tail;
|
||||
if (lru_tail != INVALID_LRU_IDX)
|
||||
lru[lru_tail].next = new_item;
|
||||
else {
|
||||
ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX);
|
||||
blk_cache->lru_hdr = new_item;
|
||||
}
|
||||
|
||||
lru[new_item].prev = lru_tail;
|
||||
lru[new_item].next = INVALID_LRU_IDX;
|
||||
blk_cache->lru_tail = new_item;
|
||||
|
||||
lru[new_item].start_page = start_page;
|
||||
lru[new_item].order = order;
|
||||
|
||||
return new_item;
|
||||
}
|
||||
|
||||
static void
|
||||
lru_remove(int idx) {
|
||||
if (!blk_cache_init || !enable_blk_cache)
|
||||
return;
|
||||
|
||||
blk_lru_t* lru = blk_cache->lru_v;
|
||||
blk_lru_t* lru_entry = lru + idx;
|
||||
int prev = lru_entry->prev;
|
||||
int next = lru_entry->next;
|
||||
|
||||
if (prev != INVALID_LRU_IDX) {
|
||||
lru[prev].next = next;
|
||||
} else {
|
||||
ASSERT(blk_cache->lru_hdr == idx);
|
||||
blk_cache->lru_hdr = next;
|
||||
}
|
||||
|
||||
if (next != INVALID_LRU_IDX) {
|
||||
lru[next].prev = prev;
|
||||
} else {
|
||||
ASSERT(blk_cache->lru_tail == idx);
|
||||
blk_cache->lru_tail = prev;
|
||||
}
|
||||
|
||||
lru_entry->order = -1; /* for debugging purpose */
|
||||
lru_entry->next = blk_cache->lru_free_list;
|
||||
blk_cache->lru_free_list = idx;
|
||||
}
|
||||
|
||||
static inline int
|
||||
lru_popback(void) {
|
||||
if (likely(blk_cache->lru_tail) != INVALID_LRU_IDX) {
|
||||
lru_remove(blk_cache->lru_tail);
|
||||
return 1;
|
||||
}
|
||||
ASSERT(blk_cache->lru_hdr == INVALID_LRU_IDX);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
bc_init(void) {
|
||||
if (unlikely(blk_cache_init))
|
||||
return 1;
|
||||
|
||||
if (unlikely(!enable_blk_cache))
|
||||
return 0;
|
||||
|
||||
if (!(blk_cache = (block_cache_t*)malloc(sizeof(block_cache_t))))
|
||||
return 0;
|
||||
|
||||
blk_cache->blks = rbt_create();
|
||||
if (!blk_cache->blks) {
|
||||
free(blk_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
lru_init();
|
||||
blk_cache_init = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
bc_fini(void) {
|
||||
if (unlikely(!enable_blk_cache))
|
||||
return 1;
|
||||
|
||||
if (unlikely(!blk_cache_init))
|
||||
return 0;
|
||||
|
||||
rbt_destroy(blk_cache->blks);
|
||||
free(blk_cache);
|
||||
blk_cache_init = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
bc_add_blk(page_idx_t start_page, int order) {
|
||||
if (!blk_cache_init || !enable_blk_cache)
|
||||
return INVALID_LRU_IDX;
|
||||
|
||||
if (unlikely(lru_is_full())) {
|
||||
bc_evict_oldest();
|
||||
ASSERT(!lru_is_full());
|
||||
}
|
||||
|
||||
int idx = lru_append(start_page, order);
|
||||
ASSERT(idx != INVALID_LRU_IDX);
|
||||
|
||||
int rv = rbt_insert(blk_cache->blks, start_page, idx);
|
||||
if (likely(rv)) {
|
||||
blk_cache->total_page_num += 1 << order;
|
||||
if (blk_cache->total_page_num > MAX_CACHE_PAGE_NUM &&
|
||||
blk_cache->lru_hdr != blk_cache->lru_tail) {
|
||||
bc_evict_oldest();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
lru_popback();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
bc_remove_block(page_idx_t start_page, int order, int zap_page) {
|
||||
if (zap_page) {
|
||||
char* p = get_page_addr(start_page);
|
||||
size_t len = ((size_t)(1 << order)) << alloc_info->page_size_log2;
|
||||
madvise(p, len, MADV_DONTDUMP|MADV_DONTNEED);
|
||||
}
|
||||
|
||||
if (!blk_cache_init || !enable_blk_cache)
|
||||
return 0;
|
||||
|
||||
intptr_t idx;
|
||||
if (!rbt_delete(blk_cache->blks, start_page, &idx))
|
||||
return 0;
|
||||
|
||||
blk_lru_t* lru = blk_cache->lru_v + idx;
|
||||
ASSERT(lru->order == order);
|
||||
blk_cache->total_page_num -= (1 << order);
|
||||
ASSERT(blk_cache->total_page_num >= 0);
|
||||
|
||||
lru_remove(idx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
bc_evict_oldest() {
|
||||
if (!blk_cache_init || !enable_blk_cache)
|
||||
return 0;
|
||||
|
||||
if (!lru_is_empty()) {
|
||||
blk_lru_t* lru = blk_cache->lru_v + blk_cache->lru_hdr;
|
||||
page_idx_t page = lru->start_page;
|
||||
return bc_remove_block(page, lru->order, 1);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef _BLOCK_CACHE_H_
|
||||
#define _BLOCK_CACHE_H_
|
||||
|
||||
#include "ljmm_conf.h"
|
||||
|
||||
struct blk_lru;
|
||||
typedef struct blk_lru blk_lru_t;
|
||||
|
||||
int bc_init(void);
|
||||
int bc_fini(void);
|
||||
int bc_add_blk(page_idx_t start_page, int order);
|
||||
int bc_evict_oldest(void);
|
||||
int bc_remove_block(page_idx_t start_page, int order, int zap_page);
|
||||
|
||||
#endif /* _BLOCK_CACHE_H_ */
|
2
chunk.c
2
chunk.c
|
@ -41,6 +41,8 @@ lm_alloc_chunk (void) {
|
|||
if (!chunk)
|
||||
return NULL;
|
||||
|
||||
madvise((void*)chunk, avail, MADV_DONTNEED|MADV_DONTDUMP);
|
||||
|
||||
big_chunk.base = (char*)chunk;
|
||||
big_chunk.start = (char*)chunk;
|
||||
big_chunk.alloc_size = avail;
|
||||
|
|
|
@ -52,7 +52,7 @@ lm_malloc(size_t sz) {
|
|||
if (blk_idx == -1)
|
||||
return NULL;
|
||||
|
||||
remove_free_block(blk_idx, blk_order);
|
||||
remove_free_block(blk_idx, blk_order, 0);
|
||||
|
||||
/* The free block may be too big. If this is the case, keep splitting
|
||||
* the block until it tightly fit the allocation request.
|
||||
|
|
17
page_alloc.c
17
page_alloc.c
|
@ -8,6 +8,7 @@
|
|||
#include "lj_mm.h"
|
||||
#include "chunk.h"
|
||||
#include "page_alloc.h"
|
||||
#include "block_cache.h"
|
||||
|
||||
/* Forward Decl */
|
||||
lm_alloc_t* alloc_info = NULL;
|
||||
|
@ -103,6 +104,9 @@ lm_init_page_alloc(lm_chunk_t* chunk, lj_mm_opt_t* mm_opt) {
|
|||
}
|
||||
}
|
||||
|
||||
/*init the block cache */
|
||||
bc_init();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -119,6 +123,8 @@ lm_fini_page_alloc(void) {
|
|||
free(alloc_info);
|
||||
alloc_info = 0;
|
||||
}
|
||||
|
||||
bc_fini();
|
||||
}
|
||||
|
||||
/* The extend given the exiting allocated block such that it could accommodate
|
||||
|
@ -138,7 +144,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) {
|
|||
page_id_t blk_id = page_idx_to_id(block_idx);
|
||||
int order = alloc_info->page_info[block_idx].order;
|
||||
|
||||
/* step 1: perfrom try run to see if we have luck. */
|
||||
/* step 1: perfrom dry-run to see if we have luck. */
|
||||
int succ = 0;
|
||||
int ord;
|
||||
for (ord = order; ord <= alloc_info->max_order; ord++) {
|
||||
|
@ -169,7 +175,7 @@ extend_alloc_block(page_idx_t block_idx, size_t new_sz) {
|
|||
for (t = order; t < ord; t++) {
|
||||
page_id_t buddy_id = blk_id ^ (1 << t);
|
||||
int buddy_idx = page_id_to_idx(buddy_id);
|
||||
remove_free_block(buddy_idx, t);
|
||||
remove_free_block(buddy_idx, t, 0);
|
||||
reset_page_leader(alloc_info->page_info + buddy_idx);
|
||||
}
|
||||
|
||||
|
@ -187,11 +193,6 @@ free_block(page_idx_t page_idx) {
|
|||
int order = page->order;
|
||||
ASSERT (find_block(page_idx, order, NULL) == 0);
|
||||
|
||||
char* block_addr = alloc_info->first_page +
|
||||
(page_idx << alloc_info->page_size_log2);
|
||||
size_t block_len = (1<<order) << alloc_info->page_size_log2;
|
||||
madvise(block_addr, block_len, MADV_DONTNEED);
|
||||
|
||||
/* Consolidate adjacent buddies */
|
||||
int page_num = alloc_info->page_num;
|
||||
page_id_t page_id = page_idx_to_id(page_idx);
|
||||
|
@ -208,7 +209,7 @@ free_block(page_idx_t page_idx) {
|
|||
is_allocated_blk(pi + buddy_idx)) {
|
||||
break;
|
||||
}
|
||||
remove_free_block(buddy_idx, order);
|
||||
remove_free_block(buddy_idx, order, 0);
|
||||
reset_page_leader(alloc_info->page_info + buddy_idx);
|
||||
|
||||
page_id = page_id < buddy_id ? page_id : buddy_id;
|
||||
|
|
25
page_alloc.h
25
page_alloc.h
|
@ -5,6 +5,7 @@
|
|||
#include "util.h"
|
||||
#include "chunk.h" /* for lm_chunk_t */
|
||||
#include "lj_mm.h"
|
||||
#include "block_cache.h"
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
|
@ -79,10 +80,6 @@ typedef struct {
|
|||
|
||||
extern lm_alloc_t* alloc_info;
|
||||
|
||||
/* Page index to ID conversion */
|
||||
typedef int page_id_t;
|
||||
typedef int page_idx_t;
|
||||
|
||||
static inline page_id_t
|
||||
page_idx_to_id(page_idx_t idx) {
|
||||
ASSERT(idx >= 0 && idx < alloc_info->page_num);
|
||||
|
@ -96,6 +93,11 @@ page_id_to_idx(page_id_t id) {
|
|||
return idx;
|
||||
}
|
||||
|
||||
static inline char*
|
||||
get_page_addr(page_idx_t pg) {
|
||||
return alloc_info->first_page + (pg << alloc_info->page_size_log2);
|
||||
}
|
||||
|
||||
static inline int
|
||||
verify_order(page_idx_t blk_leader, int order) {
|
||||
return 0 == (page_idx_to_id(blk_leader) & ((1<<order) - 1));
|
||||
|
@ -109,14 +111,17 @@ find_block(page_idx_t block, int order, intptr_t* value) {
|
|||
return rbt_search(&alloc_info->free_blks[order], block, value);
|
||||
}
|
||||
|
||||
/* If zap_pages is set, the corresponding pages will be removed via madvise()*/
|
||||
static inline int
|
||||
remove_free_block(page_idx_t block, int order) {
|
||||
remove_free_block(page_idx_t block, int order, int zap_pages) {
|
||||
lm_page_t* page = alloc_info->page_info + block;
|
||||
|
||||
ASSERT(page->order == order && find_block(block, order, NULL));
|
||||
ASSERT(!is_allocated_blk(page) && verify_order(block, order));
|
||||
|
||||
return rbt_delete(&alloc_info->free_blks[order], block);
|
||||
bc_remove_block(block, order, zap_pages);
|
||||
|
||||
return rbt_delete(&alloc_info->free_blks[order], block, NULL);
|
||||
}
|
||||
|
||||
/* Add the free block of the given "order" to the buddy system */
|
||||
|
@ -131,6 +136,7 @@ add_free_block(page_idx_t block, int order) {
|
|||
set_page_leader(page);
|
||||
reset_allocated_blk(page);
|
||||
|
||||
bc_add_blk(block, order);
|
||||
return rbt_insert(&alloc_info->free_blks[order], block, 0);
|
||||
}
|
||||
|
||||
|
@ -149,13 +155,18 @@ add_alloc_block(page_idx_t block, intptr_t sz, int order) {
|
|||
set_page_leader(pg);
|
||||
set_allocated_blk(pg);
|
||||
|
||||
bc_remove_block(block, order, 0);
|
||||
madvise(alloc_info->first_page + block,
|
||||
(1 << order) << alloc_info->page_size_log2,
|
||||
MADV_DODUMP);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline int
|
||||
remove_alloc_block(page_idx_t block) {
|
||||
ASSERT(is_page_leader(alloc_info->page_info + block));
|
||||
int res = rbt_delete(&alloc_info->alloc_blks, block);
|
||||
int res = rbt_delete(&alloc_info->alloc_blks, block, NULL);
|
||||
ASSERT(res);
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ private:
|
|||
if (!rbt_verify(_rbt))
|
||||
return false;
|
||||
|
||||
int ret = rbt_delete(_rbt, val);
|
||||
int ret = rbt_delete(_rbt, val, 0);
|
||||
|
||||
if (_dump_tree)
|
||||
Dump_Tree("after_del");
|
||||
|
|
5
rbtree.c
5
rbtree.c
|
@ -591,12 +591,15 @@ rbt_delete_fixup(rb_tree_t* rbt, int node_idx) {
|
|||
}
|
||||
|
||||
int
|
||||
rbt_delete(rb_tree_t* rbt, int key) {
|
||||
rbt_delete(rb_tree_t* rbt, int key, intptr_t* val) {
|
||||
/* step 1: find the element to be deleted */
|
||||
int nd_idx = bst_search(rbt, key);
|
||||
if (nd_idx == INVALID_IDX)
|
||||
return 0;
|
||||
|
||||
if (val)
|
||||
*val = rbt->tree[nd_idx].value;
|
||||
|
||||
/* step 2: delete the element as we normally do with a binary-search tree */
|
||||
rb_node_t* nd_vect = rbt->tree;
|
||||
rb_node_t* node = nd_vect + nd_idx; /* the node being deleted*/
|
||||
|
|
2
rbtree.h
2
rbtree.h
|
@ -39,7 +39,7 @@ void rbt_fini(rb_tree_t*);
|
|||
|
||||
/* RB-tree operations */
|
||||
int rbt_insert(rb_tree_t*, int key, intptr_t val);
|
||||
int rbt_delete(rb_tree_t*, int key);
|
||||
int rbt_delete(rb_tree_t*, int key, intptr_t* val);
|
||||
int rbt_get_min(rb_tree_t*);
|
||||
int rbt_get_max(rb_tree_t*);
|
||||
|
||||
|
|
Loading…
Reference in New Issue