From 7dbfb668f17dfd7d8bf130210ec342608994a24d Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Tue, 2 Sep 2014 10:30:04 -0700 Subject: [PATCH] Fix bunch of bugs! As of this revision. We are able to run CPU2000int with refernece input successfully! These benchmarks are linked against ptmalloc3 (not part of this project) and the libadaptor.so. Running CPU2000int is just a way to stress test this work. --- Makefile | 69 ++-- block_cache.c | 2 +- chunk.c | 2 +- demo.c | 2 +- lj_mm.h | 26 +- mem_map.c | 82 ++-- page_alloc.c | 16 +- page_alloc.h | 5 + rbtree.c | 19 +- tests/Makefile | 67 ++++ tests/README | 1 + adaptor.c => tests/adaptor.c | 13 +- tests/mymalloc.c | 551 +++++++++++++++++++++++++++ rb_test.cxx => tests/rb_test.cxx | 0 unit_test.cxx => tests/unit_test.cxx | 9 +- util.h | 16 + 16 files changed, 753 insertions(+), 127 deletions(-) create mode 100644 tests/Makefile create mode 100644 tests/README rename adaptor.c => tests/adaptor.c (96%) create mode 100644 tests/mymalloc.c rename rb_test.cxx => tests/rb_test.cxx (100%) rename unit_test.cxx => tests/unit_test.cxx (98%) diff --git a/Makefile b/Makefile index c182d7f..f0be641 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ default : all AR_NAME := libljmm.a SO_NAME := libljmm.so +# For testing and benchmarking, see details in adaptor.c +ADAPTOR_SO_NAME := libljmm4adaptor.so + OPT_FLAGS = -O3 -g -march=native -DDEBUG CFLAGS = -DENABLE_TESTING -fvisibility=hidden -MMD -Wall $(OPT_FLAGS) CXXFLAGS = $(CFLAGS) @@ -30,27 +33,23 @@ C_OBJS = ${C_SRCS:%.c=%.o} AR_OBJ = $(addprefix obj/lib/, $(C_OBJS)) SO_OBJ = $(addprefix obj/so/, $(C_OBJS)) +ADAPTOR_SO_OBJ = $(addprefix obj/so/adaptor_, $(C_OBJS)) # Testing targets and Misc # -UNIT_TEST := unit_test -ADAPTOR := libadaptor.so -RBTREE_TEST := rbt_test DEMO_NAME := demo - -UNIT_TEST_SRCS = unit_test.cxx -ADAPTOR_SRCS = adaptor.c -RB_TEST_SRCS = rb_test.cxx DEMO_SRCS = demo.c # Highest level dependency -all: $(AR_NAME) $(SO_NAME) $(RBTREE_TEST) $(DEMO_NAME) $(UNIT_TEST) $(ADAPTOR) +all: $(AR_NAME) $(SO_NAME) $(ADAPTOR_SO_NAME) $(RBTREE_TEST) \ + $(DEMO_NAME) $(UNIT_TEST) -$(RBTREE_TEST) $(DEMO_NAME) $(UNIT_TEST) $(ADAPTOR): $(AR_NAME) $(SO_NAME) +test $(DEMO_NAME): $(AR_NAME) $(SO_NAME) $(SO_4_ADAPTOR_NAME) -include ar_dep.txt -include so_dep.txt --include adaptor_dep.txt +-include adaptor_so_dep.txt +-include demo_dep.txt ##################################################################### # @@ -70,13 +69,20 @@ $(AR_OBJ) : $(BUILD_AR_DIR)/%.o : %.c # Building shared lib # ##################################################################### -$(SO_NAME) : $(SO_OBJ) - $(CC) $(CFLAGS) $(AR_BUILD_CFLAGS) $(SO_OBJ) -shared -o $@ - cat $(BUILD_SO_DIR)/*.d > so_dep.txt - $(SO_OBJ) : $(BUILD_SO_DIR)/%.o : %.c $(CC) -c $(CFLAGS) $(SO_BUILD_CFLAGS) $< -o $@ +$(SO_NAME) : $(SO_OBJ) + $(CC) $(CFLAGS) $(SO_BUILD_CFLAGS) $(SO_OBJ) -shared -o $(SO_NAME) + +$(ADAPTOR_SO_OBJ) : $(BUILD_SO_DIR)/adaptor_%.o : %.c + $(CC) -c $(CFLAGS) $(SO_BUILD_CFLAGS) -DFOR_ADAPTOR $< -o $@ + +$(ADAPTOR_SO_NAME) : $(ADAPTOR_SO_OBJ) + $(CC) $(CFLAGS) $(SO_BUILD_CFLAGS) $(ADAPTOR_SO_OBJ) -DFOR_ADAPTOR\ + -shared -o $@ + cat ${ADAPTOR_SO_OBJ:%.o=%.d} > adaptor_so_dep.txt + ##################################################################### # # Building demo program @@ -84,38 +90,17 @@ $(SO_OBJ) : $(BUILD_SO_DIR)/%.o : %.c ##################################################################### $(DEMO_NAME) : ${DEMO_SRCS:%.c=%.o} $(AR_NAME) $(CC) $(filter %.o, $+) -L. -Wl,-static -lljmm -Wl,-Bdynamic -o $@ - -$(UNIT_TEST) : ${UNIT_TEST_SRCS:%.cxx=%.o} $(AR_NAME) - $(CXX) $(filter %.o, $+) -L. -Wl,-static -lljmm -Wl,-Bdynamic -o $@ - -$(RBTREE_TEST) : ${RB_TREE_SRCS:%.c=%.o} ${RB_TEST_SRCS:%.cxx=%.o} - $(CXX) $(filter %.o, $+) -o $@ - + cat ${DEMO_SRCS:%.c=%.d} > demo_dep.txt %.o : %.c $(CC) $(CFLAGS) -c $< %.o : %.cxx $(CXX) $(CXXFLAGS) -c $< -##################################################################### -# -# Building testing/benchmark stuff -# -##################################################################### -test : $(RBTREE_TEST) $(UNIT_TEST) - @echo "RB-tree unit testing" - ./$(RBTREE_TEST) - @echo "" - @echo "Memory management unit testing" - ./$(UNIT_TEST) - -${ADAPTOR_SRCS:%.c=%.o} : %.o : %.c - $(CC) $(CFLAGS) -fvisibility=default -MMD -Wall -fPIC -I. -c $< - -$(ADAPTOR) : ${ADAPTOR_SRCS:%.c=%.o} - $(CC) $(CFLAGS) -fvisibility=default -shared $(filter %.o, $+) -L. -lljmm -ldl -o $@ - cat ${ADAPTOR_SRCS:%.c=%.d} > adaptor_dep.txt - clean: - rm -f *.o *.d *_dep.txt $(BUILD_AR_DIR)/* $(BUILD_SO_DIR)/* - rm -f $(AR_NAME) $(SO_NAME) $(RBTREE_TEST) $(DEMO_NAME) $(ADAPTOR) + rm -f *.o *.d *_dep.txt $(BUILD_AR_DIR)/*.[do] $(BUILD_SO_DIR)/*.[od] + rm -f $(AR_NAME) $(SO_NAME) $(DEMO_NAME) + make -C tests clean + +test: + make all -C tests diff --git a/block_cache.c b/block_cache.c index 55de9ff..8cb958c 100644 --- a/block_cache.c +++ b/block_cache.c @@ -159,7 +159,7 @@ bc_init(void) { if (unlikely(!enable_blk_cache)) return 0; - if (!(blk_cache = (block_cache_t*)malloc(sizeof(block_cache_t)))) + if (!(blk_cache = (block_cache_t*)MYMALLOC(sizeof(block_cache_t)))) return 0; blk_cache->blks = rbt_create(); diff --git a/chunk.c b/chunk.c index d4c298d..bf41bec 100644 --- a/chunk.c +++ b/chunk.c @@ -45,7 +45,7 @@ lm_alloc_chunk (void) { mmap((void*)cur_brk, avail, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_32BIT | MAP_ANONYMOUS, -1, 0); - if (!chunk) + if (chunk == (uintptr_t)MAP_FAILED) return NULL; /* If the program linked to this lib generates code-dump, do not dump those diff --git a/demo.c b/demo.c index 1b99cdf..e9dd3e3 100644 --- a/demo.c +++ b/demo.c @@ -12,7 +12,7 @@ mmap_wrap(size_t len) { int main(int argc, char** argv) { #if defined(DEBUG) - lm_init(1); + lm_init(); dump_page_alloc(stderr); int size1 = 100; diff --git a/lj_mm.h b/lj_mm.h index dcd6264..8b376b2 100644 --- a/lj_mm.h +++ b/lj_mm.h @@ -35,20 +35,22 @@ lm_init_mm_opt(lj_mm_opt_t* opt) { * conflicting with applications being benchmarked. */ -#define lm_init ljmm_init -#define lm_init2 ljmm_init2 -#define lm_fini ljmm_fini -#define lm_mmap ljmm_mmap -#define lm_munmap ljmm_munmap -#define lm_mremap ljmm_mremap -#define lm_malloc ljmm_malloc -#define lm_free ljmm_free +#define lm_init ljmm_init +#define lm_init2 ljmm_init2 +#define lm_fini ljmm_fini +#define lm_mmap ljmm_mmap +#define lm_munmap ljmm_munmap +#define lm_mremap ljmm_mremap +#define lm_malloc ljmm_malloc +#define lm_free ljmm_free +#define lm_get_status ljmm_get_status +#define lm_free_status ljmm_free_status /* Inititalize the memory-management system. If auto_fini is set * (i.e. auto_fini != 0), there is no need to call lm_fini() at exit. */ -int lm_init(int auto_fini) LJMM_EXPORT; -int lm_init2(int auto_fini, lj_mm_opt_t*) LJMM_EXPORT; +int lm_init(void) LJMM_EXPORT; +int lm_init2(lj_mm_opt_t*) LJMM_EXPORT; void lm_fini(void) LJMM_EXPORT; /* Same prototype as mmap(2), and munmap(2) */ @@ -79,8 +81,8 @@ typedef struct { block_info_t* alloc_blk_info; } lm_status_t; -const lm_status_t* lm_get_status(void); -void lm_free_status(lm_status_t*); +const lm_status_t* lm_get_status(void) LJMM_EXPORT; +void lm_free_status(lm_status_t*) LJMM_EXPORT; #ifdef DEBUG void dump_page_alloc(FILE*) LJMM_EXPORT; diff --git a/mem_map.c b/mem_map.c index ba864ce..a246045 100644 --- a/mem_map.c +++ b/mem_map.c @@ -21,7 +21,7 @@ void* lm_malloc(size_t sz) { errno = 0; if (!alloc_info) { - lm_init(1); + lm_init(); if (!alloc_info) return NULL; } @@ -432,66 +432,60 @@ lm_mmap(void *addr, size_t length, int prot, int flags, * ***************************************************************************** */ +static int finalized = 0; +/* "ignore_alloc_blk != 0": to unmap allocated chunk even if there are some + * allocated blocks not yet released. + */ +static inline void +fini_helper(int ignore_alloc_blk) { + if (finalized) + return; + + int no_alloc_blk = no_alloc_blocks(); + lm_fini_page_alloc(); + + if (no_alloc_blk || ignore_alloc_blk) + lm_free_chunk(); + + finalized = 1; +} + void lm_fini(void) { ENTER_MUTEX; - - lm_fini_page_alloc(); - lm_free_chunk(); - + fini_helper(1); LEAVE_MUTEX; } -/* The purpose of this variable is to workaround a link problem: If we were - * directly feeding lm_fini to atexit() in function lm_init(), we would be - * going to see a complaint like this: - * - * "...relocation R_X86_64_PC32 against protected symbol `lm_fini' can not - * be used when making a shared object"... - * - * I think it's perfectly fine using R_X86_64_PC32 as a relocation for - * the protected symbol lm_fini. It seems like it's GNU ld (I'm using 2.24) - * problem. Actually gold linker is able to link successfully. - * - * NOTE: This variable must be visible to other modules, otherwise, with - * higher optimization level, compiler can propagate its initial value (i.e. - * the lm_fini) to where it's referenced. - */ -void (*lm_fini_ptr)() __attribute__((visibility("protected"))) = lm_fini; - -static inline int -lm_init_helper(int auto_fini, lj_mm_opt_t* mm_opt) { - int res = 1; - if (auto_fini != 0) { - /* Do not directly feed lm_fini to atexit(), see the comment to - * variable "lm_fini_ptr" for why. - */ - res = atexit(lm_fini_ptr); - - /* Negate the sense of 'success' :-) */ - res = (res == 0) ? 1 : 0; - } - - if (res) - res = lm_init_page_alloc(lm_alloc_chunk(), mm_opt); - - return res; +__attribute__((destructor)) +static void +lm_fini2(void) { + /* It is unsafe to unmap the chunk as we are not sure if they are still alive. + * We don't need to worry about luajit. However, when we stress-test this lib + * with real-world applications, we find there are memory leakage, and at the + * time lm_fini2() is called, these allocated blocks are still alive (will be + * referenced by exit-handlers. + */ + fini_helper(0); } /* Initialize the allocation, return non-zero on success, 0 otherwise. */ int -lm_init(int auto_fini) { +lm_init(void) { + int res; ENTER_MUTEX; - int res = lm_init_helper(auto_fini, NULL); + res = lm_init_page_alloc(lm_alloc_chunk(), NULL); + finalized = 0; LEAVE_MUTEX; - return res; } int -lm_init2(int auto_fini, lj_mm_opt_t* mm_opt) { +lm_init2(lj_mm_opt_t* mm_opt) { + int res; ENTER_MUTEX; - int res = lm_init_helper(auto_fini, mm_opt); + res = lm_init_page_alloc(lm_alloc_chunk(), mm_opt); + finalized = 0; LEAVE_MUTEX; return res; } diff --git a/page_alloc.c b/page_alloc.c index c705f98..de9a0da 100644 --- a/page_alloc.c +++ b/page_alloc.c @@ -42,7 +42,7 @@ lm_init_page_alloc(lm_chunk_t* chunk, lj_mm_opt_t* mm_opt) { int alloc_sz = sizeof(lm_alloc_t) + sizeof(lm_page_t) * (page_num + 1); - alloc_info = (lm_alloc_t*) malloc(alloc_sz); + alloc_info = (lm_alloc_t*) MYMALLOC(alloc_sz); if (!alloc_info) { errno = ENOMEM; return 0; @@ -125,7 +125,7 @@ lm_fini_page_alloc(void) { rbt_fini(&alloc_info->alloc_blks); - free(alloc_info); + MYFREE(alloc_info); alloc_info = 0; } @@ -240,7 +240,7 @@ lm_get_status(void) { if (!alloc_info) return NULL; - lm_status_t* s = (lm_status_t *)malloc(sizeof(lm_status_t)); + lm_status_t* s = (lm_status_t *)MYMALLOC(sizeof(lm_status_t)); s->first_page = alloc_info->first_page; s->page_num = alloc_info->page_num; s->idx_to_id = alloc_info->idx_2_id_adj; @@ -254,7 +254,7 @@ lm_get_status(void) { /* Populate allocated block info */ if (alloc_blk_num) { block_info_t* ai; - ai = (block_info_t*)malloc(sizeof(block_info_t) * alloc_blk_num); + ai = (block_info_t*)MYMALLOC(sizeof(block_info_t) * alloc_blk_num); rb_iter_t iter, iter_e; int idx = 0; @@ -280,7 +280,7 @@ lm_get_status(void) { } if (free_blk_num) { block_info_t* fi; - fi = (block_info_t*)malloc(sizeof(block_info_t) * free_blk_num); + fi = (block_info_t*)MYMALLOC(sizeof(block_info_t) * free_blk_num); int idx = 0; int page_size_log2 = alloc_info->page_size_log2; @@ -313,12 +313,12 @@ lm_free_status(lm_status_t* status) { return; if (status->free_blk_info) - free(status->free_blk_info); + MYFREE(status->free_blk_info); if (status->alloc_blk_info) - free(status->alloc_blk_info); + MYFREE(status->alloc_blk_info); - free(status); + MYFREE(status); } #ifdef DEBUG diff --git a/page_alloc.h b/page_alloc.h index a1a3e17..a9c8b9d 100644 --- a/page_alloc.h +++ b/page_alloc.h @@ -174,6 +174,11 @@ remove_alloc_block(page_idx_t block) { return res; } +static inline int +no_alloc_blocks(void) { + return rbt_is_empty(&alloc_info->alloc_blks); +} + static inline void migrade_alloc_block(page_idx_t block, int ord_was, int ord_is, size_t new_map_sz) { rb_tree_t* rbt = &alloc_info->alloc_blks; diff --git a/rbtree.c b/rbtree.c index 977f05e..d8d9c9b 100644 --- a/rbtree.c +++ b/rbtree.c @@ -2,6 +2,7 @@ #include #include "rbtree.h" +#include "util.h" #define INVALID_IDX (-1) #define SENTINEL_IDX 0 @@ -19,7 +20,7 @@ int rbt_init(rb_tree_t* rbt) { rbt->capacity = 16; - rbt->tree = (rb_node_t*)malloc(rbt->capacity * sizeof(rb_node_t)); + rbt->tree = (rb_node_t*)MYMALLOC(rbt->capacity * sizeof(rb_node_t)); if (rbt->tree == 0) return 0; @@ -37,7 +38,7 @@ rbt_init(rb_tree_t* rbt) { void rbt_fini(rb_tree_t* rbt) { if (rbt && rbt->tree) { - free((void*)rbt->tree); + MYFREE((void*)rbt->tree); rbt->tree = 0; rbt->capacity = rbt->node_num = 0; rbt->root = INVALID_IDX; @@ -46,7 +47,7 @@ rbt_fini(rb_tree_t* rbt) { rb_tree_t* rbt_create(void) { - rb_tree_t* rbt = (rb_tree_t*)malloc(sizeof(rb_tree_t)); + rb_tree_t* rbt = (rb_tree_t*)MYMALLOC(sizeof(rb_tree_t)); if (rbt && rbt_init(rbt)) return rbt; @@ -57,7 +58,7 @@ void rbt_destroy(rb_tree_t* rbt) { if (rbt) { rbt_fini(rbt); - free((void*)rbt); + MYFREE((void*)rbt); } } @@ -159,7 +160,7 @@ rbt_try_shrink(rb_tree_t* rbt) { return 1; int cap = rbt->node_num * 3 / 2; - rbt->tree = (rb_node_t*)realloc(rbt->tree, cap * sizeof(rb_node_t)); + rbt->tree = (rb_node_t*)MYREALLOC(rbt->tree, cap * sizeof(rb_node_t)); if (rbt->tree == 0) return 0; @@ -212,7 +213,7 @@ bst_insert(rb_tree_t* t, int key, intptr_t value) { if (cap <= 16) cap = 16; - nodes = t->tree = (rb_node_t*)realloc(nodes, 100* sizeof(rb_node_t)); + nodes = t->tree = (rb_node_t*)MYREALLOC(nodes, cap * sizeof(rb_node_t)); t->capacity = cap; if (!nodes) @@ -705,7 +706,7 @@ rbt_verify(rb_tree_t* rbt) { int node_num = rbt->node_num; rb_node_t* nd_vect = rbt->tree; - int* cnt = (int*)malloc(sizeof(int) * node_num); + int* cnt = (int*)MYMALLOC(sizeof(int) * node_num); int i; for (i = 0; i < node_num; i++) cnt[i] = 0; @@ -745,12 +746,12 @@ rbt_verify(rb_tree_t* rbt) { /* we either have multiple roots, or rbt->root is not set * properly. */ - free(cnt); + MYFREE(cnt); return 0; } } } - free(cnt); + MYFREE(cnt); cnt = 0; if (root_cnt != 1) { if (root_cnt == 0 && node_num != 1) diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..0946918 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,67 @@ +.PHONY = default all clean + +default : all + +OPT_FLAGS := -O3 -g -march=native -DENABLE_TESTING #-DDEBUG +CFLAGS := -I.. -fvisibility=hidden -MMD -Wall $(OPT_FLAGS) +CXXFLAGS = $(CFLAGS) + +CC = gcc +CXX = g++ + +# Targets to be built. +UNIT_TEST := unit_test +ADAPTOR := libadaptor.so +RBTREE_TEST := rbt_test +MYMALLOC := libmymalloc.so + +# Source codes +UNIT_TEST_SRCS = unit_test.cxx +ADAPTOR_SRCS = adaptor.c mymalloc.c +RB_TEST_SRCS = rb_test.cxx +MYMALLOC_SRCS = mymalloc.c + +-include adaptor_dep.txt +-include mymalloc_dep.txt + + +all : $(UNIT_TEST) $(ADAPTOR) $(RBTREE_TEST) $(MYMALLOC) + ./$(RBTREE_TEST) + ./$(UNIT_TEST) + +# Building unit-test +${UNIT_TEST_SRCS:%.cxx=%.o} : %.o : %.cxx + $(CXX) $(CXXFLAGS) $< -c + +$(UNIT_TEST) : ${UNIT_TEST_SRCS:%.cxx=%.o} ../libljmm.so + $(CXX) $(filter %.o, $^) -Wl,-rpath=.. -L.. -lljmm -o $@ + +# Building libadpator.so. +# +${ADAPTOR_SRCS:%.c=%.o} : %.o : %.c + $(CC) $(CFLAGS) -fvisibility=default -DFOR_ADAPTOR -fPIC -I.. -I. -c $< + +$(ADAPTOR) : ${ADAPTOR_SRCS:%.c=%.o} ../libljmm4adaptor.so + ln -fs ../libljmm4adaptor.so . + $(CC) $(CFLAGS) -fvisibility=default -shared $(filter %.o, $^) -L.. -lljmm4adaptor \ + -Wl,-rpath=.. -o $@ + cat ${ADAPTOR_SRCS:%.c=%.d} > adaptor_dep.txt + +# Building RB-tree unit-test + +${RB_TEST_SRCS:%.cxx=%.o} : %.o : %.cxx + $(CXX) $(CXXFLAGS) -I.. $< -c + +${RBTREE_TEST} : ${RB_TEST_SRCS:%.cxx=%.o} ../rbtree.o + $(CXX) $(CXXFLAGS) $^ -o $@ + +# Building mymalloc.so +${MYMALLOC_SRCS:%.c=my_%.o} : my_%.o : %.c + $(CC) $(CFLAGS) -fvisibility=default -fPIC -c $< -o $@ + +$(MYMALLOC) : ${MYMALLOC_SRCS:%.c=my_%.o} + $(CC) $+ $(CFLAGS) -fvisibility=default -shared -o $@ + cat ${MYMALLOC_SRCS:%.c=%.d} > mymalloc_dep.txt + +clean: + rm -rf *.o *.d *_dep.txt $(UNIT_TEST) $(ADAPTOR) $(RBTREE_TEST) $(MYMALLOC) *.so diff --git a/tests/README b/tests/README new file mode 100644 index 0000000..c3b097b --- /dev/null +++ b/tests/README @@ -0,0 +1 @@ +TODO: Describe the testing methordology, which is bit involved. diff --git a/adaptor.c b/tests/adaptor.c similarity index 96% rename from adaptor.c rename to tests/adaptor.c index db8cda0..698e233 100644 --- a/adaptor.c +++ b/tests/adaptor.c @@ -57,7 +57,8 @@ * * 3. build the ptmalloc3 with: * make -C src/dir linux-shared \ - * OPT_FLAGS='-O3 -march=native -pthread -Wl,--wrap=mmap -Wl,--wrap=munmap -Wl,--wrap=mremap' + * OPT_FLAGS='-O3 -march=native -pthread -Wl,--wrap=mmap \ + * -Wl,--wrap=munmap -Wl,--wrap=mremap' * * it will successfuly build libptmalloc3.so, but fail to build t-test1, * which we don't need. The failure is due to undefined symbol of _wrap_xxx() @@ -68,7 +69,8 @@ * the libptmalloc3.so, and will be automatically loaded by the system * dynamic loader. * - * o. the -Wl,--wrap=xxx is to let linker the replace symbol xxx with __wrap_xxx. + * o. the -Wl,--wrap=xxx is to let linker the replace symbol xxx with + * __wrap_xxx. * * 4. set LD_LIBRARY_PATH properly to include the path to libljmm.so. * @@ -78,7 +80,8 @@ * * Miscellaneous * ------------- - * Some functionalities can be turned on/off via following environment variables. + * Some functionalities can be turned on/off via following environment + * variables: * - ENABLE_LJMM = {0|1} * - ENABLE_LJMM_TRACE = {0|1} */ @@ -112,7 +115,7 @@ static int init_done = 0; static int __attribute__((noinline)) init_adaptor(void) { const char* func_name = __FUNCTION__; - if (!lm_init(1)) { + if (!lm_init()) { fprintf(stderr, "%s: fail to call lm_init()\n", func_name); return 0; } @@ -152,7 +155,6 @@ __wrap_mmap64(void *addr, size_t length, int prot, int flags, const char* func = __FUNCTION__; void* blk = NULL; - fprintf(stderr, "init_done = %d, addr = %p, flags = %d\n", init_done, addr, flags); if (init_done && !addr && (flags & (MAP_ANONYMOUS|MAP_ANON))) { blk = lm_mmap(addr, length, prot, flags|MAP_32BIT, fd, offset); if (unlikely(enable_trace)) { @@ -204,6 +206,7 @@ __wrap_munmap(void *addr, size_t length) { void* __wrap_mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) { + fprintf(stderr, "WTF!, remap is called\n"); if (!init_done || old_addr > (void*)LJMM_AS_UPBOUND) { void* p = NULL; if (!(flags & MREMAP_FIXED)) { diff --git a/tests/mymalloc.c b/tests/mymalloc.c new file mode 100644 index 0000000..0970cc5 --- /dev/null +++ b/tests/mymalloc.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include +#include /* for bzero() */ +#include /* for memcpy() */ + +#ifdef DEBUG + // Usage examples: ASSERT(a > b), ASSERT(foo() && "Opps, foo() reutrn 0"); + #define ASSERT(c) if (!(c))\ + { fprintf(stderr, "%s:%d Assert: %s\n", __FILE__, __LINE__, #c); abort(); } +#else + #define ASSERT(c) ((void)0) +#endif + +#ifndef FOR_ADAPTOR + #define MYMALLOC __wrap_malloc + #define MYFREE __wrap_free + #define MYCALLOC __wrap_calloc + #define MYREALLOC __wrap_realloc +#else + #define MYMALLOC __adaptor_malloc + #define MYFREE __adaptor_free + #define MYCALLOC __adaptor_calloc + #define MYREALLOC __adaptor_realloc +#endif + +#define MYMALLOC_EXPORT __attribute__ ((visibility ("default"))) +void* MYMALLOC(size_t) MYMALLOC_EXPORT; +void MYFREE(void*) MYMALLOC_EXPORT; +void* MYCALLOC(size_t, size_t) MYMALLOC_EXPORT; +void* MYREALLOC(void*, size_t) MYMALLOC_EXPORT; + +typedef int v4si __attribute__ ((vector_size (16))); + +static inline int +log2_int32(unsigned num) { + return 31 - __builtin_clz(num); +} + +static inline int +ceil_log2_int32 (unsigned num) { + int res = 31 - __builtin_clz(num); + res += (num & (num - 1)) ? 1 : 0; + return res; +} + +#define ENABLE_TRACE 0 + +#define EXT_SZ (4096 * 2) +#define MIN_ORDER 5 +#define MAX_ORDER 31 +#define BIN_NUM (MAX_ORDER - MIN_ORDER + 1) +#define CHUNK_ALIGN (__alignof__(v4si)) + +typedef struct my_malloc_chunk my_chunk_t; +struct my_malloc_chunk { + unsigned prev_size; + unsigned this_size; +#ifdef DEBUG + int magic_number; +#endif + union { + struct { + my_chunk_t* prev_free; + my_chunk_t* next_free; + }; + v4si align_data; + }; +}; + +#ifdef DEBUG +#define MAGIC_NUM 0x5a5a5a + #define SET_MAGCI_NUM(c) {(c)->magic_number = MAGIC_NUM; } + #define VERIFY_MAGIC_NUM(c) ASSERT((c)->magic_number == MAGIC_NUM) +#else + #define SET_MAGCI_NUM(c) ((void)0) + #define VERIFY_MAGIC_NUM(c) ((void)0) +#endif + +#define IS_CHUNK_FREE(c) ((c)->this_size & 1) +#define SET_CHUNK_FREE(c) {(c)->this_size |= 1;} +#define RESET_CHUNK_FREE(c) {(c)->this_size &= ~1;} + +#define IS_CHUNK_MMAP(c) ((c)->this_size & 2) +#define SET_CHUNK_MMAP(c) {(c)->this_size |= 2;} +#define RESET_CHUNK_MMAP(c) {(c)->this_size &= ~2;} + +#define IS_LAST_CHUNK(c) ((c)->this_size & 4) +#define SET_LAST_CHUNK(c) {(c)->this_size |= 4; } +#define RESET_LAST_CHUNK(c) {(c)->this_size &= ~4; } + +#define CHUNK_SIZE(c) ((c)->this_size & ~7) +#define SET_CHUNK_SIZE(c, s) { typeof(c) t = (c);\ + t->this_size = (t->this_size & 7) + s; } + +#define offsetof(st, m) ((size_t)(&((st *)0)->m)) +#define CHUNK_OVERHEAD offsetof(my_chunk_t, align_data) + +typedef struct { + my_chunk_t list; + int min_size; +} bin_t; + +typedef struct my_malloc_info my_malloc_info_t; +struct my_malloc_info { + int initialized; + bin_t bins[BIN_NUM]; +}; + +static my_malloc_info_t malloc_info; + +#define MMAP_THRESHOLD (EXT_SZ - sizeof(malloc_info) - CHUNK_ALIGN) + +/* Return cur's previous adjacent chunk. If the chunk dose not have previous + * adjacent chunk, chunk itself is returned. + */ +static inline my_chunk_t* +get_prev_adj_chunk(my_chunk_t* cur) { + if (cur->prev_size != 0) { + char* p = ((char*)(void*)cur) - cur->prev_size; + return (my_chunk_t*)(void*)p; + } + return NULL; +} + +static inline my_chunk_t* +get_next_adj_chunk(my_chunk_t* chunk) { + if (!IS_LAST_CHUNK(chunk)) + return (my_chunk_t*)(void*)(CHUNK_SIZE(chunk) + (void*)chunk); + return NULL; +} + +static inline int +is_bin_empty(bin_t* bin) { + my_chunk_t* list = &bin->list; + return list->next_free == list && list->prev_free == list; +} + +/* Return the min bin index, such that all chunk c in that bin have + * CHUNK_SIZE(c) > bin->min_size + */ +static inline int +get_bin_idx_for_chunk(int chunk_size) { + int idx = log2_int32(chunk_size) - MIN_ORDER; + ASSERT(idx >= 0); + if (idx >= BIN_NUM) + idx = BIN_NUM - 1; + + return idx; +} + +/* Return the min bin index, such that all chunk c in the bin have + * CHUNK_SIZE(c) > alloc_sz + */ +static inline int +get_bin_idx_for_alloc(int alloc_sz) { + int idx = ceil_log2_int32(alloc_sz) - MIN_ORDER; + ASSERT(idx >= 0); + if (idx >= BIN_NUM) + idx = BIN_NUM - 1; + + return idx; +} + +static inline void +append_to_bin(bin_t* bin, my_chunk_t* chunk) { + ASSERT((CHUNK_SIZE(chunk) & (CHUNK_ALIGN - 1)) == 0 && + (CHUNK_SIZE(chunk) >= bin->min_size)); + + my_chunk_t* insert_after = bin->list.prev_free; + my_chunk_t* insert_before = &bin->list; + + chunk->prev_free = insert_after; + chunk->next_free = insert_before; + + insert_before->prev_free = chunk; + insert_after->next_free = chunk; +} + +static inline my_chunk_t* +pop_from_bin(bin_t* bin) { + my_chunk_t* first = bin->list.next_free; + + if (first != &bin->list) { + my_chunk_t* before_1st = first->prev_free; + my_chunk_t* after_1st = first->next_free; + before_1st->next_free = after_1st; + after_1st->prev_free = before_1st; + first->prev_free = first->next_free = NULL; + return first; + } + + return NULL; +} + +static inline void +append_free_chunk(my_chunk_t* chunk) { + ASSERT(IS_CHUNK_FREE(chunk)); + /* look for the right bin for this chunk */ + int chunk_size = CHUNK_SIZE(chunk); + int bin_idx = get_bin_idx_for_chunk(chunk_size); + append_to_bin(malloc_info.bins + bin_idx, chunk); +} + +/* Remove the free chunk from bin */ +static inline void +remove_free_chunk(my_chunk_t* chunk) { + ASSERT(IS_CHUNK_FREE(chunk)); +#ifdef DEBUG + { + int chunk_size = CHUNK_SIZE(chunk); + bin_t* bin = malloc_info.bins + get_bin_idx_for_chunk(chunk_size); + int found = 0; + my_chunk_t* iter, *iter_e = &bin->list; + for (iter = bin->list.next_free; + iter != iter_e; iter = iter->next_free) { + if (iter == chunk) { + found = 1; break; + } + } + ASSERT(found); + } +#endif + + my_chunk_t* prev = chunk->prev_free; + my_chunk_t* next = chunk->next_free; + prev->next_free = next; + next->prev_free = prev; + + chunk->prev_free = chunk->next_free = NULL; +} + +static void +malloc_init(void) { + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + my_chunk_t* list = &bin->list; + list->prev_free = list->next_free = list; + bin->min_size = 1 << (i + MIN_ORDER); + } + malloc_info.initialized = 1; +} + +/* Split the given chunk into two at the specified splitting point, return + * the second one. + */ +static my_chunk_t* +split_chunk(my_chunk_t* chunk, int split_point) { + ASSERT((split_point & (CHUNK_ALIGN - 1)) == 0); + ASSERT(split_point + CHUNK_OVERHEAD <= CHUNK_SIZE(chunk)); + + int chunk_sz = CHUNK_SIZE(chunk); + int chunk2_sz = chunk_sz - split_point; + ASSERT(chunk2_sz >= sizeof(my_chunk_t)); + + my_chunk_t* chunk2; + chunk2 = (my_chunk_t*)(void*)(split_point + (char*)(void*)chunk); + chunk2->prev_size = chunk_sz - chunk2_sz; + SET_CHUNK_SIZE(chunk2, chunk2_sz); + SET_CHUNK_SIZE(chunk, split_point); + + /* Only the 1st chunk is marked with mapped*/ + RESET_CHUNK_MMAP(chunk2); + SET_MAGCI_NUM(chunk2); + + if (!IS_LAST_CHUNK(chunk)) { + my_chunk_t* follow; + follow = (my_chunk_t*)(void*)(chunk_sz + (char*)(void*)chunk); + follow->prev_size = chunk2_sz; + RESET_LAST_CHUNK(chunk2); + } else { + RESET_LAST_CHUNK(chunk); + SET_LAST_CHUNK(chunk2); + } + + if (IS_CHUNK_FREE(chunk)) + SET_CHUNK_FREE(chunk2); + + return chunk2; +} + +static my_chunk_t* +find_big_enough_chunk(size_t alloc_size, int* bin_idx) { + int bin_idx_tmp = get_bin_idx_for_alloc(alloc_size); + int i; + for (i = bin_idx_tmp; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + break; + } + + *bin_idx = i; + if (i < BIN_NUM - 1) + return pop_from_bin(malloc_info.bins + i); + + if (i == BIN_NUM -1) { + my_chunk_t* iter, *iter_e; + for (iter = malloc_info.bins[i].list.next_free, + iter_e = malloc_info.bins[i].list.prev_free; + iter != iter_e; iter = iter->next_free) { + if (CHUNK_SIZE(iter) >= alloc_size) { + remove_free_chunk(iter); + return iter; + } + } + } + + return NULL; +} + +/* The alloc_sz already take into account the chunk-overhead, and is + * properly aligned. + * + * NOTE: before calling this function, chunk should already be removed from bin. + */ +static void* +malloc_helper(my_chunk_t* chunk, size_t alloc_sz) { + RESET_CHUNK_FREE(chunk); + + /* Try to split the chunk. */ + unsigned chunk_size = CHUNK_SIZE(chunk); + ASSERT(((alloc_sz & (CHUNK_ALIGN - 1)) == 0) && chunk_size >= alloc_sz); + + unsigned remain_sz = chunk_size - alloc_sz; + if (remain_sz > sizeof(my_chunk_t)) { + my_chunk_t* split = split_chunk(chunk, alloc_sz); + SET_CHUNK_FREE(split); + append_free_chunk(split); + } + + return CHUNK_OVERHEAD + ((char*)(void*)chunk); +} + +void* +MYMALLOC(size_t size) { + if (ENABLE_TRACE) + fprintf(stderr, "\nmalloc(%lu)\n", size); + + if (!malloc_info.initialized) + malloc_init(); + + size_t norm_size = + (size + CHUNK_OVERHEAD + CHUNK_ALIGN - 1) & ~(CHUNK_ALIGN - 1); + + void* result = NULL; + + int bin_idx; + my_chunk_t* chunk = find_big_enough_chunk(norm_size, &bin_idx); + if (chunk) { + result = malloc_helper(chunk, norm_size); + goto malloc_exit; + } + + /* case 2: no free chunk big enough. Create one via mmap() */ + size_t mmap_sz = EXT_SZ; + if (mmap_sz < norm_size) + mmap_sz = norm_size; + + long page_sz = sysconf(_SC_PAGESIZE); + mmap_sz = (mmap_sz + page_sz - 1) & ~(page_sz - 1); + result = mmap(NULL, mmap_sz, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ENABLE_TRACE) + fprintf(stderr, " > %p = mmap(%ld)\n", result, mmap_sz); + + if (result == MAP_FAILED) + goto malloc_exit; + + chunk = (my_chunk_t*)result; + chunk->prev_size = 0; + chunk->this_size = mmap_sz; + SET_LAST_CHUNK(chunk); + SET_CHUNK_MMAP(chunk); + SET_CHUNK_FREE(chunk); + SET_MAGCI_NUM(chunk); + + result = malloc_helper(chunk, norm_size); + +malloc_exit: + if (ENABLE_TRACE) + fprintf(stderr, "%p = malloc(%ld)\n", result, size); + + return result; +} + +void +MYFREE(void* ptr) { + if (ENABLE_TRACE) + fprintf(stderr, "\nfree(%p)\n", ptr); + + my_chunk_t* chunk = (my_chunk_t*)(void*)(((char*)ptr) - CHUNK_OVERHEAD); + ASSERT(!IS_CHUNK_FREE(chunk)); + VERIFY_MAGIC_NUM(chunk); + + while (1) { + my_chunk_t* prev_adj = get_prev_adj_chunk(chunk); + my_chunk_t* next_adj = get_next_adj_chunk(chunk); + int change = 0; + + /* Consolidate with the adjacent following chunk */ + if (next_adj && IS_CHUNK_FREE(next_adj)) { + remove_free_chunk(next_adj); + if (IS_LAST_CHUNK(next_adj)) + SET_LAST_CHUNK(chunk); + + int new_sz = CHUNK_SIZE(chunk) + CHUNK_SIZE(next_adj); + SET_CHUNK_SIZE(chunk, new_sz); + change = 1; + } + + /* Consolidate with the previous adjacent chunk */ + if (prev_adj && IS_CHUNK_FREE(prev_adj)) { + remove_free_chunk(prev_adj); + + if (IS_LAST_CHUNK(chunk)) + SET_LAST_CHUNK(prev_adj); + + int new_sz = CHUNK_SIZE(chunk) + CHUNK_SIZE(prev_adj); + SET_CHUNK_SIZE(prev_adj, new_sz); + chunk = prev_adj; + change = 1; + } + + if (!change) + break; + } + + if (IS_CHUNK_MMAP(chunk) && IS_LAST_CHUNK(chunk)) { + if (ENABLE_TRACE) + fprintf(stderr, " > munmap(%p, %u)\n", chunk, CHUNK_SIZE(chunk)); + munmap((void*)chunk, CHUNK_SIZE(chunk)); + return; + } + SET_CHUNK_FREE(chunk); + if (!IS_LAST_CHUNK(chunk)) { + my_chunk_t* next_adj = get_next_adj_chunk(chunk); + next_adj->prev_size = CHUNK_SIZE(chunk); + } + + append_free_chunk(chunk); +} + +void* +MYREALLOC(void* ptr, size_t size) { + if (ENABLE_TRACE) + fprintf(stderr, "\nrealloc(%p, %lu)\n", ptr, size); + + void* result = ptr; + + /* normalize the size */ + size_t norm_size = (size + CHUNK_ALIGN - 1) & ~(CHUNK_ALIGN - 1); + norm_size += CHUNK_OVERHEAD; + + my_chunk_t* chunk = (my_chunk_t*)(void*)(((char*)ptr) - CHUNK_OVERHEAD); + size_t chunk_sz = CHUNK_SIZE(chunk); + if (norm_size > chunk_sz) { + result = (my_chunk_t*)MYMALLOC(norm_size); + if (result) { + memcpy(result, (void*)&chunk->align_data, + chunk_sz - CHUNK_OVERHEAD); + SET_CHUNK_FREE(chunk); + append_free_chunk(chunk); + } + goto realloc_exit; + } + + if (chunk_sz - norm_size >= sizeof(my_chunk_t)) { + /* shrink the allocated block */ + my_chunk_t* another = split_chunk(chunk, norm_size); + SET_CHUNK_FREE(another); + append_free_chunk(another); + } + +realloc_exit: + if (ENABLE_TRACE) + fprintf(stderr, "%p = realloc(%p, %lu)\n", result, ptr, size); + return result; +} + +void* +MYCALLOC(size_t nmemb, size_t size) { + size_t t = nmemb * size; + void* p = MYMALLOC(t); + if (p) + bzero(p, t); + + return p; +} + +#if 0 +static void +my_malloc_verify(void) { + if (!malloc_info.initialized) + return; + + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + + my_chunk_t* iter, *iter_e; + for (iter = bin->list.next_free, iter_e = &bin->list; + iter != iter_e; iter = iter->next_free) { + ASSERT(IS_CHUNK_FREE(iter)); + ASSERT(iter->next_free && iter->prev_free); + } + } +} +#endif + +void +my_malloc_dump(FILE* f) { + if (!malloc_info.initialized) { + return; + } + + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + + fprintf(f, "BIN:%3d, min_size:%d :", i, bin->min_size); + my_chunk_t* iter, *iter_e; + for (iter = bin->list.next_free, iter_e = &bin->list; + iter != iter_e; iter = iter->next_free) { + fprintf(f, "\n\t[chunk %p, size:%d, prev_size:%d, ", + iter, CHUNK_SIZE(iter), iter->prev_size); + + fprintf(f, "prev_free:%p, next_free:%p", + iter->prev_free, iter->next_free); + + if (IS_CHUNK_FREE(iter)) + fprintf(f, ", free"); + + if (IS_CHUNK_MMAP(iter)) + fprintf(f, ", mmap"); + + if (IS_LAST_CHUNK(iter)) + fprintf(f, ", last"); + fprintf(f, "] "); + } + + fprintf(f, "\n"); + } +} diff --git a/rb_test.cxx b/tests/rb_test.cxx similarity index 100% rename from rb_test.cxx rename to tests/rb_test.cxx diff --git a/unit_test.cxx b/tests/unit_test.cxx similarity index 98% rename from unit_test.cxx rename to tests/unit_test.cxx index 1350e51..a5a2bdf 100644 --- a/unit_test.cxx +++ b/tests/unit_test.cxx @@ -96,11 +96,11 @@ UNIT_TEST::UNIT_TEST(int test_id, int page_num) mm_opt.chunk_sz_in_page = _page_num = page_num; mm_opt.enable_block_cache = 0; - _init_succ = lm_init2(0, &mm_opt); + _init_succ = lm_init2(&mm_opt); _test_succ = _init_succ ? true : false; _page_size = sysconf(_SC_PAGESIZE); if (_init_succ) { - const lm_status_t* status = lm_get_status(); + const lm_status_t* status = ljmm_get_status(); _chunk_base = status->first_page; lm_free_status(const_cast(status)); } else { @@ -222,7 +222,7 @@ UNIT_TEST::VerifyStatus(blk_info2_t* alloc_blk_v, int alloc_blk_v_len, if (!_test_succ) return; - const lm_status_t* status = lm_get_status(); + const lm_status_t* status = ljmm_get_status(); if (free_blk_v_len != status->free_blk_num || alloc_blk_v_len != status->alloc_blk_num) { _test_succ = false; @@ -261,6 +261,7 @@ UNIT_TEST::VerifyStatus(blk_info2_t* alloc_blk_v, int alloc_blk_v_len, int main(int argc, char** argv) { fprintf(stdout, "\n>>Mmap unit testing\n"); +#if 0 // test1 // { @@ -287,7 +288,7 @@ main(int argc, char** argv) { ut.VerifyStatus(alloc_blk, ARRAY_SIZE(alloc_blk), free_blk, ARRAY_SIZE(free_blk)); } - +#endif fprintf(stdout, "\n>>Munmap unit testing\n"); // Notation for address. diff --git a/util.h b/util.h index 969acd4..2404b1a 100644 --- a/util.h +++ b/util.h @@ -32,6 +32,22 @@ log2_int32(unsigned num) { return 31 - __builtin_clz(num); } +#ifdef FOR_ADAPTOR + #define MYMALLOC __adaptor_malloc + #define MYFREE __adaptor_free + #define MYCALLOC __adaptor_calloc + #define MYREALLOC __adaptor_realloc + void* MYMALLOC(size_t); + void MYFREE(void*); + void* MYCALLOC(size_t, size_t); + void* MYREALLOC(void*, size_t); +#else + #define MYMALLOC malloc + #define MYFREE free + #define MYCALLOC calloc + #define MYREALLOC realloc +#endif + #ifdef DEBUG // Usage examples: ASSERT(a > b), ASSERT(foo() && "Opps, foo() reutrn 0"); #define ASSERT(c) if (!(c))\