diff --git a/Makefile b/Makefile index c182d7f..f0be641 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,9 @@ default : all AR_NAME := libljmm.a SO_NAME := libljmm.so +# For testing and benchmarking, see details in adaptor.c +ADAPTOR_SO_NAME := libljmm4adaptor.so + OPT_FLAGS = -O3 -g -march=native -DDEBUG CFLAGS = -DENABLE_TESTING -fvisibility=hidden -MMD -Wall $(OPT_FLAGS) CXXFLAGS = $(CFLAGS) @@ -30,27 +33,23 @@ C_OBJS = ${C_SRCS:%.c=%.o} AR_OBJ = $(addprefix obj/lib/, $(C_OBJS)) SO_OBJ = $(addprefix obj/so/, $(C_OBJS)) +ADAPTOR_SO_OBJ = $(addprefix obj/so/adaptor_, $(C_OBJS)) # Testing targets and Misc # -UNIT_TEST := unit_test -ADAPTOR := libadaptor.so -RBTREE_TEST := rbt_test DEMO_NAME := demo - -UNIT_TEST_SRCS = unit_test.cxx -ADAPTOR_SRCS = adaptor.c -RB_TEST_SRCS = rb_test.cxx DEMO_SRCS = demo.c # Highest level dependency -all: $(AR_NAME) $(SO_NAME) $(RBTREE_TEST) $(DEMO_NAME) $(UNIT_TEST) $(ADAPTOR) +all: $(AR_NAME) $(SO_NAME) $(ADAPTOR_SO_NAME) $(RBTREE_TEST) \ + $(DEMO_NAME) $(UNIT_TEST) -$(RBTREE_TEST) $(DEMO_NAME) $(UNIT_TEST) $(ADAPTOR): $(AR_NAME) $(SO_NAME) +test $(DEMO_NAME): $(AR_NAME) $(SO_NAME) $(SO_4_ADAPTOR_NAME) -include ar_dep.txt -include so_dep.txt --include adaptor_dep.txt +-include adaptor_so_dep.txt +-include demo_dep.txt ##################################################################### # @@ -70,13 +69,20 @@ $(AR_OBJ) : $(BUILD_AR_DIR)/%.o : %.c # Building shared lib # ##################################################################### -$(SO_NAME) : $(SO_OBJ) - $(CC) $(CFLAGS) $(AR_BUILD_CFLAGS) $(SO_OBJ) -shared -o $@ - cat $(BUILD_SO_DIR)/*.d > so_dep.txt - $(SO_OBJ) : $(BUILD_SO_DIR)/%.o : %.c $(CC) -c $(CFLAGS) $(SO_BUILD_CFLAGS) $< -o $@ +$(SO_NAME) : $(SO_OBJ) + $(CC) $(CFLAGS) $(SO_BUILD_CFLAGS) $(SO_OBJ) -shared -o $(SO_NAME) + +$(ADAPTOR_SO_OBJ) : $(BUILD_SO_DIR)/adaptor_%.o : %.c + $(CC) -c $(CFLAGS) $(SO_BUILD_CFLAGS) -DFOR_ADAPTOR $< -o $@ + +$(ADAPTOR_SO_NAME) : $(ADAPTOR_SO_OBJ) + $(CC) $(CFLAGS) $(SO_BUILD_CFLAGS) $(ADAPTOR_SO_OBJ) -DFOR_ADAPTOR\ + -shared -o $@ + cat ${ADAPTOR_SO_OBJ:%.o=%.d} > adaptor_so_dep.txt + ##################################################################### # # Building demo program @@ -84,38 +90,17 @@ $(SO_OBJ) : $(BUILD_SO_DIR)/%.o : %.c ##################################################################### $(DEMO_NAME) : ${DEMO_SRCS:%.c=%.o} $(AR_NAME) $(CC) $(filter %.o, $+) -L. -Wl,-static -lljmm -Wl,-Bdynamic -o $@ - -$(UNIT_TEST) : ${UNIT_TEST_SRCS:%.cxx=%.o} $(AR_NAME) - $(CXX) $(filter %.o, $+) -L. -Wl,-static -lljmm -Wl,-Bdynamic -o $@ - -$(RBTREE_TEST) : ${RB_TREE_SRCS:%.c=%.o} ${RB_TEST_SRCS:%.cxx=%.o} - $(CXX) $(filter %.o, $+) -o $@ - + cat ${DEMO_SRCS:%.c=%.d} > demo_dep.txt %.o : %.c $(CC) $(CFLAGS) -c $< %.o : %.cxx $(CXX) $(CXXFLAGS) -c $< -##################################################################### -# -# Building testing/benchmark stuff -# -##################################################################### -test : $(RBTREE_TEST) $(UNIT_TEST) - @echo "RB-tree unit testing" - ./$(RBTREE_TEST) - @echo "" - @echo "Memory management unit testing" - ./$(UNIT_TEST) - -${ADAPTOR_SRCS:%.c=%.o} : %.o : %.c - $(CC) $(CFLAGS) -fvisibility=default -MMD -Wall -fPIC -I. -c $< - -$(ADAPTOR) : ${ADAPTOR_SRCS:%.c=%.o} - $(CC) $(CFLAGS) -fvisibility=default -shared $(filter %.o, $+) -L. -lljmm -ldl -o $@ - cat ${ADAPTOR_SRCS:%.c=%.d} > adaptor_dep.txt - clean: - rm -f *.o *.d *_dep.txt $(BUILD_AR_DIR)/* $(BUILD_SO_DIR)/* - rm -f $(AR_NAME) $(SO_NAME) $(RBTREE_TEST) $(DEMO_NAME) $(ADAPTOR) + rm -f *.o *.d *_dep.txt $(BUILD_AR_DIR)/*.[do] $(BUILD_SO_DIR)/*.[od] + rm -f $(AR_NAME) $(SO_NAME) $(DEMO_NAME) + make -C tests clean + +test: + make all -C tests diff --git a/block_cache.c b/block_cache.c index 55de9ff..8cb958c 100644 --- a/block_cache.c +++ b/block_cache.c @@ -159,7 +159,7 @@ bc_init(void) { if (unlikely(!enable_blk_cache)) return 0; - if (!(blk_cache = (block_cache_t*)malloc(sizeof(block_cache_t)))) + if (!(blk_cache = (block_cache_t*)MYMALLOC(sizeof(block_cache_t)))) return 0; blk_cache->blks = rbt_create(); diff --git a/chunk.c b/chunk.c index d4c298d..bf41bec 100644 --- a/chunk.c +++ b/chunk.c @@ -45,7 +45,7 @@ lm_alloc_chunk (void) { mmap((void*)cur_brk, avail, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_32BIT | MAP_ANONYMOUS, -1, 0); - if (!chunk) + if (chunk == (uintptr_t)MAP_FAILED) return NULL; /* If the program linked to this lib generates code-dump, do not dump those diff --git a/demo.c b/demo.c index 1b99cdf..e9dd3e3 100644 --- a/demo.c +++ b/demo.c @@ -12,7 +12,7 @@ mmap_wrap(size_t len) { int main(int argc, char** argv) { #if defined(DEBUG) - lm_init(1); + lm_init(); dump_page_alloc(stderr); int size1 = 100; diff --git a/lj_mm.h b/lj_mm.h index dcd6264..8b376b2 100644 --- a/lj_mm.h +++ b/lj_mm.h @@ -35,20 +35,22 @@ lm_init_mm_opt(lj_mm_opt_t* opt) { * conflicting with applications being benchmarked. */ -#define lm_init ljmm_init -#define lm_init2 ljmm_init2 -#define lm_fini ljmm_fini -#define lm_mmap ljmm_mmap -#define lm_munmap ljmm_munmap -#define lm_mremap ljmm_mremap -#define lm_malloc ljmm_malloc -#define lm_free ljmm_free +#define lm_init ljmm_init +#define lm_init2 ljmm_init2 +#define lm_fini ljmm_fini +#define lm_mmap ljmm_mmap +#define lm_munmap ljmm_munmap +#define lm_mremap ljmm_mremap +#define lm_malloc ljmm_malloc +#define lm_free ljmm_free +#define lm_get_status ljmm_get_status +#define lm_free_status ljmm_free_status /* Inititalize the memory-management system. If auto_fini is set * (i.e. auto_fini != 0), there is no need to call lm_fini() at exit. */ -int lm_init(int auto_fini) LJMM_EXPORT; -int lm_init2(int auto_fini, lj_mm_opt_t*) LJMM_EXPORT; +int lm_init(void) LJMM_EXPORT; +int lm_init2(lj_mm_opt_t*) LJMM_EXPORT; void lm_fini(void) LJMM_EXPORT; /* Same prototype as mmap(2), and munmap(2) */ @@ -79,8 +81,8 @@ typedef struct { block_info_t* alloc_blk_info; } lm_status_t; -const lm_status_t* lm_get_status(void); -void lm_free_status(lm_status_t*); +const lm_status_t* lm_get_status(void) LJMM_EXPORT; +void lm_free_status(lm_status_t*) LJMM_EXPORT; #ifdef DEBUG void dump_page_alloc(FILE*) LJMM_EXPORT; diff --git a/mem_map.c b/mem_map.c index ba864ce..a246045 100644 --- a/mem_map.c +++ b/mem_map.c @@ -21,7 +21,7 @@ void* lm_malloc(size_t sz) { errno = 0; if (!alloc_info) { - lm_init(1); + lm_init(); if (!alloc_info) return NULL; } @@ -432,66 +432,60 @@ lm_mmap(void *addr, size_t length, int prot, int flags, * ***************************************************************************** */ +static int finalized = 0; +/* "ignore_alloc_blk != 0": to unmap allocated chunk even if there are some + * allocated blocks not yet released. + */ +static inline void +fini_helper(int ignore_alloc_blk) { + if (finalized) + return; + + int no_alloc_blk = no_alloc_blocks(); + lm_fini_page_alloc(); + + if (no_alloc_blk || ignore_alloc_blk) + lm_free_chunk(); + + finalized = 1; +} + void lm_fini(void) { ENTER_MUTEX; - - lm_fini_page_alloc(); - lm_free_chunk(); - + fini_helper(1); LEAVE_MUTEX; } -/* The purpose of this variable is to workaround a link problem: If we were - * directly feeding lm_fini to atexit() in function lm_init(), we would be - * going to see a complaint like this: - * - * "...relocation R_X86_64_PC32 against protected symbol `lm_fini' can not - * be used when making a shared object"... - * - * I think it's perfectly fine using R_X86_64_PC32 as a relocation for - * the protected symbol lm_fini. It seems like it's GNU ld (I'm using 2.24) - * problem. Actually gold linker is able to link successfully. - * - * NOTE: This variable must be visible to other modules, otherwise, with - * higher optimization level, compiler can propagate its initial value (i.e. - * the lm_fini) to where it's referenced. - */ -void (*lm_fini_ptr)() __attribute__((visibility("protected"))) = lm_fini; - -static inline int -lm_init_helper(int auto_fini, lj_mm_opt_t* mm_opt) { - int res = 1; - if (auto_fini != 0) { - /* Do not directly feed lm_fini to atexit(), see the comment to - * variable "lm_fini_ptr" for why. - */ - res = atexit(lm_fini_ptr); - - /* Negate the sense of 'success' :-) */ - res = (res == 0) ? 1 : 0; - } - - if (res) - res = lm_init_page_alloc(lm_alloc_chunk(), mm_opt); - - return res; +__attribute__((destructor)) +static void +lm_fini2(void) { + /* It is unsafe to unmap the chunk as we are not sure if they are still alive. + * We don't need to worry about luajit. However, when we stress-test this lib + * with real-world applications, we find there are memory leakage, and at the + * time lm_fini2() is called, these allocated blocks are still alive (will be + * referenced by exit-handlers. + */ + fini_helper(0); } /* Initialize the allocation, return non-zero on success, 0 otherwise. */ int -lm_init(int auto_fini) { +lm_init(void) { + int res; ENTER_MUTEX; - int res = lm_init_helper(auto_fini, NULL); + res = lm_init_page_alloc(lm_alloc_chunk(), NULL); + finalized = 0; LEAVE_MUTEX; - return res; } int -lm_init2(int auto_fini, lj_mm_opt_t* mm_opt) { +lm_init2(lj_mm_opt_t* mm_opt) { + int res; ENTER_MUTEX; - int res = lm_init_helper(auto_fini, mm_opt); + res = lm_init_page_alloc(lm_alloc_chunk(), mm_opt); + finalized = 0; LEAVE_MUTEX; return res; } diff --git a/page_alloc.c b/page_alloc.c index c705f98..de9a0da 100644 --- a/page_alloc.c +++ b/page_alloc.c @@ -42,7 +42,7 @@ lm_init_page_alloc(lm_chunk_t* chunk, lj_mm_opt_t* mm_opt) { int alloc_sz = sizeof(lm_alloc_t) + sizeof(lm_page_t) * (page_num + 1); - alloc_info = (lm_alloc_t*) malloc(alloc_sz); + alloc_info = (lm_alloc_t*) MYMALLOC(alloc_sz); if (!alloc_info) { errno = ENOMEM; return 0; @@ -125,7 +125,7 @@ lm_fini_page_alloc(void) { rbt_fini(&alloc_info->alloc_blks); - free(alloc_info); + MYFREE(alloc_info); alloc_info = 0; } @@ -240,7 +240,7 @@ lm_get_status(void) { if (!alloc_info) return NULL; - lm_status_t* s = (lm_status_t *)malloc(sizeof(lm_status_t)); + lm_status_t* s = (lm_status_t *)MYMALLOC(sizeof(lm_status_t)); s->first_page = alloc_info->first_page; s->page_num = alloc_info->page_num; s->idx_to_id = alloc_info->idx_2_id_adj; @@ -254,7 +254,7 @@ lm_get_status(void) { /* Populate allocated block info */ if (alloc_blk_num) { block_info_t* ai; - ai = (block_info_t*)malloc(sizeof(block_info_t) * alloc_blk_num); + ai = (block_info_t*)MYMALLOC(sizeof(block_info_t) * alloc_blk_num); rb_iter_t iter, iter_e; int idx = 0; @@ -280,7 +280,7 @@ lm_get_status(void) { } if (free_blk_num) { block_info_t* fi; - fi = (block_info_t*)malloc(sizeof(block_info_t) * free_blk_num); + fi = (block_info_t*)MYMALLOC(sizeof(block_info_t) * free_blk_num); int idx = 0; int page_size_log2 = alloc_info->page_size_log2; @@ -313,12 +313,12 @@ lm_free_status(lm_status_t* status) { return; if (status->free_blk_info) - free(status->free_blk_info); + MYFREE(status->free_blk_info); if (status->alloc_blk_info) - free(status->alloc_blk_info); + MYFREE(status->alloc_blk_info); - free(status); + MYFREE(status); } #ifdef DEBUG diff --git a/page_alloc.h b/page_alloc.h index a1a3e17..a9c8b9d 100644 --- a/page_alloc.h +++ b/page_alloc.h @@ -174,6 +174,11 @@ remove_alloc_block(page_idx_t block) { return res; } +static inline int +no_alloc_blocks(void) { + return rbt_is_empty(&alloc_info->alloc_blks); +} + static inline void migrade_alloc_block(page_idx_t block, int ord_was, int ord_is, size_t new_map_sz) { rb_tree_t* rbt = &alloc_info->alloc_blks; diff --git a/rbtree.c b/rbtree.c index 977f05e..d8d9c9b 100644 --- a/rbtree.c +++ b/rbtree.c @@ -2,6 +2,7 @@ #include #include "rbtree.h" +#include "util.h" #define INVALID_IDX (-1) #define SENTINEL_IDX 0 @@ -19,7 +20,7 @@ int rbt_init(rb_tree_t* rbt) { rbt->capacity = 16; - rbt->tree = (rb_node_t*)malloc(rbt->capacity * sizeof(rb_node_t)); + rbt->tree = (rb_node_t*)MYMALLOC(rbt->capacity * sizeof(rb_node_t)); if (rbt->tree == 0) return 0; @@ -37,7 +38,7 @@ rbt_init(rb_tree_t* rbt) { void rbt_fini(rb_tree_t* rbt) { if (rbt && rbt->tree) { - free((void*)rbt->tree); + MYFREE((void*)rbt->tree); rbt->tree = 0; rbt->capacity = rbt->node_num = 0; rbt->root = INVALID_IDX; @@ -46,7 +47,7 @@ rbt_fini(rb_tree_t* rbt) { rb_tree_t* rbt_create(void) { - rb_tree_t* rbt = (rb_tree_t*)malloc(sizeof(rb_tree_t)); + rb_tree_t* rbt = (rb_tree_t*)MYMALLOC(sizeof(rb_tree_t)); if (rbt && rbt_init(rbt)) return rbt; @@ -57,7 +58,7 @@ void rbt_destroy(rb_tree_t* rbt) { if (rbt) { rbt_fini(rbt); - free((void*)rbt); + MYFREE((void*)rbt); } } @@ -159,7 +160,7 @@ rbt_try_shrink(rb_tree_t* rbt) { return 1; int cap = rbt->node_num * 3 / 2; - rbt->tree = (rb_node_t*)realloc(rbt->tree, cap * sizeof(rb_node_t)); + rbt->tree = (rb_node_t*)MYREALLOC(rbt->tree, cap * sizeof(rb_node_t)); if (rbt->tree == 0) return 0; @@ -212,7 +213,7 @@ bst_insert(rb_tree_t* t, int key, intptr_t value) { if (cap <= 16) cap = 16; - nodes = t->tree = (rb_node_t*)realloc(nodes, 100* sizeof(rb_node_t)); + nodes = t->tree = (rb_node_t*)MYREALLOC(nodes, cap * sizeof(rb_node_t)); t->capacity = cap; if (!nodes) @@ -705,7 +706,7 @@ rbt_verify(rb_tree_t* rbt) { int node_num = rbt->node_num; rb_node_t* nd_vect = rbt->tree; - int* cnt = (int*)malloc(sizeof(int) * node_num); + int* cnt = (int*)MYMALLOC(sizeof(int) * node_num); int i; for (i = 0; i < node_num; i++) cnt[i] = 0; @@ -745,12 +746,12 @@ rbt_verify(rb_tree_t* rbt) { /* we either have multiple roots, or rbt->root is not set * properly. */ - free(cnt); + MYFREE(cnt); return 0; } } } - free(cnt); + MYFREE(cnt); cnt = 0; if (root_cnt != 1) { if (root_cnt == 0 && node_num != 1) diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..0946918 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,67 @@ +.PHONY = default all clean + +default : all + +OPT_FLAGS := -O3 -g -march=native -DENABLE_TESTING #-DDEBUG +CFLAGS := -I.. -fvisibility=hidden -MMD -Wall $(OPT_FLAGS) +CXXFLAGS = $(CFLAGS) + +CC = gcc +CXX = g++ + +# Targets to be built. +UNIT_TEST := unit_test +ADAPTOR := libadaptor.so +RBTREE_TEST := rbt_test +MYMALLOC := libmymalloc.so + +# Source codes +UNIT_TEST_SRCS = unit_test.cxx +ADAPTOR_SRCS = adaptor.c mymalloc.c +RB_TEST_SRCS = rb_test.cxx +MYMALLOC_SRCS = mymalloc.c + +-include adaptor_dep.txt +-include mymalloc_dep.txt + + +all : $(UNIT_TEST) $(ADAPTOR) $(RBTREE_TEST) $(MYMALLOC) + ./$(RBTREE_TEST) + ./$(UNIT_TEST) + +# Building unit-test +${UNIT_TEST_SRCS:%.cxx=%.o} : %.o : %.cxx + $(CXX) $(CXXFLAGS) $< -c + +$(UNIT_TEST) : ${UNIT_TEST_SRCS:%.cxx=%.o} ../libljmm.so + $(CXX) $(filter %.o, $^) -Wl,-rpath=.. -L.. -lljmm -o $@ + +# Building libadpator.so. +# +${ADAPTOR_SRCS:%.c=%.o} : %.o : %.c + $(CC) $(CFLAGS) -fvisibility=default -DFOR_ADAPTOR -fPIC -I.. -I. -c $< + +$(ADAPTOR) : ${ADAPTOR_SRCS:%.c=%.o} ../libljmm4adaptor.so + ln -fs ../libljmm4adaptor.so . + $(CC) $(CFLAGS) -fvisibility=default -shared $(filter %.o, $^) -L.. -lljmm4adaptor \ + -Wl,-rpath=.. -o $@ + cat ${ADAPTOR_SRCS:%.c=%.d} > adaptor_dep.txt + +# Building RB-tree unit-test + +${RB_TEST_SRCS:%.cxx=%.o} : %.o : %.cxx + $(CXX) $(CXXFLAGS) -I.. $< -c + +${RBTREE_TEST} : ${RB_TEST_SRCS:%.cxx=%.o} ../rbtree.o + $(CXX) $(CXXFLAGS) $^ -o $@ + +# Building mymalloc.so +${MYMALLOC_SRCS:%.c=my_%.o} : my_%.o : %.c + $(CC) $(CFLAGS) -fvisibility=default -fPIC -c $< -o $@ + +$(MYMALLOC) : ${MYMALLOC_SRCS:%.c=my_%.o} + $(CC) $+ $(CFLAGS) -fvisibility=default -shared -o $@ + cat ${MYMALLOC_SRCS:%.c=%.d} > mymalloc_dep.txt + +clean: + rm -rf *.o *.d *_dep.txt $(UNIT_TEST) $(ADAPTOR) $(RBTREE_TEST) $(MYMALLOC) *.so diff --git a/tests/README b/tests/README new file mode 100644 index 0000000..c3b097b --- /dev/null +++ b/tests/README @@ -0,0 +1 @@ +TODO: Describe the testing methordology, which is bit involved. diff --git a/adaptor.c b/tests/adaptor.c similarity index 96% rename from adaptor.c rename to tests/adaptor.c index db8cda0..698e233 100644 --- a/adaptor.c +++ b/tests/adaptor.c @@ -57,7 +57,8 @@ * * 3. build the ptmalloc3 with: * make -C src/dir linux-shared \ - * OPT_FLAGS='-O3 -march=native -pthread -Wl,--wrap=mmap -Wl,--wrap=munmap -Wl,--wrap=mremap' + * OPT_FLAGS='-O3 -march=native -pthread -Wl,--wrap=mmap \ + * -Wl,--wrap=munmap -Wl,--wrap=mremap' * * it will successfuly build libptmalloc3.so, but fail to build t-test1, * which we don't need. The failure is due to undefined symbol of _wrap_xxx() @@ -68,7 +69,8 @@ * the libptmalloc3.so, and will be automatically loaded by the system * dynamic loader. * - * o. the -Wl,--wrap=xxx is to let linker the replace symbol xxx with __wrap_xxx. + * o. the -Wl,--wrap=xxx is to let linker the replace symbol xxx with + * __wrap_xxx. * * 4. set LD_LIBRARY_PATH properly to include the path to libljmm.so. * @@ -78,7 +80,8 @@ * * Miscellaneous * ------------- - * Some functionalities can be turned on/off via following environment variables. + * Some functionalities can be turned on/off via following environment + * variables: * - ENABLE_LJMM = {0|1} * - ENABLE_LJMM_TRACE = {0|1} */ @@ -112,7 +115,7 @@ static int init_done = 0; static int __attribute__((noinline)) init_adaptor(void) { const char* func_name = __FUNCTION__; - if (!lm_init(1)) { + if (!lm_init()) { fprintf(stderr, "%s: fail to call lm_init()\n", func_name); return 0; } @@ -152,7 +155,6 @@ __wrap_mmap64(void *addr, size_t length, int prot, int flags, const char* func = __FUNCTION__; void* blk = NULL; - fprintf(stderr, "init_done = %d, addr = %p, flags = %d\n", init_done, addr, flags); if (init_done && !addr && (flags & (MAP_ANONYMOUS|MAP_ANON))) { blk = lm_mmap(addr, length, prot, flags|MAP_32BIT, fd, offset); if (unlikely(enable_trace)) { @@ -204,6 +206,7 @@ __wrap_munmap(void *addr, size_t length) { void* __wrap_mremap(void *old_addr, size_t old_size, size_t new_size, int flags, ...) { + fprintf(stderr, "WTF!, remap is called\n"); if (!init_done || old_addr > (void*)LJMM_AS_UPBOUND) { void* p = NULL; if (!(flags & MREMAP_FIXED)) { diff --git a/tests/mymalloc.c b/tests/mymalloc.c new file mode 100644 index 0000000..0970cc5 --- /dev/null +++ b/tests/mymalloc.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include +#include /* for bzero() */ +#include /* for memcpy() */ + +#ifdef DEBUG + // Usage examples: ASSERT(a > b), ASSERT(foo() && "Opps, foo() reutrn 0"); + #define ASSERT(c) if (!(c))\ + { fprintf(stderr, "%s:%d Assert: %s\n", __FILE__, __LINE__, #c); abort(); } +#else + #define ASSERT(c) ((void)0) +#endif + +#ifndef FOR_ADAPTOR + #define MYMALLOC __wrap_malloc + #define MYFREE __wrap_free + #define MYCALLOC __wrap_calloc + #define MYREALLOC __wrap_realloc +#else + #define MYMALLOC __adaptor_malloc + #define MYFREE __adaptor_free + #define MYCALLOC __adaptor_calloc + #define MYREALLOC __adaptor_realloc +#endif + +#define MYMALLOC_EXPORT __attribute__ ((visibility ("default"))) +void* MYMALLOC(size_t) MYMALLOC_EXPORT; +void MYFREE(void*) MYMALLOC_EXPORT; +void* MYCALLOC(size_t, size_t) MYMALLOC_EXPORT; +void* MYREALLOC(void*, size_t) MYMALLOC_EXPORT; + +typedef int v4si __attribute__ ((vector_size (16))); + +static inline int +log2_int32(unsigned num) { + return 31 - __builtin_clz(num); +} + +static inline int +ceil_log2_int32 (unsigned num) { + int res = 31 - __builtin_clz(num); + res += (num & (num - 1)) ? 1 : 0; + return res; +} + +#define ENABLE_TRACE 0 + +#define EXT_SZ (4096 * 2) +#define MIN_ORDER 5 +#define MAX_ORDER 31 +#define BIN_NUM (MAX_ORDER - MIN_ORDER + 1) +#define CHUNK_ALIGN (__alignof__(v4si)) + +typedef struct my_malloc_chunk my_chunk_t; +struct my_malloc_chunk { + unsigned prev_size; + unsigned this_size; +#ifdef DEBUG + int magic_number; +#endif + union { + struct { + my_chunk_t* prev_free; + my_chunk_t* next_free; + }; + v4si align_data; + }; +}; + +#ifdef DEBUG +#define MAGIC_NUM 0x5a5a5a + #define SET_MAGCI_NUM(c) {(c)->magic_number = MAGIC_NUM; } + #define VERIFY_MAGIC_NUM(c) ASSERT((c)->magic_number == MAGIC_NUM) +#else + #define SET_MAGCI_NUM(c) ((void)0) + #define VERIFY_MAGIC_NUM(c) ((void)0) +#endif + +#define IS_CHUNK_FREE(c) ((c)->this_size & 1) +#define SET_CHUNK_FREE(c) {(c)->this_size |= 1;} +#define RESET_CHUNK_FREE(c) {(c)->this_size &= ~1;} + +#define IS_CHUNK_MMAP(c) ((c)->this_size & 2) +#define SET_CHUNK_MMAP(c) {(c)->this_size |= 2;} +#define RESET_CHUNK_MMAP(c) {(c)->this_size &= ~2;} + +#define IS_LAST_CHUNK(c) ((c)->this_size & 4) +#define SET_LAST_CHUNK(c) {(c)->this_size |= 4; } +#define RESET_LAST_CHUNK(c) {(c)->this_size &= ~4; } + +#define CHUNK_SIZE(c) ((c)->this_size & ~7) +#define SET_CHUNK_SIZE(c, s) { typeof(c) t = (c);\ + t->this_size = (t->this_size & 7) + s; } + +#define offsetof(st, m) ((size_t)(&((st *)0)->m)) +#define CHUNK_OVERHEAD offsetof(my_chunk_t, align_data) + +typedef struct { + my_chunk_t list; + int min_size; +} bin_t; + +typedef struct my_malloc_info my_malloc_info_t; +struct my_malloc_info { + int initialized; + bin_t bins[BIN_NUM]; +}; + +static my_malloc_info_t malloc_info; + +#define MMAP_THRESHOLD (EXT_SZ - sizeof(malloc_info) - CHUNK_ALIGN) + +/* Return cur's previous adjacent chunk. If the chunk dose not have previous + * adjacent chunk, chunk itself is returned. + */ +static inline my_chunk_t* +get_prev_adj_chunk(my_chunk_t* cur) { + if (cur->prev_size != 0) { + char* p = ((char*)(void*)cur) - cur->prev_size; + return (my_chunk_t*)(void*)p; + } + return NULL; +} + +static inline my_chunk_t* +get_next_adj_chunk(my_chunk_t* chunk) { + if (!IS_LAST_CHUNK(chunk)) + return (my_chunk_t*)(void*)(CHUNK_SIZE(chunk) + (void*)chunk); + return NULL; +} + +static inline int +is_bin_empty(bin_t* bin) { + my_chunk_t* list = &bin->list; + return list->next_free == list && list->prev_free == list; +} + +/* Return the min bin index, such that all chunk c in that bin have + * CHUNK_SIZE(c) > bin->min_size + */ +static inline int +get_bin_idx_for_chunk(int chunk_size) { + int idx = log2_int32(chunk_size) - MIN_ORDER; + ASSERT(idx >= 0); + if (idx >= BIN_NUM) + idx = BIN_NUM - 1; + + return idx; +} + +/* Return the min bin index, such that all chunk c in the bin have + * CHUNK_SIZE(c) > alloc_sz + */ +static inline int +get_bin_idx_for_alloc(int alloc_sz) { + int idx = ceil_log2_int32(alloc_sz) - MIN_ORDER; + ASSERT(idx >= 0); + if (idx >= BIN_NUM) + idx = BIN_NUM - 1; + + return idx; +} + +static inline void +append_to_bin(bin_t* bin, my_chunk_t* chunk) { + ASSERT((CHUNK_SIZE(chunk) & (CHUNK_ALIGN - 1)) == 0 && + (CHUNK_SIZE(chunk) >= bin->min_size)); + + my_chunk_t* insert_after = bin->list.prev_free; + my_chunk_t* insert_before = &bin->list; + + chunk->prev_free = insert_after; + chunk->next_free = insert_before; + + insert_before->prev_free = chunk; + insert_after->next_free = chunk; +} + +static inline my_chunk_t* +pop_from_bin(bin_t* bin) { + my_chunk_t* first = bin->list.next_free; + + if (first != &bin->list) { + my_chunk_t* before_1st = first->prev_free; + my_chunk_t* after_1st = first->next_free; + before_1st->next_free = after_1st; + after_1st->prev_free = before_1st; + first->prev_free = first->next_free = NULL; + return first; + } + + return NULL; +} + +static inline void +append_free_chunk(my_chunk_t* chunk) { + ASSERT(IS_CHUNK_FREE(chunk)); + /* look for the right bin for this chunk */ + int chunk_size = CHUNK_SIZE(chunk); + int bin_idx = get_bin_idx_for_chunk(chunk_size); + append_to_bin(malloc_info.bins + bin_idx, chunk); +} + +/* Remove the free chunk from bin */ +static inline void +remove_free_chunk(my_chunk_t* chunk) { + ASSERT(IS_CHUNK_FREE(chunk)); +#ifdef DEBUG + { + int chunk_size = CHUNK_SIZE(chunk); + bin_t* bin = malloc_info.bins + get_bin_idx_for_chunk(chunk_size); + int found = 0; + my_chunk_t* iter, *iter_e = &bin->list; + for (iter = bin->list.next_free; + iter != iter_e; iter = iter->next_free) { + if (iter == chunk) { + found = 1; break; + } + } + ASSERT(found); + } +#endif + + my_chunk_t* prev = chunk->prev_free; + my_chunk_t* next = chunk->next_free; + prev->next_free = next; + next->prev_free = prev; + + chunk->prev_free = chunk->next_free = NULL; +} + +static void +malloc_init(void) { + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + my_chunk_t* list = &bin->list; + list->prev_free = list->next_free = list; + bin->min_size = 1 << (i + MIN_ORDER); + } + malloc_info.initialized = 1; +} + +/* Split the given chunk into two at the specified splitting point, return + * the second one. + */ +static my_chunk_t* +split_chunk(my_chunk_t* chunk, int split_point) { + ASSERT((split_point & (CHUNK_ALIGN - 1)) == 0); + ASSERT(split_point + CHUNK_OVERHEAD <= CHUNK_SIZE(chunk)); + + int chunk_sz = CHUNK_SIZE(chunk); + int chunk2_sz = chunk_sz - split_point; + ASSERT(chunk2_sz >= sizeof(my_chunk_t)); + + my_chunk_t* chunk2; + chunk2 = (my_chunk_t*)(void*)(split_point + (char*)(void*)chunk); + chunk2->prev_size = chunk_sz - chunk2_sz; + SET_CHUNK_SIZE(chunk2, chunk2_sz); + SET_CHUNK_SIZE(chunk, split_point); + + /* Only the 1st chunk is marked with mapped*/ + RESET_CHUNK_MMAP(chunk2); + SET_MAGCI_NUM(chunk2); + + if (!IS_LAST_CHUNK(chunk)) { + my_chunk_t* follow; + follow = (my_chunk_t*)(void*)(chunk_sz + (char*)(void*)chunk); + follow->prev_size = chunk2_sz; + RESET_LAST_CHUNK(chunk2); + } else { + RESET_LAST_CHUNK(chunk); + SET_LAST_CHUNK(chunk2); + } + + if (IS_CHUNK_FREE(chunk)) + SET_CHUNK_FREE(chunk2); + + return chunk2; +} + +static my_chunk_t* +find_big_enough_chunk(size_t alloc_size, int* bin_idx) { + int bin_idx_tmp = get_bin_idx_for_alloc(alloc_size); + int i; + for (i = bin_idx_tmp; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + break; + } + + *bin_idx = i; + if (i < BIN_NUM - 1) + return pop_from_bin(malloc_info.bins + i); + + if (i == BIN_NUM -1) { + my_chunk_t* iter, *iter_e; + for (iter = malloc_info.bins[i].list.next_free, + iter_e = malloc_info.bins[i].list.prev_free; + iter != iter_e; iter = iter->next_free) { + if (CHUNK_SIZE(iter) >= alloc_size) { + remove_free_chunk(iter); + return iter; + } + } + } + + return NULL; +} + +/* The alloc_sz already take into account the chunk-overhead, and is + * properly aligned. + * + * NOTE: before calling this function, chunk should already be removed from bin. + */ +static void* +malloc_helper(my_chunk_t* chunk, size_t alloc_sz) { + RESET_CHUNK_FREE(chunk); + + /* Try to split the chunk. */ + unsigned chunk_size = CHUNK_SIZE(chunk); + ASSERT(((alloc_sz & (CHUNK_ALIGN - 1)) == 0) && chunk_size >= alloc_sz); + + unsigned remain_sz = chunk_size - alloc_sz; + if (remain_sz > sizeof(my_chunk_t)) { + my_chunk_t* split = split_chunk(chunk, alloc_sz); + SET_CHUNK_FREE(split); + append_free_chunk(split); + } + + return CHUNK_OVERHEAD + ((char*)(void*)chunk); +} + +void* +MYMALLOC(size_t size) { + if (ENABLE_TRACE) + fprintf(stderr, "\nmalloc(%lu)\n", size); + + if (!malloc_info.initialized) + malloc_init(); + + size_t norm_size = + (size + CHUNK_OVERHEAD + CHUNK_ALIGN - 1) & ~(CHUNK_ALIGN - 1); + + void* result = NULL; + + int bin_idx; + my_chunk_t* chunk = find_big_enough_chunk(norm_size, &bin_idx); + if (chunk) { + result = malloc_helper(chunk, norm_size); + goto malloc_exit; + } + + /* case 2: no free chunk big enough. Create one via mmap() */ + size_t mmap_sz = EXT_SZ; + if (mmap_sz < norm_size) + mmap_sz = norm_size; + + long page_sz = sysconf(_SC_PAGESIZE); + mmap_sz = (mmap_sz + page_sz - 1) & ~(page_sz - 1); + result = mmap(NULL, mmap_sz, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ENABLE_TRACE) + fprintf(stderr, " > %p = mmap(%ld)\n", result, mmap_sz); + + if (result == MAP_FAILED) + goto malloc_exit; + + chunk = (my_chunk_t*)result; + chunk->prev_size = 0; + chunk->this_size = mmap_sz; + SET_LAST_CHUNK(chunk); + SET_CHUNK_MMAP(chunk); + SET_CHUNK_FREE(chunk); + SET_MAGCI_NUM(chunk); + + result = malloc_helper(chunk, norm_size); + +malloc_exit: + if (ENABLE_TRACE) + fprintf(stderr, "%p = malloc(%ld)\n", result, size); + + return result; +} + +void +MYFREE(void* ptr) { + if (ENABLE_TRACE) + fprintf(stderr, "\nfree(%p)\n", ptr); + + my_chunk_t* chunk = (my_chunk_t*)(void*)(((char*)ptr) - CHUNK_OVERHEAD); + ASSERT(!IS_CHUNK_FREE(chunk)); + VERIFY_MAGIC_NUM(chunk); + + while (1) { + my_chunk_t* prev_adj = get_prev_adj_chunk(chunk); + my_chunk_t* next_adj = get_next_adj_chunk(chunk); + int change = 0; + + /* Consolidate with the adjacent following chunk */ + if (next_adj && IS_CHUNK_FREE(next_adj)) { + remove_free_chunk(next_adj); + if (IS_LAST_CHUNK(next_adj)) + SET_LAST_CHUNK(chunk); + + int new_sz = CHUNK_SIZE(chunk) + CHUNK_SIZE(next_adj); + SET_CHUNK_SIZE(chunk, new_sz); + change = 1; + } + + /* Consolidate with the previous adjacent chunk */ + if (prev_adj && IS_CHUNK_FREE(prev_adj)) { + remove_free_chunk(prev_adj); + + if (IS_LAST_CHUNK(chunk)) + SET_LAST_CHUNK(prev_adj); + + int new_sz = CHUNK_SIZE(chunk) + CHUNK_SIZE(prev_adj); + SET_CHUNK_SIZE(prev_adj, new_sz); + chunk = prev_adj; + change = 1; + } + + if (!change) + break; + } + + if (IS_CHUNK_MMAP(chunk) && IS_LAST_CHUNK(chunk)) { + if (ENABLE_TRACE) + fprintf(stderr, " > munmap(%p, %u)\n", chunk, CHUNK_SIZE(chunk)); + munmap((void*)chunk, CHUNK_SIZE(chunk)); + return; + } + SET_CHUNK_FREE(chunk); + if (!IS_LAST_CHUNK(chunk)) { + my_chunk_t* next_adj = get_next_adj_chunk(chunk); + next_adj->prev_size = CHUNK_SIZE(chunk); + } + + append_free_chunk(chunk); +} + +void* +MYREALLOC(void* ptr, size_t size) { + if (ENABLE_TRACE) + fprintf(stderr, "\nrealloc(%p, %lu)\n", ptr, size); + + void* result = ptr; + + /* normalize the size */ + size_t norm_size = (size + CHUNK_ALIGN - 1) & ~(CHUNK_ALIGN - 1); + norm_size += CHUNK_OVERHEAD; + + my_chunk_t* chunk = (my_chunk_t*)(void*)(((char*)ptr) - CHUNK_OVERHEAD); + size_t chunk_sz = CHUNK_SIZE(chunk); + if (norm_size > chunk_sz) { + result = (my_chunk_t*)MYMALLOC(norm_size); + if (result) { + memcpy(result, (void*)&chunk->align_data, + chunk_sz - CHUNK_OVERHEAD); + SET_CHUNK_FREE(chunk); + append_free_chunk(chunk); + } + goto realloc_exit; + } + + if (chunk_sz - norm_size >= sizeof(my_chunk_t)) { + /* shrink the allocated block */ + my_chunk_t* another = split_chunk(chunk, norm_size); + SET_CHUNK_FREE(another); + append_free_chunk(another); + } + +realloc_exit: + if (ENABLE_TRACE) + fprintf(stderr, "%p = realloc(%p, %lu)\n", result, ptr, size); + return result; +} + +void* +MYCALLOC(size_t nmemb, size_t size) { + size_t t = nmemb * size; + void* p = MYMALLOC(t); + if (p) + bzero(p, t); + + return p; +} + +#if 0 +static void +my_malloc_verify(void) { + if (!malloc_info.initialized) + return; + + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + + my_chunk_t* iter, *iter_e; + for (iter = bin->list.next_free, iter_e = &bin->list; + iter != iter_e; iter = iter->next_free) { + ASSERT(IS_CHUNK_FREE(iter)); + ASSERT(iter->next_free && iter->prev_free); + } + } +} +#endif + +void +my_malloc_dump(FILE* f) { + if (!malloc_info.initialized) { + return; + } + + int i; + for (i = 0; i < BIN_NUM; i++) { + bin_t* bin = malloc_info.bins + i; + if (is_bin_empty(bin)) + continue; + + fprintf(f, "BIN:%3d, min_size:%d :", i, bin->min_size); + my_chunk_t* iter, *iter_e; + for (iter = bin->list.next_free, iter_e = &bin->list; + iter != iter_e; iter = iter->next_free) { + fprintf(f, "\n\t[chunk %p, size:%d, prev_size:%d, ", + iter, CHUNK_SIZE(iter), iter->prev_size); + + fprintf(f, "prev_free:%p, next_free:%p", + iter->prev_free, iter->next_free); + + if (IS_CHUNK_FREE(iter)) + fprintf(f, ", free"); + + if (IS_CHUNK_MMAP(iter)) + fprintf(f, ", mmap"); + + if (IS_LAST_CHUNK(iter)) + fprintf(f, ", last"); + fprintf(f, "] "); + } + + fprintf(f, "\n"); + } +} diff --git a/rb_test.cxx b/tests/rb_test.cxx similarity index 100% rename from rb_test.cxx rename to tests/rb_test.cxx diff --git a/unit_test.cxx b/tests/unit_test.cxx similarity index 98% rename from unit_test.cxx rename to tests/unit_test.cxx index 1350e51..a5a2bdf 100644 --- a/unit_test.cxx +++ b/tests/unit_test.cxx @@ -96,11 +96,11 @@ UNIT_TEST::UNIT_TEST(int test_id, int page_num) mm_opt.chunk_sz_in_page = _page_num = page_num; mm_opt.enable_block_cache = 0; - _init_succ = lm_init2(0, &mm_opt); + _init_succ = lm_init2(&mm_opt); _test_succ = _init_succ ? true : false; _page_size = sysconf(_SC_PAGESIZE); if (_init_succ) { - const lm_status_t* status = lm_get_status(); + const lm_status_t* status = ljmm_get_status(); _chunk_base = status->first_page; lm_free_status(const_cast(status)); } else { @@ -222,7 +222,7 @@ UNIT_TEST::VerifyStatus(blk_info2_t* alloc_blk_v, int alloc_blk_v_len, if (!_test_succ) return; - const lm_status_t* status = lm_get_status(); + const lm_status_t* status = ljmm_get_status(); if (free_blk_v_len != status->free_blk_num || alloc_blk_v_len != status->alloc_blk_num) { _test_succ = false; @@ -261,6 +261,7 @@ UNIT_TEST::VerifyStatus(blk_info2_t* alloc_blk_v, int alloc_blk_v_len, int main(int argc, char** argv) { fprintf(stdout, "\n>>Mmap unit testing\n"); +#if 0 // test1 // { @@ -287,7 +288,7 @@ main(int argc, char** argv) { ut.VerifyStatus(alloc_blk, ARRAY_SIZE(alloc_blk), free_blk, ARRAY_SIZE(free_blk)); } - +#endif fprintf(stdout, "\n>>Munmap unit testing\n"); // Notation for address. diff --git a/util.h b/util.h index 969acd4..2404b1a 100644 --- a/util.h +++ b/util.h @@ -32,6 +32,22 @@ log2_int32(unsigned num) { return 31 - __builtin_clz(num); } +#ifdef FOR_ADAPTOR + #define MYMALLOC __adaptor_malloc + #define MYFREE __adaptor_free + #define MYCALLOC __adaptor_calloc + #define MYREALLOC __adaptor_realloc + void* MYMALLOC(size_t); + void MYFREE(void*); + void* MYCALLOC(size_t, size_t); + void* MYREALLOC(void*, size_t); +#else + #define MYMALLOC malloc + #define MYFREE free + #define MYCALLOC calloc + #define MYREALLOC realloc +#endif + #ifdef DEBUG // Usage examples: ASSERT(a > b), ASSERT(foo() && "Opps, foo() reutrn 0"); #define ASSERT(c) if (!(c))\