Rewrote memory pool to use arenas per power of two.

Using per-size arenas behaves like a memory leak when the user creates many VoxelBuffers of random sizes repeatedly. Now memory blocks of the next power of two are used instead. VoxelBuffers with power-of-two size will fit best, while also being the most common. Non-power-of-two will use a bit more memory, but such buffers are often temporary and less numerous.
2021-11-12 23:39:50 +00:00 · 2021-11-12 23:39:50 +00:00 · 2fd4c22c54
parent 148d5e4116
commit 2fd4c22c54
3 changed files with 122 additions and 73 deletions
--- a/storage/voxel_memory_pool.cpp
+++ b/storage/voxel_memory_pool.cpp
@ -41,17 +41,29 @@ VoxelMemoryPool::~VoxelMemoryPool() {

 uint8_t *VoxelMemoryPool::allocate(size_t size) {
 	VOXEL_PROFILE_SCOPE();
-	MutexLock lock(_mutex);
-	Pool *pool = get_or_create_pool(size);
-	uint8_t *block;
-	if (pool->blocks.size() > 0) {
-		block = pool->blocks.back();
-		pool->blocks.pop_back();
-	} else {
+	CRASH_COND(size == 0);
+	uint8_t *block = nullptr;
+	// Not calculating `pot` immediately because the function we use to calculate it uses 32 bits,
+	// while `size_t` can be larger than that.
+	if (size > get_highest_supported_size()) {
+		// Sorry, memory is not pooled past this size
 		block = (uint8_t *)memalloc(size * sizeof(uint8_t));
-		ERR_FAIL_COND_V(block == nullptr, nullptr);
-		_total_memory += size;
+	} else {
+		const unsigned int pot = get_pool_index_from_size(size);
+		Pool &pool = _pot_pools[pot];
+		pool.mutex.lock();
+		if (pool.blocks.size() > 0) {
+			block = pool.blocks.back();
+			pool.blocks.pop_back();
+			pool.mutex.unlock();
+		} else {
+			pool.mutex.unlock();
+			block = (uint8_t *)memalloc(size * sizeof(uint8_t));
+		}
 	}
+#ifdef DEBUG_ENABLED
+	debug_add_allock(block);
+#endif
 	++_used_blocks;
 	_used_memory += size;
 	return block;
@ -60,64 +72,60 @@ uint8_t *VoxelMemoryPool::allocate(size_t size) {
 void VoxelMemoryPool::recycle(uint8_t *block, size_t size) {
 	CRASH_COND(size == 0);
 	CRASH_COND(block == nullptr);
-	MutexLock lock(_mutex);
-	Pool *pool = _pools[size]; // If not found, entry will be created! It would be an error
-	// Check recycling before having allocated
-	CRASH_COND(pool == nullptr);
-	pool->blocks.push_back(block);
+#ifdef DEBUG_ENABLED
+	debug_remove_alloc(block);
+#endif
+	// Not calculating `pot` immediately because the function we use to calculate it uses 32 bits,
+	// while `size_t` can be larger than that.
+	if (size > get_highest_supported_size()) {
+		memfree(block);
+	} else {
+		const unsigned int pot = get_pool_index_from_size(size);
+		Pool &pool = _pot_pools[pot];
+		MutexLock lock(pool.mutex);
+		pool.blocks.push_back(block);
+	}
 	--_used_blocks;
 	_used_memory -= size;
 }

 void VoxelMemoryPool::clear_unused_blocks() {
-	MutexLock lock(_mutex);
-	const size_t *key = nullptr;
-	while ((key = _pools.next(key))) {
-		Pool *pool = _pools.get(*key);
-		CRASH_COND(pool == nullptr);
-		for (auto it = pool->blocks.begin(); it != pool->blocks.end(); ++it) {
-			uint8_t *ptr = *it;
-			CRASH_COND(ptr == nullptr);
-			memfree(ptr);
+	for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
+		Pool &pool = _pot_pools[pot];
+		MutexLock lock(pool.mutex);
+		for (unsigned int i = 0; i < pool.blocks.size(); ++i) {
+			void *block = pool.blocks[i];
+			memfree(block);
 		}
-		_total_memory -= (*key) * pool->blocks.size();
-		pool->blocks.clear();
+		_total_memory -= get_size_from_pool_index(pot) * pool.blocks.size();
+		pool.blocks.clear();
 	}
 }

 void VoxelMemoryPool::clear() {
-	MutexLock lock(_mutex);
-	const size_t *key = nullptr;
-	while ((key = _pools.next(key))) {
-		Pool *pool = _pools.get(*key);
-		CRASH_COND(pool == nullptr);
-		for (auto it = pool->blocks.begin(); it != pool->blocks.end(); ++it) {
-			uint8_t *ptr = *it;
-			CRASH_COND(ptr == nullptr);
-			memfree(ptr);
+	for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
+		Pool &pool = _pot_pools[pot];
+		MutexLock lock(pool.mutex);
+		for (unsigned int i = 0; i < pool.blocks.size(); ++i) {
+			void *block = pool.blocks[i];
+			memfree(block);
 		}
-		memdelete(pool);
+		pool.blocks.clear();
 	}
-	_pools.clear();
 	_used_memory = 0;
 	_total_memory = 0;
 	_used_blocks = 0;
 }

 void VoxelMemoryPool::debug_print() {
-	MutexLock lock(_mutex);
 	print_line("-------- VoxelMemoryPool ----------");
-	if (_pools.size() == 0) {
-		print_line("No pools created");
-	} else {
-		const size_t *key = nullptr;
-		int i = 0;
-		while ((key = _pools.next(key))) {
-			Pool *pool = _pools.get(*key);
-			print_line(String("Pool {0} for size {1}: {2} blocks")
-							   .format(varray(i, SIZE_T_TO_VARIANT(*key), SIZE_T_TO_VARIANT(pool->blocks.size()))));
-			++i;
-		}
+	for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
+		Pool &pool = _pot_pools[pot];
+		MutexLock lock(pool.mutex);
+		print_line(String("Pool {0}: {1} blocks (capacity {2})")
+						   .format(varray(pot,
+								   SIZE_T_TO_VARIANT(pool.blocks.size()),
+								   SIZE_T_TO_VARIANT(pool.blocks.capacity()))));
 	}
 }

@ -135,17 +143,3 @@ size_t VoxelMemoryPool::debug_get_total_memory() const {
 	//MutexLock lock(_mutex);
 	return _total_memory;
 }
-
-VoxelMemoryPool::Pool *VoxelMemoryPool::get_or_create_pool(size_t size) {
-	Pool *pool;
-	Pool **ppool = _pools.getptr(size);
-	if (ppool == nullptr) {
-		pool = memnew(Pool);
-		CRASH_COND(pool == nullptr);
-		_pools.set(size, pool);
-	} else {
-		pool = *ppool;
-		CRASH_COND(pool == nullptr);
-	}
-	return pool;
-}
--- a/storage/voxel_memory_pool.h
+++ b/storage/voxel_memory_pool.h
@ -1,16 +1,24 @@
 #ifndef VOXEL_MEMORY_POOL_H
 #define VOXEL_MEMORY_POOL_H

-#include "core/hash_map.h"
+#include "../util/fixed_array.h"
+#include "../util/math/funcs.h"
 #include "core/os/mutex.h"

+#include <limits>
+#include <unordered_set>
 #include <vector>

 // Pool based on a scenario where allocated blocks are often the same size.
-// A pool of blocks is assigned for each size.
+// A pool of blocks is assigned for each power of two.
+// The majority of VoxelBuffers use powers of two so most of the time
+// we won't waste memory. Sometimes non-power-of-two buffers are created,
+// but they are often temporary and less numerous.
 class VoxelMemoryPool {
 private:
 	struct Pool {
+		Mutex mutex;
+		// Would a linked list be better?
 		std::vector<uint8_t *> blocks;
 	};

@ -33,20 +41,54 @@ public:
 	size_t debug_get_total_memory() const;

 private:
-	Pool *get_or_create_pool(size_t size);
 	void clear();

-	struct SizeTHasher {
-		static _FORCE_INLINE_ uint32_t hash(const size_t p_int) {
-			return HashMapHasherDefault::hash(uint64_t(p_int));
-		}
-	};
+#ifdef DEBUG_ENABLED
+	void debug_add_allock(void *block) {
+		MutexLock lock(_debug_allocs_mutex);
+		auto it = _debug_allocs.find(block);
+		CRASH_COND(it != _debug_allocs.end());
+		_debug_allocs.insert(block);
+	}

-	HashMap<size_t, Pool *, SizeTHasher> _pools;
-	unsigned int _used_blocks = 0;
+	void debug_remove_alloc(void *block) {
+		MutexLock lock(_debug_allocs_mutex);
+		auto it = _debug_allocs.find(block);
+		CRASH_COND(it == _debug_allocs.end());
+		_debug_allocs.erase(it);
+	}
+#endif
+
+	inline size_t get_highest_supported_size() const {
+		return size_t(1) << (_pot_pools.size() - 1);
+	}
+
+	inline unsigned int get_pool_index_from_size(size_t size) const {
+#ifdef DEBUG_ENABLED
+		// `next_power_of_2` takes unsigned int
+		CRASH_COND(size > std::numeric_limits<unsigned int>::max());
+#endif
+		return get_shift_from_power_of_two(next_power_of_2(size));
+	}
+
+	inline size_t get_size_from_pool_index(unsigned int i) const {
+		return size_t(1) << i;
+	}
+
+	// We handle allocations with up to 2^20 = 1,048,576 bytes.
+	// This is chosen based on practical needs.
+	// Each slot in this array corresponds to allocations
+	// that contain 2^index bytes in them.
+	FixedArray<Pool, 21> _pot_pools;
+
+#ifdef DEBUG_ENABLED
+	std::unordered_set<void *> _debug_allocs;
+	Mutex _debug_allocs_mutex;
+#endif
+
+	unsigned int _used_blocks = 0; // TODO Make atomic?
 	size_t _used_memory = 0;
 	size_t _total_memory = 0;
-	Mutex _mutex;
 };

 #endif // VOXEL_MEMORY_POOL_H
--- a/util/math/funcs.h
+++ b/util/math/funcs.h
@ -162,6 +162,19 @@ inline bool is_power_of_two(size_t x) {
 	return x != 0 && (x & (x - 1)) == 0;
 }

+inline unsigned int get_shift_from_power_of_two(unsigned int pot) {
+#ifdef DEBUG_ENABLED
+	CRASH_COND(!is_power_of_two(pot));
+#endif
+	for (unsigned int i = 0; i < 32; ++i) {
+		if (pot == (1 << i)) {
+			return i;
+		}
+	}
+	// Input was not a valid power of two
+	CRASH_COND(true);
+}
+
 // If the provided address `a` is not aligned to the number of bytes specified in `align`,
 // returns the next aligned address. `align` must be a power of two.
 inline size_t alignup(size_t a, size_t align) {