Rewrote memory pool to use arenas per power of two.

Using per-size arenas behaves like a memory leak when
the user creates many VoxelBuffers of random sizes repeatedly.
Now memory blocks of the next power of two are used instead.
VoxelBuffers with power-of-two size will fit best, while also being
the most common. Non-power-of-two will use a bit more memory,
but such buffers are often temporary and less numerous.
master
Marc Gilleron 2021-11-12 23:39:50 +00:00
parent 148d5e4116
commit 2fd4c22c54
3 changed files with 122 additions and 73 deletions

View File

@ -41,17 +41,29 @@ VoxelMemoryPool::~VoxelMemoryPool() {
uint8_t *VoxelMemoryPool::allocate(size_t size) {
VOXEL_PROFILE_SCOPE();
MutexLock lock(_mutex);
Pool *pool = get_or_create_pool(size);
uint8_t *block;
if (pool->blocks.size() > 0) {
block = pool->blocks.back();
pool->blocks.pop_back();
} else {
CRASH_COND(size == 0);
uint8_t *block = nullptr;
// Not calculating `pot` immediately because the function we use to calculate it uses 32 bits,
// while `size_t` can be larger than that.
if (size > get_highest_supported_size()) {
// Sorry, memory is not pooled past this size
block = (uint8_t *)memalloc(size * sizeof(uint8_t));
ERR_FAIL_COND_V(block == nullptr, nullptr);
_total_memory += size;
} else {
const unsigned int pot = get_pool_index_from_size(size);
Pool &pool = _pot_pools[pot];
pool.mutex.lock();
if (pool.blocks.size() > 0) {
block = pool.blocks.back();
pool.blocks.pop_back();
pool.mutex.unlock();
} else {
pool.mutex.unlock();
block = (uint8_t *)memalloc(size * sizeof(uint8_t));
}
}
#ifdef DEBUG_ENABLED
debug_add_allock(block);
#endif
++_used_blocks;
_used_memory += size;
return block;
@ -60,64 +72,60 @@ uint8_t *VoxelMemoryPool::allocate(size_t size) {
void VoxelMemoryPool::recycle(uint8_t *block, size_t size) {
CRASH_COND(size == 0);
CRASH_COND(block == nullptr);
MutexLock lock(_mutex);
Pool *pool = _pools[size]; // If not found, entry will be created! It would be an error
// Check recycling before having allocated
CRASH_COND(pool == nullptr);
pool->blocks.push_back(block);
#ifdef DEBUG_ENABLED
debug_remove_alloc(block);
#endif
// Not calculating `pot` immediately because the function we use to calculate it uses 32 bits,
// while `size_t` can be larger than that.
if (size > get_highest_supported_size()) {
memfree(block);
} else {
const unsigned int pot = get_pool_index_from_size(size);
Pool &pool = _pot_pools[pot];
MutexLock lock(pool.mutex);
pool.blocks.push_back(block);
}
--_used_blocks;
_used_memory -= size;
}
void VoxelMemoryPool::clear_unused_blocks() {
MutexLock lock(_mutex);
const size_t *key = nullptr;
while ((key = _pools.next(key))) {
Pool *pool = _pools.get(*key);
CRASH_COND(pool == nullptr);
for (auto it = pool->blocks.begin(); it != pool->blocks.end(); ++it) {
uint8_t *ptr = *it;
CRASH_COND(ptr == nullptr);
memfree(ptr);
for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
Pool &pool = _pot_pools[pot];
MutexLock lock(pool.mutex);
for (unsigned int i = 0; i < pool.blocks.size(); ++i) {
void *block = pool.blocks[i];
memfree(block);
}
_total_memory -= (*key) * pool->blocks.size();
pool->blocks.clear();
_total_memory -= get_size_from_pool_index(pot) * pool.blocks.size();
pool.blocks.clear();
}
}
void VoxelMemoryPool::clear() {
MutexLock lock(_mutex);
const size_t *key = nullptr;
while ((key = _pools.next(key))) {
Pool *pool = _pools.get(*key);
CRASH_COND(pool == nullptr);
for (auto it = pool->blocks.begin(); it != pool->blocks.end(); ++it) {
uint8_t *ptr = *it;
CRASH_COND(ptr == nullptr);
memfree(ptr);
for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
Pool &pool = _pot_pools[pot];
MutexLock lock(pool.mutex);
for (unsigned int i = 0; i < pool.blocks.size(); ++i) {
void *block = pool.blocks[i];
memfree(block);
}
memdelete(pool);
pool.blocks.clear();
}
_pools.clear();
_used_memory = 0;
_total_memory = 0;
_used_blocks = 0;
}
void VoxelMemoryPool::debug_print() {
MutexLock lock(_mutex);
print_line("-------- VoxelMemoryPool ----------");
if (_pools.size() == 0) {
print_line("No pools created");
} else {
const size_t *key = nullptr;
int i = 0;
while ((key = _pools.next(key))) {
Pool *pool = _pools.get(*key);
print_line(String("Pool {0} for size {1}: {2} blocks")
.format(varray(i, SIZE_T_TO_VARIANT(*key), SIZE_T_TO_VARIANT(pool->blocks.size()))));
++i;
}
for (unsigned int pot = 0; pot < _pot_pools.size(); ++pot) {
Pool &pool = _pot_pools[pot];
MutexLock lock(pool.mutex);
print_line(String("Pool {0}: {1} blocks (capacity {2})")
.format(varray(pot,
SIZE_T_TO_VARIANT(pool.blocks.size()),
SIZE_T_TO_VARIANT(pool.blocks.capacity()))));
}
}
@ -135,17 +143,3 @@ size_t VoxelMemoryPool::debug_get_total_memory() const {
//MutexLock lock(_mutex);
return _total_memory;
}
VoxelMemoryPool::Pool *VoxelMemoryPool::get_or_create_pool(size_t size) {
Pool *pool;
Pool **ppool = _pools.getptr(size);
if (ppool == nullptr) {
pool = memnew(Pool);
CRASH_COND(pool == nullptr);
_pools.set(size, pool);
} else {
pool = *ppool;
CRASH_COND(pool == nullptr);
}
return pool;
}

View File

@ -1,16 +1,24 @@
#ifndef VOXEL_MEMORY_POOL_H
#define VOXEL_MEMORY_POOL_H
#include "core/hash_map.h"
#include "../util/fixed_array.h"
#include "../util/math/funcs.h"
#include "core/os/mutex.h"
#include <limits>
#include <unordered_set>
#include <vector>
// Pool based on a scenario where allocated blocks are often the same size.
// A pool of blocks is assigned for each size.
// A pool of blocks is assigned for each power of two.
// The majority of VoxelBuffers use powers of two so most of the time
// we won't waste memory. Sometimes non-power-of-two buffers are created,
// but they are often temporary and less numerous.
class VoxelMemoryPool {
private:
struct Pool {
Mutex mutex;
// Would a linked list be better?
std::vector<uint8_t *> blocks;
};
@ -33,20 +41,54 @@ public:
size_t debug_get_total_memory() const;
private:
Pool *get_or_create_pool(size_t size);
void clear();
struct SizeTHasher {
static _FORCE_INLINE_ uint32_t hash(const size_t p_int) {
return HashMapHasherDefault::hash(uint64_t(p_int));
}
};
#ifdef DEBUG_ENABLED
void debug_add_allock(void *block) {
MutexLock lock(_debug_allocs_mutex);
auto it = _debug_allocs.find(block);
CRASH_COND(it != _debug_allocs.end());
_debug_allocs.insert(block);
}
HashMap<size_t, Pool *, SizeTHasher> _pools;
unsigned int _used_blocks = 0;
void debug_remove_alloc(void *block) {
MutexLock lock(_debug_allocs_mutex);
auto it = _debug_allocs.find(block);
CRASH_COND(it == _debug_allocs.end());
_debug_allocs.erase(it);
}
#endif
inline size_t get_highest_supported_size() const {
return size_t(1) << (_pot_pools.size() - 1);
}
inline unsigned int get_pool_index_from_size(size_t size) const {
#ifdef DEBUG_ENABLED
// `next_power_of_2` takes unsigned int
CRASH_COND(size > std::numeric_limits<unsigned int>::max());
#endif
return get_shift_from_power_of_two(next_power_of_2(size));
}
inline size_t get_size_from_pool_index(unsigned int i) const {
return size_t(1) << i;
}
// We handle allocations with up to 2^20 = 1,048,576 bytes.
// This is chosen based on practical needs.
// Each slot in this array corresponds to allocations
// that contain 2^index bytes in them.
FixedArray<Pool, 21> _pot_pools;
#ifdef DEBUG_ENABLED
std::unordered_set<void *> _debug_allocs;
Mutex _debug_allocs_mutex;
#endif
unsigned int _used_blocks = 0; // TODO Make atomic?
size_t _used_memory = 0;
size_t _total_memory = 0;
Mutex _mutex;
};
#endif // VOXEL_MEMORY_POOL_H

View File

@ -162,6 +162,19 @@ inline bool is_power_of_two(size_t x) {
return x != 0 && (x & (x - 1)) == 0;
}
inline unsigned int get_shift_from_power_of_two(unsigned int pot) {
#ifdef DEBUG_ENABLED
CRASH_COND(!is_power_of_two(pot));
#endif
for (unsigned int i = 0; i < 32; ++i) {
if (pot == (1 << i)) {
return i;
}
}
// Input was not a valid power of two
CRASH_COND(true);
}
// If the provided address `a` is not aligned to the number of bytes specified in `align`,
// returns the next aligned address. `align` must be a power of two.
inline size_t alignup(size_t a, size_t align) {