Spread out mesh destruction in attempt to workaround slow Vulkan buffer deallocation
This commit is contained in:
parent
ea74c1f05f
commit
541af4a4ad
55
server/progressive_task_runner.cpp
Normal file
55
server/progressive_task_runner.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include "progressive_task_runner.h"
|
||||
|
||||
namespace zylann {
|
||||
|
||||
ProgressiveTaskRunner::~ProgressiveTaskRunner() {
|
||||
flush();
|
||||
ERR_FAIL_COND_MSG(_tasks.size() > 0, "Tasks got created in destructors?");
|
||||
}
|
||||
|
||||
void ProgressiveTaskRunner::push(IProgressiveTask *task) {
|
||||
ERR_FAIL_COND(task == nullptr);
|
||||
_tasks.push(task);
|
||||
}
|
||||
|
||||
void ProgressiveTaskRunner::process() {
|
||||
const int64_t now_msec = Time::get_singleton()->get_ticks_msec();
|
||||
const int64_t delta_msec = now_msec - _last_process_time_msec;
|
||||
_last_process_time_msec = now_msec;
|
||||
ERR_FAIL_COND(delta_msec < 0);
|
||||
|
||||
// The goal is to dequeue everything in S seconds.
|
||||
// So if we have N tasks and `process` is called F times per second, we must dequeue N / (S * F) tasks.
|
||||
// Or put it another way, if we call `process` every D seconds, we must dequeue (D * N) / S tasks.
|
||||
// We make sure a minimum amount is run so it cannot be stuck at 0.
|
||||
// As the number of pending tasks decreases, we want to keep running the highest amount we calculated.
|
||||
// we reset when we are done.
|
||||
|
||||
_dequeue_count = max(int64_t(_dequeue_count), (int64_t(_tasks.size()) * delta_msec) / COMPLETION_TIME_MSEC);
|
||||
_dequeue_count = min(_dequeue_count, max(MIN_COUNT, unsigned int(_tasks.size())));
|
||||
|
||||
unsigned int count = _dequeue_count;
|
||||
while (_tasks.size() > 0 && count > 0) {
|
||||
IProgressiveTask *task = _tasks.front();
|
||||
_tasks.pop();
|
||||
task->run();
|
||||
// TODO Call recycling function instead?
|
||||
memdelete(task);
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
void ProgressiveTaskRunner::flush() {
|
||||
while (!_tasks.empty()) {
|
||||
IProgressiveTask *task = _tasks.front();
|
||||
_tasks.pop();
|
||||
task->run();
|
||||
memdelete(task);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int ProgressiveTaskRunner::get_pending_count() const {
|
||||
return _tasks.size();
|
||||
}
|
||||
|
||||
} // namespace zylann
|
50
server/progressive_task_runner.h
Normal file
50
server/progressive_task_runner.h
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef PROGRESSIVE_TASK_RUNNER_H
|
||||
#define PROGRESSIVE_TASK_RUNNER_H
|
||||
|
||||
#include "../util/math/funcs.h"
|
||||
#include <core/os/time.h>
|
||||
#include <queue>
|
||||
|
||||
namespace zylann {
|
||||
|
||||
// TODO It would be really nice if Godot4 Vulkan buffer deallocation was better optimized.
|
||||
// This is originally to workaround the terribly slow Vulkan buffer deallocation in Godot4.
|
||||
// It happens on the main thread and causes deferred stutters when a terrain contains a lot of chunks
|
||||
// and the camera moves fast.
|
||||
// I hate this workaround because it feels like we are almost not in control of a stable framerate.
|
||||
// "Make less meshes" is not enough, if it can't be dynamically adressed.
|
||||
|
||||
class IProgressiveTask {
|
||||
public:
|
||||
virtual ~IProgressiveTask() {}
|
||||
virtual void run() = 0;
|
||||
};
|
||||
|
||||
// Runs a certain amount of tasks per frame such that all tasks should be completed in N seconds.
|
||||
// This has the effect of spreading the load over time and avoids CPU spikes.
|
||||
// This can be used in place of a time-slicing runner when the duration of tasks cannot be used as a cost metric.
|
||||
// This is the case of tasks that defer their workload to another system to run later. It is far from perfect though,
|
||||
// and is a last resort solution when optimization and threading are not possible.
|
||||
// Such tasks may preferably not require low latency in the game,
|
||||
// because they will likely run a bit later than a time-sliced task.
|
||||
class ProgressiveTaskRunner {
|
||||
public:
|
||||
~ProgressiveTaskRunner();
|
||||
|
||||
void push(IProgressiveTask *task);
|
||||
void process();
|
||||
void flush();
|
||||
unsigned int get_pending_count() const;
|
||||
|
||||
private:
|
||||
static const unsigned int MIN_COUNT = 4;
|
||||
static const unsigned int COMPLETION_TIME_MSEC = 500;
|
||||
|
||||
std::queue<IProgressiveTask *> _tasks;
|
||||
unsigned int _dequeue_count = MIN_COUNT;
|
||||
int64_t _last_process_time_msec = 0;
|
||||
};
|
||||
|
||||
} // namespace zylann
|
||||
|
||||
#endif // PROGRESSIVE_TASK_RUNNER_H
|
@ -586,6 +586,10 @@ void VoxelServer::push_time_spread_task(IVoxelTimeSpreadTask *task) {
|
||||
_time_spread_task_runner.push(task);
|
||||
}
|
||||
|
||||
void VoxelServer::push_progressive_task(zylann::IProgressiveTask *task) {
|
||||
_progressive_task_runner.push(task);
|
||||
}
|
||||
|
||||
int VoxelServer::get_main_thread_time_budget_usec() const {
|
||||
return _main_thread_time_budget_usec;
|
||||
}
|
||||
@ -605,6 +609,8 @@ void VoxelServer::process() {
|
||||
//VOXEL_PROFILE_MARK_FRAME();
|
||||
VOXEL_PROFILE_SCOPE();
|
||||
VOXEL_PROFILE_PLOT("Static memory usage", int64_t(OS::get_singleton()->get_static_memory_usage()));
|
||||
VOXEL_PROFILE_PLOT("TimeSpread tasks", int64_t(_time_spread_task_runner.get_pending_count()));
|
||||
VOXEL_PROFILE_PLOT("Progressive tasks", int64_t(_progressive_task_runner.get_pending_count()));
|
||||
|
||||
// Receive data updates
|
||||
_streaming_thread_pool.dequeue_completed_tasks([](IVoxelTask *task) {
|
||||
@ -622,6 +628,8 @@ void VoxelServer::process() {
|
||||
// which could in turn complete right away (we avoid 1-frame delays this way).
|
||||
_time_spread_task_runner.process(_main_thread_time_budget_usec);
|
||||
|
||||
_progressive_task_runner.process();
|
||||
|
||||
// Update viewer dependencies
|
||||
{
|
||||
const size_t viewer_count = _world.viewers.count();
|
||||
@ -696,7 +704,7 @@ VoxelServer::Stats VoxelServer::get_stats() const {
|
||||
s.generation_tasks = g_debug_generate_tasks_count;
|
||||
s.meshing_tasks = g_debug_mesh_tasks_count;
|
||||
s.streaming_tasks = g_debug_stream_tasks_count;
|
||||
s.main_thread_tasks = _time_spread_task_runner.get_pending_count();
|
||||
s.main_thread_tasks = _time_spread_task_runner.get_pending_count() + _progressive_task_runner.get_pending_count();
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,10 @@
|
||||
#include "../meshers/blocky/voxel_mesher_blocky.h"
|
||||
#include "../streams/voxel_stream.h"
|
||||
#include "../util/file_locker.h"
|
||||
#include "progressive_task_runner.h"
|
||||
#include "struct_db.h"
|
||||
#include "voxel_thread_pool.h"
|
||||
|
||||
#include <scene/main/node.h>
|
||||
|
||||
#include <memory>
|
||||
@ -155,6 +157,8 @@ public:
|
||||
void push_time_spread_task(IVoxelTimeSpreadTask *task);
|
||||
int get_main_thread_time_budget_usec() const;
|
||||
|
||||
void push_progressive_task(zylann::IProgressiveTask *task);
|
||||
|
||||
void push_async_task(IVoxelTask *task);
|
||||
void push_async_tasks(Span<IVoxelTask *> tasks);
|
||||
|
||||
@ -384,6 +388,7 @@ private:
|
||||
// For tasks that can only run on the main thread and be spread out over frames
|
||||
VoxelTimeSpreadTaskRunner _time_spread_task_runner;
|
||||
int _main_thread_time_budget_usec = 8000;
|
||||
zylann::ProgressiveTaskRunner _progressive_task_runner;
|
||||
|
||||
VoxelFileLocker _file_locker;
|
||||
};
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include "voxel_mesh_block.h"
|
||||
#include "../constants/voxel_string_names.h"
|
||||
#include "../server/progressive_task_runner.h"
|
||||
#include "../server/voxel_server.h"
|
||||
#include "../util/godot/funcs.h"
|
||||
#include "../util/macros.h"
|
||||
#include "../util/profiling.h"
|
||||
@ -34,7 +36,43 @@ VoxelMeshBlock *VoxelMeshBlock::create(Vector3i bpos, unsigned int size, unsigne
|
||||
|
||||
VoxelMeshBlock::VoxelMeshBlock() {}
|
||||
|
||||
VoxelMeshBlock::~VoxelMeshBlock() {}
|
||||
VoxelMeshBlock::~VoxelMeshBlock() {
|
||||
// Had to resort to this in Godot4 because deleting meshes is particularly expensive,
|
||||
// because of the Vulkan allocator used by the renderer
|
||||
class FreeMeshTask : public zylann::IProgressiveTask {
|
||||
public:
|
||||
static inline void try_add_and_destroy(DirectMeshInstance &mi) {
|
||||
if (mi.get_mesh().is_valid()) {
|
||||
add(mi.get_mesh());
|
||||
}
|
||||
mi.destroy();
|
||||
}
|
||||
|
||||
static void add(Ref<Mesh> mesh) {
|
||||
CRASH_COND(mesh.is_null());
|
||||
FreeMeshTask *task = memnew(FreeMeshTask());
|
||||
task->mesh = mesh;
|
||||
VoxelServer::get_singleton()->push_progressive_task(task);
|
||||
}
|
||||
|
||||
void run() override {
|
||||
#ifdef DEBUG_ENABLED
|
||||
if (mesh->reference_get_count() > 1) {
|
||||
WARN_PRINT("Mesh has more than one ref left, task spreading will not be effective at smoothing "
|
||||
"destruction cost");
|
||||
}
|
||||
#endif
|
||||
mesh.unref();
|
||||
}
|
||||
|
||||
Ref<Mesh> mesh;
|
||||
};
|
||||
|
||||
FreeMeshTask::try_add_and_destroy(_mesh_instance);
|
||||
for (unsigned int i = 0; i < _transition_mesh_instances.size(); ++i) {
|
||||
FreeMeshTask::try_add_and_destroy(_transition_mesh_instances[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void VoxelMeshBlock::set_world(Ref<World3D> p_world) {
|
||||
if (_world != p_world) {
|
||||
|
@ -24,8 +24,8 @@ void DirectMeshInstance::destroy() {
|
||||
RenderingServer &vs = *RenderingServer::get_singleton();
|
||||
vs.free(_mesh_instance);
|
||||
_mesh_instance = RID();
|
||||
_mesh.unref();
|
||||
}
|
||||
_mesh.unref();
|
||||
}
|
||||
|
||||
void DirectMeshInstance::set_world(World3D *world) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user