Build mesh resource on the task thread now the Vulkan renderer supports it

master
Marc Gilleron 2022-07-02 23:01:49 +01:00
parent 8bb8bfccab
commit dac0af3a04
7 changed files with 144 additions and 45 deletions

View File

@ -129,6 +129,9 @@ void initialize_voxel_module(ModuleInitializationLevel p_level) {
const VoxelServer::ThreadsConfig threads_config = get_config_from_godot(main_thread_budget_usec);
VoxelServer::create_singleton(threads_config);
VoxelServer::get_singleton().set_main_thread_time_budget_usec(main_thread_budget_usec);
// TODO Pick this from the current renderer + user option (at time of writing, Godot 4 has only one renderer and
// has not figured out how such option would be exposed)
VoxelServer::get_singleton().set_threaded_mesh_resource_building_enabled(true);
gd::VoxelServer::create_singleton();
Engine::get_singleton()->add_singleton(Engine::Singleton("VoxelServer", gd::VoxelServer::get_singleton()));

View File

@ -1,6 +1,8 @@
#include "mesh_block_task.h"
#include "../storage/voxel_data_map.h"
#include "../terrain/voxel_mesh_block.h"
#include "../util/dstack.h"
#include "../util/godot/funcs.h"
#include "../util/log.h"
#include "../util/profiling.h"
#include "voxel_server.h"
@ -170,6 +172,61 @@ static void copy_block_and_neighbors(Span<std::shared_ptr<VoxelBufferInternal>>
}
}
Ref<ArrayMesh> build_mesh(Span<const VoxelMesher::Output::Surface> surfaces, Mesh::PrimitiveType primitive, int flags,
// This vector indexes surfaces to the material they use (if a surface uses a material but is empty, it
// won't be added to the mesh)
std::vector<uint8_t> &mesh_material_indices) {
ZN_PROFILE_SCOPE();
Ref<ArrayMesh> mesh;
unsigned int gd_surface_index = 0;
for (unsigned int i = 0; i < surfaces.size(); ++i) {
const VoxelMesher::Output::Surface &surface = surfaces[i];
Array arrays = surface.arrays;
if (arrays.is_empty()) {
continue;
}
CRASH_COND(arrays.size() != Mesh::ARRAY_MAX);
if (!is_surface_triangulated(arrays)) {
continue;
}
if (mesh.is_null()) {
mesh.instantiate();
}
// TODO Use `add_surface`, it's about 20% faster after measuring in Tracy (though we may see if Godot 4 expects
// the same)
mesh->add_surface_from_arrays(primitive, arrays, Array(), Dictionary(), flags);
// No multi-material supported yet
++gd_surface_index;
mesh_material_indices.push_back(i);
}
// Debug code to highlight vertex sharing
/*if (mesh->get_surface_count() > 0) {
Array wireframe_surface = generate_debug_seams_wireframe_surface(mesh, 0);
if (wireframe_surface.size() > 0) {
const int wireframe_surface_index = mesh->get_surface_count();
mesh->add_surface_from_arrays(Mesh::PRIMITIVE_LINES, wireframe_surface);
Ref<SpatialMaterial> line_material;
line_material.instance();
line_material->set_flag(SpatialMaterial::FLAG_UNSHADED, true);
line_material->set_albedo(Color(1.0, 0.0, 1.0));
mesh->surface_set_material(wireframe_surface_index, line_material);
}
}*/
if (mesh.is_valid() && is_mesh_empty(**mesh)) {
mesh = Ref<Mesh>();
}
return mesh;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace {
@ -244,6 +301,15 @@ void MeshBlockTask::run(zylann::ThreadedTaskContext ctx) {
lod_index, collision_hint, lod_hint };
mesher->build(_surfaces_output, input);
if (VoxelServer::get_singleton().is_threaded_mesh_resource_building_enabled()) {
// This shall only run if Godot supports building meshes from multiple threads
_mesh = build_mesh(to_span(_surfaces_output.surfaces), _surfaces_output.primitive_type,
_surfaces_output.mesh_flags, _mesh_material_indices);
_has_mesh_resource = true;
} else {
_has_mesh_resource = false;
}
_has_run = true;
}
@ -276,6 +342,9 @@ void MeshBlockTask::apply_result() {
o.position = position;
o.lod = lod_index;
o.surfaces = std::move(_surfaces_output);
o.mesh = _mesh;
o.mesh_material_indices = std::move(_mesh_material_indices);
o.has_mesh_resource = _has_mesh_resource;
VoxelServer::VolumeCallbacks callbacks = VoxelServer::get_singleton().get_volume_callbacks(volume_id);
ERR_FAIL_COND(callbacks.mesh_output_callback == nullptr);

View File

@ -40,9 +40,15 @@ public:
private:
bool _has_run = false;
bool _too_far = false;
bool _has_mesh_resource = false;
VoxelMesher::Output _surfaces_output;
Ref<Mesh> _mesh;
std::vector<uint8_t> _mesh_material_indices; // Indexed by mesh surface
};
Ref<ArrayMesh> build_mesh(Span<const VoxelMesher::Output::Surface> surfaces, Mesh::PrimitiveType primitive, int flags,
std::vector<uint8_t> &surface_indices);
} // namespace zylann::voxel
#endif // VOXEL_MESH_BLOCK_TASK_H

View File

@ -220,6 +220,14 @@ void VoxelServer::set_main_thread_time_budget_usec(unsigned int usec) {
_main_thread_time_budget_usec = usec;
}
void VoxelServer::set_threaded_mesh_resource_building_enabled(bool enable) {
_threaded_mesh_resource_building_enabled = enable;
}
bool VoxelServer::is_threaded_mesh_resource_building_enabled() const {
return _threaded_mesh_resource_building_enabled;
}
void VoxelServer::push_async_task(zylann::IThreadedTask *task) {
_general_thread_pool.enqueue(task);
}

View File

@ -24,8 +24,17 @@ public:
Type type;
VoxelMesher::Output surfaces;
// Only used if `has_mesh_resource` is true.
Ref<Mesh> mesh;
// Remaps Mesh surface indices to Mesher material indices. Only used if `has_mesh_resource` is true.
// TODO Optimize: candidate for small vector optimization. A big majority of meshes will have a handful of
// surfaces, which would fit here without allocating.
std::vector<uint8_t> mesh_material_indices;
// In mesh block coordinates
Vector3i position;
uint8_t lod;
// Tells if the mesh resource was built as part of the task. If not, you need to build it on the main thread.
bool has_mesh_resource;
};
struct BlockDataOutput {
@ -124,6 +133,12 @@ public:
int get_main_thread_time_budget_usec() const;
void set_main_thread_time_budget_usec(unsigned int usec);
// Allows/disallows building Mesh resources from inside threads. Depends on Godot's efficiency at doing so, and
// which renderer is used. For example, the OpenGL renderer does not support this well, but the Vulkan one should.
void set_threaded_mesh_resource_building_enabled(bool enable);
// This should be fast and safe to access from multiple threads.
bool is_threaded_mesh_resource_building_enabled() const;
void push_main_thread_progressive_task(IProgressiveTask *task);
// Thread-safe.
@ -209,6 +224,8 @@ private:
ProgressiveTaskRunner _progressive_task_runner;
FileLocker _file_locker;
bool _threaded_mesh_resource_building_enabled = false;
};
struct VoxelFileLockerRead {

View File

@ -1551,43 +1551,25 @@ void VoxelTerrain::apply_mesh_update(const VoxelServer::BlockMeshOutput &ob) {
}
Ref<ArrayMesh> mesh;
const bool gen_collisions = _generate_collisions && block->collision_viewers.get() > 0;
const bool use_render_mesh_as_collider = gen_collisions && !_mesher->is_generating_collision_surface();
std::vector<Array> render_surfaces;
int gd_surface_index = 0;
for (unsigned int surface_index = 0; surface_index < ob.surfaces.surfaces.size(); ++surface_index) {
const VoxelMesher::Output::Surface &surface = ob.surfaces.surfaces[surface_index];
Array arrays = surface.arrays;
if (arrays.is_empty()) {
continue;
}
CRASH_COND(arrays.size() != Mesh::ARRAY_MAX);
if (!is_surface_triangulated(arrays)) {
continue;
}
if (use_render_mesh_as_collider) {
render_surfaces.push_back(arrays);
}
if (mesh.is_null()) {
mesh.instantiate();
}
mesh->add_surface_from_arrays(
ob.surfaces.primitive_type, arrays, Array(), Dictionary(), ob.surfaces.mesh_flags);
Ref<Material> material = _mesher->get_material_by_index(surface_index);
mesh->surface_set_material(gd_surface_index, material);
++gd_surface_index;
std::vector<uint8_t> material_indices;
if (ob.has_mesh_resource) {
// The mesh was already built as part of the threaded task
mesh = ob.mesh;
// It can be empty
material_indices = std::move(ob.mesh_material_indices);
} else {
// Can't build meshes in threads, do it here
material_indices.clear();
mesh = build_mesh(to_span_const(ob.surfaces.surfaces), ob.surfaces.primitive_type, ob.surfaces.mesh_flags,
material_indices);
}
if (mesh.is_valid() && is_mesh_empty(**mesh)) {
mesh = Ref<Mesh>();
render_surfaces.clear();
if (mesh.is_valid()) {
const unsigned int surface_count = mesh->get_surface_count();
for (unsigned int surface_index = 0; surface_index < surface_count; ++surface_index) {
const unsigned int material_index = material_indices[surface_index];
Ref<Material> material = _mesher->get_material_by_index(material_index);
mesh->surface_set_material(surface_index, material);
}
}
if (_instancer != nullptr) {
@ -1609,6 +1591,7 @@ void VoxelTerrain::apply_mesh_update(const VoxelServer::BlockMeshOutput &ob) {
block->set_material_override(_material_override);
}
const bool gen_collisions = _generate_collisions && block->collision_viewers.get() > 0;
if (gen_collisions) {
Ref<Shape3D> collision_shape = make_collision_shape_from_mesher_output(ob.surfaces, **_mesher);
block->set_collision_shape(

View File

@ -1424,9 +1424,10 @@ void VoxelLodTerrain::apply_data_block_response(VoxelServer::BlockDataOutput &ob
}
void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) {
// The following is done on the main thread because Godot doesn't really support multithreaded Mesh allocation.
// This also proved to be very slow compared to the meshing process itself...
// hopefully Vulkan will allow us to upload graphical resources without stalling rendering as they upload?
// The following is done on the main thread because Godot doesn't really support everything done here.
// Building meshes can be done in the threaded task when using Vulkan, but not OpenGL.
// Setting up mesh instances might not be well threaded?
// Building collision shapes in threads efficiently is not supported.
ZN_PROFILE_SCOPE();
ERR_FAIL_COND(!is_inside_tree());
@ -1472,19 +1473,31 @@ void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) {
}
// -------- Part where we invoke Godot functions ---------
// As far as I know, this is not yet threadable efficiently, for the most part.
// By that, I mean being able to call into RenderingServer and PhysicsServer,
// without inducing a stall of the main thread.
// This part is not fully threadable.
VoxelMeshMap<VoxelMeshBlockVLT> &mesh_map = _mesh_maps_per_lod[ob.lod];
VoxelMeshBlockVLT *block = mesh_map.get_block(ob.position);
VoxelMesher::Output &mesh_data = ob.surfaces;
Ref<ArrayMesh> mesh =
build_mesh(to_span_const(mesh_data.surfaces), mesh_data.primitive_type, mesh_data.mesh_flags, _material);
Ref<ArrayMesh> mesh;
if (ob.has_mesh_resource) {
// The mesh was already built as part of the threaded task
mesh = ob.mesh;
// It can be empty
if (mesh.is_valid()) {
const unsigned int surface_count = mesh->get_surface_count();
for (unsigned int surface_index = 0; surface_index < surface_count; ++surface_index) {
mesh->surface_set_material(surface_index, _material);
}
}
} else {
// Can't build meshes in threads, do it here
build_mesh(to_span_const(mesh_data.surfaces), mesh_data.primitive_type, mesh_data.mesh_flags, _material);
}
if (mesh.is_null()) {
// The mesh is empty
if (block != nullptr) {
// No surface anymore in this block, destroy it
// TODO Factor removal in a function, it's done in a few places
@ -1562,7 +1575,7 @@ void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) {
block->set_mesh(mesh, DirectMeshInstance::GIMode(get_gi_mode()));
{
if (!ob.has_mesh_resource) {
// Profiling has shown Godot takes as much time to build a transition mesh as the main mesh of a block, so
// because there are 6 transition meshes per block, we would spend about 80% of the time on these if we build
// them all. Which is counter-intuitive because transition meshes are tiny in comparison... (collision meshes