diff --git a/register_types.cpp b/register_types.cpp index 3fe54784..41349947 100644 --- a/register_types.cpp +++ b/register_types.cpp @@ -129,6 +129,9 @@ void initialize_voxel_module(ModuleInitializationLevel p_level) { const VoxelServer::ThreadsConfig threads_config = get_config_from_godot(main_thread_budget_usec); VoxelServer::create_singleton(threads_config); VoxelServer::get_singleton().set_main_thread_time_budget_usec(main_thread_budget_usec); + // TODO Pick this from the current renderer + user option (at time of writing, Godot 4 has only one renderer and + // has not figured out how such option would be exposed) + VoxelServer::get_singleton().set_threaded_mesh_resource_building_enabled(true); gd::VoxelServer::create_singleton(); Engine::get_singleton()->add_singleton(Engine::Singleton("VoxelServer", gd::VoxelServer::get_singleton())); diff --git a/server/mesh_block_task.cpp b/server/mesh_block_task.cpp index e1d2253b..5ab58366 100644 --- a/server/mesh_block_task.cpp +++ b/server/mesh_block_task.cpp @@ -1,6 +1,8 @@ #include "mesh_block_task.h" #include "../storage/voxel_data_map.h" +#include "../terrain/voxel_mesh_block.h" #include "../util/dstack.h" +#include "../util/godot/funcs.h" #include "../util/log.h" #include "../util/profiling.h" #include "voxel_server.h" @@ -170,6 +172,61 @@ static void copy_block_and_neighbors(Span> } } +Ref build_mesh(Span surfaces, Mesh::PrimitiveType primitive, int flags, + // This vector indexes surfaces to the material they use (if a surface uses a material but is empty, it + // won't be added to the mesh) + std::vector &mesh_material_indices) { + ZN_PROFILE_SCOPE(); + Ref mesh; + + unsigned int gd_surface_index = 0; + for (unsigned int i = 0; i < surfaces.size(); ++i) { + const VoxelMesher::Output::Surface &surface = surfaces[i]; + Array arrays = surface.arrays; + + if (arrays.is_empty()) { + continue; + } + + CRASH_COND(arrays.size() != Mesh::ARRAY_MAX); + if (!is_surface_triangulated(arrays)) { + continue; + } + + if (mesh.is_null()) { + mesh.instantiate(); + } + + // TODO Use `add_surface`, it's about 20% faster after measuring in Tracy (though we may see if Godot 4 expects + // the same) + mesh->add_surface_from_arrays(primitive, arrays, Array(), Dictionary(), flags); + // No multi-material supported yet + ++gd_surface_index; + + mesh_material_indices.push_back(i); + } + + // Debug code to highlight vertex sharing + /*if (mesh->get_surface_count() > 0) { + Array wireframe_surface = generate_debug_seams_wireframe_surface(mesh, 0); + if (wireframe_surface.size() > 0) { + const int wireframe_surface_index = mesh->get_surface_count(); + mesh->add_surface_from_arrays(Mesh::PRIMITIVE_LINES, wireframe_surface); + Ref line_material; + line_material.instance(); + line_material->set_flag(SpatialMaterial::FLAG_UNSHADED, true); + line_material->set_albedo(Color(1.0, 0.0, 1.0)); + mesh->surface_set_material(wireframe_surface_index, line_material); + } + }*/ + + if (mesh.is_valid() && is_mesh_empty(**mesh)) { + mesh = Ref(); + } + + return mesh; +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// namespace { @@ -244,6 +301,15 @@ void MeshBlockTask::run(zylann::ThreadedTaskContext ctx) { lod_index, collision_hint, lod_hint }; mesher->build(_surfaces_output, input); + if (VoxelServer::get_singleton().is_threaded_mesh_resource_building_enabled()) { + // This shall only run if Godot supports building meshes from multiple threads + _mesh = build_mesh(to_span(_surfaces_output.surfaces), _surfaces_output.primitive_type, + _surfaces_output.mesh_flags, _mesh_material_indices); + _has_mesh_resource = true; + } else { + _has_mesh_resource = false; + } + _has_run = true; } @@ -276,6 +342,9 @@ void MeshBlockTask::apply_result() { o.position = position; o.lod = lod_index; o.surfaces = std::move(_surfaces_output); + o.mesh = _mesh; + o.mesh_material_indices = std::move(_mesh_material_indices); + o.has_mesh_resource = _has_mesh_resource; VoxelServer::VolumeCallbacks callbacks = VoxelServer::get_singleton().get_volume_callbacks(volume_id); ERR_FAIL_COND(callbacks.mesh_output_callback == nullptr); diff --git a/server/mesh_block_task.h b/server/mesh_block_task.h index 53dfc58c..90235500 100644 --- a/server/mesh_block_task.h +++ b/server/mesh_block_task.h @@ -40,9 +40,15 @@ public: private: bool _has_run = false; bool _too_far = false; + bool _has_mesh_resource = false; VoxelMesher::Output _surfaces_output; + Ref _mesh; + std::vector _mesh_material_indices; // Indexed by mesh surface }; +Ref build_mesh(Span surfaces, Mesh::PrimitiveType primitive, int flags, + std::vector &surface_indices); + } // namespace zylann::voxel #endif // VOXEL_MESH_BLOCK_TASK_H diff --git a/server/voxel_server.cpp b/server/voxel_server.cpp index e678a8a8..a6f9e994 100644 --- a/server/voxel_server.cpp +++ b/server/voxel_server.cpp @@ -220,6 +220,14 @@ void VoxelServer::set_main_thread_time_budget_usec(unsigned int usec) { _main_thread_time_budget_usec = usec; } +void VoxelServer::set_threaded_mesh_resource_building_enabled(bool enable) { + _threaded_mesh_resource_building_enabled = enable; +} + +bool VoxelServer::is_threaded_mesh_resource_building_enabled() const { + return _threaded_mesh_resource_building_enabled; +} + void VoxelServer::push_async_task(zylann::IThreadedTask *task) { _general_thread_pool.enqueue(task); } diff --git a/server/voxel_server.h b/server/voxel_server.h index 263ed439..81660225 100644 --- a/server/voxel_server.h +++ b/server/voxel_server.h @@ -24,8 +24,17 @@ public: Type type; VoxelMesher::Output surfaces; + // Only used if `has_mesh_resource` is true. + Ref mesh; + // Remaps Mesh surface indices to Mesher material indices. Only used if `has_mesh_resource` is true. + // TODO Optimize: candidate for small vector optimization. A big majority of meshes will have a handful of + // surfaces, which would fit here without allocating. + std::vector mesh_material_indices; + // In mesh block coordinates Vector3i position; uint8_t lod; + // Tells if the mesh resource was built as part of the task. If not, you need to build it on the main thread. + bool has_mesh_resource; }; struct BlockDataOutput { @@ -124,6 +133,12 @@ public: int get_main_thread_time_budget_usec() const; void set_main_thread_time_budget_usec(unsigned int usec); + // Allows/disallows building Mesh resources from inside threads. Depends on Godot's efficiency at doing so, and + // which renderer is used. For example, the OpenGL renderer does not support this well, but the Vulkan one should. + void set_threaded_mesh_resource_building_enabled(bool enable); + // This should be fast and safe to access from multiple threads. + bool is_threaded_mesh_resource_building_enabled() const; + void push_main_thread_progressive_task(IProgressiveTask *task); // Thread-safe. @@ -209,6 +224,8 @@ private: ProgressiveTaskRunner _progressive_task_runner; FileLocker _file_locker; + + bool _threaded_mesh_resource_building_enabled = false; }; struct VoxelFileLockerRead { diff --git a/terrain/fixed_lod/voxel_terrain.cpp b/terrain/fixed_lod/voxel_terrain.cpp index 5a20dde6..bf8b586d 100644 --- a/terrain/fixed_lod/voxel_terrain.cpp +++ b/terrain/fixed_lod/voxel_terrain.cpp @@ -1551,43 +1551,25 @@ void VoxelTerrain::apply_mesh_update(const VoxelServer::BlockMeshOutput &ob) { } Ref mesh; - - const bool gen_collisions = _generate_collisions && block->collision_viewers.get() > 0; - const bool use_render_mesh_as_collider = gen_collisions && !_mesher->is_generating_collision_surface(); - std::vector render_surfaces; - - int gd_surface_index = 0; - for (unsigned int surface_index = 0; surface_index < ob.surfaces.surfaces.size(); ++surface_index) { - const VoxelMesher::Output::Surface &surface = ob.surfaces.surfaces[surface_index]; - Array arrays = surface.arrays; - if (arrays.is_empty()) { - continue; - } - - CRASH_COND(arrays.size() != Mesh::ARRAY_MAX); - if (!is_surface_triangulated(arrays)) { - continue; - } - - if (use_render_mesh_as_collider) { - render_surfaces.push_back(arrays); - } - - if (mesh.is_null()) { - mesh.instantiate(); - } - - mesh->add_surface_from_arrays( - ob.surfaces.primitive_type, arrays, Array(), Dictionary(), ob.surfaces.mesh_flags); - - Ref material = _mesher->get_material_by_index(surface_index); - mesh->surface_set_material(gd_surface_index, material); - ++gd_surface_index; + std::vector material_indices; + if (ob.has_mesh_resource) { + // The mesh was already built as part of the threaded task + mesh = ob.mesh; + // It can be empty + material_indices = std::move(ob.mesh_material_indices); + } else { + // Can't build meshes in threads, do it here + material_indices.clear(); + mesh = build_mesh(to_span_const(ob.surfaces.surfaces), ob.surfaces.primitive_type, ob.surfaces.mesh_flags, + material_indices); } - - if (mesh.is_valid() && is_mesh_empty(**mesh)) { - mesh = Ref(); - render_surfaces.clear(); + if (mesh.is_valid()) { + const unsigned int surface_count = mesh->get_surface_count(); + for (unsigned int surface_index = 0; surface_index < surface_count; ++surface_index) { + const unsigned int material_index = material_indices[surface_index]; + Ref material = _mesher->get_material_by_index(material_index); + mesh->surface_set_material(surface_index, material); + } } if (_instancer != nullptr) { @@ -1609,6 +1591,7 @@ void VoxelTerrain::apply_mesh_update(const VoxelServer::BlockMeshOutput &ob) { block->set_material_override(_material_override); } + const bool gen_collisions = _generate_collisions && block->collision_viewers.get() > 0; if (gen_collisions) { Ref collision_shape = make_collision_shape_from_mesher_output(ob.surfaces, **_mesher); block->set_collision_shape( diff --git a/terrain/variable_lod/voxel_lod_terrain.cpp b/terrain/variable_lod/voxel_lod_terrain.cpp index 466954d0..117431d2 100644 --- a/terrain/variable_lod/voxel_lod_terrain.cpp +++ b/terrain/variable_lod/voxel_lod_terrain.cpp @@ -1424,9 +1424,10 @@ void VoxelLodTerrain::apply_data_block_response(VoxelServer::BlockDataOutput &ob } void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) { - // The following is done on the main thread because Godot doesn't really support multithreaded Mesh allocation. - // This also proved to be very slow compared to the meshing process itself... - // hopefully Vulkan will allow us to upload graphical resources without stalling rendering as they upload? + // The following is done on the main thread because Godot doesn't really support everything done here. + // Building meshes can be done in the threaded task when using Vulkan, but not OpenGL. + // Setting up mesh instances might not be well threaded? + // Building collision shapes in threads efficiently is not supported. ZN_PROFILE_SCOPE(); ERR_FAIL_COND(!is_inside_tree()); @@ -1472,19 +1473,31 @@ void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) { } // -------- Part where we invoke Godot functions --------- - // As far as I know, this is not yet threadable efficiently, for the most part. - // By that, I mean being able to call into RenderingServer and PhysicsServer, - // without inducing a stall of the main thread. + // This part is not fully threadable. VoxelMeshMap &mesh_map = _mesh_maps_per_lod[ob.lod]; VoxelMeshBlockVLT *block = mesh_map.get_block(ob.position); VoxelMesher::Output &mesh_data = ob.surfaces; - Ref mesh = - build_mesh(to_span_const(mesh_data.surfaces), mesh_data.primitive_type, mesh_data.mesh_flags, _material); + Ref mesh; + if (ob.has_mesh_resource) { + // The mesh was already built as part of the threaded task + mesh = ob.mesh; + // It can be empty + if (mesh.is_valid()) { + const unsigned int surface_count = mesh->get_surface_count(); + for (unsigned int surface_index = 0; surface_index < surface_count; ++surface_index) { + mesh->surface_set_material(surface_index, _material); + } + } + } else { + // Can't build meshes in threads, do it here + build_mesh(to_span_const(mesh_data.surfaces), mesh_data.primitive_type, mesh_data.mesh_flags, _material); + } if (mesh.is_null()) { + // The mesh is empty if (block != nullptr) { // No surface anymore in this block, destroy it // TODO Factor removal in a function, it's done in a few places @@ -1562,7 +1575,7 @@ void VoxelLodTerrain::apply_mesh_update(VoxelServer::BlockMeshOutput &ob) { block->set_mesh(mesh, DirectMeshInstance::GIMode(get_gi_mode())); - { + if (!ob.has_mesh_resource) { // Profiling has shown Godot takes as much time to build a transition mesh as the main mesh of a block, so // because there are 6 transition meshes per block, we would spend about 80% of the time on these if we build // them all. Which is counter-intuitive because transition meshes are tiny in comparison... (collision meshes