godot_voxel/engine/mesh_block_task.cpp

#include "mesh_block_task.h"
#include "../meshers/transvoxel/voxel_mesher_transvoxel.h"
#include "../storage/voxel_data.h"
#include "../terrain/voxel_mesh_block.h"
#include "../util/dstack.h"
#include "../util/godot/mesh.h"
#include "../util/log.h"
#include "../util/profiling.h"
#include "generate_distance_normalmap_task.h"
//#include "../util/string_funcs.h" // Debug
#include "../meshers/transvoxel/transvoxel_cell_iterator.h"
#include "voxel_engine.h"

namespace zylann::voxel {

struct CubicAreaInfo {
	int edge_size; // In data blocks
	int mesh_block_size_factor;
	unsigned int anchor_buffer_index;

	inline bool is_valid() const {
		return edge_size != 0;
	}
};

CubicAreaInfo get_cubic_area_info_from_size(unsigned int size) {
	// Determine size of the cube of blocks
	int edge_size;
	int mesh_block_size_factor;
	switch (size) {
		case 3 * 3 * 3:
			edge_size = 3;
			mesh_block_size_factor = 1;
			break;
		case 4 * 4 * 4:
			edge_size = 4;
			mesh_block_size_factor = 2;
			break;
		default:
			ZN_PRINT_ERROR("Unsupported block count");
			return CubicAreaInfo{ 0, 0, 0 };
	}

	// Pick anchor block, usually within the central part of the cube (that block must be valid)
	const unsigned int anchor_buffer_index = edge_size * edge_size + edge_size + 1;

	return { edge_size, mesh_block_size_factor, anchor_buffer_index };
}

// Takes a list of blocks and interprets it as a cube of blocks centered around the area we want to create a mesh from.
// Voxels from central blocks are copied, and part of side blocks are also copied so we get a temporary buffer
// which includes enough neighbors for the mesher to avoid doing bound checks.
static void copy_block_and_neighbors(Span<std::shared_ptr<VoxelBufferInternal>> blocks, VoxelBufferInternal &dst,
		int min_padding, int max_padding, int channels_mask, Ref<VoxelGenerator> generator,
		const VoxelModifierStack *modifiers, int data_block_size, uint8_t lod_index, Vector3i mesh_block_pos) {
	ZN_DSTACK();
	ZN_PROFILE_SCOPE();

	// Extract wanted channels in a list
	unsigned int channels_count = 0;
	FixedArray<uint8_t, VoxelBufferInternal::MAX_CHANNELS> channels =
			VoxelBufferInternal::mask_to_channels_list(channels_mask, channels_count);

	// Determine size of the cube of blocks
	const CubicAreaInfo area_info = get_cubic_area_info_from_size(blocks.size());
	ERR_FAIL_COND(!area_info.is_valid());

	std::shared_ptr<VoxelBufferInternal> &central_buffer = blocks[area_info.anchor_buffer_index];
	ERR_FAIL_COND_MSG(central_buffer == nullptr && generator.is_null(), "Central buffer must be valid");
	if (central_buffer != nullptr) {
		ERR_FAIL_COND_MSG(
				Vector3iUtil::all_members_equal(central_buffer->get_size()) == false, "Central buffer must be cubic");
	}
	const int mesh_block_size = data_block_size * area_info.mesh_block_size_factor;
	const int padded_mesh_block_size = mesh_block_size + min_padding + max_padding;

	dst.create(padded_mesh_block_size, padded_mesh_block_size, padded_mesh_block_size);

	// TODO Need to provide format differently, this won't work in full load mode where areas are generated on the fly
	// for (unsigned int ci = 0; ci < channels.size(); ++ci) {
	// 	dst.set_channel_depth(ci, central_buffer->get_channel_depth(ci));
	// }
	// This is a hack
	for (unsigned int i = 0; i < blocks.size(); ++i) {
		const std::shared_ptr<VoxelBufferInternal> &buffer = blocks[i];
		if (buffer != nullptr) {
			// Initialize channel depths from the first non-null block found
			dst.copy_format(*buffer);
			break;
		}
	}

	const Vector3i min_pos = -Vector3iUtil::create(min_padding);
	const Vector3i max_pos = Vector3iUtil::create(mesh_block_size + max_padding);

	std::vector<Box3i> boxes_to_generate;
	const Box3i mesh_data_box = Box3i::from_min_max(min_pos, max_pos);
	const bool has_generator = generator.is_valid() || modifiers != nullptr;
	if (has_generator) {
		boxes_to_generate.push_back(mesh_data_box);
	}

	// Using ZXY as convention to reconstruct positions with thread locking consistency
	unsigned int block_index = 0;
	for (int z = -1; z < area_info.edge_size - 1; ++z) {
		for (int x = -1; x < area_info.edge_size - 1; ++x) {
			for (int y = -1; y < area_info.edge_size - 1; ++y) {
				const Vector3i offset = data_block_size * Vector3i(x, y, z);
				const std::shared_ptr<VoxelBufferInternal> &src = blocks[block_index];
				++block_index;

				if (src == nullptr) {
					continue;
				}

				const Vector3i src_min = min_pos - offset;
				const Vector3i src_max = max_pos - offset;

				{
					RWLockRead read(src->get_lock());
					for (unsigned int ci = 0; ci < channels_count; ++ci) {
						dst.copy_from(*src, src_min, src_max, Vector3i(), channels[ci]);
					}
				}

				if (has_generator) {
					// Subtract edited box from the area to generate
					// TODO This approach allows to batch boxes if necessary,
					// but is it just better to do it anyways for every clipped box?
					ZN_PROFILE_SCOPE_NAMED("Box subtract");
					const unsigned int count = boxes_to_generate.size();
					const Box3i block_box = Box3i(offset, Vector3iUtil::create(data_block_size)).clipped(mesh_data_box);

					for (unsigned int box_index = 0; box_index < count; ++box_index) {
						Box3i box = boxes_to_generate[box_index];
						box.difference_to_vec(block_box, boxes_to_generate);
#ifdef DEBUG_ENABLED
						CRASH_COND(box_index >= boxes_to_generate.size());
#endif
						boxes_to_generate[box_index] = boxes_to_generate.back();
						boxes_to_generate.pop_back();
					}
				}
			}
		}
	}

	if (has_generator) {
		// Complete data with generated voxels
		ZN_PROFILE_SCOPE_NAMED("Generate");
		VoxelBufferInternal generated_voxels;

		const Vector3i origin_in_voxels =
				mesh_block_pos * (area_info.mesh_block_size_factor * data_block_size << lod_index);

		for (unsigned int i = 0; i < boxes_to_generate.size(); ++i) {
			const Box3i &box = boxes_to_generate[i];
			//print_line(String("size={0}").format(varray(box.size.to_vec3())));
			generated_voxels.create(box.size);
			//generated_voxels.set_voxel_f(2.0f, box.size.x / 2, box.size.y / 2, box.size.z / 2,
			//VoxelBufferInternal::CHANNEL_SDF);
			VoxelGenerator::VoxelQueryData q{ generated_voxels, (box.pos << lod_index) + origin_in_voxels, lod_index };

			if (generator.is_valid()) {
				generator->generate_block(q);
			}
			if (modifiers != nullptr) {
				modifiers->apply(q.voxel_buffer, AABB(q.origin_in_voxels, q.voxel_buffer.get_size() << lod_index));
			}

			for (unsigned int ci = 0; ci < channels_count; ++ci) {
				dst.copy_from(generated_voxels, Vector3i(), generated_voxels.get_size(),
						box.pos + Vector3iUtil::create(min_padding), channels[ci]);
			}
		}
	}
}

Ref<ArrayMesh> build_mesh(Span<const VoxelMesher::Output::Surface> surfaces, Mesh::PrimitiveType primitive, int flags,
		// This vector indexes surfaces to the material they use (if a surface uses a material but is empty, it
		// won't be added to the mesh)
		std::vector<uint8_t> &mesh_material_indices) {
	ZN_PROFILE_SCOPE();
	ZN_ASSERT(mesh_material_indices.size() == 0);

	Ref<ArrayMesh> mesh;

	for (unsigned int i = 0; i < surfaces.size(); ++i) {
		const VoxelMesher::Output::Surface &surface = surfaces[i];
		Array arrays = surface.arrays;

		if (arrays.is_empty()) {
			continue;
		}

		CRASH_COND(arrays.size() != Mesh::ARRAY_MAX);
		if (!is_surface_triangulated(arrays)) {
			continue;
		}

		if (mesh.is_null()) {
			mesh.instantiate();
		}

		// TODO Use `add_surface`, it's about 20% faster after measuring in Tracy (though we may see if Godot 4 expects
		// the same)
		mesh->add_surface_from_arrays(primitive, arrays, Array(), Dictionary(), flags);

		mesh_material_indices.push_back(surface.material_index);
	}

	// Debug code to highlight vertex sharing
	/*if (mesh->get_surface_count() > 0) {
		Array wireframe_surface = generate_debug_seams_wireframe_surface(mesh, 0);
		if (wireframe_surface.size() > 0) {
			const int wireframe_surface_index = mesh->get_surface_count();
			mesh->add_surface_from_arrays(Mesh::PRIMITIVE_LINES, wireframe_surface);
			Ref<SpatialMaterial> line_material;
			line_material.instance();
			line_material->set_flag(SpatialMaterial::FLAG_UNSHADED, true);
			line_material->set_albedo(Color(1.0, 0.0, 1.0));
			mesh->surface_set_material(wireframe_surface_index, line_material);
		}
	}*/

	if (mesh.is_valid() && is_mesh_empty(**mesh)) {
		mesh = Ref<Mesh>();
	}

	return mesh;
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

namespace {
std::atomic_int g_debug_mesh_tasks_count;
} //namespace

MeshBlockTask::MeshBlockTask() {
	++g_debug_mesh_tasks_count;
}

MeshBlockTask::~MeshBlockTask() {
	--g_debug_mesh_tasks_count;
}

int MeshBlockTask::debug_get_running_count() {
	return g_debug_mesh_tasks_count;
}

void MeshBlockTask::run(zylann::ThreadedTaskContext ctx) {
	ZN_DSTACK();
	ZN_PROFILE_SCOPE();
	ZN_ASSERT(meshing_dependency != nullptr);

	Ref<VoxelMesher> mesher = meshing_dependency->mesher;
	ZN_ASSERT_RETURN_MSG(
			mesher.is_valid(), "Meshing task started without a mesher. Maybe missing on the terrain node?");
	const unsigned int min_padding = mesher->get_minimum_padding();
	const unsigned int max_padding = mesher->get_maximum_padding();

	const VoxelModifierStack *modifiers = data != nullptr ? &data->get_modifiers() : nullptr;

	VoxelBufferInternal voxels;
	copy_block_and_neighbors(to_span(blocks, blocks_count), voxels, min_padding, max_padding,
			mesher->get_used_channels_mask(), meshing_dependency->generator, modifiers, data_block_size, lod_index,
			mesh_block_position);

	// Could cache generator data from here if it was safe to write into the map
	/*if (data != nullptr && cache_generated_blocks) {
		const CubicAreaInfo area_info = get_cubic_area_info_from_size(blocks.size());
		ERR_FAIL_COND(!area_info.is_valid());

		VoxelDataLodMap::Lod &lod = data->lods[lod_index];

		// Note, this box does not include neighbors!
		const Vector3i min_bpos = position * area_info.mesh_block_size_factor;
		const Vector3i max_bpos = min_bpos + Vector3iUtil::create(area_info.edge_size - 2);

		Vector3i bpos;
		for (bpos.z = min_bpos.z; bpos.z < max_bpos.z; ++bpos.z) {
			for (bpos.x = min_bpos.x; bpos.x < max_bpos.x; ++bpos.x) {
				for (bpos.y = min_bpos.y; bpos.y < max_bpos.y; ++bpos.y) {
					// {
					// 	RWLockRead rlock(lod.map_lock);
					// 	VoxelDataBlock *block = lod.map.get_block(bpos);
					// 	if (block != nullptr && (block->is_edited() || block->is_modified())) {
					// 		continue;
					// 	}
					// }
					std::shared_ptr<VoxelBufferInternal> &cache_buffer = make_shared_instance<VoxelBufferInternal>();
					cache_buffer->copy_format(voxels);
					const Vector3i min_src_pos =
							(bpos - min_bpos) * data_block_size + Vector3iUtil::create(min_padding);
					cache_buffer->copy_from(voxels, min_src_pos, min_src_pos + cache_buffer->get_size(), Vector3i());
					// TODO Where to put voxels? Can't safely write to data at the moment.
				}
			}
		}
	}*/

	const Vector3i mesh_block_size =
			voxels.get_size() - Vector3iUtil::create(mesher->get_minimum_padding() + mesher->get_maximum_padding());

	const Vector3i origin_in_voxels = mesh_block_position * (mesh_block_size << lod_index);

	const VoxelMesher::Input input = { voxels, meshing_dependency->generator.ptr(), data.get(), origin_in_voxels,
		lod_index, collision_hint, lod_hint, true };
	mesher->build(_surfaces_output, input);

	const bool mesh_is_empty = VoxelMesher::is_mesh_empty(_surfaces_output.surfaces);

	// Currently, Transvoxel only is supported in combination with virtual normalmap texturing, because the algorithm
	// provides a cheap source for cells subdividing the mesh. It should be possible to obtain cells from any mesh, but
	// it is more expensive to find them from scratch, and for now Transvoxel is the most viable algorithm for smooth
	// terrain.
	Ref<VoxelMesherTransvoxel> transvoxel_mesher = mesher;

	if (transvoxel_mesher.is_valid() && virtual_texture_settings.enabled && !mesh_is_empty &&
			lod_index >= virtual_texture_settings.begin_lod_index && require_virtual_texture) {
		ZN_PROFILE_SCOPE_NAMED("Schedule virtual render");

		const transvoxel::MeshArrays &mesh_arrays = VoxelMesherTransvoxel::get_mesh_cache_from_current_thread();
		Span<const transvoxel::CellInfo> cell_infos = VoxelMesherTransvoxel::get_cell_info_from_current_thread();
		ZN_ASSERT(cell_infos.size() > 0 && mesh_arrays.vertices.size() > 0);

		UniquePtr<TransvoxelCellIterator> cell_iterator = make_unique_instance<TransvoxelCellIterator>(cell_infos);

		std::shared_ptr<VirtualTextureOutput> virtual_textures = make_shared_instance<VirtualTextureOutput>();
		virtual_textures->valid = false;
		// This is stored here in case virtual texture rendering completes before the output of the current task gets
		// dequeued in the main thread, since it runs in a separate asynchronous task
		_virtual_textures = virtual_textures;

		GenerateDistanceNormalmapTask *nm_task = ZN_NEW(GenerateDistanceNormalmapTask);
		nm_task->cell_iterator = std::move(cell_iterator);
		nm_task->mesh_vertices = mesh_arrays.vertices;
		nm_task->mesh_normals = mesh_arrays.normals;
		nm_task->mesh_indices = mesh_arrays.indices;
		nm_task->generator = meshing_dependency->generator;
		nm_task->voxel_data = data;
		nm_task->mesh_block_size = mesh_block_size;
		nm_task->lod_index = lod_index;
		nm_task->mesh_block_position = mesh_block_position;
		nm_task->volume_id = volume_id;
		nm_task->virtual_textures = virtual_textures;
		nm_task->virtual_texture_settings = virtual_texture_settings;
		nm_task->priority_dependency = priority_dependency;

		VoxelEngine::get_singleton().push_async_task(nm_task);
	}

	if (VoxelEngine::get_singleton().is_threaded_graphics_resource_building_enabled()) {
		// This shall only run if Godot supports building meshes from multiple threads
		_mesh = build_mesh(to_span(_surfaces_output.surfaces), _surfaces_output.primitive_type,
				_surfaces_output.mesh_flags, _mesh_material_indices);
		_has_mesh_resource = true;

	} else {
		_has_mesh_resource = false;
	}

	_has_run = true;
}

TaskPriority MeshBlockTask::get_priority() {
	float closest_viewer_distance_sq;
	const TaskPriority p =
			priority_dependency.evaluate(lod_index, constants::TASK_PRIORITY_MESH_BAND2, &closest_viewer_distance_sq);
	_too_far = closest_viewer_distance_sq > priority_dependency.drop_distance_squared;
	return p;
}

bool MeshBlockTask::is_cancelled() {
	return !meshing_dependency->valid || _too_far;
}

void MeshBlockTask::apply_result() {
	if (VoxelEngine::get_singleton().is_volume_valid(volume_id)) {
		// The request response must match the dependency it would have been requested with.
		// If it doesn't match, we are no longer interested in the result.
		// It is assumed that if a dependency is changed, a new copy of it is made and the old one is marked invalid.
		if (meshing_dependency->valid) {
			VoxelEngine::BlockMeshOutput o;
			// TODO Check for invalidation due to property changes

			if (_has_run) {
				o.type = VoxelEngine::BlockMeshOutput::TYPE_MESHED;
			} else {
				o.type = VoxelEngine::BlockMeshOutput::TYPE_DROPPED;
			}

			o.position = mesh_block_position;
			o.lod = lod_index;
			o.surfaces = std::move(_surfaces_output);
			o.mesh = _mesh;
			o.mesh_material_indices = std::move(_mesh_material_indices);
			o.has_mesh_resource = _has_mesh_resource;
			o.virtual_textures = _virtual_textures;

			VoxelEngine::VolumeCallbacks callbacks = VoxelEngine::get_singleton().get_volume_callbacks(volume_id);
			ERR_FAIL_COND(callbacks.mesh_output_callback == nullptr);
			ERR_FAIL_COND(callbacks.data == nullptr);
			callbacks.mesh_output_callback(callbacks.data, o);
		}

	} else {
		// This can happen if the user removes the volume while requests are still about to return
		ZN_PRINT_VERBOSE("Mesh request response came back but volume wasn't found");
	}
}

} // namespace zylann::voxel