godot_voxel/generators/graph/voxel_generator_graph.cpp

#include "voxel_generator_graph.h"
#include "../../util/macros.h"
#include "../../util/profiling.h"
#include "../../util/profiling_clock.h"
#include "voxel_graph_node_db.h"

#include <core/core_string_names.h>

const char *VoxelGeneratorGraph::SIGNAL_NODE_NAME_CHANGED = "node_name_changed";

thread_local VoxelGeneratorGraph::Cache VoxelGeneratorGraph::_cache;

VoxelGeneratorGraph::VoxelGeneratorGraph() {}

VoxelGeneratorGraph::~VoxelGeneratorGraph() {
	clear();
}

void VoxelGeneratorGraph::clear() {
	unregister_subresources();
	_graph.clear();
	{
		RWLockWrite wlock(_runtime_lock);
		_runtime.reset();
	}
}

static ProgramGraph::Node *create_node_internal(
		ProgramGraph &graph, VoxelGeneratorGraph::NodeTypeID type_id, Vector2 position, uint32_t id) {
	const VoxelGraphNodeDB::NodeType &type = VoxelGraphNodeDB::get_singleton()->get_type(type_id);

	ProgramGraph::Node *node = graph.create_node(type_id, id);
	ERR_FAIL_COND_V(node == nullptr, nullptr);
	node->inputs.resize(type.inputs.size());
	node->outputs.resize(type.outputs.size());
	node->default_inputs.resize(type.inputs.size());
	node->gui_position = position;

	node->params.resize(type.params.size());
	for (size_t i = 0; i < type.params.size(); ++i) {
		node->params[i] = type.params[i].default_value;
	}
	for (size_t i = 0; i < type.inputs.size(); ++i) {
		node->default_inputs[i] = type.inputs[i].default_value;
	}

	return node;
}

uint32_t VoxelGeneratorGraph::create_node(NodeTypeID type_id, Vector2 position, uint32_t id) {
	ERR_FAIL_COND_V(!VoxelGraphNodeDB::get_singleton()->is_valid_type_id(type_id), ProgramGraph::NULL_ID);
	const ProgramGraph::Node *node = create_node_internal(_graph, type_id, position, id);
	ERR_FAIL_COND_V(node == nullptr, ProgramGraph::NULL_ID);
	return node->id;
}

void VoxelGeneratorGraph::remove_node(uint32_t node_id) {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND(node == nullptr);
	for (size_t i = 0; i < node->params.size(); ++i) {
		Ref<Resource> resource = node->params[i];
		if (resource.is_valid()) {
			unregister_subresource(**resource);
		}
	}
	_graph.remove_node(node_id);
	emit_changed();
}

bool VoxelGeneratorGraph::can_connect(
		uint32_t src_node_id, uint32_t src_port_index, uint32_t dst_node_id, uint32_t dst_port_index) const {
	const ProgramGraph::PortLocation src_port{ src_node_id, src_port_index };
	const ProgramGraph::PortLocation dst_port{ dst_node_id, dst_port_index };
	ERR_FAIL_COND_V(!_graph.is_output_port_valid(src_port), false);
	ERR_FAIL_COND_V(!_graph.is_input_port_valid(dst_port), false);
	return _graph.can_connect(src_port, dst_port);
}

void VoxelGeneratorGraph::add_connection(
		uint32_t src_node_id, uint32_t src_port_index, uint32_t dst_node_id, uint32_t dst_port_index) {
	const ProgramGraph::PortLocation src_port{ src_node_id, src_port_index };
	const ProgramGraph::PortLocation dst_port{ dst_node_id, dst_port_index };
	ERR_FAIL_COND(!_graph.is_output_port_valid(src_port));
	ERR_FAIL_COND(!_graph.is_input_port_valid(dst_port));
	_graph.connect(src_port, dst_port);
	emit_changed();
}

void VoxelGeneratorGraph::remove_connection(
		uint32_t src_node_id, uint32_t src_port_index, uint32_t dst_node_id, uint32_t dst_port_index) {
	const ProgramGraph::PortLocation src_port{ src_node_id, src_port_index };
	const ProgramGraph::PortLocation dst_port{ dst_node_id, dst_port_index };
	ERR_FAIL_COND(!_graph.is_output_port_valid(src_port));
	ERR_FAIL_COND(!_graph.is_input_port_valid(dst_port));
	_graph.disconnect(src_port, dst_port);
	emit_changed();
}

void VoxelGeneratorGraph::get_connections(std::vector<ProgramGraph::Connection> &connections) const {
	_graph.get_connections(connections);
}

bool VoxelGeneratorGraph::try_get_connection_to(
		ProgramGraph::PortLocation dst, ProgramGraph::PortLocation &out_src) const {
	const ProgramGraph::Node *node = _graph.get_node(dst.node_id);
	CRASH_COND(node == nullptr);
	CRASH_COND(dst.port_index >= node->inputs.size());
	const ProgramGraph::Port &port = node->inputs[dst.port_index];
	if (port.connections.size() == 0) {
		return false;
	}
	out_src = port.connections[0];
	return true;
}

bool VoxelGeneratorGraph::has_node(uint32_t node_id) const {
	return _graph.try_get_node(node_id) != nullptr;
}

void VoxelGeneratorGraph::set_node_name(uint32_t node_id, StringName name) {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_MSG(node == nullptr, "No node was found with the specified ID");
	if (node->name == name) {
		return;
	}
	if (name != StringName()) {
		const uint32_t existing_node_id = _graph.find_node_by_name(name);
		if (existing_node_id != ProgramGraph::NULL_ID && node_id == existing_node_id) {
			ERR_PRINT(String("More than one graph node has the name \"{0}\"").format(varray(name)));
		}
	}
	node->name = name;
	emit_signal(SIGNAL_NODE_NAME_CHANGED, node_id);
}

StringName VoxelGeneratorGraph::get_node_name(uint32_t node_id) const {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_V(node == nullptr, StringName());
	return node->name;
}

uint32_t VoxelGeneratorGraph::find_node_by_name(StringName name) const {
	return _graph.find_node_by_name(name);
}

void VoxelGeneratorGraph::set_node_param(uint32_t node_id, uint32_t param_index, Variant value) {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND(node == nullptr);
	ERR_FAIL_INDEX(param_index, node->params.size());

	if (node->params[param_index] != value) {
		Ref<Resource> prev_resource = node->params[param_index];
		if (prev_resource.is_valid()) {
			unregister_subresource(**prev_resource);
		}

		node->params[param_index] = value;

		Ref<Resource> resource = value;
		if (resource.is_valid()) {
			register_subresource(**resource);
		}

		emit_changed();
	}
}

Variant VoxelGeneratorGraph::get_node_param(uint32_t node_id, uint32_t param_index) const {
	const ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_V(node == nullptr, Variant());
	ERR_FAIL_INDEX_V(param_index, node->params.size(), Variant());
	return node->params[param_index];
}

Variant VoxelGeneratorGraph::get_node_default_input(uint32_t node_id, uint32_t input_index) const {
	const ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_V(node == nullptr, Variant());
	ERR_FAIL_INDEX_V(input_index, node->default_inputs.size(), Variant());
	return node->default_inputs[input_index];
}

void VoxelGeneratorGraph::set_node_default_input(uint32_t node_id, uint32_t input_index, Variant value) {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND(node == nullptr);
	ERR_FAIL_INDEX(input_index, node->default_inputs.size());
	if (node->default_inputs[input_index] != value) {
		node->default_inputs[input_index] = value;
		emit_changed();
	}
}

Vector2 VoxelGeneratorGraph::get_node_gui_position(uint32_t node_id) const {
	const ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_V(node == nullptr, Vector2());
	return node->gui_position;
}

void VoxelGeneratorGraph::set_node_gui_position(uint32_t node_id, Vector2 pos) {
	ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND(node == nullptr);
	if (node->gui_position != pos) {
		node->gui_position = pos;
	}
}

VoxelGeneratorGraph::NodeTypeID VoxelGeneratorGraph::get_node_type_id(uint32_t node_id) const {
	const ProgramGraph::Node *node = _graph.try_get_node(node_id);
	ERR_FAIL_COND_V(node == nullptr, NODE_TYPE_COUNT);
	CRASH_COND(node->type_id >= NODE_TYPE_COUNT);
	return (NodeTypeID)node->type_id;
}

PackedInt32Array VoxelGeneratorGraph::get_node_ids() const {
	PackedInt32Array ids;
	ids.resize(_graph.get_nodes_count());
	{
		int i = 0;
		_graph.for_each_node_id([&ids, &i](int id) {
			ids.write[i] = id;
			++i;
		});
	}
	return ids;
}

int VoxelGeneratorGraph::get_nodes_count() const {
	return _graph.get_nodes_count();
}

bool VoxelGeneratorGraph::is_using_optimized_execution_map() const {
	return _use_optimized_execution_map;
}

void VoxelGeneratorGraph::set_use_optimized_execution_map(bool use) {
	_use_optimized_execution_map = use;
}

float VoxelGeneratorGraph::get_sdf_clip_threshold() const {
	return _sdf_clip_threshold;
}

void VoxelGeneratorGraph::set_sdf_clip_threshold(float t) {
	_sdf_clip_threshold = max(t, 0.f);
}

int VoxelGeneratorGraph::get_used_channels_mask() const {
	return 1 << VoxelBufferInternal::CHANNEL_SDF;
}

void VoxelGeneratorGraph::set_use_subdivision(bool use) {
	_use_subdivision = use;
}

bool VoxelGeneratorGraph::is_using_subdivision() const {
	return _use_subdivision;
}

void VoxelGeneratorGraph::set_subdivision_size(int size) {
	_subdivision_size = size;
}

int VoxelGeneratorGraph::get_subdivision_size() const {
	return _subdivision_size;
}

void VoxelGeneratorGraph::set_debug_clipped_blocks(bool enabled) {
	_debug_clipped_blocks = enabled;
}

bool VoxelGeneratorGraph::is_debug_clipped_blocks() const {
	return _debug_clipped_blocks;
}

void VoxelGeneratorGraph::set_use_xz_caching(bool enabled) {
	_use_xz_caching = enabled;
}

bool VoxelGeneratorGraph::is_using_xz_caching() const {
	return _use_xz_caching;
}

// TODO Optimization: generating indices and weights on every voxel of a block might be avoidable
// Instead, we could only generate them near zero-crossings, because this is where materials will be seen.
// The problem is that it's harder to manage at the moment, to support edited blocks and LOD...
void VoxelGeneratorGraph::gather_indices_and_weights(Span<const WeightOutput> weight_outputs,
		const VoxelGraphRuntime::State &state, Vector3i rmin, Vector3i rmax, int ry,
		VoxelBufferInternal &out_voxel_buffer, FixedArray<uint8_t, 4> spare_indices) {
	VOXEL_PROFILE_SCOPE();

	// TODO Optimization: exclude up-front outputs that are known to be zero?
	// So we choose the cases below based on non-zero outputs instead of total output count

	// TODO Could maybe put this part outside?
	FixedArray<Span<const float>, 16> buffers;
	const unsigned int buffers_count = weight_outputs.size();
	for (unsigned int oi = 0; oi < buffers_count; ++oi) {
		const WeightOutput &info = weight_outputs[oi];
		const VoxelGraphRuntime::Buffer &buffer = state.get_buffer(info.output_buffer_index);
		buffers[oi] = Span<const float>(buffer.data, buffer.size);
	}

	if (buffers_count <= 4) {
		// Pick all results and fill with spare indices to keep semantic
		unsigned int value_index = 0;
		for (int rz = rmin.z; rz < rmax.z; ++rz) {
			for (int rx = rmin.x; rx < rmax.x; ++rx) {
				FixedArray<uint8_t, 4> weights;
				FixedArray<uint8_t, 4> indices = spare_indices;
				weights.fill(0);
				for (unsigned int oi = 0; oi < buffers_count; ++oi) {
					const float weight = buffers[oi][value_index];
					// TODO Optimization: weight output nodes could already multiply by 255 and clamp afterward
					// so we would not need to do it here
					weights[oi] = clamp(weight * 255.f, 0.f, 255.f);
					indices[oi] = weight_outputs[oi].layer_index;
				}
				debug_check_texture_indices(indices);
				const uint16_t encoded_indices =
						encode_indices_to_packed_u16(indices[0], indices[1], indices[2], indices[3]);
				const uint16_t encoded_weights =
						encode_weights_to_packed_u16(weights[0], weights[1], weights[2], weights[3]);
				// TODO Flatten this further?
				out_voxel_buffer.set_voxel(encoded_indices, rx, ry, rz, VoxelBufferInternal::CHANNEL_INDICES);
				out_voxel_buffer.set_voxel(encoded_weights, rx, ry, rz, VoxelBufferInternal::CHANNEL_WEIGHTS);
				++value_index;
			}
		}

	} else if (buffers_count == 4) {
		// Pick all results
		unsigned int value_index = 0;
		for (int rz = rmin.z; rz < rmax.z; ++rz) {
			for (int rx = rmin.x; rx < rmax.x; ++rx) {
				FixedArray<uint8_t, 4> weights;
				FixedArray<uint8_t, 4> indices;
				for (unsigned int oi = 0; oi < buffers_count; ++oi) {
					const float weight = buffers[oi][value_index];
					weights[oi] = clamp(weight * 255.f, 0.f, 255.f);
					indices[oi] = weight_outputs[oi].layer_index;
				}
				const uint16_t encoded_indices =
						encode_indices_to_packed_u16(indices[0], indices[1], indices[2], indices[3]);
				const uint16_t encoded_weights =
						encode_weights_to_packed_u16(weights[0], weights[1], weights[2], weights[3]);
				// TODO Flatten this further?
				out_voxel_buffer.set_voxel(encoded_indices, rx, ry, rz, VoxelBufferInternal::CHANNEL_INDICES);
				out_voxel_buffer.set_voxel(encoded_weights, rx, ry, rz, VoxelBufferInternal::CHANNEL_WEIGHTS);
				++value_index;
			}
		}

	} else {
		// More weights than we can have per voxel. Will need to pick most represented weights
		const float pivot = 1.f / 5.f;
		unsigned int value_index = 0;
		FixedArray<uint8_t, 16> skipped_outputs;
		for (int rz = rmin.z; rz < rmax.z; ++rz) {
			for (int rx = rmin.x; rx < rmax.x; ++rx) {
				FixedArray<uint8_t, 4> weights;
				FixedArray<uint8_t, 4> indices;
				unsigned int skipped_outputs_count = 0;
				indices.fill(0);
				weights[0] = 1.f;
				weights[1] = 0.f;
				weights[2] = 0.f;
				weights[3] = 0.f;
				unsigned int recorded_weights = 0;
				// Pick up weights above pivot (this is not as correct as a sort but faster)
				for (unsigned int oi = 0; oi < buffers_count && recorded_weights < indices.size(); ++oi) {
					const float weight = buffers[oi][value_index];
					if (weight > pivot) {
						weights[recorded_weights] = clamp(weight * 255.f, 0.f, 255.f);
						indices[recorded_weights] = weight_outputs[oi].layer_index;
						++recorded_weights;
					} else {
						skipped_outputs[skipped_outputs_count] = oi;
						++skipped_outputs_count;
					}
				}
				// If we found less outputs above pivot than expected, fill with some skipped outputs.
				// We have to do this because if an index appears twice with a different corresponding weight,
				// then the latest weight will take precedence, which would be unwanted
				for (unsigned int oi = recorded_weights; oi < indices.size(); ++oi) {
					indices[oi] = skipped_outputs[oi - recorded_weights];
				}
				const uint16_t encoded_indices =
						encode_indices_to_packed_u16(indices[0], indices[1], indices[2], indices[3]);
				const uint16_t encoded_weights =
						encode_weights_to_packed_u16(weights[0], weights[1], weights[2], weights[3]);
				// TODO Flatten this further?
				out_voxel_buffer.set_voxel(encoded_indices, rx, ry, rz, VoxelBufferInternal::CHANNEL_INDICES);
				out_voxel_buffer.set_voxel(encoded_weights, rx, ry, rz, VoxelBufferInternal::CHANNEL_WEIGHTS);
				++value_index;
			}
		}
	}
}

VoxelGenerator::Result VoxelGeneratorGraph::generate_block(VoxelBlockRequest &input) {
	std::shared_ptr<Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}

	Result result;

	if (runtime_ptr == nullptr) {
		return result;
	}

	VoxelBufferInternal &out_buffer = input.voxel_buffer;

	const Vector3i bs = out_buffer.get_size();
	const VoxelBufferInternal::ChannelId channel = VoxelBufferInternal::CHANNEL_SDF;
	const Vector3i origin = input.origin_in_voxels;

	// TODO This may be shared across the module
	// Storing voxels is lossy on some depth configurations. They use normalized SDF,
	// so we must scale the values to make better use of the offered resolution
	const float sdf_scale = VoxelBufferInternal::get_sdf_quantization_scale(
			out_buffer.get_channel_depth(out_buffer.get_channel_depth(channel)));

	const int stride = 1 << input.lod;

	// Clip threshold must be higher for higher lod indexes because distances for one sampled voxel are also larger
	const float clip_threshold = sdf_scale * _sdf_clip_threshold * stride;

	// Block size must be a multiple of section size, as all sections must have the same size
	const bool can_use_subdivision =
			(bs.x % _subdivision_size == 0) && (bs.y % _subdivision_size == 0) && (bs.z % _subdivision_size == 0);

	const Vector3i section_size =
			_use_subdivision && can_use_subdivision ? Vector3iUtil::create(_subdivision_size) : bs;
	// ERR_FAIL_COND_V(bs.x % section_size != 0, result);
	// ERR_FAIL_COND_V(bs.y % section_size != 0, result);
	// ERR_FAIL_COND_V(bs.z % section_size != 0, result);

	Cache &cache = _cache;

	// Slice is on the Y axis
	const unsigned int slice_buffer_size = section_size.x * section_size.z;
	VoxelGraphRuntime &runtime = runtime_ptr->runtime;
	runtime.prepare_state(cache.state, slice_buffer_size);

	cache.x_cache.resize(slice_buffer_size);
	cache.y_cache.resize(slice_buffer_size);
	cache.z_cache.resize(slice_buffer_size);

	Span<float> x_cache(cache.x_cache, 0, cache.x_cache.size());
	Span<float> y_cache(cache.y_cache, 0, cache.y_cache.size());
	Span<float> z_cache(cache.z_cache, 0, cache.z_cache.size());

	const float air_sdf = _debug_clipped_blocks ? -1.f : 1.f;
	const float matter_sdf = _debug_clipped_blocks ? 1.f : -1.f;

	FixedArray<uint8_t, 4> spare_texture_indices = runtime_ptr->spare_texture_indices;
	const unsigned int sdf_output_buffer_index = runtime_ptr->sdf_output_buffer_index;

	bool all_sdf_is_uniform = true;

	// For each subdivision of the block
	for (int sz = 0; sz < bs.z; sz += section_size.z) {
		for (int sy = 0; sy < bs.y; sy += section_size.y) {
			for (int sx = 0; sx < bs.x; sx += section_size.x) {
				VOXEL_PROFILE_SCOPE_NAMED("Section");

				const Vector3i rmin(sx, sy, sz);
				const Vector3i rmax = rmin + Vector3i(section_size);
				const Vector3i gmin = origin + (rmin << input.lod);
				const Vector3i gmax = origin + (rmax << input.lod);

				runtime.analyze_range(cache.state, gmin, gmax);
				const Interval sdf_range = cache.state.get_range(sdf_output_buffer_index) * sdf_scale;
				bool sdf_is_uniform = false;
				if (sdf_range.min > clip_threshold && sdf_range.max > clip_threshold) {
					out_buffer.fill_area_f(air_sdf, rmin, rmax, channel);
					// In case of air, we skip weights because there is nothing to texture anyways
					continue;

				} else if (sdf_range.min < -clip_threshold && sdf_range.max < -clip_threshold) {
					out_buffer.fill_area_f(matter_sdf, rmin, rmax, channel);
					sdf_is_uniform = true;

				} else if (sdf_range.is_single_value()) {
					out_buffer.fill_area_f(sdf_range.min, rmin, rmax, channel);
					if (sdf_range.min > 0.f) {
						continue;
					}
					sdf_is_uniform = true;
				}

				// The section may have the surface in it, we have to calculate it

				if (!sdf_is_uniform) {
					// SDF is not uniform, we may do a full query
					all_sdf_is_uniform = false;

					if (_use_optimized_execution_map) {
						// Optimize out branches of the graph that won't contribute to the result
						runtime.generate_optimized_execution_map(cache.state, cache.optimized_execution_map, false);
					}

					{
						unsigned int i = 0;
						for (int rz = rmin.z, gz = gmin.z; rz < rmax.z; ++rz, gz += stride) {
							for (int rx = rmin.x, gx = gmin.x; rx < rmax.x; ++rx, gx += stride) {
								x_cache[i] = gx;
								z_cache[i] = gz;
								++i;
							}
						}
					}

					for (int ry = rmin.y, gy = gmin.y; ry < rmax.y; ++ry, gy += stride) {
						VOXEL_PROFILE_SCOPE_NAMED("Full slice");

						y_cache.fill(gy);

						// Full query
						runtime.generate_set(cache.state, x_cache, y_cache, z_cache, _use_xz_caching && ry != rmin.y,
								_use_optimized_execution_map ? &cache.optimized_execution_map : nullptr);

						{
							VOXEL_PROFILE_SCOPE_NAMED("Copy SDF to block");
							unsigned int i = 0;
							const VoxelGraphRuntime::Buffer &sdf_buffer =
									cache.state.get_buffer(sdf_output_buffer_index);
							for (int rz = rmin.z; rz < rmax.z; ++rz) {
								for (int rx = rmin.x; rx < rmax.x; ++rx) {
									// TODO Flatten this further, this may run checks we don't need
									out_buffer.set_voxel_f(sdf_scale * sdf_buffer.data[i], rx, ry, rz, channel);
									++i;
								}
							}
						}

						if (runtime_ptr->weight_outputs_count > 0) {
							gather_indices_and_weights(
									to_span_const(runtime_ptr->weight_outputs, runtime_ptr->weight_outputs_count),
									cache.state, rmin, rmax, ry, out_buffer, spare_texture_indices);
						}
					}

				} else if (runtime_ptr->weight_outputs_count > 0) {
					// SDF is uniform and full of matter, but we may want to query weights

					if (_use_optimized_execution_map) {
						// Optimize out branches of the graph that won't contribute to the result
						runtime.generate_optimized_execution_map(cache.state, cache.optimized_execution_map,
								to_span_const(runtime_ptr->weight_output_indices, runtime_ptr->weight_outputs_count),
								false);
					}

					unsigned int i = 0;
					for (int rz = rmin.z, gz = gmin.z; rz < rmax.z; ++rz, gz += stride) {
						for (int rx = rmin.x, gx = gmin.x; rx < rmax.x; ++rx, gx += stride) {
							x_cache[i] = gx;
							z_cache[i] = gz;
							++i;
						}
					}

					for (int ry = rmin.y, gy = gmin.y; ry < rmax.y; ++ry, gy += stride) {
						VOXEL_PROFILE_SCOPE_NAMED("Weights slice");

						y_cache.fill(gy);

						runtime.generate_set(cache.state, x_cache, y_cache, z_cache, _use_xz_caching && ry != rmin.y,
								_use_optimized_execution_map ? &cache.optimized_execution_map : nullptr);

						gather_indices_and_weights(
								to_span_const(runtime_ptr->weight_outputs, runtime_ptr->weight_outputs_count),
								cache.state, rmin, rmax, ry, out_buffer, spare_texture_indices);
					}
				}
			}
		}
	}

	out_buffer.compress_uniform_channels();

	// This is different from finding out that the buffer is uniform.
	// This really means we predicted SDF will never cross zero in this area, no matter how precise we get.
	// Relying on the block's uniform channels would bring up false positives due to LOD aliasing.
	if (all_sdf_is_uniform) {
		// TODO If voxel texure weights are used, octree compression might be a bit more complicated.
		// For now we only look at SDF but if texture weights are used and the player digs a bit inside terrain,
		// they will find it's all default weights.
		// Possible workarounds:
		// - Only do it for air
		// - Also take indices and weights into account, but may lead to way less compression, or none, for stuff that
		//   essentially isnt showing up until dug out
		// - Invoke generator to produce LOD0 blocks somehow, but main thread could stall
		result.max_lod_hint = true;
	}

	return result;
}

VoxelGraphRuntime::CompilationResult VoxelGeneratorGraph::compile() {
	const int64_t time_before = OS::get_singleton()->get_ticks_usec();

	std::shared_ptr<Runtime> r = std::make_shared<Runtime>();
	VoxelGraphRuntime &runtime = r->runtime;

	// Core compilation
	const VoxelGraphRuntime::CompilationResult result =
			runtime.compile(_graph, Engine::get_singleton()->is_editor_hint());

	if (!result.success) {
		return result;
	}

	// Extra steps
	for (unsigned int output_index = 0; output_index < runtime.get_output_count(); ++output_index) {
		const VoxelGraphRuntime::OutputInfo output = runtime.get_output_info(output_index);
		const ProgramGraph::Node *node = _graph.get_node(output.node_id);

		ERR_FAIL_COND_V(node == nullptr, VoxelGraphRuntime::CompilationResult());

		switch (node->type_id) {
			case NODE_OUTPUT_SDF:
				if (r->sdf_output_buffer_index != -1) {
					VoxelGraphRuntime::CompilationResult error;
					error.success = false;
					error.message = TTR("Multiple SDF outputs are not supported");
					error.node_id = output.node_id;
					return error;
				} else {
					r->sdf_output_buffer_index = output.buffer_address;
				}
				break;

			case NODE_OUTPUT_WEIGHT: {
				if (r->weight_outputs_count >= r->weight_outputs.size()) {
					VoxelGraphRuntime::CompilationResult error;
					error.success = false;
					error.message = String(TTR("Cannot use more than {0} weight outputs"))
											.format(varray(r->weight_outputs.size()));
					error.node_id = output.node_id;
					return error;
				}
				CRASH_COND(node->params.size() == 0);
				const int layer_index = node->params[0];
				if (layer_index < 0) {
					// Should not be allowed by the UI, but who knows
					VoxelGraphRuntime::CompilationResult error;
					error.success = false;
					error.message = String(TTR("Cannot use negative layer index in weight output"));
					error.node_id = output.node_id;
					return error;
				}
				if (layer_index >= static_cast<int>(r->weight_outputs.size())) {
					VoxelGraphRuntime::CompilationResult error;
					error.success = false;
					error.message =
							String(TTR("Weight layers cannot exceed {}")).format(varray(r->weight_outputs.size()));
					error.node_id = output.node_id;
					return error;
				}
				for (unsigned int i = 0; i < r->weight_outputs_count; ++i) {
					const WeightOutput &wo = r->weight_outputs[i];
					if (static_cast<int>(wo.layer_index) == layer_index) {
						VoxelGraphRuntime::CompilationResult error;
						error.success = false;
						error.message =
								String(TTR("Only one weight output node can use layer index {0}, found duplicate"))
										.format(varray(layer_index));
						error.node_id = output.node_id;
						return error;
					}
				}
				WeightOutput &new_weight_output = r->weight_outputs[r->weight_outputs_count];
				new_weight_output.layer_index = layer_index;
				new_weight_output.output_buffer_index = output.buffer_address;
				r->weight_output_indices[r->weight_outputs_count] = output_index;
				++r->weight_outputs_count;
			} break;

			default:
				break;
		}
	}

	if (r->sdf_output_buffer_index == -1) {
		VoxelGraphRuntime::CompilationResult error;
		error.success = false;
		error.message = String(TTR("An SDF output is required for the graph to be valid."));
		return error;
	}

	// Sort output weights by layer index, for determinism. Could be exploited for optimization too?
	{
		struct WeightOutputComparer {
			inline bool operator()(const WeightOutput &a, const WeightOutput &b) const {
				return a.layer_index < b.layer_index;
			}
		};
		SortArray<WeightOutput, WeightOutputComparer> sorter;
		CRASH_COND(r->weight_outputs_count >= r->weight_outputs.size());
		sorter.sort(r->weight_outputs.data(), r->weight_outputs_count);
	}

	// Calculate spare indices
	{
		FixedArray<bool, 16> used_indices_map;
		FixedArray<uint8_t, 4> spare_indices;
		used_indices_map.fill(false);
		for (unsigned int i = 0; i < r->weight_outputs.size(); ++i) {
			used_indices_map[r->weight_outputs[i].layer_index] = true;
		}
		unsigned int spare_indices_count = 0;
		for (unsigned int i = 0; i < used_indices_map.size() && spare_indices_count < 4; ++i) {
			if (used_indices_map[i] == false) {
				spare_indices[spare_indices_count] = i;
				++spare_indices_count;
			}
		}
		//debug_check_texture_indices(spare_indices);
		ERR_FAIL_COND_V(spare_indices_count != 4, VoxelGraphRuntime::CompilationResult());
		r->spare_texture_indices = spare_indices;
	}

	// Store valid result
	RWLockWrite wlock(_runtime_lock);
	_runtime = r;

	const int64_t time_spent = OS::get_singleton()->get_ticks_usec() - time_before;
	PRINT_VERBOSE(String("Voxel graph compiled in {0} us").format(varray(time_spent)));

	return result;
}

// This is an external API which involves locking so better not use this internally
bool VoxelGeneratorGraph::is_good() const {
	RWLockRead rlock(_runtime_lock);
	return _runtime != nullptr;
}

void VoxelGeneratorGraph::generate_set(Span<float> in_x, Span<float> in_y, Span<float> in_z) {
	RWLockRead rlock(_runtime_lock);
	ERR_FAIL_COND(_runtime == nullptr);
	Cache &cache = _cache;
	VoxelGraphRuntime &runtime = _runtime->runtime;
	runtime.prepare_state(cache.state, in_x.size());
	runtime.generate_set(cache.state, in_x, in_y, in_z, false, nullptr);
}

const VoxelGraphRuntime::State &VoxelGeneratorGraph::get_last_state_from_current_thread() {
	return _cache.state;
}

Span<const int> VoxelGeneratorGraph::get_last_execution_map_debug_from_current_thread() {
	return to_span_const(_cache.optimized_execution_map.debug_nodes);
}

bool VoxelGeneratorGraph::try_get_output_port_address(ProgramGraph::PortLocation port, uint32_t &out_address) const {
	RWLockRead rlock(_runtime_lock);
	ERR_FAIL_COND_V(_runtime == nullptr, false);
	uint16_t addr;
	const bool res = _runtime->runtime.try_get_output_port_address(port, addr);
	out_address = addr;
	return res;
}

void VoxelGeneratorGraph::find_dependencies(uint32_t node_id, std::vector<uint32_t> &out_dependencies) const {
	std::vector<uint32_t> dst;
	dst.push_back(node_id);
	_graph.find_dependencies(dst, out_dependencies);
}

inline Vector3 get_3d_pos_from_panorama_uv(Vector2 uv) {
	const float xa = -Math_TAU * uv.x - Math_PI;
	const float ya = -Math_PI * (uv.y - 0.5f);
	const float y = Math::sin(ya);
	const float ca = Math::cos(ya);
	const float x = Math::cos(xa) * ca;
	const float z = Math::sin(xa) * ca;
	return Vector3(x, y, z);
}

// Subdivides a rectangle in square chunks and runs a function on each of them.
// The ref is important to allow re-using functors.
template <typename F> inline void for_chunks_2d(int w, int h, int chunk_size, F &f) {
	const int chunks_x = w / chunk_size;
	const int chunks_y = h / chunk_size;

	const int last_chunk_width = w % chunk_size;
	const int last_chunk_height = h % chunk_size;

	for (int cy = 0; cy < chunks_y; ++cy) {
		int ry = cy * chunk_size;
		int rh = ry + chunk_size > h ? last_chunk_height : chunk_size;

		for (int cx = 0; cx < chunks_x; ++cx) {
			int rx = cx * chunk_size;
			int rw = ry + chunk_size > w ? last_chunk_width : chunk_size;

			f(rx, ry, rw, rh);
		}
	}
}

void VoxelGeneratorGraph::bake_sphere_bumpmap(Ref<Image> im, float ref_radius, float sdf_min, float sdf_max) {
	ERR_FAIL_COND(im.is_null());

	std::shared_ptr<const Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}

	ERR_FAIL_COND(runtime_ptr == nullptr);

	// This process would use too much memory if run over the entire image at once,
	// so we'll subdivide the load in smaller chunks
	struct ProcessChunk {
		std::vector<float> x_coords;
		std::vector<float> y_coords;
		std::vector<float> z_coords;
		Ref<Image> im;
		const VoxelGraphRuntime &runtime;
		VoxelGraphRuntime::State &state;
		const unsigned int sdf_buffer_index;
		const float ref_radius;
		const float sdf_min;
		const float sdf_max;

		ProcessChunk(VoxelGraphRuntime::State &p_state, unsigned int p_sdf_buffer_index,
				const VoxelGraphRuntime &p_runtime, float p_ref_radius, float p_sdf_min, float p_sdf_max) :
				runtime(p_runtime),
				state(p_state),
				sdf_buffer_index(p_sdf_buffer_index),
				ref_radius(p_ref_radius),
				sdf_min(p_sdf_min),
				sdf_max(p_sdf_max) {}

		void operator()(int x0, int y0, int width, int height) {
			VOXEL_PROFILE_SCOPE();

			const unsigned int area = width * height;
			x_coords.resize(area);
			y_coords.resize(area);
			z_coords.resize(area);
			runtime.prepare_state(state, area);

			const Vector2 suv =
					Vector2(1.f / static_cast<float>(im->get_width()), 1.f / static_cast<float>(im->get_height()));

			const float nr = 1.f / (sdf_max - sdf_min);

			const int xmax = x0 + width;
			const int ymax = y0 + height;

			unsigned int i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const Vector2 uv = suv * Vector2(ix, iy);
					const Vector3 p = get_3d_pos_from_panorama_uv(uv) * ref_radius;
					x_coords[i] = p.x;
					y_coords[i] = p.y;
					z_coords[i] = p.z;
					++i;
				}
			}

			runtime.generate_set(state, to_span(x_coords), to_span(y_coords), to_span(z_coords), false, nullptr);
			const VoxelGraphRuntime::Buffer &buffer = state.get_buffer(sdf_buffer_index);

			// Calculate final pixels
			// TODO Optimize: could convert to buffer directly?
			i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const float sdf = buffer.data[i];
					const float nh = (-sdf - sdf_min) * nr;
					im->set_pixel(ix, iy, Color(nh, nh, nh));
					++i;
				}
			}
		}
	};

	Cache &cache = _cache;

	ProcessChunk pc(
			cache.state, runtime_ptr->sdf_output_buffer_index, runtime_ptr->runtime, ref_radius, sdf_min, sdf_max);
	pc.im = im;
	for_chunks_2d(im->get_width(), im->get_height(), 32, pc);
}

// If this generator is used to produce a planet, specifically using a spherical heightmap approach,
// then this function can be used to bake a map of the surface.
// Such maps can be used by shaders to sharpen the details of the planet when seen from far away.
void VoxelGeneratorGraph::bake_sphere_normalmap(Ref<Image> im, float ref_radius, float strength) {
	VOXEL_PROFILE_SCOPE();
	ERR_FAIL_COND(im.is_null());

	std::shared_ptr<const Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}

	ERR_FAIL_COND(runtime_ptr == nullptr);

	// This process would use too much memory if run over the entire image at once,
	// so we'll subdivide the load in smaller chunks
	struct ProcessChunk {
		std::vector<float> x_coords;
		std::vector<float> y_coords;
		std::vector<float> z_coords;
		std::vector<float> sdf_values_p; // TODO Could be used at the same time to get bump?
		std::vector<float> sdf_values_px;
		std::vector<float> sdf_values_py;
		unsigned int sdf_buffer_index;
		Ref<Image> im;
		const VoxelGraphRuntime &runtime;
		VoxelGraphRuntime::State &state;
		const float strength;
		const float ref_radius;

		ProcessChunk(VoxelGraphRuntime::State &p_state, unsigned int p_sdf_buffer_index, Ref<Image> p_im,
				const VoxelGraphRuntime &p_runtime, float p_strength, float p_ref_radius) :
				sdf_buffer_index(p_sdf_buffer_index),
				im(p_im),
				runtime(p_runtime),
				state(p_state),
				strength(p_strength),
				ref_radius(p_ref_radius) {}

		void operator()(int x0, int y0, int width, int height) {
			VOXEL_PROFILE_SCOPE();

			const unsigned int area = width * height;
			x_coords.resize(area);
			y_coords.resize(area);
			z_coords.resize(area);
			sdf_values_p.resize(area);
			sdf_values_px.resize(area);
			sdf_values_py.resize(area);
			runtime.prepare_state(state, area);

			const float ns = 2.f / strength;

			const Vector2 suv =
					Vector2(1.f / static_cast<float>(im->get_width()), 1.f / static_cast<float>(im->get_height()));

			const Vector2 normal_step = 0.5f * Vector2(1.f, 1.f) / im->get_size();
			const Vector2 normal_step_x = Vector2(normal_step.x, 0.f);
			const Vector2 normal_step_y = Vector2(0.f, normal_step.y);

			const int xmax = x0 + width;
			const int ymax = y0 + height;

			const VoxelGraphRuntime::Buffer &sdf_buffer = state.get_buffer(sdf_buffer_index);

			// TODO instead of using 3 separate queries, interleave triplets of positions into a single array?

			// Get heights
			unsigned int i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const Vector2 uv = suv * Vector2(ix, iy);
					const Vector3 p = get_3d_pos_from_panorama_uv(uv) * ref_radius;
					x_coords[i] = p.x;
					y_coords[i] = p.y;
					z_coords[i] = p.z;
					++i;
				}
			}
			// TODO Perform range analysis on the range of coordinates, it might still yield performance benefits
			runtime.generate_set(state, to_span(x_coords), to_span(y_coords), to_span(z_coords), false, nullptr);
			CRASH_COND(sdf_values_p.size() != sdf_buffer.size);
			memcpy(sdf_values_p.data(), sdf_buffer.data, sdf_values_p.size() * sizeof(float));

			// Get neighbors along X
			i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const Vector2 uv = suv * Vector2(ix, iy);
					const Vector3 p = get_3d_pos_from_panorama_uv(uv + normal_step_x) * ref_radius;
					x_coords[i] = p.x;
					y_coords[i] = p.y;
					z_coords[i] = p.z;
					++i;
				}
			}
			runtime.generate_set(state, to_span(x_coords), to_span(y_coords), to_span(z_coords), false, nullptr);
			CRASH_COND(sdf_values_px.size() != sdf_buffer.size);
			memcpy(sdf_values_px.data(), sdf_buffer.data, sdf_values_px.size() * sizeof(float));

			// Get neighbors along Y
			i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const Vector2 uv = suv * Vector2(ix, iy);
					const Vector3 p = get_3d_pos_from_panorama_uv(uv + normal_step_y) * ref_radius;
					x_coords[i] = p.x;
					y_coords[i] = p.y;
					z_coords[i] = p.z;
					++i;
				}
			}
			runtime.generate_set(state, to_span(x_coords), to_span(y_coords), to_span(z_coords), false, nullptr);
			CRASH_COND(sdf_values_py.size() != sdf_buffer.size);
			memcpy(sdf_values_py.data(), sdf_buffer.data, sdf_values_py.size() * sizeof(float));

			// TODO This is probably invalid due to the distortion, may need to use another approach.
			// Compute the 3D normal from gradient, then project it?

			// Calculate final pixels
			// TODO Optimize: convert into buffer directly?
			i = 0;
			for (int iy = y0; iy < ymax; ++iy) {
				for (int ix = x0; ix < xmax; ++ix) {
					const float h = sdf_values_p[i];
					const float h_px = sdf_values_px[i];
					const float h_py = sdf_values_py[i];
					++i;
					const Vector3 normal = Vector3(h_px - h, ns, h_py - h).normalized();
					const Color en(0.5f * normal.x + 0.5f, -0.5f * normal.z + 0.5f, 0.5f * normal.y + 0.5f);
					im->set_pixel(ix, iy, en);
				}
			}
		}
	};

	Cache &cache = _cache;

	// The default for strength is 1.f
	const float e = 0.001f;
	if (strength > -e && strength < e) {
		if (strength > 0.f) {
			strength = e;
		} else {
			strength = -e;
		}
	}

	ProcessChunk pc(cache.state, runtime_ptr->sdf_output_buffer_index, im, runtime_ptr->runtime, strength, ref_radius);
	for_chunks_2d(im->get_width(), im->get_height(), 32, pc);
}

VoxelSingleValue VoxelGeneratorGraph::generate_single(Vector3i position, unsigned int channel) {
	// TODO Support other channels
	VoxelSingleValue v;
	v.i = 0;
	if (channel != VoxelBufferInternal::CHANNEL_SDF) {
		return v;
	}
	std::shared_ptr<const Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}
	ERR_FAIL_COND_V(runtime_ptr == nullptr, v);
	Cache &cache = _cache;
	const VoxelGraphRuntime &runtime = runtime_ptr->runtime;
	runtime.prepare_state(cache.state, 1);
	runtime.generate_single(cache.state, position, nullptr);
	const VoxelGraphRuntime::Buffer &buffer = cache.state.get_buffer(runtime_ptr->sdf_output_buffer_index);
	ERR_FAIL_COND_V(buffer.size == 0, v);
	ERR_FAIL_COND_V(buffer.data == nullptr, v);
	v.f = buffer.data[0];
	return v;
}

// Note, this wrapper may not be used for main generation tasks.
// It is mostly used as a debug tool.
Interval VoxelGeneratorGraph::debug_analyze_range(
		Vector3i min_pos, Vector3i max_pos, bool optimize_execution_map) const {
	std::shared_ptr<const Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}
	ERR_FAIL_COND_V(runtime_ptr == nullptr, Interval::from_single_value(0.f));
	Cache &cache = _cache;
	const VoxelGraphRuntime &runtime = runtime_ptr->runtime;
	// Note, buffer size is irrelevant here, because range analysis doesn't use buffers
	runtime.prepare_state(cache.state, 1);
	runtime.analyze_range(cache.state, min_pos, max_pos);
	if (optimize_execution_map) {
		runtime.generate_optimized_execution_map(cache.state, cache.optimized_execution_map, true);
	}
	return cache.state.get_range(runtime_ptr->sdf_output_buffer_index);
}

Ref<Resource> VoxelGeneratorGraph::duplicate(bool p_subresources) const {
	Ref<VoxelGeneratorGraph> d;
	d.instantiate();

	d->_graph.copy_from(_graph, p_subresources);
	d->register_subresources();
	// Program not copied, as it may contain pointers to the resources we are duplicating

	return d;
}

static Dictionary get_graph_as_variant_data(const ProgramGraph &graph) {
	Dictionary nodes_data;
	graph.for_each_node_id([&graph, &nodes_data](uint32_t node_id) {
		const ProgramGraph::Node *node = graph.get_node(node_id);
		ERR_FAIL_COND(node == nullptr);

		Dictionary node_data;

		const VoxelGraphNodeDB::NodeType &type = VoxelGraphNodeDB::get_singleton()->get_type(node->type_id);
		node_data["type"] = type.name;
		node_data["gui_position"] = node->gui_position;

		if (node->name != StringName()) {
			node_data["name"] = node->name;
		}

		for (size_t j = 0; j < type.params.size(); ++j) {
			const VoxelGraphNodeDB::Param &param = type.params[j];
			node_data[param.name] = node->params[j];
		}

		for (size_t j = 0; j < type.inputs.size(); ++j) {
			if (node->inputs[j].connections.size() == 0) {
				const VoxelGraphNodeDB::Port &port = type.inputs[j];
				node_data[port.name] = node->default_inputs[j];
			}
		}

		String key = String::num_uint64(node_id);
		nodes_data[key] = node_data;
	});

	Array connections_data;
	std::vector<ProgramGraph::Connection> connections;
	graph.get_connections(connections);
	connections_data.resize(connections.size());
	for (size_t i = 0; i < connections.size(); ++i) {
		const ProgramGraph::Connection &con = connections[i];
		Array con_data;
		con_data.resize(4);
		con_data[0] = con.src.node_id;
		con_data[1] = con.src.port_index;
		con_data[2] = con.dst.node_id;
		con_data[3] = con.dst.port_index;
		connections_data[i] = con_data;
	}

	Dictionary data;
	data["nodes"] = nodes_data;
	data["connections"] = connections_data;
	return data;
}

Dictionary VoxelGeneratorGraph::get_graph_as_variant_data() const {
	return ::get_graph_as_variant_data(_graph);
}

static bool var_to_id(Variant v, uint32_t &out_id, uint32_t min = 0) {
	ERR_FAIL_COND_V(v.get_type() != Variant::INT, false);
	int i = v;
	ERR_FAIL_COND_V(i < 0 || (unsigned int)i < min, false);
	out_id = i;
	return true;
}

static bool load_graph_from_variant_data(ProgramGraph &graph, Dictionary data) {
	Dictionary nodes_data = data["nodes"];
	Array connections_data = data["connections"];
	const VoxelGraphNodeDB &type_db = *VoxelGraphNodeDB::get_singleton();

	const Variant *id_key = nullptr;
	while ((id_key = nodes_data.next(id_key))) {
		const String id_str = *id_key;
		ERR_FAIL_COND_V(!id_str.is_valid_int(), false);
		const int sid = id_str.to_int();
		ERR_FAIL_COND_V(sid < static_cast<int>(ProgramGraph::NULL_ID), false);
		const uint32_t id = sid;

		Dictionary node_data = nodes_data[*id_key];

		const String type_name = node_data["type"];
		const Vector2 gui_position = node_data["gui_position"];
		VoxelGeneratorGraph::NodeTypeID type_id;
		ERR_FAIL_COND_V(!type_db.try_get_type_id_from_name(type_name, type_id), false);
		ProgramGraph::Node *node = create_node_internal(graph, type_id, gui_position, id);
		ERR_FAIL_COND_V(node == nullptr, false);

		const Variant *param_key = nullptr;
		while ((param_key = node_data.next(param_key))) {
			const String param_name = *param_key;
			if (param_name == "type") {
				continue;
			}
			if (param_name == "gui_position") {
				continue;
			}
			uint32_t param_index;
			if (type_db.try_get_param_index_from_name(type_id, param_name, param_index)) {
				node->params[param_index] = node_data[*param_key];
			}
			if (type_db.try_get_input_index_from_name(type_id, param_name, param_index)) {
				node->default_inputs[param_index] = node_data[*param_key];
			}
			const Variant *vname = node_data.getptr("name");
			if (vname != nullptr) {
				node->name = *vname;
			}
		}
	}

	for (int i = 0; i < connections_data.size(); ++i) {
		Array con_data = connections_data[i];
		ERR_FAIL_COND_V(con_data.size() != 4, false);
		ProgramGraph::PortLocation src;
		ProgramGraph::PortLocation dst;
		ERR_FAIL_COND_V(!var_to_id(con_data[0], src.node_id, ProgramGraph::NULL_ID), false);
		ERR_FAIL_COND_V(!var_to_id(con_data[1], src.port_index), false);
		ERR_FAIL_COND_V(!var_to_id(con_data[2], dst.node_id, ProgramGraph::NULL_ID), false);
		ERR_FAIL_COND_V(!var_to_id(con_data[3], dst.port_index), false);
		graph.connect(src, dst);
	}

	return true;
}

void VoxelGeneratorGraph::load_graph_from_variant_data(Dictionary data) {
	clear();

	if (::load_graph_from_variant_data(_graph, data)) {
		register_subresources();
		// It's possible to auto-compile on load because `graph_data` is the only property set by the loader,
		// which is enough to have all information we need
		compile();

	} else {
		_graph.clear();
	}
}

void VoxelGeneratorGraph::register_subresource(Resource &resource) {
	//print_line(String("{0}: Registering subresource {1}").format(varray(int64_t(this), int64_t(&resource))));
	resource.connect(CoreStringNames::get_singleton()->changed,
			callable_mp(this, &VoxelGeneratorGraph::_on_subresource_changed));
}

void VoxelGeneratorGraph::unregister_subresource(Resource &resource) {
	//print_line(String("{0}: Unregistering subresource {1}").format(varray(int64_t(this), int64_t(&resource))));
	resource.disconnect(CoreStringNames::get_singleton()->changed,
			callable_mp(this, &VoxelGeneratorGraph::_on_subresource_changed));
}

void VoxelGeneratorGraph::register_subresources() {
	_graph.for_each_node([this](ProgramGraph::Node &node) {
		for (size_t i = 0; i < node.params.size(); ++i) {
			Ref<Resource> resource = node.params[i];
			if (resource.is_valid()) {
				register_subresource(**resource);
			}
		}
	});
}

void VoxelGeneratorGraph::unregister_subresources() {
	_graph.for_each_node([this](ProgramGraph::Node &node) {
		for (size_t i = 0; i < node.params.size(); ++i) {
			Ref<Resource> resource = node.params[i];
			if (resource.is_valid()) {
				unregister_subresource(**resource);
			}
		}
	});
}

// Debug land

float VoxelGeneratorGraph::debug_measure_microseconds_per_voxel(bool singular) {
	std::shared_ptr<const Runtime> runtime_ptr;
	{
		RWLockRead rlock(_runtime_lock);
		runtime_ptr = _runtime;
	}
	ERR_FAIL_COND_V(runtime_ptr == nullptr, 0.f);
	const VoxelGraphRuntime &runtime = runtime_ptr->runtime;

	const uint32_t cube_size = 16;
	const uint32_t cube_count = 250;
	// const uint32_t cube_size = 100;
	// const uint32_t cube_count = 1;
	const uint32_t voxel_count = cube_size * cube_size * cube_size * cube_count;
	ProfilingClock profiling_clock;
	uint64_t elapsed_us = 0;

	Cache &cache = _cache;

	if (singular) {
		runtime.prepare_state(cache.state, 1);

		for (uint32_t i = 0; i < cube_count; ++i) {
			profiling_clock.restart();

			for (uint32_t z = 0; z < cube_size; ++z) {
				for (uint32_t y = 0; y < cube_size; ++y) {
					for (uint32_t x = 0; x < cube_size; ++x) {
						runtime.generate_single(cache.state, Vector3i(x, y, z), nullptr);
					}
				}
			}

			elapsed_us += profiling_clock.restart();
		}

	} else {
		const unsigned int cube_volume = cube_size * cube_size * cube_size;
		std::vector<float> src_x;
		std::vector<float> src_y;
		std::vector<float> src_z;
		src_x.resize(cube_volume);
		src_y.resize(cube_volume);
		src_z.resize(cube_volume);
		Span<float> sx(src_x, 0, src_x.size());
		Span<float> sy(src_y, 0, src_y.size());
		Span<float> sz(src_z, 0, src_z.size());

		runtime.prepare_state(cache.state, sx.size());

		for (uint32_t i = 0; i < cube_count; ++i) {
			profiling_clock.restart();

			for (uint32_t y = 0; y < cube_size; ++y) {
				runtime.generate_set(cache.state, sx, sy, sz, false, nullptr);
			}

			elapsed_us += profiling_clock.restart();
		}
	}

	float us = static_cast<double>(elapsed_us) / voxel_count;
	return us;
}

// This may be used as template when creating new graphs
void VoxelGeneratorGraph::load_plane_preset() {
	clear();

	/*
	 *     X
	 *
	 *     Y --- SdfPlane --- OutputSDF
	 *
	 *     Z
	 */

	const Vector2 k(40, 50);

	/*const uint32_t n_x = */ create_node(NODE_INPUT_X, Vector2(11, 1) * k); // 1
	const uint32_t n_y = create_node(NODE_INPUT_Y, Vector2(11, 3) * k); // 2
	/*const uint32_t n_z = */ create_node(NODE_INPUT_Z, Vector2(11, 5) * k); // 3
	const uint32_t n_o = create_node(NODE_OUTPUT_SDF, Vector2(18, 3) * k); // 4
	const uint32_t n_plane = create_node(NODE_SDF_PLANE, Vector2(14, 3) * k); // 5

	add_connection(n_y, 0, n_plane, 0);
	add_connection(n_plane, 0, n_o, 0);
}

void VoxelGeneratorGraph::debug_load_waves_preset() {
	clear();
	// This is mostly for testing

	const Vector2 k(35, 50);

	const uint32_t n_x = create_node(NODE_INPUT_X, Vector2(11, 1) * k); // 1
	const uint32_t n_y = create_node(NODE_INPUT_Y, Vector2(37, 1) * k); // 2
	const uint32_t n_z = create_node(NODE_INPUT_Z, Vector2(11, 5) * k); // 3
	const uint32_t n_o = create_node(NODE_OUTPUT_SDF, Vector2(45, 3) * k); // 4
	const uint32_t n_sin0 = create_node(NODE_SIN, Vector2(23, 1) * k); // 5
	const uint32_t n_sin1 = create_node(NODE_SIN, Vector2(23, 5) * k); // 6
	const uint32_t n_add = create_node(NODE_ADD, Vector2(27, 3) * k); // 7
	const uint32_t n_mul0 = create_node(NODE_MULTIPLY, Vector2(17, 1) * k); // 8
	const uint32_t n_mul1 = create_node(NODE_MULTIPLY, Vector2(17, 5) * k); // 9
	const uint32_t n_mul2 = create_node(NODE_MULTIPLY, Vector2(33, 3) * k); // 10
	const uint32_t n_c0 = create_node(NODE_CONSTANT, Vector2(14, 3) * k); // 11
	const uint32_t n_c1 = create_node(NODE_CONSTANT, Vector2(30, 5) * k); // 12
	const uint32_t n_sub = create_node(NODE_SUBTRACT, Vector2(39, 3) * k); // 13

	set_node_param(n_c0, 0, 1.f / 20.f);
	set_node_param(n_c1, 0, 10.f);

	/*
	 *    X --- * --- sin           Y
	 *         /         \           \
	 *       1/20         + --- * --- - --- O
	 *         \         /     /
	 *    Z --- * --- sin    10.0
	 */

	add_connection(n_x, 0, n_mul0, 0);
	add_connection(n_z, 0, n_mul1, 0);
	add_connection(n_c0, 0, n_mul0, 1);
	add_connection(n_c0, 0, n_mul1, 1);
	add_connection(n_mul0, 0, n_sin0, 0);
	add_connection(n_mul1, 0, n_sin1, 0);
	add_connection(n_sin0, 0, n_add, 0);
	add_connection(n_sin1, 0, n_add, 1);
	add_connection(n_add, 0, n_mul2, 0);
	add_connection(n_c1, 0, n_mul2, 1);
	add_connection(n_y, 0, n_sub, 0);
	add_connection(n_mul2, 0, n_sub, 1);
	add_connection(n_sub, 0, n_o, 0);
}

// Binding land

int VoxelGeneratorGraph::_b_get_node_type_count() const {
	return VoxelGraphNodeDB::get_singleton()->get_type_count();
}

Dictionary VoxelGeneratorGraph::_b_get_node_type_info(int type_id) const {
	ERR_FAIL_COND_V(!VoxelGraphNodeDB::get_singleton()->is_valid_type_id(type_id), Dictionary());
	return VoxelGraphNodeDB::get_singleton()->get_type_info_dict(type_id);
}

Array VoxelGeneratorGraph::_b_get_connections() const {
	Array con_array;
	std::vector<ProgramGraph::Connection> cons;
	_graph.get_connections(cons);
	con_array.resize(cons.size());

	for (size_t i = 0; i < cons.size(); ++i) {
		const ProgramGraph::Connection &con = cons[i];
		Dictionary d;
		d["src_node_id"] = con.src.node_id;
		d["src_port_index"] = con.src.port_index;
		d["dst_node_id"] = con.dst.node_id;
		d["dst_port_index"] = con.dst.port_index;
		con_array[i] = d;
	}

	return con_array;
}

void VoxelGeneratorGraph::_b_set_node_param_null(int node_id, int param_index) {
	set_node_param(node_id, param_index, Variant());
}

float VoxelGeneratorGraph::_b_generate_single(Vector3 pos) {
	return generate_single(Vector3iUtil::from_floored(pos), VoxelBufferInternal::CHANNEL_SDF).f;
}

Vector2 VoxelGeneratorGraph::_b_debug_analyze_range(Vector3 min_pos, Vector3 max_pos) const {
	ERR_FAIL_COND_V(min_pos.x > max_pos.x, Vector2());
	ERR_FAIL_COND_V(min_pos.y > max_pos.y, Vector2());
	ERR_FAIL_COND_V(min_pos.z > max_pos.z, Vector2());
	const Interval r =
			debug_analyze_range(Vector3iUtil::from_floored(min_pos), Vector3iUtil::from_floored(max_pos), false);
	return Vector2(r.min, r.max);
}

Dictionary VoxelGeneratorGraph::_b_compile() {
	VoxelGraphRuntime::CompilationResult res = compile();
	Dictionary d;
	d["success"] = res.success;
	if (!res.success) {
		d["message"] = res.message;
		d["node_id"] = res.node_id;
	}
	return d;
}

void VoxelGeneratorGraph::_on_subresource_changed() {
	emit_changed();
}

void VoxelGeneratorGraph::_bind_methods() {
	ClassDB::bind_method(D_METHOD("clear"), &VoxelGeneratorGraph::clear);
	ClassDB::bind_method(D_METHOD("create_node", "type_id", "position", "id"), &VoxelGeneratorGraph::create_node,
			DEFVAL(ProgramGraph::NULL_ID));
	ClassDB::bind_method(D_METHOD("remove_node", "node_id"), &VoxelGeneratorGraph::remove_node);
	ClassDB::bind_method(D_METHOD("can_connect", "src_node_id", "src_port_index", "dst_node_id", "dst_port_index"),
			&VoxelGeneratorGraph::can_connect);
	ClassDB::bind_method(D_METHOD("add_connection", "src_node_id", "src_port_index", "dst_node_id", "dst_port_index"),
			&VoxelGeneratorGraph::add_connection);
	ClassDB::bind_method(
			D_METHOD("remove_connection", "src_node_id", "src_port_index", "dst_node_id", "dst_port_index"),
			&VoxelGeneratorGraph::remove_connection);
	ClassDB::bind_method(D_METHOD("get_connections"), &VoxelGeneratorGraph::_b_get_connections);
	ClassDB::bind_method(D_METHOD("get_node_ids"), &VoxelGeneratorGraph::get_node_ids);
	ClassDB::bind_method(D_METHOD("find_node_by_name", "name"), &VoxelGeneratorGraph::find_node_by_name);

	ClassDB::bind_method(D_METHOD("get_node_type_id", "node_id"), &VoxelGeneratorGraph::get_node_type_id);
	ClassDB::bind_method(D_METHOD("get_node_param", "node_id", "param_index"), &VoxelGeneratorGraph::get_node_param);
	ClassDB::bind_method(
			D_METHOD("set_node_param", "node_id", "param_index", "value"), &VoxelGeneratorGraph::set_node_param);
	ClassDB::bind_method(
			D_METHOD("get_node_default_input", "node_id", "input_index"), &VoxelGeneratorGraph::get_node_default_input);
	ClassDB::bind_method(D_METHOD("set_node_default_input", "node_id", "input_index", "value"),
			&VoxelGeneratorGraph::set_node_default_input);
	ClassDB::bind_method(
			D_METHOD("set_node_param_null", "node_id", "param_index"), &VoxelGeneratorGraph::_b_set_node_param_null);
	ClassDB::bind_method(D_METHOD("get_node_gui_position", "node_id"), &VoxelGeneratorGraph::get_node_gui_position);
	ClassDB::bind_method(
			D_METHOD("set_node_gui_position", "node_id", "position"), &VoxelGeneratorGraph::set_node_gui_position);

	ClassDB::bind_method(D_METHOD("set_sdf_clip_threshold", "threshold"), &VoxelGeneratorGraph::set_sdf_clip_threshold);
	ClassDB::bind_method(D_METHOD("get_sdf_clip_threshold"), &VoxelGeneratorGraph::get_sdf_clip_threshold);

	ClassDB::bind_method(
			D_METHOD("is_using_optimized_execution_map"), &VoxelGeneratorGraph::is_using_optimized_execution_map);
	ClassDB::bind_method(
			D_METHOD("set_use_optimized_execution_map", "use"), &VoxelGeneratorGraph::set_use_optimized_execution_map);

	ClassDB::bind_method(D_METHOD("set_use_subdivision", "use"), &VoxelGeneratorGraph::set_use_subdivision);
	ClassDB::bind_method(D_METHOD("is_using_subdivision"), &VoxelGeneratorGraph::is_using_subdivision);

	ClassDB::bind_method(D_METHOD("set_subdivision_size", "size"), &VoxelGeneratorGraph::set_subdivision_size);
	ClassDB::bind_method(D_METHOD("get_subdivision_size"), &VoxelGeneratorGraph::get_subdivision_size);

	ClassDB::bind_method(
			D_METHOD("set_debug_clipped_blocks", "enabled"), &VoxelGeneratorGraph::set_debug_clipped_blocks);
	ClassDB::bind_method(D_METHOD("is_debug_clipped_blocks"), &VoxelGeneratorGraph::is_debug_clipped_blocks);

	ClassDB::bind_method(D_METHOD("set_use_xz_caching", "enabled"), &VoxelGeneratorGraph::set_use_xz_caching);
	ClassDB::bind_method(D_METHOD("is_using_xz_caching"), &VoxelGeneratorGraph::is_using_xz_caching);

	ClassDB::bind_method(D_METHOD("compile"), &VoxelGeneratorGraph::_b_compile);

	ClassDB::bind_method(D_METHOD("get_node_type_count"), &VoxelGeneratorGraph::_b_get_node_type_count);
	ClassDB::bind_method(D_METHOD("get_node_type_info", "type_id"), &VoxelGeneratorGraph::_b_get_node_type_info);

	//ClassDB::bind_method(D_METHOD("generate_single"), &VoxelGeneratorGraph::_b_generate_single);
	ClassDB::bind_method(
			D_METHOD("debug_analyze_range", "min_pos", "max_pos"), &VoxelGeneratorGraph::_b_debug_analyze_range);

	ClassDB::bind_method(D_METHOD("bake_sphere_bumpmap", "im", "ref_radius", "sdf_min", "sdf_max"),
			&VoxelGeneratorGraph::bake_sphere_bumpmap);
	ClassDB::bind_method(D_METHOD("bake_sphere_normalmap", "im", "ref_radius", "strength"),
			&VoxelGeneratorGraph::bake_sphere_normalmap);

	ClassDB::bind_method(D_METHOD("debug_load_waves_preset"), &VoxelGeneratorGraph::debug_load_waves_preset);
	ClassDB::bind_method(D_METHOD("debug_measure_microseconds_per_voxel", "use_singular_queries"),
			&VoxelGeneratorGraph::debug_measure_microseconds_per_voxel);

	ClassDB::bind_method(D_METHOD("_set_graph_data", "data"), &VoxelGeneratorGraph::load_graph_from_variant_data);
	ClassDB::bind_method(D_METHOD("_get_graph_data"), &VoxelGeneratorGraph::get_graph_as_variant_data);

	ClassDB::bind_method(D_METHOD("_on_subresource_changed"), &VoxelGeneratorGraph::_on_subresource_changed);

	ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "graph_data", PROPERTY_HINT_NONE, "",
						 PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL),
			"_set_graph_data", "_get_graph_data");

	ADD_GROUP("Performance Tuning", "");

	ADD_PROPERTY(
			PropertyInfo(Variant::FLOAT, "sdf_clip_threshold"), "set_sdf_clip_threshold", "get_sdf_clip_threshold");
	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_optimized_execution_map"), "set_use_optimized_execution_map",
			"is_using_optimized_execution_map");
	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_subdivision"), "set_use_subdivision", "is_using_subdivision");
	ADD_PROPERTY(PropertyInfo(Variant::INT, "subdivision_size"), "set_subdivision_size", "get_subdivision_size");
	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_xz_caching"), "set_use_xz_caching", "is_using_xz_caching");
	ADD_PROPERTY(
			PropertyInfo(Variant::BOOL, "debug_block_clipping"), "set_debug_clipped_blocks", "is_debug_clipped_blocks");

	ADD_SIGNAL(MethodInfo(SIGNAL_NODE_NAME_CHANGED, PropertyInfo(Variant::INT, "node_id")));

	BIND_ENUM_CONSTANT(NODE_CONSTANT);
	BIND_ENUM_CONSTANT(NODE_INPUT_X);
	BIND_ENUM_CONSTANT(NODE_INPUT_Y);
	BIND_ENUM_CONSTANT(NODE_INPUT_Z);
	BIND_ENUM_CONSTANT(NODE_OUTPUT_SDF);
	BIND_ENUM_CONSTANT(NODE_ADD);
	BIND_ENUM_CONSTANT(NODE_SUBTRACT);
	BIND_ENUM_CONSTANT(NODE_MULTIPLY);
	BIND_ENUM_CONSTANT(NODE_DIVIDE);
	BIND_ENUM_CONSTANT(NODE_SIN);
	BIND_ENUM_CONSTANT(NODE_FLOOR);
	BIND_ENUM_CONSTANT(NODE_ABS);
	BIND_ENUM_CONSTANT(NODE_SQRT);
	BIND_ENUM_CONSTANT(NODE_FRACT);
	BIND_ENUM_CONSTANT(NODE_STEPIFY);
	BIND_ENUM_CONSTANT(NODE_WRAP);
	BIND_ENUM_CONSTANT(NODE_MIN);
	BIND_ENUM_CONSTANT(NODE_MAX);
	BIND_ENUM_CONSTANT(NODE_DISTANCE_2D);
	BIND_ENUM_CONSTANT(NODE_DISTANCE_3D);
	BIND_ENUM_CONSTANT(NODE_CLAMP);
	BIND_ENUM_CONSTANT(NODE_MIX);
	BIND_ENUM_CONSTANT(NODE_REMAP);
	BIND_ENUM_CONSTANT(NODE_SMOOTHSTEP);
	BIND_ENUM_CONSTANT(NODE_CURVE);
	BIND_ENUM_CONSTANT(NODE_SELECT);
	BIND_ENUM_CONSTANT(NODE_NOISE_2D);
	BIND_ENUM_CONSTANT(NODE_NOISE_3D);
	BIND_ENUM_CONSTANT(NODE_IMAGE_2D);
	BIND_ENUM_CONSTANT(NODE_SDF_PLANE);
	BIND_ENUM_CONSTANT(NODE_SDF_BOX);
	BIND_ENUM_CONSTANT(NODE_SDF_SPHERE);
	BIND_ENUM_CONSTANT(NODE_SDF_TORUS);
	BIND_ENUM_CONSTANT(NODE_SDF_PREVIEW);
	BIND_ENUM_CONSTANT(NODE_SDF_SPHERE_HEIGHTMAP);
	BIND_ENUM_CONSTANT(NODE_SDF_SMOOTH_UNION);
	BIND_ENUM_CONSTANT(NODE_SDF_SMOOTH_SUBTRACT);
	BIND_ENUM_CONSTANT(NODE_NORMALIZE_3D);
	BIND_ENUM_CONSTANT(NODE_FAST_NOISE_2D);
	BIND_ENUM_CONSTANT(NODE_FAST_NOISE_3D);
	BIND_ENUM_CONSTANT(NODE_FAST_NOISE_GRADIENT_2D);
	BIND_ENUM_CONSTANT(NODE_FAST_NOISE_GRADIENT_3D);
	BIND_ENUM_CONSTANT(NODE_OUTPUT_WEIGHT);
	BIND_ENUM_CONSTANT(NODE_TYPE_COUNT);
}