446 lines
16 KiB
C++
446 lines
16 KiB
C++
#ifndef VOXEL_GRAPH_RUNTIME_H
|
|
#define VOXEL_GRAPH_RUNTIME_H
|
|
|
|
#include "../../util/math/interval.h"
|
|
#include "../../util/math/vector3f.h"
|
|
#include "../../util/math/vector3i.h"
|
|
#include "../../util/span.h"
|
|
#include "program_graph.h"
|
|
|
|
#include <core/object/ref_counted.h>
|
|
|
|
namespace zylann::voxel {
|
|
|
|
class VoxelGraphNodeDB;
|
|
|
|
// CPU VM to execute a voxel graph generator.
|
|
// This is a more generic class implementing the core of a 3D expression processing system.
|
|
// Some of the logic dedicated to voxel data is moved in other classes.
|
|
class VoxelGraphRuntime {
|
|
public:
|
|
static const unsigned int MAX_OUTPUTS = 24;
|
|
|
|
struct CompilationResult {
|
|
bool success = false;
|
|
int node_id = -1;
|
|
String message;
|
|
|
|
static CompilationResult make_error(const char *p_message, int p_node_id = -1) {
|
|
VoxelGraphRuntime::CompilationResult res;
|
|
res.success = false;
|
|
res.node_id = p_node_id;
|
|
res.message = p_message;
|
|
return res;
|
|
}
|
|
};
|
|
|
|
// Contains values of a node output
|
|
struct Buffer {
|
|
// Values of the buffer. Must contain at least `size` values.
|
|
// TODO Consider wrapping this in debug mode. It is one of the rare cases I didnt do it.
|
|
// I spent an hour debugging memory corruption which originated from an overrun while accessing this data.
|
|
float *data = nullptr;
|
|
// This size is not the allocated count, it's an available count below capacity.
|
|
// All buffers have the same available count, size is here only for convenience.
|
|
unsigned int size;
|
|
unsigned int capacity;
|
|
// Constant value of the buffer, if it is a compile-time constant
|
|
float constant_value;
|
|
// Is the buffer holding a compile-time constant
|
|
bool is_constant;
|
|
// Is the buffer a user input/output
|
|
bool is_binding = false;
|
|
// How many operations are using this buffer as input.
|
|
// This value is only relevant when using optimized execution mapping.
|
|
unsigned int local_users_count;
|
|
};
|
|
|
|
// Contains a list of adresses to the operations to execute for a given query.
|
|
// If no local optimization is done, this can remain the same for any position lists.
|
|
// If local optimization is used, it may be recomputed before each query.
|
|
struct ExecutionMap {
|
|
// TODO Typo?
|
|
std::vector<uint16_t> operation_adresses;
|
|
// Stores node IDs referring to the user-facing graph.
|
|
// Each index corresponds to operation indices.
|
|
// The same node can appear twice, because sometimes a user-facing node compiles as multiple nodes.
|
|
std::vector<uint32_t> debug_nodes;
|
|
// From which index in the adress list operations will start depending on Y
|
|
unsigned int xzy_start_index = 0;
|
|
|
|
void clear() {
|
|
operation_adresses.clear();
|
|
debug_nodes.clear();
|
|
xzy_start_index = 0;
|
|
}
|
|
};
|
|
|
|
// Contains the data the program will modify while it runs.
|
|
// The same state can be re-used with multiple programs, but it should be prepared before doing that.
|
|
class State {
|
|
public:
|
|
~State() {
|
|
clear();
|
|
}
|
|
|
|
inline const Buffer &get_buffer(uint16_t address) const {
|
|
// TODO Just for convenience because STL bound checks aren't working in Godot 3
|
|
CRASH_COND(address >= buffers.size());
|
|
return buffers[address];
|
|
}
|
|
|
|
inline const math::Interval get_range(uint16_t address) const {
|
|
// TODO Just for convenience because STL bound checks aren't working in Godot 3
|
|
CRASH_COND(address >= buffers.size());
|
|
return ranges[address];
|
|
}
|
|
|
|
void clear() {
|
|
buffer_size = 0;
|
|
buffer_capacity = 0;
|
|
for (auto it = buffers.begin(); it != buffers.end(); ++it) {
|
|
Buffer &b = *it;
|
|
if (b.data != nullptr && !b.is_binding) {
|
|
memfree(b.data);
|
|
}
|
|
}
|
|
buffers.clear();
|
|
ranges.clear();
|
|
debug_profiler_times.clear();
|
|
}
|
|
|
|
inline void add_execution_time(uint32_t execution_map_index, uint32_t time) {
|
|
#if DEBUG_ENABLED
|
|
CRASH_COND(execution_map_index >= debug_profiler_times.size());
|
|
#endif
|
|
debug_profiler_times[execution_map_index] += time;
|
|
}
|
|
|
|
inline uint32_t get_execution_time(uint32_t execution_map_index) const {
|
|
#if DEBUG_ENABLED
|
|
CRASH_COND(execution_map_index >= debug_profiler_times.size());
|
|
#endif
|
|
return debug_profiler_times[execution_map_index];
|
|
}
|
|
|
|
private:
|
|
friend class VoxelGraphRuntime;
|
|
|
|
std::vector<math::Interval> ranges;
|
|
std::vector<Buffer> buffers;
|
|
// [execution_map_index] => microseconds
|
|
std::vector<uint32_t> debug_profiler_times;
|
|
|
|
unsigned int buffer_size = 0;
|
|
unsigned int buffer_capacity = 0;
|
|
};
|
|
|
|
// Info about a terminal node of the graph
|
|
struct OutputInfo {
|
|
unsigned int buffer_address;
|
|
unsigned int dependency_graph_node_index;
|
|
unsigned int node_id;
|
|
};
|
|
|
|
VoxelGraphRuntime();
|
|
~VoxelGraphRuntime();
|
|
|
|
void clear();
|
|
CompilationResult compile(const ProgramGraph &p_graph, bool debug);
|
|
|
|
// Call this before you use a state with generation functions.
|
|
// You need to call it once, until you want to use a different graph, buffer size or buffer count.
|
|
// If none of these change, you can keep re-using it.
|
|
void prepare_state(State &state, unsigned int buffer_size, bool with_profiling) const;
|
|
|
|
// Convenience for set generation with only one value
|
|
// TODO Evaluate needs for double-precision in VoxelGraphRuntime
|
|
void generate_single(State &state, Vector3f position_f, const ExecutionMap *execution_map) const;
|
|
|
|
void generate_set(State &state, Span<float> in_x, Span<float> in_y, Span<float> in_z, bool skip_xz,
|
|
const ExecutionMap *execution_map) const;
|
|
|
|
inline unsigned int get_output_count() const {
|
|
return _program.outputs_count;
|
|
}
|
|
|
|
inline const OutputInfo &get_output_info(unsigned int i) const {
|
|
return _program.outputs[i];
|
|
}
|
|
|
|
// Analyzes a specific region of inputs to find out what ranges of outputs we can expect.
|
|
// It can be used to speed up calls to `generate_set` thanks to execution mapping,
|
|
// so that operations can be optimized out if they don't contribute to the result.
|
|
void analyze_range(State &state, Vector3i min_pos, Vector3i max_pos) const;
|
|
|
|
// Call this after `analyze_range` if you intend to actually generate a set or single values in the area.
|
|
// This allows to use the execution map optimization, until you choose another area.
|
|
// (i.e when using this, querying values outside of the analyzed area may be invalid)
|
|
void generate_optimized_execution_map(const State &state, ExecutionMap &execution_map,
|
|
Span<const unsigned int> required_outputs, bool debug) const;
|
|
|
|
// Convenience function to require all outputs
|
|
void generate_optimized_execution_map(const State &state, ExecutionMap &execution_map, bool debug) const;
|
|
|
|
const ExecutionMap &get_default_execution_map() const;
|
|
|
|
// Gets the buffer address of a specific output port
|
|
bool try_get_output_port_address(ProgramGraph::PortLocation port, uint16_t &out_address) const;
|
|
|
|
uint64_t get_program_hash() const;
|
|
|
|
struct HeapResource {
|
|
void *ptr;
|
|
void (*deleter)(void *p);
|
|
};
|
|
|
|
class _ProcessContext {
|
|
public:
|
|
inline _ProcessContext(const Span<const uint16_t> inputs, const Span<const uint16_t> outputs,
|
|
const Span<const uint8_t> params) :
|
|
_inputs(inputs), _outputs(outputs), _params(params) {}
|
|
|
|
template <typename T>
|
|
inline const T &get_params() const {
|
|
#ifdef DEBUG_ENABLED
|
|
CRASH_COND(sizeof(T) > _params.size());
|
|
#endif
|
|
return *reinterpret_cast<const T *>(_params.data());
|
|
}
|
|
|
|
inline uint32_t get_input_address(uint32_t i) const {
|
|
return _inputs[i];
|
|
}
|
|
|
|
protected:
|
|
inline uint32_t get_output_address(uint32_t i) const {
|
|
return _outputs[i];
|
|
}
|
|
|
|
private:
|
|
const Span<const uint16_t> _inputs;
|
|
const Span<const uint16_t> _outputs;
|
|
const Span<const uint8_t> _params;
|
|
};
|
|
|
|
// Functions usable by node implementations during execution
|
|
class ProcessBufferContext : public _ProcessContext {
|
|
public:
|
|
inline ProcessBufferContext(const Span<const uint16_t> inputs, const Span<const uint16_t> outputs,
|
|
const Span<const uint8_t> params, Span<Buffer> buffers, bool using_execution_map) :
|
|
_ProcessContext(inputs, outputs, params),
|
|
_buffers(buffers),
|
|
_using_execution_map(using_execution_map) {}
|
|
|
|
inline const Buffer &get_input(uint32_t i) const {
|
|
const uint32_t address = get_input_address(i);
|
|
#ifdef DEBUG_ENABLED
|
|
// When using optimized execution mapping,
|
|
// If a buffer is marked as having no users during range analysis, then it should really not be used,
|
|
// because it won't be filled with relevant data. If it is still used,
|
|
// then the result can be completely different from what the range analysis predicted.
|
|
const Buffer &b = _buffers[address];
|
|
ERR_FAIL_COND_V_MSG(_using_execution_map && !b.is_binding && b.local_users_count == 0, b,
|
|
"buffer marked as 'ignored' is still being used");
|
|
#endif
|
|
return _buffers[address];
|
|
}
|
|
|
|
inline Buffer &get_output(uint32_t i) {
|
|
const uint32_t address = get_output_address(i);
|
|
return _buffers[address];
|
|
}
|
|
|
|
// Different signature to force the coder to acknowledge the condition
|
|
inline const Buffer &try_get_input(uint32_t i, bool &ignored) {
|
|
const uint32_t address = get_input_address(i);
|
|
const Buffer &b = _buffers[address];
|
|
ignored = _using_execution_map && !b.is_binding && b.local_users_count == 0;
|
|
return b;
|
|
}
|
|
|
|
private:
|
|
Span<Buffer> _buffers;
|
|
bool _using_execution_map;
|
|
};
|
|
|
|
// Functions usable by node implementations during range analysis
|
|
class RangeAnalysisContext : public _ProcessContext {
|
|
public:
|
|
inline RangeAnalysisContext(const Span<const uint16_t> inputs, const Span<const uint16_t> outputs,
|
|
const Span<const uint8_t> params, Span<math::Interval> ranges, Span<Buffer> buffers) :
|
|
_ProcessContext(inputs, outputs, params), _ranges(ranges), _buffers(buffers) {}
|
|
|
|
inline const math::Interval get_input(uint32_t i) const {
|
|
const uint32_t address = get_input_address(i);
|
|
return _ranges[address];
|
|
}
|
|
|
|
inline void set_output(uint32_t i, const math::Interval r) {
|
|
const uint32_t address = get_output_address(i);
|
|
_ranges[address] = r;
|
|
}
|
|
|
|
inline void ignore_input(uint32_t i) {
|
|
const uint32_t address = get_input_address(i);
|
|
Buffer &b = _buffers[address];
|
|
--b.local_users_count;
|
|
}
|
|
|
|
private:
|
|
Span<math::Interval> _ranges;
|
|
Span<Buffer> _buffers;
|
|
};
|
|
|
|
typedef void (*ProcessBufferFunc)(ProcessBufferContext &);
|
|
typedef void (*RangeAnalysisFunc)(RangeAnalysisContext &);
|
|
|
|
private:
|
|
CompilationResult _compile(const ProgramGraph &graph, bool debug, const VoxelGraphNodeDB &type_db);
|
|
|
|
bool is_operation_constant(const State &state, uint16_t op_address) const;
|
|
|
|
struct BufferSpec {
|
|
// Index the buffer should be stored at
|
|
uint16_t address;
|
|
// How many nodes use this buffer as input
|
|
uint16_t users_count;
|
|
// Value of the compile-time constant, if any
|
|
float constant_value;
|
|
// Is the buffer constant at compile time
|
|
bool is_constant;
|
|
// Is the buffer a user input/output
|
|
bool is_binding;
|
|
};
|
|
|
|
// Pre-processed, read-only graph used for runtime optimizations.
|
|
struct DependencyGraph {
|
|
struct Node {
|
|
uint16_t first_dependency;
|
|
uint16_t end_dependency;
|
|
uint16_t op_address;
|
|
bool is_input;
|
|
// Node ID from the expanded ProgramGraph (non user-provided, so may need remap)
|
|
uint32_t debug_node_id;
|
|
};
|
|
|
|
// Indexes to the `nodes` array
|
|
std::vector<uint16_t> dependencies;
|
|
// Nodes in the same order they would be in the default execution map (but indexes may not match)
|
|
std::vector<Node> nodes;
|
|
|
|
inline void clear() {
|
|
dependencies.clear();
|
|
nodes.clear();
|
|
}
|
|
};
|
|
|
|
// Precalculated program data.
|
|
// Remains constant and read-only after compilation.
|
|
struct Program {
|
|
// Serialized operations and arguments, aligned at minimum with uint16.
|
|
// They come up as series of:
|
|
//
|
|
// - uint16 opid
|
|
// - uint16 inputs[0..*]
|
|
// - uint16 outputs[0..*]
|
|
// - uint16 parameters_size
|
|
// - uint16 parameters_offset // how much to advance from here to reach the beginning of `parameters`
|
|
// - <optional padding>
|
|
// - T parameters, where T could be any struct
|
|
// - <optional padding to keep alignment with uint16>
|
|
//
|
|
// They should be laid out in the same order they will be run in, although it's not absolutely required.
|
|
// It's better to have it ordered because memory access will be more predictable.
|
|
std::vector<uint16_t> operations;
|
|
|
|
// Describes dependencies between operations. It is generated at compile time.
|
|
// It is used to perform dynamic optimization in case some operations can be predicted as constant.
|
|
DependencyGraph dependency_graph;
|
|
|
|
// List of indexes within `operations` describing which order they should be run into by default.
|
|
// It's used because sometimes we may want to override with a simplified execution map dynamically.
|
|
// When we don't, we use the default one so the code doesn't have to change.
|
|
ExecutionMap default_execution_map;
|
|
|
|
// Heap-allocated parameters data, when too large to fit in `operations`.
|
|
// We keep a reference to them so they can be freed when the program is cleared.
|
|
std::vector<HeapResource> heap_resources;
|
|
|
|
// Heap-allocated parameters data, when too large to fit in `operations`.
|
|
// We keep a reference to them so they won't be freed until the program is cleared.
|
|
std::vector<Ref<RefCounted>> ref_resources;
|
|
|
|
// Describes the list of buffers to prepare in `State` before the program can be run
|
|
std::vector<BufferSpec> buffer_specs;
|
|
|
|
// Address in `operations` from which operations will depend on Y. Operations before never depend on it.
|
|
// It is used to optimize away calculations that would otherwise be the same in planar terrain use cases.
|
|
uint32_t xzy_start_op_address;
|
|
|
|
// Note: the following buffers are allocated by the user.
|
|
// They are mapped temporarily into the same array of buffers inside `State`,
|
|
// so we won't need specific code to handle them. This requires knowing at which index they are reserved.
|
|
// They must be all assigned for the program to run correctly.
|
|
//
|
|
// Address within the State's array of buffers where the X input may be.
|
|
int x_input_address = -1;
|
|
// Address within the State's array of buffers where the Y input may be.
|
|
int y_input_address = -1;
|
|
// Address within the State's array of buffers where the Z input may be.
|
|
int z_input_address = -1;
|
|
|
|
FixedArray<OutputInfo, MAX_OUTPUTS> outputs;
|
|
unsigned int outputs_count = 0;
|
|
|
|
// Maximum amount of buffers this program will need to do a full run.
|
|
// Buffers are needed to hold values of arguments and outputs for each operation.
|
|
unsigned int buffer_count = 0;
|
|
|
|
// Associates a port from the input graph to its corresponding address within the compiled program.
|
|
// This is used for debugging intermediate values.
|
|
std::unordered_map<ProgramGraph::PortLocation, uint16_t> output_port_addresses;
|
|
|
|
// If you have a port location from the original user graph, before querying `output_port_addresses`, remap
|
|
// it first, in case it got expanded to different nodes during compilation.
|
|
std::unordered_map<ProgramGraph::PortLocation, ProgramGraph::PortLocation> user_port_to_expanded_port;
|
|
|
|
// Associates expanded graph ID to user graph node IDs.
|
|
std::unordered_map<uint32_t, uint32_t> expanded_node_id_to_user_node_id;
|
|
|
|
// Result of the last compilation attempt. The program should not be run if it failed.
|
|
CompilationResult compilation_result;
|
|
|
|
void clear() {
|
|
operations.clear();
|
|
buffer_specs.clear();
|
|
xzy_start_op_address = 0;
|
|
default_execution_map.clear();
|
|
output_port_addresses.clear();
|
|
user_port_to_expanded_port.clear();
|
|
expanded_node_id_to_user_node_id.clear();
|
|
dependency_graph.clear();
|
|
x_input_address = -1;
|
|
y_input_address = -1;
|
|
z_input_address = -1;
|
|
outputs_count = 0;
|
|
compilation_result = CompilationResult();
|
|
for (auto it = heap_resources.begin(); it != heap_resources.end(); ++it) {
|
|
HeapResource &r = *it;
|
|
CRASH_COND(r.deleter == nullptr);
|
|
CRASH_COND(r.ptr == nullptr);
|
|
r.deleter(r.ptr);
|
|
}
|
|
heap_resources.clear();
|
|
ref_resources.clear();
|
|
buffer_count = 0;
|
|
}
|
|
};
|
|
|
|
Program _program;
|
|
};
|
|
|
|
} // namespace zylann::voxel
|
|
|
|
#endif // VOXEL_GRAPH_RUNTIME_H
|