godot_voxel/generators/graph/voxel_graph_runtime.h
2021-04-07 00:05:00 +01:00

440 lines
14 KiB
C++

#ifndef VOXEL_GRAPH_RUNTIME_H
#define VOXEL_GRAPH_RUNTIME_H
#include "../../util/array_slice.h"
#include "../../util/math/interval.h"
#include "../../util/math/vector3i.h"
#include "program_graph.h"
#include <core/reference.h>
class ImageRangeGrid;
// CPU VM to execute a voxel graph generator
class VoxelGraphRuntime {
public:
struct CompilationResult {
bool success = false;
int node_id = -1;
String message;
};
struct Buffer {
// Values of the buffer. Must contain at least `size` values.
// TODO Consider wrapping this in debug mode. It is one of the rare cases I didnt do it.
// I spent an hour debugging memory corruption which originated from an overrun while accessing this data.
float *data = nullptr;
// This size is not the allocated count, it's an available count below capacity.
// All buffers have the same available count, size is here only for convenience.
unsigned int size;
unsigned int capacity;
// Constant value of the buffer, if it is a compile-time constant
float constant_value;
// Is the buffer holding a compile-time constant
bool is_constant;
// Is the buffer a user input/output
bool is_binding = false;
// How many operations are using this buffer as input.
// This value is only relevant when using optimized execution mapping.
unsigned int local_users_count;
};
// Contains the data the program will modify while it runs.
// The same state can be re-used with multiple programs, but it should be prepared before doing that.
class State {
public:
inline const Buffer &get_buffer(uint16_t address) const {
// TODO Just for convenience because STL bound checks aren't working in Godot 3
CRASH_COND(address >= buffers.size());
return buffers[address];
}
inline const Interval get_range(uint16_t address) const {
// TODO Just for convenience because STL bound checks aren't working in Godot 3
CRASH_COND(address >= buffers.size());
return ranges[address];
}
void clear() {
buffer_size = 0;
buffer_capacity = 0;
for (auto it = buffers.begin(); it != buffers.end(); ++it) {
Buffer &b = *it;
if (b.data != nullptr && !b.is_binding) {
memfree(b.data);
}
}
buffers.clear();
ranges.clear();
}
ArraySlice<const int> get_debug_execution_map() const {
return to_slice_const(debug_execution_map);
}
private:
friend class VoxelGraphRuntime;
std::vector<Interval> ranges;
std::vector<Buffer> buffers;
// Stores operation addresses
std::vector<uint16_t> execution_map;
// Stores node IDs referring to the user-facing graph
std::vector<int> debug_execution_map;
unsigned int execution_map_xzy_start_index;
unsigned int buffer_size = 0;
unsigned int buffer_capacity = 0;
};
VoxelGraphRuntime();
~VoxelGraphRuntime();
void clear();
CompilationResult compile(const ProgramGraph &graph, bool debug);
// Call this before you use a state with generation functions.
// You need to call it once, until you want to use a different graph, buffer size or buffer count.
// If none of these change, you can keep re-using it.
void prepare_state(State &state, unsigned int buffer_size) const;
float generate_single(State &state, Vector3 position, bool use_execution_map) const;
void generate_set(State &state, ArraySlice<float> in_x, ArraySlice<float> in_y, ArraySlice<float> in_z,
ArraySlice<float> out_sdf, bool skip_xz, bool use_execution_map) const;
// Analyzes a specific region of inputs to find out what ranges of outputs we can expect.
// It can be used to speed up calls to `generate_set` thanks to execution mapping,
// so that operations can be optimized out if they don't contribute to the result.
Interval analyze_range(State &state, Vector3i min_pos, Vector3i max_pos) const;
// Call this after `analyze_range` if you intend to actually generate a set or single values in the area.
// This allows to use the execution map optimization, until you choose another area.
// (i.e when using this, querying values outside of the analyzed area may be invalid)
inline void generate_optimized_execution_map(State &state, bool debug) const {
generate_execution_map(state, state.execution_map, state.execution_map_xzy_start_index,
debug ? &state.debug_execution_map : nullptr);
}
inline bool has_output() const {
return _program.sdf_output_address != -1;
}
bool try_get_output_port_address(ProgramGraph::PortLocation port, uint16_t &out_address) const;
struct HeapResource {
void *ptr;
void (*deleter)(void *p);
};
class CompileContext {
public:
CompileContext(const ProgramGraph::Node &node, std::vector<uint8_t> &program,
std::vector<HeapResource> &heap_resources,
std::vector<Variant> &params) :
_node(node),
_offset(program.size()),
_program(program),
_heap_resources(heap_resources),
_params(params) {}
Variant get_param(size_t i) const {
CRASH_COND(i > _params.size());
return _params[i];
}
// Typical use is to pass a struct containing all compile-time arguments the operation will need
template <typename T>
void set_params(T params) {
// Can be called only once per node
CRASH_COND(_offset != _program.size());
_program.resize(_program.size() + sizeof(T));
T &p = *reinterpret_cast<T *>(&_program[_offset]);
p = params;
}
// In case the compilation step produces a resource to be deleted
template <typename T>
void add_memdelete_cleanup(T *ptr) {
HeapResource hr;
hr.ptr = ptr;
hr.deleter = [](void *p) {
// TODO We have no guarantee it was allocated with memnew :|
T *tp = reinterpret_cast<T *>(p);
memdelete(tp);
};
_heap_resources.push_back(hr);
}
void make_error(String message) {
_error_message = message;
_has_error = true;
}
bool has_error() const {
return _has_error;
}
const String &get_error_message() const {
return _error_message;
}
private:
const ProgramGraph::Node &_node;
const size_t _offset;
std::vector<uint8_t> &_program;
std::vector<HeapResource> &_heap_resources;
std::vector<Variant> &_params;
String _error_message;
bool _has_error = false;
};
class _ProcessContext {
public:
inline _ProcessContext(
const ArraySlice<const uint16_t> inputs,
const ArraySlice<const uint16_t> outputs,
const ArraySlice<const uint8_t> params) :
_inputs(inputs),
_outputs(outputs),
_params(params) {}
template <typename T>
inline const T &get_params() const {
return *reinterpret_cast<const T *>(_params.data());
}
inline uint32_t get_input_address(uint32_t i) const {
return _inputs[i];
}
protected:
inline uint32_t get_output_address(uint32_t i) const {
return _outputs[i];
}
private:
const ArraySlice<const uint16_t> _inputs;
const ArraySlice<const uint16_t> _outputs;
const ArraySlice<const uint8_t> _params;
};
class ProcessBufferContext : public _ProcessContext {
public:
inline ProcessBufferContext(
const ArraySlice<const uint16_t> inputs,
const ArraySlice<const uint16_t> outputs,
const ArraySlice<const uint8_t> params,
ArraySlice<Buffer> buffers,
bool using_execution_map) :
_ProcessContext(inputs, outputs, params),
_buffers(buffers),
_using_execution_map(using_execution_map) {}
inline const Buffer &get_input(uint32_t i) const {
const uint32_t address = get_input_address(i);
#ifdef DEBUG_ENABLED
// When using optimized execution mapping,
// If a buffer is marked as having no users during range analysis, then it should really not be used,
// because it won't be filled with relevant data. If it is still used,
// then the result can be completely different from what the range analysis predicted.
const Buffer &b = _buffers[address];
ERR_FAIL_COND_V_MSG(_using_execution_map && !b.is_binding && b.local_users_count == 0, b,
"buffer marked as 'ignored' is still being used");
#endif
return _buffers[address];
}
inline Buffer &get_output(uint32_t i) {
const uint32_t address = get_output_address(i);
return _buffers[address];
}
// Different signature to force the coder to acknowledge the condition
inline const Buffer &try_get_input(uint32_t i, bool &ignored) {
const uint32_t address = get_input_address(i);
const Buffer &b = _buffers[address];
ignored = _using_execution_map && !b.is_binding && b.local_users_count == 0;
return b;
}
private:
ArraySlice<Buffer> _buffers;
bool _using_execution_map;
};
class RangeAnalysisContext : public _ProcessContext {
public:
inline RangeAnalysisContext(
const ArraySlice<const uint16_t> inputs,
const ArraySlice<const uint16_t> outputs,
const ArraySlice<const uint8_t> params,
ArraySlice<Interval> ranges,
ArraySlice<Buffer> buffers) :
_ProcessContext(inputs, outputs, params),
_ranges(ranges),
_buffers(buffers) {}
inline const Interval get_input(uint32_t i) const {
const uint32_t address = get_input_address(i);
return _ranges[address];
}
inline void set_output(uint32_t i, const Interval r) {
const uint32_t address = get_output_address(i);
_ranges[address] = r;
}
inline void ignore_input(uint32_t i) {
const uint32_t address = get_input_address(i);
Buffer &b = _buffers[address];
--b.local_users_count;
}
private:
ArraySlice<Interval> _ranges;
ArraySlice<Buffer> _buffers;
};
typedef void (*CompileFunc)(CompileContext &);
typedef void (*ProcessBufferFunc)(ProcessBufferContext &);
typedef void (*RangeAnalysisFunc)(RangeAnalysisContext &);
private:
CompilationResult _compile(const ProgramGraph &graph, bool debug);
void generate_execution_map(const State &state,
std::vector<uint16_t> &execution_map, unsigned int &out_mapped_xzy_start,
std::vector<int> *debug_execution_map) const;
bool is_operation_constant(const State &state, uint16_t op_address) const;
struct BufferSpec {
// Index the buffer should be stored at
uint16_t address;
// How many nodes use this buffer as input
uint16_t users_count;
// Value of the compile-time constant, if any
float constant_value;
// Is the buffer constant at compile time
bool is_constant;
// Is the buffer a user input/output
bool is_binding;
};
struct DependencyGraph {
struct Node {
uint16_t first_dependency;
uint16_t end_dependency;
uint16_t op_address;
bool is_output;
int debug_node_id;
};
// Indexes to the `nodes` array
std::vector<uint16_t> dependencies;
// Nodes in the same order they would be in the default execution map
std::vector<Node> nodes;
inline void clear() {
dependencies.clear();
nodes.clear();
}
};
// Precalculated program data.
// Remains constant and read-only after compilation.
struct Program {
// Serialized operations and arguments.
// They come up as series of <opid><inputs><outputs><parameters_size><parameters>.
// They should be laid out in the same order they will be run in, although it's not absolutely required.
// It's better to have it ordered because memory access will be more predictable.
std::vector<uint8_t> operations;
// Describes dependencies between operations. It is generated at compile time.
// It is used to perform dynamic optimization in case some operations can be predicted as constant.
DependencyGraph dependency_graph;
// List of indexes within `operations` describing which order they should be run into by default.
// It's used because sometimes we may want to override with a simplified execution map dynamically.
// When we don't, we use the default one so the code doesn't have to change.
std::vector<uint16_t> default_execution_map;
// Heap-allocated parameters data, when too large to fit in `operations`.
// We keep a reference to them so they can be freed when the program is cleared.
std::vector<HeapResource> heap_resources;
// Heap-allocated parameters data, when too large to fit in `operations`.
// We keep a reference to them so they won't be freed until the program is cleared.
std::vector<Ref<Reference> > ref_resources;
// Describes the list of buffers to prepare in `State` before the program can be run
std::vector<BufferSpec> buffer_specs;
// Address in `operations` from which operations will depend on Y. Operations before never depend on it.
// It is used to optimize away calculations that would otherwise be the same in planar terrain use cases.
uint32_t xzy_start_op_address;
uint32_t xzy_start_execution_map_index;
// Note: the following buffers are allocated by the user.
// They are mapped temporarily into the same array of buffers inside `State`,
// so we won't need specific code to handle them. This requires knowing at which index they are reserved.
// They must be all assigned for the program to run correctly.
//
// Address within the State's array of buffers where the X input may be.
int x_input_address = -1;
// Address within the State's array of buffers where the Y input may be.
int y_input_address = -1;
// Address within the State's array of buffers where the Z input may be.
int z_input_address = -1;
// Address within the State's array of buffers where the SDF output may be.
int sdf_output_address = -1;
int sdf_output_node_index = -1;
// Maximum amount of buffers this program will need to do a full run.
// Buffers are needed to hold values of arguments and outputs for each operation.
unsigned int buffer_count = 0;
// Associates a high-level port to its corresponding address within the compiled program.
// This is used for debugging intermediate values.
HashMap<ProgramGraph::PortLocation, uint16_t, ProgramGraph::PortLocationHasher> output_port_addresses;
// Result of the last compilation attempt. The program should not be run if it failed.
CompilationResult compilation_result;
void clear() {
operations.clear();
buffer_specs.clear();
xzy_start_execution_map_index = 0;
xzy_start_op_address = 0;
default_execution_map.clear();
output_port_addresses.clear();
dependency_graph.clear();
sdf_output_address = -1;
x_input_address = -1;
y_input_address = -1;
z_input_address = -1;
sdf_output_node_index = -1;
compilation_result = CompilationResult();
for (auto it = heap_resources.begin(); it != heap_resources.end(); ++it) {
HeapResource &r = *it;
CRASH_COND(r.deleter == nullptr);
CRASH_COND(r.ptr == nullptr);
r.deleter(r.ptr);
}
heap_resources.clear();
unlock_images();
ref_resources.clear();
buffer_count = 0;
}
void lock_images();
void unlock_images();
};
Program _program;
};
#endif // VOXEL_GRAPH_RUNTIME_H