Optimized several range analysis functions

- square, cube and polynomials are more precise
- lerp() is more precise
- smooth union and subtract are more precise
- fallback on simple min/max if smoothness is zero
This commit is contained in:
Marc Gilleron 2021-03-17 18:11:00 +00:00
parent 0720618295
commit 9188ae619b
9 changed files with 244 additions and 44 deletions

View File

@ -1,10 +1,14 @@
#include "range_utility.h"
#include "../../util/math/sdf.h"
#include "../../util/noise/fast_noise_lite.h"
#include <core/image.h>
#include <modules/opensimplex/open_simplex_noise.h>
#include <scene/resources/curve.h>
// TODO We could skew max derivative estimation if the anchor is on a bump or a dip
// because in these cases, it becomes impossible for noise to go further up or further down
template <typename Noise_F>
inline Interval get_noise_range_2d(Noise_F noise_func, const Interval &x, const Interval &y, float max_derivative) {
// Any unit vector away from a given evaluation point, the maximum difference is a fixed number.
@ -172,6 +176,71 @@ Interval get_heightmap_range(Image &im, Rect2i rect) {
return Interval();
}
template <typename F>
inline Interval sdf_smooth_op(Interval b, Interval a, float s, F smooth_op_func) {
// Smooth union and subtract are a generalization of `min(a, b)` and `max(-a, b)`, with a smooth junction.
// That junction runs in a diagonal crossing zero (with equation `y = -x`).
// Areas on the two sides of the junction are monotonic, i.e their derivatives should never cross zero,
// because they are linear functions modified by a "smoothing" polynomial for which the tip is on diagonal.
// So to find the output range, we can evaluate and sort the 4 corners,
// and diagonal intersections if it crosses the area.
// | \ |
// ---1---x-------------3--- b.max
// | \ |
// | \ | (b)
// | \ | y
// | \ | |
// ---0--------x--------2--- b.min o---x (a)
// | \ |
// a.min a.max
const float v0 = smooth_op_func(b.min, a.min, s);
const float v1 = smooth_op_func(b.max, a.min, s);
const float v2 = smooth_op_func(b.min, a.max, s);
const float v3 = smooth_op_func(b.max, a.max, s);
const Vector2 diag_b_min(-b.min, b.min);
const Vector2 diag_b_max(-b.max, b.max);
const Vector2 diag_a_min(a.min, -a.min);
const Vector2 diag_a_max(a.max, -a.max);
const bool crossing_top = (diag_b_max.x > a.min && diag_b_max.x < a.max);
const bool crossing_left = (diag_a_min.y > b.min && diag_a_min.y < b.max);
if (crossing_left || crossing_top) {
const bool crossing_right = (diag_a_max.y > b.min && diag_a_max.y < b.max);
float v4;
if (crossing_left) {
v4 = smooth_op_func(diag_a_min.y, diag_a_min.x, s);
} else {
v4 = smooth_op_func(diag_b_max.y, diag_b_max.x, s);
}
float v5;
if (crossing_right) {
v5 = smooth_op_func(diag_a_max.y, diag_a_max.x, s);
} else {
v5 = smooth_op_func(diag_b_min.y, diag_b_min.x, s);
}
return Interval(min(v0, v1, v2, v3, v4, v5), max(v0, v1, v2, v3, v4, v5));
}
// The diagonal does not cross the area
return Interval(min(v0, v1, v2, v3), max(v0, v1, v2, v3));
}
Interval sdf_smooth_union(Interval b, Interval a, float s) {
// Had to use a lambda because otherwise it's ambiguous
return sdf_smooth_op(b, a, s, [](float b, float a, float s) { return sdf_smooth_union(b, a, s); });
}
Interval sdf_smooth_subtract(Interval b, Interval a, float s) {
return sdf_smooth_op(b, a, s, [](float b, float a, float s) { return sdf_smooth_subtract(b, a, s); });
}
static Interval get_fnl_cellular_value_range_2d(const FastNoiseLite *noise, Interval x, Interval y) {
const float c0 = noise->get_noise_2d(x.min, y.min);
const float c1 = noise->get_noise_2d(x.max, y.min);
@ -323,7 +392,7 @@ Interval fnl_single_opensimplex2(const fast_noise_lite::FastNoiseLite &fn, int s
// According to OpenSimplex2 author, the 3D version is supposed to have a max derivative around 4.23718
// https://www.wolframalpha.com/input/?i=max+d%2Fdx+32.69428253173828125+*+x+*+%28%280.6-x%5E2%29%5E4%29+from+-0.6+to+0.6
// But empiric measures have shown it around 8. Discontinuities do exist in this noise though,
// which makes this measuring harder (and the reason why multiple step sizes are used)
// which makes this measuring harder
return get_noise_range_3d(
[&fn, seed](float x, float y, float z) { return fn.SingleOpenSimplex2(seed, x, y, z); },
x, y, z, 4.23718f);
@ -333,6 +402,7 @@ Interval fnl_single_opensimplex2s(const fast_noise_lite::FastNoiseLite &fn, int
Interval z) {
return get_noise_range_3d(
[&fn, seed](float x, float y, float z) { return fn.SingleOpenSimplex2(seed, x, y, z); },
// Max derivative found from empiric tests
x, y, z, 2.5f);
}
@ -347,18 +417,21 @@ Interval fnl_single_cellular(const FastNoiseLite *noise, Interval x, Interval y,
Interval fnl_single_perlin(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
return get_noise_range_3d(
[&fn, seed](float x, float y, float z) { return fn.SinglePerlin(seed, x, y, z); },
x, y, z, 3.5f);
// Max derivative found from empiric tests
x, y, z, 3.2f);
}
Interval fnl_single_value_cubic(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
return get_noise_range_3d(
[&fn, seed](float x, float y, float z) { return fn.SingleValueCubic(seed, x, y, z); },
// Max derivative found from empiric tests
x, y, z, 1.2f);
}
Interval fnl_single_value(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
return get_noise_range_3d(
[&fn, seed](float x, float y, float z) { return fn.SingleValue(seed, x, y, z); },
// Max derivative found from empiric tests
x, y, z, 3.0f);
}

View File

@ -18,6 +18,17 @@ Interval get_curve_range(Curve &curve, bool &is_monotonic_increasing);
Interval get_heightmap_range(Image &im);
Interval get_heightmap_range(Image &im, Rect2i rect);
inline Interval sdf_union(Interval a, Interval b) {
return min_interval(a, b);
}
inline Interval sdf_subtract(Interval a, Interval b) {
return max_interval(a, b);
}
Interval sdf_smooth_union(Interval b, Interval a, float s);
Interval sdf_smooth_subtract(Interval b, Interval a, float s);
struct Interval2 {
Interval x;
Interval y;

View File

@ -215,7 +215,7 @@ void VoxelGeneratorGraph::generate_block(VoxelBlockRequest &input) {
const float sdf_scale = VoxelBuffer::get_sdf_quantization_scale(
out_buffer.get_channel_depth(out_buffer.get_channel_depth(channel)));
const float clip_threshold = sdf_scale * 0.05f;
const float clip_threshold = sdf_scale * 0.2f;
const int stride = 1 << input.lod;
@ -252,18 +252,15 @@ void VoxelGeneratorGraph::generate_block(VoxelBlockRequest &input) {
const Interval range = runtime->analyze_range(cache.state, gmin, gmax) * sdf_scale;
if (range.min > clip_threshold && range.max > clip_threshold) {
// TODO fill_area_f has an issue with max values
out_buffer.fill_area_f(1.f, rmin, rmax, channel);
//out_buffer.clear_channel_f(channel, 1.f);
// DEBUG: use this instead to fill optimized-out blocks with matter, making them stand out
//out_buffer.clear_channel_f(channel, -1.f);
//out_buffer.fill_area_f(-1.f, rmin, rmax, channel);
continue;
} else if (range.min < -clip_threshold && range.max < -clip_threshold) {
out_buffer.fill_area_f(-1.f, rmin, rmax, channel);
//out_buffer.clear_channel_f(channel, -1.f);
// DEBUG: use this instead to fill optimized-out blocks with matter, making them stand out
//out_buffer.fill_area_f(1.f, rmin, rmax, channel);
continue;
} else if (range.is_single_value()) {

View File

@ -136,11 +136,14 @@ inline Interval select(const Interval &a, const Interval &b, const Interval &thr
return Interval(min(a.min, b.min), max(a.max, b.max));
}
template <typename T>
inline T skew3(T x) {
inline float skew3(float x) {
return (x * x * x + x) * 0.5f;
}
inline Interval skew3(Interval x) {
return (cubed(x) + x) * 0.5f;
}
// This is mostly useful for generating planets from an existing heightmap
inline float sdf_sphere_heightmap(float x, float y, float z, float r, float m, const Image &im,
float min_h, float max_h, float norm_x, float norm_y) {
@ -328,7 +331,12 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
t.range_analysis_func = [](RangeAnalysisContext &ctx) {
const Interval a = ctx.get_input(0);
const Interval b = ctx.get_input(1);
ctx.set_output(0, a * b);
if (ctx.get_input_address(0) == ctx.get_input_address(1)) {
// The two operands have the same source, we can optimize to a square function
ctx.set_output(0, squared(a));
} else {
ctx.set_output(0, a * b);
}
};
}
{
@ -507,7 +515,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
const Interval y1 = ctx.get_input(3);
const Interval dx = x1 - x0;
const Interval dy = y1 - y0;
const Interval r = sqrt(dx * dx + dy * dy);
const Interval r = sqrt(squared(dx) + squared(dy));
ctx.set_output(0, r);
};
}
@ -547,7 +555,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
const Interval dx = x1 - x0;
const Interval dy = y1 - y0;
const Interval dz = z1 - z0;
Interval r = sqrt(dx * dx + dy * dy + dz * dz);
Interval r = get_length(dx, dy, dz);
ctx.set_output(0, r);
};
}
@ -1022,15 +1030,26 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
const VoxelGraphRuntime::Buffer &b = ctx.get_input(1);
VoxelGraphRuntime::Buffer &out = ctx.get_output(0);
const Params params = ctx.get_params<Params>();
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_smooth_union(a.data[i], b.data[i], params.smoothness);
if (params.smoothness > 0.0001f) {
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_smooth_union(a.data[i], b.data[i], params.smoothness);
}
} else {
// Fallback on hard-union, smooth union does not support zero smoothness
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_union(a.data[i], b.data[i]);
}
}
};
t.range_analysis_func = [](RangeAnalysisContext &ctx) {
const Interval a = ctx.get_input(0);
const Interval b = ctx.get_input(1);
const Params params = ctx.get_params<Params>();
ctx.set_output(0, sdf_smooth_union(a, b, Interval::from_single_value(params.smoothness)));
if (params.smoothness > 0.0001f) {
ctx.set_output(0, sdf_smooth_union(a, b, params.smoothness));
} else {
ctx.set_output(0, sdf_union(a, b));
}
};
}
{
@ -1055,15 +1074,26 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
const VoxelGraphRuntime::Buffer &b = ctx.get_input(1);
VoxelGraphRuntime::Buffer &out = ctx.get_output(0);
const Params params = ctx.get_params<Params>();
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_smooth_subtract(a.data[i], b.data[i], params.smoothness);
if (params.smoothness > 0.0001f) {
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_smooth_subtract(a.data[i], b.data[i], params.smoothness);
}
} else {
// Fallback on hard-subtract, smooth subtract does not support zero smoothness
for (uint32_t i = 0; i < out.size; ++i) {
out.data[i] = sdf_subtract(a.data[i], b.data[i]);
}
}
};
t.range_analysis_func = [](RangeAnalysisContext &ctx) {
const Interval a = ctx.get_input(0);
const Interval b = ctx.get_input(1);
const Params params = ctx.get_params<Params>();
ctx.set_output(0, sdf_smooth_subtract(a, b, Interval::from_single_value(params.smoothness)));
if (params.smoothness > 0.0001f) {
ctx.set_output(0, sdf_smooth_subtract(a, b, params.smoothness));
} else {
ctx.set_output(0, sdf_subtract(a, b));
}
};
}
{
@ -1222,7 +1252,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
const Interval x = ctx.get_input(0);
const Interval y = ctx.get_input(1);
const Interval z = ctx.get_input(2);
const Interval len = sqrt(x * x + y * y + z * z);
const Interval len = get_length(x, y, z);
const Interval nx = x / len;
const Interval ny = y / len;
const Interval nz = z / len;

View File

@ -562,6 +562,7 @@ void VoxelGraphRuntime::generate_set(State &state,
pc += params_size;
}
// TODO May be replaced by execution mapping?
// Skip node if all its outputs are constant
bool all_outputs_constant = true;
for (uint32_t i = 0; i < outputs.size(); ++i) {

View File

@ -175,10 +175,11 @@ public:
return *reinterpret_cast<const T *>(_params.data());
}
protected:
inline uint32_t get_input_address(uint32_t i) const {
return _inputs[i];
}
protected:
inline uint32_t get_output_address(uint32_t i) const {
return _outputs[i];
}

View File

@ -54,8 +54,29 @@ inline T max(const T a, const T b, const T c, const T d) {
return max(max(a, b), max(c, d));
}
template <typename T>
inline T min(const T a, const T b, const T c, const T d, const T e, const T f) {
return min(min(min(a, b), min(c, d)), min(e, f));
}
template <typename T>
inline T max(const T a, const T b, const T c, const T d, const T e, const T f) {
return max(max(max(a, b), max(c, d)), max(e, f));
}
template <typename T>
inline T min(const T a, const T b, const T c, const T d, const T e, const T f, const T g, const T h) {
return min(min(a, b, c, d), min(e, f, g, h));
}
template <typename T>
inline T max(const T a, const T b, const T c, const T d, const T e, const T f, const T g, const T h) {
return max(max(a, b, c, d), max(e, f, g, h));
}
template <typename T>
inline T clamp(const T x, const T min_value, const T max_value) {
// TODO Clang can optimize a min/max implementation. Worth changing to that?
if (x < min_value) {
return min_value;
}

View File

@ -96,6 +96,8 @@ struct Interval {
}
inline Interval operator*(const Interval &other) const {
// Note, if the two operands have the same source (i.e you are doing x^2), this may lead to suboptimal results.
// You may then prefer using a more dedicated function.
const float a = min * other.min;
const float b = min * other.max;
const float c = max * other.min;
@ -113,6 +115,7 @@ struct Interval {
inline Interval operator/(const Interval &other) const {
if (other.is_single_value() && other.min == 0.f) {
// Division by zero. In Voxel graph, we return 0.
return Interval::from_single_value(0.f);
}
if (other.contains(0.f)) {
@ -165,10 +168,6 @@ inline Interval sqrt(const Interval &i) {
};
}
inline Interval squared(const Interval a) {
return a * a;
}
inline Interval abs(const Interval &i) {
return Interval{
i.contains(0) ? 0 : ::min(Math::abs(i.min), Math::abs(i.max)),
@ -198,9 +197,20 @@ inline Interval clamp(const Interval &i, const Interval &p_min, const Interval &
inline Interval lerp(const Interval &a, const Interval &b, const Interval &t) {
if (t.is_single_value()) {
return Interval(Math::lerp(a.min, b.min, t.min), Math::lerp(a.max, b.max, t.min));
} else {
return a + t * (b - a);
}
const float v0 = a.min + t.min * (b.min - a.min);
const float v1 = a.max + t.min * (b.min - a.max);
const float v2 = a.min + t.max * (b.min - a.min);
const float v3 = a.max + t.max * (b.min - a.max);
const float v4 = a.min + t.min * (b.max - a.min);
const float v5 = a.max + t.min * (b.max - a.max);
const float v6 = a.min + t.max * (b.max - a.min);
const float v7 = a.max + t.max * (b.max - a.max);
return Interval(
min(v0, v1, v2, v3, v4, v5, v6, v7),
max(v0, v1, v2, v3, v4, v5, v6, v7));
}
inline Interval sin(const Interval &i) {
@ -337,12 +347,72 @@ inline Interval smoothstep(float p_from, float p_to, Interval p_weight) {
}
}
// Prefer this over x*x, this will provide a more optimal result
inline Interval squared(const Interval &x) {
if (x.min < 0.f && x.max > 0.f) {
// The interval includes 0
return Interval{ 0.f, max(x.min * x.min, x.max * x.max) };
}
// The interval is only on one side of the parabola
if (x.max <= 0.f) {
// Negative side: monotonic descending
return Interval{ x.max * x.max, x.min * x.min };
} else {
// Positive side: monotonic ascending
return Interval{ x.min * x.min, x.max * x.max };
}
}
// Prefer this instead of doing polynomials with a single interval, this will provide a more optimal result
inline Interval polynomial_second_degree(const Interval x, float a, float b, float c) {
// a*x*x + b*x + c
if (a == 0.f) {
if (b == 0.f) {
return Interval::from_single_value(c);
} else {
return b * x + c;
}
}
const float parabola_x = -b / (2.f * a);
const float y0 = a * x.min * x.min + b * x.min + c;
const float y1 = a * x.max * x.max + b * x.max + c;
if (x.min < parabola_x && x.max > parabola_x) {
// The interval includes the tip
const float parabola_y = a * parabola_x * parabola_x + b * parabola_x + c;
if (a < 0) {
return Interval(min(y0, y1), parabola_y);
} else {
return Interval(parabola_y, max(y0, y1));
}
}
// The interval is only on one side of the parabola
if ((a >= 0 && x.min >= parabola_x) || (a < 0 && x.max < parabola_x)) {
// Monotonic increasing
return Interval(y0, y1);
} else {
// Monotonic decreasing
return Interval(y1, y0);
}
}
// Prefer this over x*x*x, this will provide a more optimal result
inline Interval cubed(const Interval &x) {
// x^3 is monotonic ascending
const float minv = x.min * x.min * x.min;
const float maxv = x.max * x.max * x.max;
return Interval{ minv, maxv };
}
inline Interval get_length(const Interval &x, const Interval &y) {
return sqrt(x * x + y * y);
return sqrt(squared(x) + squared(y));
}
inline Interval get_length(const Interval &x, const Interval &y, const Interval &z) {
return sqrt(x * x + y * y + z * z);
return sqrt(squared(x) + squared(y) + squared(z));
}
#endif // INTERVAL_H

View File

@ -6,7 +6,6 @@
// Signed-distance-field functions.
// For more, see https://www.iquilezles.org/www/articles/distfunctions/distfunctions.htm
// TODO Move these to VoxelMath once we have a proper namespace, so they can be used in VoxelTool too
inline float sdf_box(const Vector3 pos, const Vector3 extents) {
Vector3 d = pos.abs() - extents;
@ -35,27 +34,24 @@ inline Interval sdf_torus(
return get_length(qx, y) - r1;
}
inline float sdf_union(float a, float b) {
return min(a, b);
}
// Subtracts SDF a from SDF b
inline float sdf_subtract(float a, float b) {
return max(-a, b);
}
inline float sdf_smooth_union(float a, float b, float s) {
float h = clamp(0.5f + 0.5f * (b - a) / s, 0.0f, 1.0f);
return Math::lerp(b, a, h) - s * h * (1.0f - h);
}
inline Interval sdf_smooth_union(Interval a, Interval b, Interval s) {
Interval h = clamp(Interval::from_single_value(0.5f) + Interval::from_single_value(0.5f) * (b - a) / s,
Interval::from_single_value(0.0f), Interval::from_single_value(1.0f));
return lerp(b, a, h) - s * h * (Interval::from_single_value(1.0f) - h);
}
// Inverted a and b because it does b - a
// Inverted a and b because it subtracts SDF b from SDF a
inline float sdf_smooth_subtract(float b, float a, float s) {
float h = clamp(0.5f - 0.5f * (b + a) / s, 0.0f, 1.0f);
return Math::lerp(b, -a, h) + s * h * (1.0f - h);
}
inline Interval sdf_smooth_subtract(Interval b, Interval a, Interval s) {
Interval h = clamp(Interval::from_single_value(0.5f) - Interval::from_single_value(0.5f) * (b + a) / s,
Interval::from_single_value(0.0f), Interval::from_single_value(1.0f));
return lerp(b, -a, h) + s * h * (Interval::from_single_value(1.0f) - h);
}
#endif // VOXEL_MATH_SDF_H