Optimized several range analysis functions

- square, cube and polynomials are more precise - lerp() is more precise - smooth union and subtract are more precise - fallback on simple min/max if smoothness is zero
2021-03-17 18:11:00 +00:00 · 2021-03-17 18:11:00 +00:00 · 9188ae619b
commit 9188ae619b
parent 0720618295
9 changed files with 244 additions and 44 deletions
--- a/generators/graph/range_utility.cpp
+++ b/generators/graph/range_utility.cpp
@ -1,10 +1,14 @@
 #include "range_utility.h"
+#include "../../util/math/sdf.h"
 #include "../../util/noise/fast_noise_lite.h"

 #include <core/image.h>
 #include <modules/opensimplex/open_simplex_noise.h>
 #include <scene/resources/curve.h>

+// TODO We could skew max derivative estimation if the anchor is on a bump or a dip
+// because in these cases, it becomes impossible for noise to go further up or further down
+
 template <typename Noise_F>
 inline Interval get_noise_range_2d(Noise_F noise_func, const Interval &x, const Interval &y, float max_derivative) {
 	// Any unit vector away from a given evaluation point, the maximum difference is a fixed number.
@ -172,6 +176,71 @@ Interval get_heightmap_range(Image &im, Rect2i rect) {
 	return Interval();
 }

+template <typename F>
+inline Interval sdf_smooth_op(Interval b, Interval a, float s, F smooth_op_func) {
+	// Smooth union and subtract are a generalization of `min(a, b)` and `max(-a, b)`, with a smooth junction.
+	// That junction runs in a diagonal crossing zero (with equation `y = -x`).
+	// Areas on the two sides of the junction are monotonic, i.e their derivatives should never cross zero,
+	// because they are linear functions modified by a "smoothing" polynomial for which the tip is on diagonal.
+	// So to find the output range, we can evaluate and sort the 4 corners,
+	// and diagonal intersections if it crosses the area.
+
+	//     |  \              |
+	//  ---1---x-------------3--- b.max
+	//     |    \            |
+	//     |     \           |              (b)
+	//     |      \          |               y
+	//     |       \         |               |
+	//  ---0--------x--------2--- b.min      o---x (a)
+	//     |         \       |
+	//    a.min             a.max
+
+	const float v0 = smooth_op_func(b.min, a.min, s);
+	const float v1 = smooth_op_func(b.max, a.min, s);
+	const float v2 = smooth_op_func(b.min, a.max, s);
+	const float v3 = smooth_op_func(b.max, a.max, s);
+
+	const Vector2 diag_b_min(-b.min, b.min);
+	const Vector2 diag_b_max(-b.max, b.max);
+	const Vector2 diag_a_min(a.min, -a.min);
+	const Vector2 diag_a_max(a.max, -a.max);
+
+	const bool crossing_top = (diag_b_max.x > a.min && diag_b_max.x < a.max);
+	const bool crossing_left = (diag_a_min.y > b.min && diag_a_min.y < b.max);
+
+	if (crossing_left || crossing_top) {
+		const bool crossing_right = (diag_a_max.y > b.min && diag_a_max.y < b.max);
+
+		float v4;
+		if (crossing_left) {
+			v4 = smooth_op_func(diag_a_min.y, diag_a_min.x, s);
+		} else {
+			v4 = smooth_op_func(diag_b_max.y, diag_b_max.x, s);
+		}
+
+		float v5;
+		if (crossing_right) {
+			v5 = smooth_op_func(diag_a_max.y, diag_a_max.x, s);
+		} else {
+			v5 = smooth_op_func(diag_b_min.y, diag_b_min.x, s);
+		}
+
+		return Interval(min(v0, v1, v2, v3, v4, v5), max(v0, v1, v2, v3, v4, v5));
+	}
+
+	// The diagonal does not cross the area
+	return Interval(min(v0, v1, v2, v3), max(v0, v1, v2, v3));
+}
+
+Interval sdf_smooth_union(Interval b, Interval a, float s) {
+	// Had to use a lambda because otherwise it's ambiguous
+	return sdf_smooth_op(b, a, s, [](float b, float a, float s) { return sdf_smooth_union(b, a, s); });
+}
+
+Interval sdf_smooth_subtract(Interval b, Interval a, float s) {
+	return sdf_smooth_op(b, a, s, [](float b, float a, float s) { return sdf_smooth_subtract(b, a, s); });
+}
+
 static Interval get_fnl_cellular_value_range_2d(const FastNoiseLite *noise, Interval x, Interval y) {
 	const float c0 = noise->get_noise_2d(x.min, y.min);
 	const float c1 = noise->get_noise_2d(x.max, y.min);
@ -323,7 +392,7 @@ Interval fnl_single_opensimplex2(const fast_noise_lite::FastNoiseLite &fn, int s
 	// According to OpenSimplex2 author, the 3D version is supposed to have a max derivative around 4.23718
 	// https://www.wolframalpha.com/input/?i=max+d%2Fdx+32.69428253173828125+*+x+*+%28%280.6-x%5E2%29%5E4%29+from+-0.6+to+0.6
 	// But empiric measures have shown it around 8. Discontinuities do exist in this noise though,
-	// which makes this measuring harder (and the reason why multiple step sizes are used)
+	// which makes this measuring harder
 	return get_noise_range_3d(
 			[&fn, seed](float x, float y, float z) { return fn.SingleOpenSimplex2(seed, x, y, z); },
 			x, y, z, 4.23718f);
@ -333,6 +402,7 @@ Interval fnl_single_opensimplex2s(const fast_noise_lite::FastNoiseLite &fn, int
 		Interval z) {
 	return get_noise_range_3d(
 			[&fn, seed](float x, float y, float z) { return fn.SingleOpenSimplex2(seed, x, y, z); },
+			// Max derivative found from empiric tests
 			x, y, z, 2.5f);
 }

@ -347,18 +417,21 @@ Interval fnl_single_cellular(const FastNoiseLite *noise, Interval x, Interval y,
 Interval fnl_single_perlin(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
 	return get_noise_range_3d(
 			[&fn, seed](float x, float y, float z) { return fn.SinglePerlin(seed, x, y, z); },
-			x, y, z, 3.5f);
+			// Max derivative found from empiric tests
+			x, y, z, 3.2f);
 }

 Interval fnl_single_value_cubic(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
 	return get_noise_range_3d(
 			[&fn, seed](float x, float y, float z) { return fn.SingleValueCubic(seed, x, y, z); },
+			// Max derivative found from empiric tests
 			x, y, z, 1.2f);
 }

 Interval fnl_single_value(const fast_noise_lite::FastNoiseLite &fn, int seed, Interval x, Interval y, Interval z) {
 	return get_noise_range_3d(
 			[&fn, seed](float x, float y, float z) { return fn.SingleValue(seed, x, y, z); },
+			// Max derivative found from empiric tests
 			x, y, z, 3.0f);
 }

--- a/generators/graph/range_utility.h
+++ b/generators/graph/range_utility.h
@ -18,6 +18,17 @@ Interval get_curve_range(Curve &curve, bool &is_monotonic_increasing);
 Interval get_heightmap_range(Image &im);
 Interval get_heightmap_range(Image &im, Rect2i rect);

+inline Interval sdf_union(Interval a, Interval b) {
+	return min_interval(a, b);
+}
+
+inline Interval sdf_subtract(Interval a, Interval b) {
+	return max_interval(a, b);
+}
+
+Interval sdf_smooth_union(Interval b, Interval a, float s);
+Interval sdf_smooth_subtract(Interval b, Interval a, float s);
+
 struct Interval2 {
 	Interval x;
 	Interval y;
--- a/generators/graph/voxel_generator_graph.cpp
+++ b/generators/graph/voxel_generator_graph.cpp
@ -215,7 +215,7 @@ void VoxelGeneratorGraph::generate_block(VoxelBlockRequest &input) {
 	const float sdf_scale = VoxelBuffer::get_sdf_quantization_scale(
 			out_buffer.get_channel_depth(out_buffer.get_channel_depth(channel)));

-	const float clip_threshold = sdf_scale * 0.05f;
+	const float clip_threshold = sdf_scale * 0.2f;

 	const int stride = 1 << input.lod;

@ -252,18 +252,15 @@ void VoxelGeneratorGraph::generate_block(VoxelBlockRequest &input) {

 				const Interval range = runtime->analyze_range(cache.state, gmin, gmax) * sdf_scale;
 				if (range.min > clip_threshold && range.max > clip_threshold) {
-					// TODO fill_area_f has an issue with max values
 					out_buffer.fill_area_f(1.f, rmin, rmax, channel);
-
-					//out_buffer.clear_channel_f(channel, 1.f);
-
 					// DEBUG: use this instead to fill optimized-out blocks with matter, making them stand out
-					//out_buffer.clear_channel_f(channel, -1.f);
+					//out_buffer.fill_area_f(-1.f, rmin, rmax, channel);
 					continue;

 				} else if (range.min < -clip_threshold && range.max < -clip_threshold) {
 					out_buffer.fill_area_f(-1.f, rmin, rmax, channel);
-					//out_buffer.clear_channel_f(channel, -1.f);
+					// DEBUG: use this instead to fill optimized-out blocks with matter, making them stand out
+					//out_buffer.fill_area_f(1.f, rmin, rmax, channel);
 					continue;

 				} else if (range.is_single_value()) {
--- a/generators/graph/voxel_graph_node_db.cpp
+++ b/generators/graph/voxel_graph_node_db.cpp
@ -136,11 +136,14 @@ inline Interval select(const Interval &a, const Interval &b, const Interval &thr
 	return Interval(min(a.min, b.min), max(a.max, b.max));
 }

-template <typename T>
-inline T skew3(T x) {
+inline float skew3(float x) {
 	return (x * x * x + x) * 0.5f;
 }

+inline Interval skew3(Interval x) {
+	return (cubed(x) + x) * 0.5f;
+}
+
 // This is mostly useful for generating planets from an existing heightmap
 inline float sdf_sphere_heightmap(float x, float y, float z, float r, float m, const Image &im,
 		float min_h, float max_h, float norm_x, float norm_y) {
@ -328,7 +331,12 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 		t.range_analysis_func = [](RangeAnalysisContext &ctx) {
 			const Interval a = ctx.get_input(0);
 			const Interval b = ctx.get_input(1);
-			ctx.set_output(0, a * b);
+			if (ctx.get_input_address(0) == ctx.get_input_address(1)) {
+				// The two operands have the same source, we can optimize to a square function
+				ctx.set_output(0, squared(a));
+			} else {
+				ctx.set_output(0, a * b);
+			}
 		};
 	}
 	{
@ -507,7 +515,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 			const Interval y1 = ctx.get_input(3);
 			const Interval dx = x1 - x0;
 			const Interval dy = y1 - y0;
-			const Interval r = sqrt(dx * dx + dy * dy);
+			const Interval r = sqrt(squared(dx) + squared(dy));
 			ctx.set_output(0, r);
 		};
 	}
@ -547,7 +555,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 			const Interval dx = x1 - x0;
 			const Interval dy = y1 - y0;
 			const Interval dz = z1 - z0;
-			Interval r = sqrt(dx * dx + dy * dy + dz * dz);
+			Interval r = get_length(dx, dy, dz);
 			ctx.set_output(0, r);
 		};
 	}
@ -1022,15 +1030,26 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 			const VoxelGraphRuntime::Buffer &b = ctx.get_input(1);
 			VoxelGraphRuntime::Buffer &out = ctx.get_output(0);
 			const Params params = ctx.get_params<Params>();
-			for (uint32_t i = 0; i < out.size; ++i) {
-				out.data[i] = sdf_smooth_union(a.data[i], b.data[i], params.smoothness);
+			if (params.smoothness > 0.0001f) {
+				for (uint32_t i = 0; i < out.size; ++i) {
+					out.data[i] = sdf_smooth_union(a.data[i], b.data[i], params.smoothness);
+				}
+			} else {
+				// Fallback on hard-union, smooth union does not support zero smoothness
+				for (uint32_t i = 0; i < out.size; ++i) {
+					out.data[i] = sdf_union(a.data[i], b.data[i]);
+				}
 			}
 		};
 		t.range_analysis_func = [](RangeAnalysisContext &ctx) {
 			const Interval a = ctx.get_input(0);
 			const Interval b = ctx.get_input(1);
 			const Params params = ctx.get_params<Params>();
-			ctx.set_output(0, sdf_smooth_union(a, b, Interval::from_single_value(params.smoothness)));
+			if (params.smoothness > 0.0001f) {
+				ctx.set_output(0, sdf_smooth_union(a, b, params.smoothness));
+			} else {
+				ctx.set_output(0, sdf_union(a, b));
+			}
 		};
 	}
 	{
@ -1055,15 +1074,26 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 			const VoxelGraphRuntime::Buffer &b = ctx.get_input(1);
 			VoxelGraphRuntime::Buffer &out = ctx.get_output(0);
 			const Params params = ctx.get_params<Params>();
-			for (uint32_t i = 0; i < out.size; ++i) {
-				out.data[i] = sdf_smooth_subtract(a.data[i], b.data[i], params.smoothness);
+			if (params.smoothness > 0.0001f) {
+				for (uint32_t i = 0; i < out.size; ++i) {
+					out.data[i] = sdf_smooth_subtract(a.data[i], b.data[i], params.smoothness);
+				}
+			} else {
+				// Fallback on hard-subtract, smooth subtract does not support zero smoothness
+				for (uint32_t i = 0; i < out.size; ++i) {
+					out.data[i] = sdf_subtract(a.data[i], b.data[i]);
+				}
 			}
 		};
 		t.range_analysis_func = [](RangeAnalysisContext &ctx) {
 			const Interval a = ctx.get_input(0);
 			const Interval b = ctx.get_input(1);
 			const Params params = ctx.get_params<Params>();
-			ctx.set_output(0, sdf_smooth_subtract(a, b, Interval::from_single_value(params.smoothness)));
+			if (params.smoothness > 0.0001f) {
+				ctx.set_output(0, sdf_smooth_subtract(a, b, params.smoothness));
+			} else {
+				ctx.set_output(0, sdf_subtract(a, b));
+			}
 		};
 	}
 	{
@ -1222,7 +1252,7 @@ VoxelGraphNodeDB::VoxelGraphNodeDB() {
 			const Interval x = ctx.get_input(0);
 			const Interval y = ctx.get_input(1);
 			const Interval z = ctx.get_input(2);
-			const Interval len = sqrt(x * x + y * y + z * z);
+			const Interval len = get_length(x, y, z);
 			const Interval nx = x / len;
 			const Interval ny = y / len;
 			const Interval nz = z / len;
--- a/generators/graph/voxel_graph_runtime.cpp
+++ b/generators/graph/voxel_graph_runtime.cpp
@ -562,6 +562,7 @@ void VoxelGraphRuntime::generate_set(State &state,
 			pc += params_size;
 		}

+		// TODO May be replaced by execution mapping?
 		// Skip node if all its outputs are constant
 		bool all_outputs_constant = true;
 		for (uint32_t i = 0; i < outputs.size(); ++i) {
--- a/generators/graph/voxel_graph_runtime.h
+++ b/generators/graph/voxel_graph_runtime.h
@ -175,10 +175,11 @@ public:
 			return *reinterpret_cast<const T *>(_params.data());
 		}

-	protected:
 		inline uint32_t get_input_address(uint32_t i) const {
 			return _inputs[i];
 		}
+
+	protected:
 		inline uint32_t get_output_address(uint32_t i) const {
 			return _outputs[i];
 		}
--- a/util/math/funcs.h
+++ b/util/math/funcs.h
@ -54,8 +54,29 @@ inline T max(const T a, const T b, const T c, const T d) {
 	return max(max(a, b), max(c, d));
 }

+template <typename T>
+inline T min(const T a, const T b, const T c, const T d, const T e, const T f) {
+	return min(min(min(a, b), min(c, d)), min(e, f));
+}
+
+template <typename T>
+inline T max(const T a, const T b, const T c, const T d, const T e, const T f) {
+	return max(max(max(a, b), max(c, d)), max(e, f));
+}
+
+template <typename T>
+inline T min(const T a, const T b, const T c, const T d, const T e, const T f, const T g, const T h) {
+	return min(min(a, b, c, d), min(e, f, g, h));
+}
+
+template <typename T>
+inline T max(const T a, const T b, const T c, const T d, const T e, const T f, const T g, const T h) {
+	return max(max(a, b, c, d), max(e, f, g, h));
+}
+
 template <typename T>
 inline T clamp(const T x, const T min_value, const T max_value) {
+	// TODO Clang can optimize a min/max implementation. Worth changing to that?
 	if (x < min_value) {
 		return min_value;
 	}
--- a/util/math/interval.h
+++ b/util/math/interval.h
@ -96,6 +96,8 @@ struct Interval {
 	}

 	inline Interval operator*(const Interval &other) const {
+		// Note, if the two operands have the same source (i.e you are doing x^2), this may lead to suboptimal results.
+		// You may then prefer using a more dedicated function.
 		const float a = min * other.min;
 		const float b = min * other.max;
 		const float c = max * other.min;
@ -113,6 +115,7 @@ struct Interval {

 	inline Interval operator/(const Interval &other) const {
 		if (other.is_single_value() && other.min == 0.f) {
+			// Division by zero. In Voxel graph, we return 0.
 			return Interval::from_single_value(0.f);
 		}
 		if (other.contains(0.f)) {
@ -165,10 +168,6 @@ inline Interval sqrt(const Interval &i) {
 	};
 }

-inline Interval squared(const Interval a) {
-	return a * a;
-}
-
 inline Interval abs(const Interval &i) {
 	return Interval{
 		i.contains(0) ? 0 : ::min(Math::abs(i.min), Math::abs(i.max)),
@ -198,9 +197,20 @@ inline Interval clamp(const Interval &i, const Interval &p_min, const Interval &
 inline Interval lerp(const Interval &a, const Interval &b, const Interval &t) {
 	if (t.is_single_value()) {
 		return Interval(Math::lerp(a.min, b.min, t.min), Math::lerp(a.max, b.max, t.min));
-	} else {
-		return a + t * (b - a);
 	}
+
+	const float v0 = a.min + t.min * (b.min - a.min);
+	const float v1 = a.max + t.min * (b.min - a.max);
+	const float v2 = a.min + t.max * (b.min - a.min);
+	const float v3 = a.max + t.max * (b.min - a.max);
+	const float v4 = a.min + t.min * (b.max - a.min);
+	const float v5 = a.max + t.min * (b.max - a.max);
+	const float v6 = a.min + t.max * (b.max - a.min);
+	const float v7 = a.max + t.max * (b.max - a.max);
+
+	return Interval(
+			min(v0, v1, v2, v3, v4, v5, v6, v7),
+			max(v0, v1, v2, v3, v4, v5, v6, v7));
 }

 inline Interval sin(const Interval &i) {
@ -337,12 +347,72 @@ inline Interval smoothstep(float p_from, float p_to, Interval p_weight) {
 	}
 }

+// Prefer this over x*x, this will provide a more optimal result
+inline Interval squared(const Interval &x) {
+	if (x.min < 0.f && x.max > 0.f) {
+		// The interval includes 0
+		return Interval{ 0.f, max(x.min * x.min, x.max * x.max) };
+	}
+	// The interval is only on one side of the parabola
+	if (x.max <= 0.f) {
+		// Negative side: monotonic descending
+		return Interval{ x.max * x.max, x.min * x.min };
+	} else {
+		// Positive side: monotonic ascending
+		return Interval{ x.min * x.min, x.max * x.max };
+	}
+}
+
+// Prefer this instead of doing polynomials with a single interval, this will provide a more optimal result
+inline Interval polynomial_second_degree(const Interval x, float a, float b, float c) {
+	// a*x*x + b*x + c
+
+	if (a == 0.f) {
+		if (b == 0.f) {
+			return Interval::from_single_value(c);
+		} else {
+			return b * x + c;
+		}
+	}
+
+	const float parabola_x = -b / (2.f * a);
+
+	const float y0 = a * x.min * x.min + b * x.min + c;
+	const float y1 = a * x.max * x.max + b * x.max + c;
+
+	if (x.min < parabola_x && x.max > parabola_x) {
+		// The interval includes the tip
+		const float parabola_y = a * parabola_x * parabola_x + b * parabola_x + c;
+		if (a < 0) {
+			return Interval(min(y0, y1), parabola_y);
+		} else {
+			return Interval(parabola_y, max(y0, y1));
+		}
+	}
+	// The interval is only on one side of the parabola
+	if ((a >= 0 && x.min >= parabola_x) || (a < 0 && x.max < parabola_x)) {
+		// Monotonic increasing
+		return Interval(y0, y1);
+	} else {
+		// Monotonic decreasing
+		return Interval(y1, y0);
+	}
+}
+
+// Prefer this over x*x*x, this will provide a more optimal result
+inline Interval cubed(const Interval &x) {
+	// x^3 is monotonic ascending
+	const float minv = x.min * x.min * x.min;
+	const float maxv = x.max * x.max * x.max;
+	return Interval{ minv, maxv };
+}
+
 inline Interval get_length(const Interval &x, const Interval &y) {
-	return sqrt(x * x + y * y);
+	return sqrt(squared(x) + squared(y));
 }

 inline Interval get_length(const Interval &x, const Interval &y, const Interval &z) {
-	return sqrt(x * x + y * y + z * z);
+	return sqrt(squared(x) + squared(y) + squared(z));
 }

 #endif // INTERVAL_H
--- a/util/math/sdf.h
+++ b/util/math/sdf.h
@ -6,7 +6,6 @@

 // Signed-distance-field functions.
 // For more, see https://www.iquilezles.org/www/articles/distfunctions/distfunctions.htm
-// TODO Move these to VoxelMath once we have a proper namespace, so they can be used in VoxelTool too

 inline float sdf_box(const Vector3 pos, const Vector3 extents) {
 	Vector3 d = pos.abs() - extents;
@ -35,27 +34,24 @@ inline Interval sdf_torus(
 	return get_length(qx, y) - r1;
 }

+inline float sdf_union(float a, float b) {
+	return min(a, b);
+}
+
+// Subtracts SDF a from SDF b
+inline float sdf_subtract(float a, float b) {
+	return max(-a, b);
+}
+
 inline float sdf_smooth_union(float a, float b, float s) {
 	float h = clamp(0.5f + 0.5f * (b - a) / s, 0.0f, 1.0f);
 	return Math::lerp(b, a, h) - s * h * (1.0f - h);
 }

-inline Interval sdf_smooth_union(Interval a, Interval b, Interval s) {
-	Interval h = clamp(Interval::from_single_value(0.5f) + Interval::from_single_value(0.5f) * (b - a) / s,
-			Interval::from_single_value(0.0f), Interval::from_single_value(1.0f));
-	return lerp(b, a, h) - s * h * (Interval::from_single_value(1.0f) - h);
-}
-
-// Inverted a and b because it does b - a
+// Inverted a and b because it subtracts SDF b from SDF a
 inline float sdf_smooth_subtract(float b, float a, float s) {
 	float h = clamp(0.5f - 0.5f * (b + a) / s, 0.0f, 1.0f);
 	return Math::lerp(b, -a, h) + s * h * (1.0f - h);
 }

-inline Interval sdf_smooth_subtract(Interval b, Interval a, Interval s) {
-	Interval h = clamp(Interval::from_single_value(0.5f) - Interval::from_single_value(0.5f) * (b + a) / s,
-			Interval::from_single_value(0.0f), Interval::from_single_value(1.0f));
-	return lerp(b, -a, h) + s * h * (Interval::from_single_value(1.0f) - h);
-}
-
 #endif // VOXEL_MATH_SDF_H