1
0

Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)

Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
This commit is contained in:
paradust7 2022-06-11 11:01:30 -07:00 committed by luk3yx
parent e66d7fd516
commit e621d5b02c

View File

@ -37,15 +37,6 @@
#define NOISE_MAGIC_Z 52591 #define NOISE_MAGIC_Z 52591
#define NOISE_MAGIC_SEED 1013 #define NOISE_MAGIC_SEED 1013
typedef float (*Interp2dFxn)(
float v00, float v10, float v01, float v11,
float x, float y);
typedef float (*Interp3dFxn)(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z);
FlagDesc flagdesc_noiseparams[] = { FlagDesc flagdesc_noiseparams[] = {
{"defaults", NOISE_FLAG_DEFAULTS}, {"defaults", NOISE_FLAG_DEFAULTS},
{"eased", NOISE_FLAG_EASED}, {"eased", NOISE_FLAG_EASED},
@ -197,47 +188,34 @@ inline float linearInterpolation(float v0, float v1, float t)
inline float biLinearInterpolation( inline float biLinearInterpolation(
float v00, float v10, float v00, float v10,
float v01, float v11, float v01, float v11,
float x, float y) float x, float y,
{ bool eased)
float tx = easeCurve(x);
float ty = easeCurve(y);
float u = linearInterpolation(v00, v10, tx);
float v = linearInterpolation(v01, v11, tx);
return linearInterpolation(u, v, ty);
}
inline float biLinearInterpolationNoEase(
float v00, float v10,
float v01, float v11,
float x, float y)
{ {
// Inlining will optimize this branch out when possible
if (eased) {
x = easeCurve(x);
y = easeCurve(y);
}
float u = linearInterpolation(v00, v10, x); float u = linearInterpolation(v00, v10, x);
float v = linearInterpolation(v01, v11, x); float v = linearInterpolation(v01, v11, x);
return linearInterpolation(u, v, y); return linearInterpolation(u, v, y);
} }
float triLinearInterpolation( inline float triLinearInterpolation(
float v000, float v100, float v010, float v110, float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111, float v001, float v101, float v011, float v111,
float x, float y, float z) float x, float y, float z,
bool eased)
{ {
float tx = easeCurve(x); // Inlining will optimize this branch out when possible
float ty = easeCurve(y); if (eased) {
float tz = easeCurve(z); x = easeCurve(x);
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty); y = easeCurve(y);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty); z = easeCurve(z);
return linearInterpolation(u, v, tz); }
} float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
float triLinearInterpolationNoEase(
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111,
float x, float y, float z)
{
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
return linearInterpolation(u, v, z); return linearInterpolation(u, v, z);
} }
@ -255,10 +233,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
float v01 = noise2d(x0, y0+1, seed); float v01 = noise2d(x0, y0+1, seed);
float v11 = noise2d(x0+1, y0+1, seed); float v11 = noise2d(x0+1, y0+1, seed);
// Interpolate // Interpolate
if (eased) return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
} }
@ -282,17 +257,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed); float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed); float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
// Interpolate // Interpolate
if (eased) { return triLinearInterpolation(
return triLinearInterpolation(
v000, v100, v010, v110,
v001, v101, v011, v111,
xl, yl, zl);
}
return triLinearInterpolationNoEase(
v000, v100, v010, v110, v000, v100, v010, v110,
v001, v101, v011, v111, v001, v101, v011, v111,
xl, yl, zl); xl, yl, zl,
eased);
} }
@ -562,9 +531,6 @@ void Noise::gradientMap2D(
s32 x0, y0; s32 x0, y0;
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED); bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
Interp2dFxn interpolate = eased ?
biLinearInterpolation : biLinearInterpolationNoEase;
x0 = std::floor(x); x0 = std::floor(x);
y0 = std::floor(y); y0 = std::floor(y);
u = x - (float)x0; u = x - (float)x0;
@ -591,7 +557,8 @@ void Noise::gradientMap2D(
u = orig_u; u = orig_u;
noisex = 0; noisex = 0;
for (i = 0; i != sx; i++) { for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v); gradient_buf[index++] =
biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
u += step_x; u += step_x;
if (u >= 1.0) { if (u >= 1.0) {
@ -627,8 +594,7 @@ void Noise::gradientMap3D(
u32 nlx, nly, nlz; u32 nlx, nly, nlz;
s32 x0, y0, z0; s32 x0, y0, z0;
Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ? bool eased = np.flags & NOISE_FLAG_EASED;
triLinearInterpolation : triLinearInterpolationNoEase;
x0 = std::floor(x); x0 = std::floor(x);
y0 = std::floor(y); y0 = std::floor(y);
@ -669,10 +635,11 @@ void Noise::gradientMap3D(
u = orig_u; u = orig_u;
noisex = 0; noisex = 0;
for (i = 0; i != sx; i++) { for (i = 0; i != sx; i++) {
gradient_buf[index++] = interpolate( gradient_buf[index++] = triLinearInterpolation(
v000, v100, v010, v110, v000, v100, v010, v110,
v001, v101, v011, v111, v001, v101, v011, v111,
u, v, w); u, v, w,
eased);
u += step_x; u += step_x;
if (u >= 1.0) { if (u >= 1.0) {