Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)
Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
This commit is contained in:
parent
e7d4ec6834
commit
7ffc0268df
@ -38,15 +38,6 @@
|
||||
// Unsigned magic seed prevents undefined behavior.
|
||||
#define NOISE_MAGIC_SEED 1013U
|
||||
|
||||
typedef float (*Interp2dFxn)(
|
||||
float v00, float v10, float v01, float v11,
|
||||
float x, float y);
|
||||
|
||||
typedef float (*Interp3dFxn)(
|
||||
float v000, float v100, float v010, float v110,
|
||||
float v001, float v101, float v011, float v111,
|
||||
float x, float y, float z);
|
||||
|
||||
FlagDesc flagdesc_noiseparams[] = {
|
||||
{"defaults", NOISE_FLAG_DEFAULTS},
|
||||
{"eased", NOISE_FLAG_EASED},
|
||||
@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t)
|
||||
inline float biLinearInterpolation(
|
||||
float v00, float v10,
|
||||
float v01, float v11,
|
||||
float x, float y)
|
||||
{
|
||||
float tx = easeCurve(x);
|
||||
float ty = easeCurve(y);
|
||||
float u = linearInterpolation(v00, v10, tx);
|
||||
float v = linearInterpolation(v01, v11, tx);
|
||||
return linearInterpolation(u, v, ty);
|
||||
}
|
||||
|
||||
|
||||
inline float biLinearInterpolationNoEase(
|
||||
float v00, float v10,
|
||||
float v01, float v11,
|
||||
float x, float y)
|
||||
float x, float y,
|
||||
bool eased)
|
||||
{
|
||||
// Inlining will optimize this branch out when possible
|
||||
if (eased) {
|
||||
x = easeCurve(x);
|
||||
y = easeCurve(y);
|
||||
}
|
||||
float u = linearInterpolation(v00, v10, x);
|
||||
float v = linearInterpolation(v01, v11, x);
|
||||
return linearInterpolation(u, v, y);
|
||||
}
|
||||
|
||||
|
||||
float triLinearInterpolation(
|
||||
inline float triLinearInterpolation(
|
||||
float v000, float v100, float v010, float v110,
|
||||
float v001, float v101, float v011, float v111,
|
||||
float x, float y, float z)
|
||||
float x, float y, float z,
|
||||
bool eased)
|
||||
{
|
||||
float tx = easeCurve(x);
|
||||
float ty = easeCurve(y);
|
||||
float tz = easeCurve(z);
|
||||
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty);
|
||||
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty);
|
||||
return linearInterpolation(u, v, tz);
|
||||
}
|
||||
|
||||
float triLinearInterpolationNoEase(
|
||||
float v000, float v100, float v010, float v110,
|
||||
float v001, float v101, float v011, float v111,
|
||||
float x, float y, float z)
|
||||
{
|
||||
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
|
||||
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
|
||||
// Inlining will optimize this branch out when possible
|
||||
if (eased) {
|
||||
x = easeCurve(x);
|
||||
y = easeCurve(y);
|
||||
z = easeCurve(z);
|
||||
}
|
||||
float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
|
||||
float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
|
||||
return linearInterpolation(u, v, z);
|
||||
}
|
||||
|
||||
@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
|
||||
float v01 = noise2d(x0, y0+1, seed);
|
||||
float v11 = noise2d(x0+1, y0+1, seed);
|
||||
// Interpolate
|
||||
if (eased)
|
||||
return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
|
||||
|
||||
return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
|
||||
return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
|
||||
}
|
||||
|
||||
|
||||
@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
|
||||
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
|
||||
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
|
||||
// Interpolate
|
||||
if (eased) {
|
||||
return triLinearInterpolation(
|
||||
v000, v100, v010, v110,
|
||||
v001, v101, v011, v111,
|
||||
xl, yl, zl);
|
||||
}
|
||||
|
||||
return triLinearInterpolationNoEase(
|
||||
return triLinearInterpolation(
|
||||
v000, v100, v010, v110,
|
||||
v001, v101, v011, v111,
|
||||
xl, yl, zl);
|
||||
xl, yl, zl,
|
||||
eased);
|
||||
}
|
||||
|
||||
|
||||
@ -518,9 +487,6 @@ void Noise::gradientMap2D(
|
||||
s32 x0, y0;
|
||||
|
||||
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
|
||||
Interp2dFxn interpolate = eased ?
|
||||
biLinearInterpolation : biLinearInterpolationNoEase;
|
||||
|
||||
x0 = std::floor(x);
|
||||
y0 = std::floor(y);
|
||||
u = x - (float)x0;
|
||||
@ -547,7 +513,8 @@ void Noise::gradientMap2D(
|
||||
u = orig_u;
|
||||
noisex = 0;
|
||||
for (i = 0; i != sx; i++) {
|
||||
gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v);
|
||||
gradient_buf[index++] =
|
||||
biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
|
||||
|
||||
u += step_x;
|
||||
if (u >= 1.0) {
|
||||
@ -583,8 +550,7 @@ void Noise::gradientMap3D(
|
||||
u32 nlx, nly, nlz;
|
||||
s32 x0, y0, z0;
|
||||
|
||||
Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ?
|
||||
triLinearInterpolation : triLinearInterpolationNoEase;
|
||||
bool eased = np.flags & NOISE_FLAG_EASED;
|
||||
|
||||
x0 = std::floor(x);
|
||||
y0 = std::floor(y);
|
||||
@ -625,10 +591,11 @@ void Noise::gradientMap3D(
|
||||
u = orig_u;
|
||||
noisex = 0;
|
||||
for (i = 0; i != sx; i++) {
|
||||
gradient_buf[index++] = interpolate(
|
||||
gradient_buf[index++] = triLinearInterpolation(
|
||||
v000, v100, v010, v110,
|
||||
v001, v101, v011, v111,
|
||||
u, v, w);
|
||||
u, v, w,
|
||||
eased);
|
||||
|
||||
u += step_x;
|
||||
if (u >= 1.0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user