Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)
Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
This commit is contained in:
parent
e66d7fd516
commit
e621d5b02c
@ -37,15 +37,6 @@
|
|||||||
#define NOISE_MAGIC_Z 52591
|
#define NOISE_MAGIC_Z 52591
|
||||||
#define NOISE_MAGIC_SEED 1013
|
#define NOISE_MAGIC_SEED 1013
|
||||||
|
|
||||||
typedef float (*Interp2dFxn)(
|
|
||||||
float v00, float v10, float v01, float v11,
|
|
||||||
float x, float y);
|
|
||||||
|
|
||||||
typedef float (*Interp3dFxn)(
|
|
||||||
float v000, float v100, float v010, float v110,
|
|
||||||
float v001, float v101, float v011, float v111,
|
|
||||||
float x, float y, float z);
|
|
||||||
|
|
||||||
FlagDesc flagdesc_noiseparams[] = {
|
FlagDesc flagdesc_noiseparams[] = {
|
||||||
{"defaults", NOISE_FLAG_DEFAULTS},
|
{"defaults", NOISE_FLAG_DEFAULTS},
|
||||||
{"eased", NOISE_FLAG_EASED},
|
{"eased", NOISE_FLAG_EASED},
|
||||||
@ -197,47 +188,34 @@ inline float linearInterpolation(float v0, float v1, float t)
|
|||||||
inline float biLinearInterpolation(
|
inline float biLinearInterpolation(
|
||||||
float v00, float v10,
|
float v00, float v10,
|
||||||
float v01, float v11,
|
float v01, float v11,
|
||||||
float x, float y)
|
float x, float y,
|
||||||
{
|
bool eased)
|
||||||
float tx = easeCurve(x);
|
|
||||||
float ty = easeCurve(y);
|
|
||||||
float u = linearInterpolation(v00, v10, tx);
|
|
||||||
float v = linearInterpolation(v01, v11, tx);
|
|
||||||
return linearInterpolation(u, v, ty);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
inline float biLinearInterpolationNoEase(
|
|
||||||
float v00, float v10,
|
|
||||||
float v01, float v11,
|
|
||||||
float x, float y)
|
|
||||||
{
|
{
|
||||||
|
// Inlining will optimize this branch out when possible
|
||||||
|
if (eased) {
|
||||||
|
x = easeCurve(x);
|
||||||
|
y = easeCurve(y);
|
||||||
|
}
|
||||||
float u = linearInterpolation(v00, v10, x);
|
float u = linearInterpolation(v00, v10, x);
|
||||||
float v = linearInterpolation(v01, v11, x);
|
float v = linearInterpolation(v01, v11, x);
|
||||||
return linearInterpolation(u, v, y);
|
return linearInterpolation(u, v, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float triLinearInterpolation(
|
inline float triLinearInterpolation(
|
||||||
float v000, float v100, float v010, float v110,
|
float v000, float v100, float v010, float v110,
|
||||||
float v001, float v101, float v011, float v111,
|
float v001, float v101, float v011, float v111,
|
||||||
float x, float y, float z)
|
float x, float y, float z,
|
||||||
|
bool eased)
|
||||||
{
|
{
|
||||||
float tx = easeCurve(x);
|
// Inlining will optimize this branch out when possible
|
||||||
float ty = easeCurve(y);
|
if (eased) {
|
||||||
float tz = easeCurve(z);
|
x = easeCurve(x);
|
||||||
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty);
|
y = easeCurve(y);
|
||||||
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty);
|
z = easeCurve(z);
|
||||||
return linearInterpolation(u, v, tz);
|
}
|
||||||
}
|
float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
|
||||||
|
float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
|
||||||
float triLinearInterpolationNoEase(
|
|
||||||
float v000, float v100, float v010, float v110,
|
|
||||||
float v001, float v101, float v011, float v111,
|
|
||||||
float x, float y, float z)
|
|
||||||
{
|
|
||||||
float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
|
|
||||||
float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
|
|
||||||
return linearInterpolation(u, v, z);
|
return linearInterpolation(u, v, z);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,10 +233,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
|
|||||||
float v01 = noise2d(x0, y0+1, seed);
|
float v01 = noise2d(x0, y0+1, seed);
|
||||||
float v11 = noise2d(x0+1, y0+1, seed);
|
float v11 = noise2d(x0+1, y0+1, seed);
|
||||||
// Interpolate
|
// Interpolate
|
||||||
if (eased)
|
return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
|
||||||
return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
|
|
||||||
|
|
||||||
return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -282,17 +257,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
|
|||||||
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
|
float v011 = noise3d(x0, y0 + 1, z0 + 1, seed);
|
||||||
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
|
float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
|
||||||
// Interpolate
|
// Interpolate
|
||||||
if (eased) {
|
return triLinearInterpolation(
|
||||||
return triLinearInterpolation(
|
|
||||||
v000, v100, v010, v110,
|
|
||||||
v001, v101, v011, v111,
|
|
||||||
xl, yl, zl);
|
|
||||||
}
|
|
||||||
|
|
||||||
return triLinearInterpolationNoEase(
|
|
||||||
v000, v100, v010, v110,
|
v000, v100, v010, v110,
|
||||||
v001, v101, v011, v111,
|
v001, v101, v011, v111,
|
||||||
xl, yl, zl);
|
xl, yl, zl,
|
||||||
|
eased);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -562,9 +531,6 @@ void Noise::gradientMap2D(
|
|||||||
s32 x0, y0;
|
s32 x0, y0;
|
||||||
|
|
||||||
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
|
bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
|
||||||
Interp2dFxn interpolate = eased ?
|
|
||||||
biLinearInterpolation : biLinearInterpolationNoEase;
|
|
||||||
|
|
||||||
x0 = std::floor(x);
|
x0 = std::floor(x);
|
||||||
y0 = std::floor(y);
|
y0 = std::floor(y);
|
||||||
u = x - (float)x0;
|
u = x - (float)x0;
|
||||||
@ -591,7 +557,8 @@ void Noise::gradientMap2D(
|
|||||||
u = orig_u;
|
u = orig_u;
|
||||||
noisex = 0;
|
noisex = 0;
|
||||||
for (i = 0; i != sx; i++) {
|
for (i = 0; i != sx; i++) {
|
||||||
gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v);
|
gradient_buf[index++] =
|
||||||
|
biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
|
||||||
|
|
||||||
u += step_x;
|
u += step_x;
|
||||||
if (u >= 1.0) {
|
if (u >= 1.0) {
|
||||||
@ -627,8 +594,7 @@ void Noise::gradientMap3D(
|
|||||||
u32 nlx, nly, nlz;
|
u32 nlx, nly, nlz;
|
||||||
s32 x0, y0, z0;
|
s32 x0, y0, z0;
|
||||||
|
|
||||||
Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ?
|
bool eased = np.flags & NOISE_FLAG_EASED;
|
||||||
triLinearInterpolation : triLinearInterpolationNoEase;
|
|
||||||
|
|
||||||
x0 = std::floor(x);
|
x0 = std::floor(x);
|
||||||
y0 = std::floor(y);
|
y0 = std::floor(y);
|
||||||
@ -669,10 +635,11 @@ void Noise::gradientMap3D(
|
|||||||
u = orig_u;
|
u = orig_u;
|
||||||
noisex = 0;
|
noisex = 0;
|
||||||
for (i = 0; i != sx; i++) {
|
for (i = 0; i != sx; i++) {
|
||||||
gradient_buf[index++] = interpolate(
|
gradient_buf[index++] = triLinearInterpolation(
|
||||||
v000, v100, v010, v110,
|
v000, v100, v010, v110,
|
||||||
v001, v101, v011, v111,
|
v001, v101, v011, v111,
|
||||||
u, v, w);
|
u, v, w,
|
||||||
|
eased);
|
||||||
|
|
||||||
u += step_x;
|
u += step_x;
|
||||||
if (u >= 1.0) {
|
if (u >= 1.0) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user