Switch for loop to do/while because we know the condition is always true for the first loop. Replace int math with float math to play nicely with more GPUs. Add variables imagesize/targetsize to avoid redundant reciprocals. Intel GPA results: 1166 -> 836 us
72 lines
1.8 KiB
Plaintext
72 lines
1.8 KiB
Plaintext
uniform float4x4 ViewProj;
|
|
uniform float2 base_dimension_i;
|
|
uniform texture2d image;
|
|
|
|
struct VertInOut {
|
|
float4 pos : POSITION;
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
VertInOut VSDefault(VertInOut vert_in)
|
|
{
|
|
VertInOut vert_out;
|
|
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
|
|
vert_out.uv = vert_in.uv;
|
|
return vert_out;
|
|
}
|
|
|
|
float4 PSDrawAreaRGBA(VertInOut vert_in) : TARGET
|
|
{
|
|
float4 totalcolor = float4(0.0, 0.0, 0.0, 0.0);
|
|
|
|
float2 uv = vert_in.uv;
|
|
float2 uvdelta = float2(ddx(uv.x), ddy(uv.y));
|
|
|
|
// Handle potential OpenGL flip.
|
|
uvdelta.y = abs(uvdelta.y);
|
|
|
|
float2 uvhalfdelta = 0.5 * uvdelta;
|
|
float2 uvmin = uv - uvhalfdelta;
|
|
float2 uvmax = uv + uvhalfdelta;
|
|
|
|
float2 imagesize = 1.0 / base_dimension_i;
|
|
float2 loadindexmin = floor(uvmin * imagesize);
|
|
float2 loadindexmax = floor(uvmax * imagesize);
|
|
|
|
float2 targetsize = 1.0 / uvdelta;
|
|
float2 targetpos = uv * targetsize;
|
|
float2 targetposmin = targetpos - 0.5;
|
|
float2 targetposmax = targetpos + 0.5;
|
|
float2 scale = base_dimension_i * targetsize;
|
|
|
|
float loadindexy = loadindexmin.y;
|
|
do {
|
|
float loadindexx = loadindexmin.x;
|
|
do {
|
|
float2 loadindex = float2(loadindexx, loadindexy);
|
|
float2 potentialtargetmin = loadindex * scale;
|
|
float2 potentialtargetmax = potentialtargetmin + scale;
|
|
float2 targetmin = max(potentialtargetmin, targetposmin);
|
|
float2 targetmax = min(potentialtargetmax, targetposmax);
|
|
float area = (targetmax.x - targetmin.x) * (targetmax.y - targetmin.y);
|
|
float4 sample = image.Load(int3(loadindex, 0));
|
|
totalcolor += area * sample;
|
|
|
|
++loadindexx;
|
|
} while (loadindexx <= loadindexmax.x);
|
|
|
|
++loadindexy;
|
|
} while (loadindexy <= loadindexmax.y);
|
|
|
|
return totalcolor;
|
|
}
|
|
|
|
technique Draw
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSDrawAreaRGBA(vert_in);
|
|
}
|
|
}
|