2656bf0a90
RGB to YUV converison was previously baked into every scale shader, but this work has been moved to the YUV packing shaders. The scale shaders now write RGBA instead. In the case where base and output resolutions are identical, the render texture is forwarded directly to the YUV pack step, skipping an entire fullscreen pass. Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12 1920x1080, Before: RGBA -> UYVX: ~321 us UYVX -> Y: ~480 us UYVX -> UV: ~127 us 1920x1080, After: [forward render texture] RGBA -> Y: ~487 us RGBA -> UV: ~131 us 1920x1080 -> 1280x720, Before: RGBA -> UYVX: ~268 us UYVX -> Y: ~209 us UYVX -> UV: ~57 us 1920x1080 -> 1280x720, After: RGBA -> RGBA (rescale): ~268 us RGBA -> Y: ~210 us RGBA -> UV: ~58 us
53 lines
958 B
Plaintext
53 lines
958 B
Plaintext
uniform float4x4 ViewProj;
|
|
uniform texture2d image;
|
|
|
|
sampler_state def_sampler {
|
|
Filter = Linear;
|
|
AddressU = Clamp;
|
|
AddressV = Clamp;
|
|
};
|
|
|
|
struct VertInOut {
|
|
float4 pos : POSITION;
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
VertInOut VSDefault(VertInOut vert_in)
|
|
{
|
|
VertInOut vert_out;
|
|
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
|
|
vert_out.uv = vert_in.uv;
|
|
return vert_out;
|
|
}
|
|
|
|
float4 PSDrawBare(VertInOut vert_in) : TARGET
|
|
{
|
|
return image.Sample(def_sampler, vert_in.uv);
|
|
}
|
|
|
|
float4 PSDrawAlphaDivide(VertInOut vert_in) : TARGET
|
|
{
|
|
float4 rgba = image.Sample(def_sampler, vert_in.uv);
|
|
float alpha = rgba.a;
|
|
float multiplier = (alpha > 0.0) ? (1.0 / alpha) : 0.0;
|
|
return float4(rgba.rgb * multiplier, alpha);
|
|
}
|
|
|
|
technique Draw
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSDrawBare(vert_in);
|
|
}
|
|
}
|
|
|
|
technique DrawAlphaDivide
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vert_in);
|
|
pixel_shader = PSDrawAlphaDivide(vert_in);
|
|
}
|
|
}
|