Merge pull request #1995 from jpark37/yuv-simplify

libobs: Separate textures for YUV output, fix chroma
This commit is contained in:
Jim 2019-08-09 21:11:45 -07:00 committed by GitHub
commit 164f731320
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 443 additions and 593 deletions

View File

@ -15,25 +15,12 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
//#define DEBUGGING
uniform float u_plane_offset;
uniform float v_plane_offset;
uniform float width;
uniform float height;
uniform float width_i;
uniform float height_i;
uniform float width_d2;
uniform float height_d2;
uniform float width_d2_i;
uniform float height_d2_i;
uniform float input_width;
uniform float input_height;
uniform float input_width_i;
uniform float input_height_i;
uniform float input_width_i_d2;
uniform float input_height_i_d2;
uniform int int_width;
uniform int int_input_width;
@ -65,8 +52,17 @@ struct VertTexPos {
float4 pos : POSITION;
};
struct VertTexPosWide {
float3 uuv : TEXCOORD0;
float4 pos : POSITION;
};
struct FragTex {
float2 uv : TEXCOORD0;
float2 uv : TEXCOORD0;
};
struct FragTexWide {
float3 uuv : TEXCOORD0;
};
FragPos VSPos(uint id : VERTEXID)
@ -82,7 +78,7 @@ FragPos VSPos(uint id : VERTEXID)
return vert_out;
}
VertTexPos VSPosTex(uint id : VERTEXID)
VertTexPos VSTexPos(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
@ -99,225 +95,76 @@ VertTexPos VSPosTex(uint id : VERTEXID)
return vert_out;
}
VertTexPosWide VSTexPosLeft(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
float u_right = idHigh * 2.0;
float u_left = u_right - width_i;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertTexPosWide vert_out;
vert_out.uuv.x = u_left;
vert_out.uuv.y = u_right;
vert_out.uuv.z = v;
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
float4 PSNV12(FragTex frag_in) : TARGET
{
float v_mul = floor(frag_in.uv.y * input_height);
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
float2 sample_pos0 = float2(lum_u, lum_v);
float2 sample_pos1 = float2(lum_u += width_i, lum_v);
float2 sample_pos2 = float2(lum_u += width_i, lum_v);
float2 sample_pos3 = float2(lum_u + width_i, lum_v);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 out_val = float4(
dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
dot(color_vec_y.xyz, rgb3) + color_vec_y.w
);
return out_val;
} else {
#ifdef DEBUGGING
return float4(0.5, 0.2, 0.5, 0.2);
#endif
float new_offset = byte_offset - u_plane_offset;
float ch_u = floor(fmod(new_offset, width)) * width_i;
float ch_v = floor(new_offset * width_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
float2 sample_pos0 = float2(ch_u, ch_v);
float2 sample_pos1 = float2(ch_u + width_i2, ch_v);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
return float4(
dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
dot(color_vec_v.xyz, rgb1) + color_vec_v.w
);
}
}
float PSNV12_Y(FragPos frag_in) : TARGET
float PS_Y(FragPos frag_in) : TARGET
{
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
return y;
}
float2 PSNV12_UV(FragTex frag_in) : TARGET
float2 PS_UV_Wide(FragTexWide frag_in) : TARGET
{
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
return float2(u, v);
}
float4 PSPlanar420(FragTex frag_in) : TARGET
float PS_U(FragTex frag_in) : TARGET
{
float v_mul = floor(frag_in.uv.y * input_height);
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos0 = float2(lum_u, lum_v);
sample_pos1 = float2(lum_u += width_i, lum_v);
sample_pos2 = float2(lum_u += width_i, lum_v);
sample_pos3 = float2(lum_u + width_i, lum_v);
} else {
#ifdef DEBUGGING
return ((byte_offset < v_plane_offset) ?
float4(0.5, 0.5, 0.5, 0.5) :
float4(0.2, 0.2, 0.2, 0.2));
#endif
float new_offset = byte_offset -
((byte_offset < v_plane_offset) ?
u_plane_offset : v_plane_offset);
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
/* set up coordinates for next chroma line, in case
* (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
* between the current and the next chroma line; do note that the next
* chroma line is two source lines below the current source line */
float ch_u_n = 0. + width_i;
float ch_v_n = ch_v + height_i * 3;
sample_pos0 = float2(ch_u, ch_v);
sample_pos1 = float2(ch_u += width_i2, ch_v);
ch_u += width_i2;
// check if ch_u overflowed the current source and chroma line
if (ch_u > 1.0) {
sample_pos2 = float2(ch_u_n, ch_v_n);
sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
} else {
sample_pos2 = float2(ch_u, ch_v);
sample_pos3 = float2(ch_u + width_i2, ch_v);
}
}
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 color_vec;
if (byte_offset < u_plane_offset)
color_vec = color_vec_y;
else if (byte_offset < v_plane_offset)
color_vec = color_vec_u;
else
color_vec = color_vec_v;
return float4(
dot(color_vec.xyz, rgb0) + color_vec.w,
dot(color_vec.xyz, rgb1) + color_vec.w,
dot(color_vec.xyz, rgb2) + color_vec.w,
dot(color_vec.xyz, rgb3) + color_vec.w
);
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
return u;
}
float4 PSPlanar444(FragTex frag_in) : TARGET
float PS_V(FragTex frag_in) : TARGET
{
float v_mul = floor(frag_in.uv.y * input_height);
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
return v;
}
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float PS_U_Wide(FragTexWide frag_in) : TARGET
{
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
return u;
}
float new_byte_offset = byte_offset;
if (byte_offset >= v_plane_offset)
new_byte_offset -= v_plane_offset;
else if (byte_offset >= u_plane_offset)
new_byte_offset -= u_plane_offset;
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
float v_val = floor(new_byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
u_val += width_i * 0.5;
v_val += height_i * 0.5;
float2 sample_pos0 = float2(u_val, v_val);
float2 sample_pos1 = float2(u_val += width_i, v_val);
float2 sample_pos2 = float2(u_val += width_i, v_val);
float2 sample_pos3 = float2(u_val + width_i, v_val);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 color_vec;
if (byte_offset < u_plane_offset)
color_vec = color_vec_y;
else if (byte_offset < v_plane_offset)
color_vec = color_vec_u;
else
color_vec = color_vec_v;
return float4(
dot(color_vec.xyz, rgb0) + color_vec.w,
dot(color_vec.xyz, rgb1) + color_vec.w,
dot(color_vec.xyz, rgb2) + color_vec.w,
dot(color_vec.xyz, rgb3) + color_vec.w
);
float PS_V_Wide(FragTexWide frag_in) : TARGET
{
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
float3 rgb = (rgb_left + rgb_right) * 0.5;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
return v;
}
float GetIntOffsetColor(int offset)
@ -473,30 +320,48 @@ float4 PSBGR3_Full(FragTex frag_in) : TARGET
return float4(rgb, 1.0);
}
technique Planar420
technique Planar_Y
{
pass
{
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar420(frag_in);
vertex_shader = VSPos(id);
pixel_shader = PS_Y(frag_in);
}
}
technique Planar444
technique Planar_U
{
pass
{
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar444(frag_in);
vertex_shader = VSTexPos(id);
pixel_shader = PS_U(frag_in);
}
}
technique NV12
technique Planar_V
{
pass
{
vertex_shader = VSPosTex(id);
pixel_shader = PSNV12(frag_in);
vertex_shader = VSTexPos(id);
pixel_shader = PS_V(frag_in);
}
}
technique Planar_U_Left
{
pass
{
vertex_shader = VSTexPosLeft(id);
pixel_shader = PS_U_Wide(frag_in);
}
}
technique Planar_V_Left
{
pass
{
vertex_shader = VSTexPosLeft(id);
pixel_shader = PS_V_Wide(frag_in);
}
}
@ -505,7 +370,7 @@ technique NV12_Y
pass
{
vertex_shader = VSPos(id);
pixel_shader = PSNV12_Y(frag_in);
pixel_shader = PS_Y(frag_in);
}
}
@ -513,8 +378,8 @@ technique NV12_UV
{
pass
{
vertex_shader = VSPosTex(id);
pixel_shader = PSNV12_UV(frag_in);
vertex_shader = VSTexPosLeft(id);
pixel_shader = PS_UV_Wide(frag_in);
}
}
@ -522,7 +387,7 @@ technique UYVY_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
}
}
@ -531,7 +396,7 @@ technique YUY2_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
}
}
@ -540,7 +405,7 @@ technique YVYU_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
}
}
@ -549,7 +414,7 @@ technique I420_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPlanar420_Reverse(frag_in);
}
}
@ -558,7 +423,7 @@ technique I422_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPlanar422_Reverse(frag_in);
}
}
@ -567,7 +432,7 @@ technique I444_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSPlanar444_Reverse(frag_in);
}
}
@ -576,7 +441,7 @@ technique NV12_Reverse
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSNV12_Reverse(frag_in);
}
}
@ -585,7 +450,7 @@ technique Y800_Limited
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSY800_Limited(frag_in);
}
}
@ -594,7 +459,7 @@ technique Y800_Full
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSY800_Full(frag_in);
}
}
@ -603,7 +468,7 @@ technique RGB_Limited
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSRGB_Limited(frag_in);
}
}
@ -612,7 +477,7 @@ technique BGR3_Limited
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSBGR3_Limited(frag_in);
}
}
@ -621,7 +486,7 @@ technique BGR3_Full
{
pass
{
vertex_shader = VSPosTex(id);
vertex_shader = VSTexPos(id);
pixel_shader = PSBGR3_Full(frag_in);
}
}

View File

@ -37,6 +37,7 @@
#include "obs.h"
#define NUM_TEXTURES 2
#define NUM_CHANNELS 3
#define MICROSECOND_DEN 1000000
#define NUM_ENCODE_TEXTURES 3
#define NUM_ENCODE_TEXTURE_FRAMES_TO_WAIT 1
@ -235,11 +236,10 @@ struct obs_tex_frame {
struct obs_core_video {
graphics_t *graphics;
gs_stagesurf_t *copy_surfaces[NUM_TEXTURES];
gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
gs_texture_t *render_texture;
gs_texture_t *output_texture;
gs_texture_t *convert_texture;
gs_texture_t *convert_uv_texture;
gs_texture_t *convert_textures[NUM_CHANNELS];
bool texture_rendered;
bool textures_copied[NUM_TEXTURES];
bool texture_converted;
@ -258,7 +258,7 @@ struct obs_core_video {
gs_effect_t *bilinear_lowres_effect;
gs_effect_t *premultiplied_alpha_effect;
gs_samplerstate_t *point_sampler;
gs_stagesurf_t *mapped_surface;
gs_stagesurf_t *mapped_surfaces[NUM_CHANNELS];
int cur_texture;
long raw_active;
long gpu_encoder_active;
@ -283,11 +283,9 @@ struct obs_core_video {
bool thread_initialized;
bool gpu_conversion;
const char *conversion_tech;
uint32_t conversion_height;
uint32_t plane_offsets[3];
uint32_t plane_sizes[3];
uint32_t plane_linewidth[3];
const char *conversion_techs[NUM_CHANNELS];
bool conversion_needed;
float conversion_width_i;
uint32_t output_width;
uint32_t output_height;

View File

@ -109,9 +109,11 @@ static inline void set_render_size(uint32_t width, uint32_t height)
static inline void unmap_last_surface(struct obs_core_video *video)
{
if (video->mapped_surface) {
gs_stagesurface_unmap(video->mapped_surface);
video->mapped_surface = NULL;
for (int c = 0; c < NUM_CHANNELS; ++c) {
if (video->mapped_surfaces[c]) {
gs_stagesurface_unmap(video->mapped_surfaces[c]);
video->mapped_surfaces[c] = NULL;
}
}
}
@ -264,10 +266,24 @@ static inline gs_texture_t *render_output_texture(struct obs_core_video *video)
return target;
}
static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
static void render_convert_plane(gs_effect_t *effect, gs_texture_t *texture,
gs_texture_t *target, const char *tech_name)
{
gs_eparam_t *param = gs_effect_get_param_by_name(effect, name);
gs_effect_set_float(param, val);
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
const uint32_t width = gs_texture_get_width(target);
const uint32_t height = gs_texture_get_height(target);
gs_set_render_target(target, NULL);
set_render_size(width, height);
size_t passes = gs_technique_begin(tech);
for (size_t i = 0; i < passes; i++) {
gs_technique_begin_pass(tech, i);
gs_draw(GS_TRIS, 0, 3);
gs_technique_end_pass(tech);
}
gs_technique_end(tech);
}
static const char *render_convert_texture_name = "render_convert_texture";
@ -276,11 +292,6 @@ static void render_convert_texture(struct obs_core_video *video,
{
profile_start(render_convert_texture_name);
gs_texture_t *target = video->convert_texture;
float fwidth = (float)video->output_width;
float fheight = (float)video->output_height;
size_t passes, i;
gs_effect_t *effect = video->conversion_effect;
gs_eparam_t *color_vec_y =
gs_effect_get_param_by_name(effect, "color_vec_y");
@ -289,20 +300,7 @@ static void render_convert_texture(struct obs_core_video *video,
gs_eparam_t *color_vec_v =
gs_effect_get_param_by_name(effect, "color_vec_v");
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_technique_t *tech =
gs_effect_get_technique(effect, video->conversion_tech);
set_eparam(effect, "u_plane_offset", (float)video->plane_offsets[1]);
set_eparam(effect, "v_plane_offset", (float)video->plane_offsets[2]);
set_eparam(effect, "width", fwidth);
set_eparam(effect, "height", fheight);
set_eparam(effect, "width_i", 1.0f / fwidth);
set_eparam(effect, "height_i", 1.0f / fheight);
set_eparam(effect, "width_d2", fwidth * 0.5f);
set_eparam(effect, "height_d2", fheight * 0.5f);
set_eparam(effect, "width_d2_i", 1.0f / (fwidth * 0.5f));
set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f));
set_eparam(effect, "input_height", (float)video->conversion_height);
gs_eparam_t *width_i = gs_effect_get_param_by_name(effect, "width_i");
struct vec4 vec_y, vec_u, vec_v;
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
@ -311,23 +309,39 @@ static void render_convert_texture(struct obs_core_video *video,
video->color_matrix[2], video->color_matrix[3]);
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
video->color_matrix[10], video->color_matrix[11]);
gs_effect_set_vec4(color_vec_y, &vec_y);
gs_effect_set_vec4(color_vec_u, &vec_u);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_texture(image, texture);
gs_set_render_target(target, NULL);
set_render_size(video->output_width, video->conversion_height);
gs_enable_blending(false);
passes = gs_technique_begin(tech);
for (i = 0; i < passes; i++) {
gs_technique_begin_pass(tech, i);
gs_draw(GS_TRIS, 0, 3);
gs_technique_end_pass(tech);
if (video->convert_textures[0]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_y, &vec_y);
render_convert_plane(effect, texture,
video->convert_textures[0],
video->conversion_techs[0]);
if (video->convert_textures[1]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_u, &vec_u);
if (!video->convert_textures[2])
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_float(width_i, video->conversion_width_i);
render_convert_plane(effect, texture,
video->convert_textures[1],
video->conversion_techs[1]);
if (video->convert_textures[2]) {
gs_effect_set_texture(image, texture);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_float(width_i,
video->conversion_width_i);
render_convert_plane(
effect, texture,
video->convert_textures[2],
video->conversion_techs[2]);
}
}
}
gs_technique_end(tech);
gs_enable_blending(true);
video->texture_converted = true;
@ -335,90 +349,32 @@ static void render_convert_texture(struct obs_core_video *video,
profile_end(render_convert_texture_name);
}
static void render_nv12(struct obs_core_video *video, gs_texture_t *texture,
gs_texture_t *target, const char *tech_name,
uint32_t width, uint32_t height)
{
gs_effect_t *effect = video->conversion_effect;
gs_eparam_t *color_vec_y =
gs_effect_get_param_by_name(effect, "color_vec_y");
gs_eparam_t *color_vec_u =
gs_effect_get_param_by_name(effect, "color_vec_u");
gs_eparam_t *color_vec_v =
gs_effect_get_param_by_name(effect, "color_vec_v");
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
size_t passes, i;
struct vec4 vec_y, vec_u, vec_v;
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
video->color_matrix[6], video->color_matrix[7]);
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
video->color_matrix[2], video->color_matrix[3]);
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
video->color_matrix[10], video->color_matrix[11]);
gs_effect_set_vec4(color_vec_y, &vec_y);
gs_effect_set_vec4(color_vec_u, &vec_u);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_texture(image, texture);
gs_set_render_target(target, NULL);
set_render_size(width, height);
gs_enable_blending(false);
passes = gs_technique_begin(tech);
for (i = 0; i < passes; i++) {
gs_technique_begin_pass(tech, i);
gs_draw(GS_TRIS, 0, 3);
gs_technique_end_pass(tech);
}
gs_technique_end(tech);
gs_enable_blending(true);
}
static const char *render_convert_nv12_name = "render_convert_texture_nv12";
static void render_convert_texture_nv12(struct obs_core_video *video,
gs_texture_t *texture)
{
profile_start(render_convert_nv12_name);
render_nv12(video, texture, video->convert_texture, "NV12_Y",
video->output_width, video->output_height);
render_nv12(video, texture, video->convert_uv_texture, "NV12_UV",
video->output_width / 2, video->output_height / 2);
video->texture_converted = true;
profile_end(render_convert_nv12_name);
}
static const char *stage_output_texture_name = "stage_output_texture";
static inline void stage_output_texture(struct obs_core_video *video,
gs_texture_t *texture, int cur_texture)
int cur_texture)
{
profile_start(stage_output_texture_name);
bool texture_ready;
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];
if (video->gpu_conversion) {
texture = video->convert_texture;
texture_ready = video->texture_converted;
} else {
texture_ready = true;
}
unmap_last_surface(video);
if (!texture_ready)
goto end;
if (!video->gpu_conversion) {
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0];
if (copy)
gs_stage_texture(copy, video->output_texture);
gs_stage_texture(copy, texture);
video->textures_copied[cur_texture] = true;
} else if (video->texture_converted) {
for (int i = 0; i < NUM_CHANNELS; i++) {
gs_stagesurf_t *copy =
video->copy_surfaces[cur_texture][i];
if (copy)
gs_stage_texture(copy,
video->convert_textures[i]);
}
video->textures_copied[cur_texture] = true;
video->textures_copied[cur_texture] = true;
}
end:
profile_end(stage_output_texture_name);
}
@ -458,13 +414,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
* reason. otherwise, it goes to the 'duplicate' case above, which
* will ensure better performance. */
if (raw_active || vframe_info->count > 1) {
gs_copy_texture(tf.tex, video->convert_texture);
gs_copy_texture(tf.tex, video->convert_textures[0]);
} else {
gs_texture_t *tex = video->convert_texture;
gs_texture_t *tex_uv = video->convert_uv_texture;
gs_texture_t *tex = video->convert_textures[0];
gs_texture_t *tex_uv = video->convert_textures[1];
video->convert_texture = tf.tex;
video->convert_uv_texture = tf.tex_uv;
video->convert_textures[0] = tf.tex;
video->convert_textures[1] = tf.tex_uv;
tf.tex = tex;
tf.tex_uv = tex_uv;
@ -529,17 +485,12 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
gs_texture_t *texture = render_output_texture(video);
#ifdef _WIN32
if (gpu_active) {
if (gpu_active)
gs_flush();
}
#endif
if (video->gpu_conversion) {
if (video->using_nv12_tex)
render_convert_texture_nv12(video, texture);
else
render_convert_texture(video, texture);
}
if (video->gpu_conversion)
render_convert_texture(video, texture);
#ifdef _WIN32
if (gpu_active) {
@ -547,8 +498,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
output_gpu_encoders(video, raw_active);
}
#endif
if (raw_active)
stage_output_texture(video, texture, cur_texture);
stage_output_texture(video, cur_texture);
}
gs_set_render_target(NULL, NULL);
@ -560,73 +512,41 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
static inline bool download_frame(struct obs_core_video *video,
int prev_texture, struct video_data *frame)
{
gs_stagesurf_t *surface = video->copy_surfaces[prev_texture];
if (!video->textures_copied[prev_texture])
return false;
if (!gs_stagesurface_map(surface, &frame->data[0], &frame->linesize[0]))
return false;
for (int channel = 0; channel < NUM_CHANNELS; ++channel) {
gs_stagesurf_t *surface =
video->copy_surfaces[prev_texture][channel];
if (surface) {
if (!gs_stagesurface_map(surface, &frame->data[channel],
&frame->linesize[channel]))
return false;
video->mapped_surface = surface;
video->mapped_surfaces[channel] = surface;
}
}
return true;
}
static inline uint32_t calc_linesize(uint32_t pos, uint32_t linesize)
static const uint8_t *set_gpu_converted_plane(uint32_t width, uint32_t height,
uint32_t linesize_input,
uint32_t linesize_output,
const uint8_t *in, uint8_t *out)
{
uint32_t size = pos % linesize;
return size ? size : linesize;
}
static void copy_dealign(uint8_t *dst, uint32_t dst_pos, uint32_t dst_linesize,
const uint8_t *src, uint32_t src_pos,
uint32_t src_linesize, uint32_t remaining)
{
while (remaining) {
uint32_t src_remainder = src_pos % src_linesize;
uint32_t dst_offset = dst_linesize - src_remainder;
uint32_t src_offset = src_linesize - src_remainder;
if (remaining < dst_offset) {
memcpy(dst + dst_pos, src + src_pos, remaining);
src_pos += remaining;
dst_pos += remaining;
remaining = 0;
} else {
memcpy(dst + dst_pos, src + src_pos, dst_offset);
src_pos += src_offset;
dst_pos += dst_offset;
remaining -= dst_offset;
if ((width == linesize_input) && (width == linesize_output)) {
size_t total = width * height;
memcpy(out, in, total);
in += total;
} else {
for (size_t y = 0; y < height; y++) {
memcpy(out, in, width);
out += linesize_output;
in += linesize_input;
}
}
}
static inline uint32_t make_aligned_linesize_offset(uint32_t offset,
uint32_t dst_linesize,
uint32_t src_linesize)
{
uint32_t remainder = offset % dst_linesize;
return (offset / dst_linesize) * src_linesize + remainder;
}
static void fix_gpu_converted_alignment(struct obs_core_video *video,
struct video_frame *output,
const struct video_data *input)
{
uint32_t src_linesize = input->linesize[0];
uint32_t dst_linesize = output->linesize[0] * 4;
uint32_t src_pos = 0;
for (size_t i = 0; i < 3; i++) {
if (video->plane_linewidth[i] == 0)
break;
src_pos = make_aligned_linesize_offset(
video->plane_offsets[i], dst_linesize, src_linesize);
copy_dealign(output->data[i], 0, dst_linesize, input->data[0],
src_pos, src_linesize, video->plane_sizes[i]);
}
return in;
}
static void set_gpu_converted_data(struct obs_core_video *video,
@ -634,41 +554,91 @@ static void set_gpu_converted_data(struct obs_core_video *video,
const struct video_data *input,
const struct video_output_info *info)
{
if (input->linesize[0] == video->output_width * 4) {
struct video_frame frame;
if (video->using_nv12_tex) {
const uint32_t width = info->width;
const uint32_t height = info->height;
for (size_t i = 0; i < 3; i++) {
if (video->plane_linewidth[i] == 0)
break;
frame.linesize[i] = video->plane_linewidth[i];
frame.data[i] =
input->data[0] + video->plane_offsets[i];
}
video_frame_copy(output, &frame, info->format, info->height);
} else if (video->using_nv12_tex) {
size_t width = info->width;
size_t height = info->height;
size_t height_d2 = height / 2;
uint8_t *out_y = output->data[0];
uint8_t *out_uv = output->data[1];
uint8_t *in = input->data[0];
for (size_t y = 0; y < height; y++) {
memcpy(out_y, in, width);
out_y += output->linesize[0];
in += input->linesize[0];
}
for (size_t y = 0; y < height_d2; y++) {
memcpy(out_uv, in, width);
out_uv += output->linesize[0];
in += input->linesize[0];
}
const uint8_t *const in_uv = set_gpu_converted_plane(
width, height, input->linesize[0], output->linesize[0],
input->data[0], output->data[0]);
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width, height_d2, input->linesize[0],
output->linesize[1], in_uv,
output->data[1]);
} else {
fix_gpu_converted_alignment(video, output, input);
switch (info->format) {
case VIDEO_FORMAT_I420: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
const uint32_t width_d2 = width / 2;
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width_d2, height_d2,
input->linesize[1],
output->linesize[1],
input->data[1],
output->data[1]);
set_gpu_converted_plane(width_d2, height_d2,
input->linesize[2],
output->linesize[2],
input->data[2],
output->data[2]);
break;
}
case VIDEO_FORMAT_NV12: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
const uint32_t height_d2 = height / 2;
set_gpu_converted_plane(width, height_d2,
input->linesize[1],
output->linesize[1],
input->data[1],
output->data[1]);
break;
}
case VIDEO_FORMAT_I444: {
const uint32_t width = info->width;
const uint32_t height = info->height;
set_gpu_converted_plane(width, height,
input->linesize[0],
output->linesize[0],
input->data[0],
output->data[0]);
set_gpu_converted_plane(width, height,
input->linesize[1],
output->linesize[1],
input->data[1],
output->data[1]);
set_gpu_converted_plane(width, height,
input->linesize[2],
output->linesize[2],
input->data[2],
output->data[2]);
break;
}
}
}
}

View File

@ -42,117 +42,35 @@ static inline void make_video_info(struct video_output_info *vi,
vi->cache_size = 6;
}
#define PIXEL_SIZE 4
#define GET_ALIGN(val, align) (((val) + (align - 1)) & ~(align - 1))
static inline void set_420p_sizes(const struct obs_video_info *ovi)
{
struct obs_core_video *video = &obs->video;
uint32_t chroma_pixels;
uint32_t total_bytes;
chroma_pixels = (ovi->output_width * ovi->output_height / 4);
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
video->plane_offsets[0] = 0;
video->plane_offsets[1] = ovi->output_width * ovi->output_height;
video->plane_offsets[2] = video->plane_offsets[1] + chroma_pixels;
video->plane_linewidth[0] = ovi->output_width;
video->plane_linewidth[1] = ovi->output_width / 2;
video->plane_linewidth[2] = ovi->output_width / 2;
video->plane_sizes[0] = video->plane_offsets[1];
video->plane_sizes[1] = video->plane_sizes[0] / 4;
video->plane_sizes[2] = video->plane_sizes[1];
total_bytes = video->plane_offsets[2] + chroma_pixels;
video->conversion_height =
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
ovi->output_width;
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
video->conversion_tech = "Planar420";
}
static inline void set_nv12_sizes(const struct obs_video_info *ovi)
{
struct obs_core_video *video = &obs->video;
uint32_t chroma_pixels;
uint32_t total_bytes;
chroma_pixels = (ovi->output_width * ovi->output_height / 2);
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
video->plane_offsets[0] = 0;
video->plane_offsets[1] = ovi->output_width * ovi->output_height;
video->plane_linewidth[0] = ovi->output_width;
video->plane_linewidth[1] = ovi->output_width;
video->plane_sizes[0] = video->plane_offsets[1];
video->plane_sizes[1] = video->plane_sizes[0] / 2;
total_bytes = video->plane_offsets[1] + chroma_pixels;
video->conversion_height =
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
ovi->output_width;
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
video->conversion_tech = "NV12";
}
static inline void set_444p_sizes(const struct obs_video_info *ovi)
{
struct obs_core_video *video = &obs->video;
uint32_t chroma_pixels;
uint32_t total_bytes;
chroma_pixels = (ovi->output_width * ovi->output_height);
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
video->plane_offsets[0] = 0;
video->plane_offsets[1] = chroma_pixels;
video->plane_offsets[2] = chroma_pixels + chroma_pixels;
video->plane_linewidth[0] = ovi->output_width;
video->plane_linewidth[1] = ovi->output_width;
video->plane_linewidth[2] = ovi->output_width;
video->plane_sizes[0] = chroma_pixels;
video->plane_sizes[1] = chroma_pixels;
video->plane_sizes[2] = chroma_pixels;
total_bytes = video->plane_offsets[2] + chroma_pixels;
video->conversion_height =
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
ovi->output_width;
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
video->conversion_tech = "Planar444";
}
static inline void calc_gpu_conversion_sizes(const struct obs_video_info *ovi)
{
obs->video.conversion_height = 0;
memset(obs->video.plane_offsets, 0, sizeof(obs->video.plane_offsets));
memset(obs->video.plane_sizes, 0, sizeof(obs->video.plane_sizes));
memset(obs->video.plane_linewidth, 0,
sizeof(obs->video.plane_linewidth));
struct obs_core_video *video = &obs->video;
video->conversion_needed = false;
video->conversion_techs[0] = NULL;
video->conversion_techs[1] = NULL;
video->conversion_techs[2] = NULL;
video->conversion_width_i = 0.f;
switch ((uint32_t)ovi->output_format) {
case VIDEO_FORMAT_I420:
set_420p_sizes(ovi);
video->conversion_needed = true;
video->conversion_techs[0] = "Planar_Y";
video->conversion_techs[1] = "Planar_U_Left";
video->conversion_techs[2] = "Planar_V_Left";
video->conversion_width_i = 1.f / (float)ovi->output_width;
break;
case VIDEO_FORMAT_NV12:
set_nv12_sizes(ovi);
video->conversion_needed = true;
video->conversion_techs[0] = "NV12_Y";
video->conversion_techs[1] = "NV12_UV";
video->conversion_width_i = 1.f / (float)ovi->output_width;
break;
case VIDEO_FORMAT_I444:
set_444p_sizes(ovi);
video->conversion_needed = true;
video->conversion_techs[0] = "Planar_Y";
video->conversion_techs[1] = "Planar_U";
video->conversion_techs[2] = "Planar_V";
break;
}
}
@ -167,7 +85,7 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
? gs_nv12_available()
: false;
if (!video->conversion_height) {
if (!video->conversion_needed) {
blog(LOG_INFO, "GPU conversion not available for format: %u",
(unsigned int)ovi->output_format);
video->gpu_conversion = false;
@ -183,23 +101,96 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
#ifdef _WIN32
if (video->using_nv12_tex) {
gs_texture_create_nv12(&video->convert_texture,
&video->convert_uv_texture,
gs_texture_create_nv12(&video->convert_textures[0],
&video->convert_textures[1],
ovi->output_width, ovi->output_height,
GS_RENDER_TARGET | GS_SHARED_KM_TEX);
if (!video->convert_uv_texture)
return false;
} else {
#endif
video->convert_texture = gs_texture_create(
ovi->output_width, video->conversion_height, GS_RGBA, 1,
NULL, GS_RENDER_TARGET);
video->convert_textures[0] =
gs_texture_create(ovi->output_width, ovi->output_height,
GS_R8, 1, NULL, GS_RENDER_TARGET);
const struct video_output_info *info =
video_output_get_info(video->video);
switch (info->format) {
case VIDEO_FORMAT_I420:
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8, 1, NULL, GS_RENDER_TARGET);
video->convert_textures[2] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8, 1, NULL, GS_RENDER_TARGET);
if (!video->convert_textures[2])
return false;
break;
case VIDEO_FORMAT_NV12:
video->convert_textures[1] = gs_texture_create(
ovi->output_width / 2, ovi->output_height / 2,
GS_R8G8, 1, NULL, GS_RENDER_TARGET);
break;
case VIDEO_FORMAT_I444:
video->convert_textures[1] = gs_texture_create(
ovi->output_width, ovi->output_height, GS_R8, 1,
NULL, GS_RENDER_TARGET);
video->convert_textures[2] = gs_texture_create(
ovi->output_width, ovi->output_height, GS_R8, 1,
NULL, GS_RENDER_TARGET);
if (!video->convert_textures[2])
return false;
break;
}
#ifdef _WIN32
}
#endif
if (!video->convert_texture)
if (!video->convert_textures[0])
return false;
if (!video->convert_textures[1])
return false;
return true;
}
static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
{
struct obs_core_video *video = &obs->video;
video->copy_surfaces[i][0] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][0])
return false;
const struct video_output_info *info =
video_output_get_info(video->video);
switch (info->format) {
case VIDEO_FORMAT_I420:
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8);
if (!video->copy_surfaces[i][1])
return false;
video->copy_surfaces[i][2] = gs_stagesurface_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8);
if (!video->copy_surfaces[i][2])
return false;
break;
case VIDEO_FORMAT_NV12:
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width / 2, ovi->output_height / 2, GS_R8G8);
if (!video->copy_surfaces[i][1])
return false;
break;
case VIDEO_FORMAT_I444:
video->copy_surfaces[i][1] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][1])
return false;
video->copy_surfaces[i][2] = gs_stagesurface_create(
ovi->output_width, ovi->output_height, GS_R8);
if (!video->copy_surfaces[i][2])
return false;
break;
}
return true;
}
@ -207,25 +198,29 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
static bool obs_init_textures(struct obs_video_info *ovi)
{
struct obs_core_video *video = &obs->video;
uint32_t output_height = video->gpu_conversion
? video->conversion_height
: ovi->output_height;
size_t i;
for (i = 0; i < NUM_TEXTURES; i++) {
for (size_t i = 0; i < NUM_TEXTURES; i++) {
#ifdef _WIN32
if (video->using_nv12_tex) {
video->copy_surfaces[i] = gs_stagesurface_create_nv12(
ovi->output_width, ovi->output_height);
if (!video->copy_surfaces[i])
video->copy_surfaces[i][0] =
gs_stagesurface_create_nv12(ovi->output_width,
ovi->output_height);
if (!video->copy_surfaces[i][0])
return false;
} else {
#endif
video->copy_surfaces[i] = gs_stagesurface_create(
ovi->output_width, output_height, GS_RGBA);
if (!video->copy_surfaces[i])
return false;
if (video->gpu_conversion) {
if (!obs_init_gpu_copy_surfaces(ovi, i))
return false;
} else {
video->copy_surfaces[i][0] =
gs_stagesurface_create(
ovi->output_width,
ovi->output_height, GS_RGBA);
if (!video->copy_surfaces[i][0])
return false;
}
#ifdef _WIN32
}
#endif
@ -465,23 +460,45 @@ static void obs_free_video(void)
gs_enter_context(video->graphics);
if (video->mapped_surface) {
gs_stagesurface_unmap(video->mapped_surface);
video->mapped_surface = NULL;
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->mapped_surfaces[c]) {
gs_stagesurface_unmap(
video->mapped_surfaces[c]);
video->mapped_surfaces[c] = NULL;
}
}
for (size_t i = 0; i < NUM_TEXTURES; i++) {
gs_stagesurface_destroy(video->copy_surfaces[i]);
video->copy_surfaces[i] = NULL;
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->copy_surfaces[i][c]) {
gs_stagesurface_destroy(
video->copy_surfaces[i][c]);
video->copy_surfaces[i][c] = NULL;
}
}
}
gs_texture_destroy(video->render_texture);
gs_texture_destroy(video->convert_texture);
gs_texture_destroy(video->convert_uv_texture);
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->convert_textures[c]) {
gs_texture_destroy(video->convert_textures[c]);
video->convert_textures[c] = NULL;
}
}
for (size_t i = 0; i < NUM_TEXTURES; i++) {
for (size_t c = 0; c < NUM_CHANNELS; c++) {
if (video->copy_surfaces[i][c]) {
gs_stagesurface_destroy(
video->copy_surfaces[i][c]);
video->copy_surfaces[i][c] = NULL;
}
}
}
gs_texture_destroy(video->output_texture);
video->render_texture = NULL;
video->convert_texture = NULL;
video->convert_uv_texture = NULL;
video->output_texture = NULL;
gs_leave_context();