Merge pull request #1995 from jpark37/yuv-simplify
libobs: Separate textures for YUV output, fix chroma
This commit is contained in:
commit
164f731320
@ -15,25 +15,12 @@
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
******************************************************************************/
|
||||
|
||||
//#define DEBUGGING
|
||||
|
||||
uniform float u_plane_offset;
|
||||
uniform float v_plane_offset;
|
||||
|
||||
uniform float width;
|
||||
uniform float height;
|
||||
uniform float width_i;
|
||||
uniform float height_i;
|
||||
uniform float width_d2;
|
||||
uniform float height_d2;
|
||||
uniform float width_d2_i;
|
||||
uniform float height_d2_i;
|
||||
uniform float input_width;
|
||||
uniform float input_height;
|
||||
uniform float input_width_i;
|
||||
uniform float input_height_i;
|
||||
uniform float input_width_i_d2;
|
||||
uniform float input_height_i_d2;
|
||||
|
||||
uniform int int_width;
|
||||
uniform int int_input_width;
|
||||
@ -65,8 +52,17 @@ struct VertTexPos {
|
||||
float4 pos : POSITION;
|
||||
};
|
||||
|
||||
struct VertTexPosWide {
|
||||
float3 uuv : TEXCOORD0;
|
||||
float4 pos : POSITION;
|
||||
};
|
||||
|
||||
struct FragTex {
|
||||
float2 uv : TEXCOORD0;
|
||||
float2 uv : TEXCOORD0;
|
||||
};
|
||||
|
||||
struct FragTexWide {
|
||||
float3 uuv : TEXCOORD0;
|
||||
};
|
||||
|
||||
FragPos VSPos(uint id : VERTEXID)
|
||||
@ -82,7 +78,7 @@ FragPos VSPos(uint id : VERTEXID)
|
||||
return vert_out;
|
||||
}
|
||||
|
||||
VertTexPos VSPosTex(uint id : VERTEXID)
|
||||
VertTexPos VSTexPos(uint id : VERTEXID)
|
||||
{
|
||||
float idHigh = float(id >> 1);
|
||||
float idLow = float(id & uint(1));
|
||||
@ -99,225 +95,76 @@ VertTexPos VSPosTex(uint id : VERTEXID)
|
||||
return vert_out;
|
||||
}
|
||||
|
||||
VertTexPosWide VSTexPosLeft(uint id : VERTEXID)
|
||||
{
|
||||
float idHigh = float(id >> 1);
|
||||
float idLow = float(id & uint(1));
|
||||
|
||||
float x = idHigh * 4.0 - 1.0;
|
||||
float y = idLow * 4.0 - 1.0;
|
||||
|
||||
float u_right = idHigh * 2.0;
|
||||
float u_left = u_right - width_i;
|
||||
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
|
||||
|
||||
VertTexPosWide vert_out;
|
||||
vert_out.uuv.x = u_left;
|
||||
vert_out.uuv.y = u_right;
|
||||
vert_out.uuv.z = v;
|
||||
vert_out.pos = float4(x, y, 0.0, 1.0);
|
||||
return vert_out;
|
||||
}
|
||||
|
||||
/* used to prevent internal GPU precision issues width fmod in particular */
|
||||
#define PRECISION_OFFSET 0.2
|
||||
|
||||
float4 PSNV12(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
|
||||
if (byte_offset < u_plane_offset) {
|
||||
#ifdef DEBUGGING
|
||||
return float4(1.0, 1.0, 1.0, 1.0);
|
||||
#endif
|
||||
|
||||
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
||||
float lum_v = floor(byte_offset * width_i) * height_i;
|
||||
|
||||
/* move to texel centers to sample the 4 pixels properly */
|
||||
lum_u += width_i * 0.5;
|
||||
lum_v += height_i * 0.5;
|
||||
|
||||
float2 sample_pos0 = float2(lum_u, lum_v);
|
||||
float2 sample_pos1 = float2(lum_u += width_i, lum_v);
|
||||
float2 sample_pos2 = float2(lum_u += width_i, lum_v);
|
||||
float2 sample_pos3 = float2(lum_u + width_i, lum_v);
|
||||
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 out_val = float4(
|
||||
dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
|
||||
dot(color_vec_y.xyz, rgb3) + color_vec_y.w
|
||||
);
|
||||
|
||||
return out_val;
|
||||
} else {
|
||||
#ifdef DEBUGGING
|
||||
return float4(0.5, 0.2, 0.5, 0.2);
|
||||
#endif
|
||||
|
||||
float new_offset = byte_offset - u_plane_offset;
|
||||
|
||||
float ch_u = floor(fmod(new_offset, width)) * width_i;
|
||||
float ch_v = floor(new_offset * width_i) * height_d2_i;
|
||||
float width_i2 = width_i*2.0;
|
||||
|
||||
/* move to the borders of each set of 4 pixels to force it
|
||||
* to do bilinear averaging */
|
||||
ch_u += width_i;
|
||||
ch_v += height_i;
|
||||
|
||||
float2 sample_pos0 = float2(ch_u, ch_v);
|
||||
float2 sample_pos1 = float2(ch_u + width_i2, ch_v);
|
||||
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
|
||||
return float4(
|
||||
dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
|
||||
dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
|
||||
dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
|
||||
dot(color_vec_v.xyz, rgb1) + color_vec_v.w
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
float PSNV12_Y(FragPos frag_in) : TARGET
|
||||
float PS_Y(FragPos frag_in) : TARGET
|
||||
{
|
||||
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
|
||||
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
|
||||
return y;
|
||||
}
|
||||
|
||||
float2 PSNV12_UV(FragTex frag_in) : TARGET
|
||||
float2 PS_UV_Wide(FragTexWide frag_in) : TARGET
|
||||
{
|
||||
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
||||
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
||||
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
||||
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
||||
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
||||
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
||||
return float2(u, v);
|
||||
}
|
||||
|
||||
float4 PSPlanar420(FragTex frag_in) : TARGET
|
||||
float PS_U(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
|
||||
float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
|
||||
|
||||
if (byte_offset < u_plane_offset) {
|
||||
#ifdef DEBUGGING
|
||||
return float4(1.0, 1.0, 1.0, 1.0);
|
||||
#endif
|
||||
|
||||
float lum_u = floor(fmod(byte_offset, width)) * width_i;
|
||||
float lum_v = floor(byte_offset * width_i) * height_i;
|
||||
|
||||
/* move to texel centers to sample the 4 pixels properly */
|
||||
lum_u += width_i * 0.5;
|
||||
lum_v += height_i * 0.5;
|
||||
|
||||
sample_pos0 = float2(lum_u, lum_v);
|
||||
sample_pos1 = float2(lum_u += width_i, lum_v);
|
||||
sample_pos2 = float2(lum_u += width_i, lum_v);
|
||||
sample_pos3 = float2(lum_u + width_i, lum_v);
|
||||
|
||||
} else {
|
||||
#ifdef DEBUGGING
|
||||
return ((byte_offset < v_plane_offset) ?
|
||||
float4(0.5, 0.5, 0.5, 0.5) :
|
||||
float4(0.2, 0.2, 0.2, 0.2));
|
||||
#endif
|
||||
|
||||
float new_offset = byte_offset -
|
||||
((byte_offset < v_plane_offset) ?
|
||||
u_plane_offset : v_plane_offset);
|
||||
|
||||
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
|
||||
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
|
||||
float width_i2 = width_i*2.0;
|
||||
|
||||
/* move to the borders of each set of 4 pixels to force it
|
||||
* to do bilinear averaging */
|
||||
ch_u += width_i;
|
||||
ch_v += height_i;
|
||||
|
||||
/* set up coordinates for next chroma line, in case
|
||||
* (width / 2) % 4 == 2, i.e. the current set of 4 pixels is split
|
||||
* between the current and the next chroma line; do note that the next
|
||||
* chroma line is two source lines below the current source line */
|
||||
float ch_u_n = 0. + width_i;
|
||||
float ch_v_n = ch_v + height_i * 3;
|
||||
|
||||
sample_pos0 = float2(ch_u, ch_v);
|
||||
sample_pos1 = float2(ch_u += width_i2, ch_v);
|
||||
|
||||
ch_u += width_i2;
|
||||
// check if ch_u overflowed the current source and chroma line
|
||||
if (ch_u > 1.0) {
|
||||
sample_pos2 = float2(ch_u_n, ch_v_n);
|
||||
sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
|
||||
} else {
|
||||
sample_pos2 = float2(ch_u, ch_v);
|
||||
sample_pos3 = float2(ch_u + width_i2, ch_v);
|
||||
}
|
||||
}
|
||||
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 color_vec;
|
||||
if (byte_offset < u_plane_offset)
|
||||
color_vec = color_vec_y;
|
||||
else if (byte_offset < v_plane_offset)
|
||||
color_vec = color_vec_u;
|
||||
else
|
||||
color_vec = color_vec_v;
|
||||
|
||||
return float4(
|
||||
dot(color_vec.xyz, rgb0) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb1) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb2) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb3) + color_vec.w
|
||||
);
|
||||
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
||||
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
||||
return u;
|
||||
}
|
||||
|
||||
float4 PSPlanar444(FragTex frag_in) : TARGET
|
||||
float PS_V(FragTex frag_in) : TARGET
|
||||
{
|
||||
float v_mul = floor(frag_in.uv.y * input_height);
|
||||
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
|
||||
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
||||
return v;
|
||||
}
|
||||
|
||||
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
|
||||
byte_offset += PRECISION_OFFSET;
|
||||
float PS_U_Wide(FragTexWide frag_in) : TARGET
|
||||
{
|
||||
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
||||
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
||||
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
||||
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
|
||||
return u;
|
||||
}
|
||||
|
||||
float new_byte_offset = byte_offset;
|
||||
|
||||
if (byte_offset >= v_plane_offset)
|
||||
new_byte_offset -= v_plane_offset;
|
||||
else if (byte_offset >= u_plane_offset)
|
||||
new_byte_offset -= u_plane_offset;
|
||||
|
||||
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
|
||||
float v_val = floor(new_byte_offset * width_i) * height_i;
|
||||
|
||||
/* move to texel centers to sample the 4 pixels properly */
|
||||
u_val += width_i * 0.5;
|
||||
v_val += height_i * 0.5;
|
||||
|
||||
float2 sample_pos0 = float2(u_val, v_val);
|
||||
float2 sample_pos1 = float2(u_val += width_i, v_val);
|
||||
float2 sample_pos2 = float2(u_val += width_i, v_val);
|
||||
float2 sample_pos3 = float2(u_val + width_i, v_val);
|
||||
|
||||
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
|
||||
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
|
||||
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
|
||||
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
|
||||
|
||||
float4 color_vec;
|
||||
if (byte_offset < u_plane_offset)
|
||||
color_vec = color_vec_y;
|
||||
else if (byte_offset < v_plane_offset)
|
||||
color_vec = color_vec_u;
|
||||
else
|
||||
color_vec = color_vec_v;
|
||||
|
||||
return float4(
|
||||
dot(color_vec.xyz, rgb0) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb1) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb2) + color_vec.w,
|
||||
dot(color_vec.xyz, rgb3) + color_vec.w
|
||||
);
|
||||
float PS_V_Wide(FragTexWide frag_in) : TARGET
|
||||
{
|
||||
float3 rgb_left = image.Sample(def_sampler, frag_in.uuv.xz).rgb;
|
||||
float3 rgb_right = image.Sample(def_sampler, frag_in.uuv.yz).rgb;
|
||||
float3 rgb = (rgb_left + rgb_right) * 0.5;
|
||||
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
|
||||
return v;
|
||||
}
|
||||
|
||||
float GetIntOffsetColor(int offset)
|
||||
@ -473,30 +320,48 @@ float4 PSBGR3_Full(FragTex frag_in) : TARGET
|
||||
return float4(rgb, 1.0);
|
||||
}
|
||||
|
||||
technique Planar420
|
||||
technique Planar_Y
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar420(frag_in);
|
||||
vertex_shader = VSPos(id);
|
||||
pixel_shader = PS_Y(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique Planar444
|
||||
technique Planar_U
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSPlanar444(frag_in);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PS_U(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique NV12
|
||||
technique Planar_V
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSNV12(frag_in);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PS_V(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique Planar_U_Left
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSTexPosLeft(id);
|
||||
pixel_shader = PS_U_Wide(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
technique Planar_V_Left
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSTexPosLeft(id);
|
||||
pixel_shader = PS_V_Wide(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -505,7 +370,7 @@ technique NV12_Y
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPos(id);
|
||||
pixel_shader = PSNV12_Y(frag_in);
|
||||
pixel_shader = PS_Y(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -513,8 +378,8 @@ technique NV12_UV
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
pixel_shader = PSNV12_UV(frag_in);
|
||||
vertex_shader = VSTexPosLeft(id);
|
||||
pixel_shader = PS_UV_Wide(frag_in);
|
||||
}
|
||||
}
|
||||
|
||||
@ -522,7 +387,7 @@ technique UYVY_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
|
||||
}
|
||||
}
|
||||
@ -531,7 +396,7 @@ technique YUY2_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
|
||||
}
|
||||
}
|
||||
@ -540,7 +405,7 @@ technique YVYU_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
|
||||
}
|
||||
}
|
||||
@ -549,7 +414,7 @@ technique I420_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPlanar420_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
@ -558,7 +423,7 @@ technique I422_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPlanar422_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
@ -567,7 +432,7 @@ technique I444_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSPlanar444_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
@ -576,7 +441,7 @@ technique NV12_Reverse
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSNV12_Reverse(frag_in);
|
||||
}
|
||||
}
|
||||
@ -585,7 +450,7 @@ technique Y800_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSY800_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
@ -594,7 +459,7 @@ technique Y800_Full
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSY800_Full(frag_in);
|
||||
}
|
||||
}
|
||||
@ -603,7 +468,7 @@ technique RGB_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSRGB_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
@ -612,7 +477,7 @@ technique BGR3_Limited
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSBGR3_Limited(frag_in);
|
||||
}
|
||||
}
|
||||
@ -621,7 +486,7 @@ technique BGR3_Full
|
||||
{
|
||||
pass
|
||||
{
|
||||
vertex_shader = VSPosTex(id);
|
||||
vertex_shader = VSTexPos(id);
|
||||
pixel_shader = PSBGR3_Full(frag_in);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "obs.h"
|
||||
|
||||
#define NUM_TEXTURES 2
|
||||
#define NUM_CHANNELS 3
|
||||
#define MICROSECOND_DEN 1000000
|
||||
#define NUM_ENCODE_TEXTURES 3
|
||||
#define NUM_ENCODE_TEXTURE_FRAMES_TO_WAIT 1
|
||||
@ -235,11 +236,10 @@ struct obs_tex_frame {
|
||||
|
||||
struct obs_core_video {
|
||||
graphics_t *graphics;
|
||||
gs_stagesurf_t *copy_surfaces[NUM_TEXTURES];
|
||||
gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS];
|
||||
gs_texture_t *render_texture;
|
||||
gs_texture_t *output_texture;
|
||||
gs_texture_t *convert_texture;
|
||||
gs_texture_t *convert_uv_texture;
|
||||
gs_texture_t *convert_textures[NUM_CHANNELS];
|
||||
bool texture_rendered;
|
||||
bool textures_copied[NUM_TEXTURES];
|
||||
bool texture_converted;
|
||||
@ -258,7 +258,7 @@ struct obs_core_video {
|
||||
gs_effect_t *bilinear_lowres_effect;
|
||||
gs_effect_t *premultiplied_alpha_effect;
|
||||
gs_samplerstate_t *point_sampler;
|
||||
gs_stagesurf_t *mapped_surface;
|
||||
gs_stagesurf_t *mapped_surfaces[NUM_CHANNELS];
|
||||
int cur_texture;
|
||||
long raw_active;
|
||||
long gpu_encoder_active;
|
||||
@ -283,11 +283,9 @@ struct obs_core_video {
|
||||
bool thread_initialized;
|
||||
|
||||
bool gpu_conversion;
|
||||
const char *conversion_tech;
|
||||
uint32_t conversion_height;
|
||||
uint32_t plane_offsets[3];
|
||||
uint32_t plane_sizes[3];
|
||||
uint32_t plane_linewidth[3];
|
||||
const char *conversion_techs[NUM_CHANNELS];
|
||||
bool conversion_needed;
|
||||
float conversion_width_i;
|
||||
|
||||
uint32_t output_width;
|
||||
uint32_t output_height;
|
||||
|
@ -109,9 +109,11 @@ static inline void set_render_size(uint32_t width, uint32_t height)
|
||||
|
||||
static inline void unmap_last_surface(struct obs_core_video *video)
|
||||
{
|
||||
if (video->mapped_surface) {
|
||||
gs_stagesurface_unmap(video->mapped_surface);
|
||||
video->mapped_surface = NULL;
|
||||
for (int c = 0; c < NUM_CHANNELS; ++c) {
|
||||
if (video->mapped_surfaces[c]) {
|
||||
gs_stagesurface_unmap(video->mapped_surfaces[c]);
|
||||
video->mapped_surfaces[c] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -264,10 +266,24 @@ static inline gs_texture_t *render_output_texture(struct obs_core_video *video)
|
||||
return target;
|
||||
}
|
||||
|
||||
static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
|
||||
static void render_convert_plane(gs_effect_t *effect, gs_texture_t *texture,
|
||||
gs_texture_t *target, const char *tech_name)
|
||||
{
|
||||
gs_eparam_t *param = gs_effect_get_param_by_name(effect, name);
|
||||
gs_effect_set_float(param, val);
|
||||
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
|
||||
|
||||
const uint32_t width = gs_texture_get_width(target);
|
||||
const uint32_t height = gs_texture_get_height(target);
|
||||
|
||||
gs_set_render_target(target, NULL);
|
||||
set_render_size(width, height);
|
||||
|
||||
size_t passes = gs_technique_begin(tech);
|
||||
for (size_t i = 0; i < passes; i++) {
|
||||
gs_technique_begin_pass(tech, i);
|
||||
gs_draw(GS_TRIS, 0, 3);
|
||||
gs_technique_end_pass(tech);
|
||||
}
|
||||
gs_technique_end(tech);
|
||||
}
|
||||
|
||||
static const char *render_convert_texture_name = "render_convert_texture";
|
||||
@ -276,11 +292,6 @@ static void render_convert_texture(struct obs_core_video *video,
|
||||
{
|
||||
profile_start(render_convert_texture_name);
|
||||
|
||||
gs_texture_t *target = video->convert_texture;
|
||||
float fwidth = (float)video->output_width;
|
||||
float fheight = (float)video->output_height;
|
||||
size_t passes, i;
|
||||
|
||||
gs_effect_t *effect = video->conversion_effect;
|
||||
gs_eparam_t *color_vec_y =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_y");
|
||||
@ -289,20 +300,7 @@ static void render_convert_texture(struct obs_core_video *video,
|
||||
gs_eparam_t *color_vec_v =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_v");
|
||||
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
||||
gs_technique_t *tech =
|
||||
gs_effect_get_technique(effect, video->conversion_tech);
|
||||
|
||||
set_eparam(effect, "u_plane_offset", (float)video->plane_offsets[1]);
|
||||
set_eparam(effect, "v_plane_offset", (float)video->plane_offsets[2]);
|
||||
set_eparam(effect, "width", fwidth);
|
||||
set_eparam(effect, "height", fheight);
|
||||
set_eparam(effect, "width_i", 1.0f / fwidth);
|
||||
set_eparam(effect, "height_i", 1.0f / fheight);
|
||||
set_eparam(effect, "width_d2", fwidth * 0.5f);
|
||||
set_eparam(effect, "height_d2", fheight * 0.5f);
|
||||
set_eparam(effect, "width_d2_i", 1.0f / (fwidth * 0.5f));
|
||||
set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f));
|
||||
set_eparam(effect, "input_height", (float)video->conversion_height);
|
||||
gs_eparam_t *width_i = gs_effect_get_param_by_name(effect, "width_i");
|
||||
|
||||
struct vec4 vec_y, vec_u, vec_v;
|
||||
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
|
||||
@ -311,23 +309,39 @@ static void render_convert_texture(struct obs_core_video *video,
|
||||
video->color_matrix[2], video->color_matrix[3]);
|
||||
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
|
||||
video->color_matrix[10], video->color_matrix[11]);
|
||||
gs_effect_set_vec4(color_vec_y, &vec_y);
|
||||
gs_effect_set_vec4(color_vec_u, &vec_u);
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
|
||||
gs_effect_set_texture(image, texture);
|
||||
|
||||
gs_set_render_target(target, NULL);
|
||||
set_render_size(video->output_width, video->conversion_height);
|
||||
|
||||
gs_enable_blending(false);
|
||||
passes = gs_technique_begin(tech);
|
||||
for (i = 0; i < passes; i++) {
|
||||
gs_technique_begin_pass(tech, i);
|
||||
gs_draw(GS_TRIS, 0, 3);
|
||||
gs_technique_end_pass(tech);
|
||||
|
||||
if (video->convert_textures[0]) {
|
||||
gs_effect_set_texture(image, texture);
|
||||
gs_effect_set_vec4(color_vec_y, &vec_y);
|
||||
render_convert_plane(effect, texture,
|
||||
video->convert_textures[0],
|
||||
video->conversion_techs[0]);
|
||||
|
||||
if (video->convert_textures[1]) {
|
||||
gs_effect_set_texture(image, texture);
|
||||
gs_effect_set_vec4(color_vec_u, &vec_u);
|
||||
if (!video->convert_textures[2])
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
gs_effect_set_float(width_i, video->conversion_width_i);
|
||||
render_convert_plane(effect, texture,
|
||||
video->convert_textures[1],
|
||||
video->conversion_techs[1]);
|
||||
|
||||
if (video->convert_textures[2]) {
|
||||
gs_effect_set_texture(image, texture);
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
gs_effect_set_float(width_i,
|
||||
video->conversion_width_i);
|
||||
render_convert_plane(
|
||||
effect, texture,
|
||||
video->convert_textures[2],
|
||||
video->conversion_techs[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
gs_technique_end(tech);
|
||||
|
||||
gs_enable_blending(true);
|
||||
|
||||
video->texture_converted = true;
|
||||
@ -335,90 +349,32 @@ static void render_convert_texture(struct obs_core_video *video,
|
||||
profile_end(render_convert_texture_name);
|
||||
}
|
||||
|
||||
static void render_nv12(struct obs_core_video *video, gs_texture_t *texture,
|
||||
gs_texture_t *target, const char *tech_name,
|
||||
uint32_t width, uint32_t height)
|
||||
{
|
||||
gs_effect_t *effect = video->conversion_effect;
|
||||
gs_eparam_t *color_vec_y =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_y");
|
||||
gs_eparam_t *color_vec_u =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_u");
|
||||
gs_eparam_t *color_vec_v =
|
||||
gs_effect_get_param_by_name(effect, "color_vec_v");
|
||||
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
||||
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
|
||||
size_t passes, i;
|
||||
|
||||
struct vec4 vec_y, vec_u, vec_v;
|
||||
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
|
||||
video->color_matrix[6], video->color_matrix[7]);
|
||||
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
|
||||
video->color_matrix[2], video->color_matrix[3]);
|
||||
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
|
||||
video->color_matrix[10], video->color_matrix[11]);
|
||||
gs_effect_set_vec4(color_vec_y, &vec_y);
|
||||
gs_effect_set_vec4(color_vec_u, &vec_u);
|
||||
gs_effect_set_vec4(color_vec_v, &vec_v);
|
||||
|
||||
gs_effect_set_texture(image, texture);
|
||||
|
||||
gs_set_render_target(target, NULL);
|
||||
set_render_size(width, height);
|
||||
|
||||
gs_enable_blending(false);
|
||||
passes = gs_technique_begin(tech);
|
||||
for (i = 0; i < passes; i++) {
|
||||
gs_technique_begin_pass(tech, i);
|
||||
gs_draw(GS_TRIS, 0, 3);
|
||||
gs_technique_end_pass(tech);
|
||||
}
|
||||
gs_technique_end(tech);
|
||||
gs_enable_blending(true);
|
||||
}
|
||||
|
||||
static const char *render_convert_nv12_name = "render_convert_texture_nv12";
|
||||
static void render_convert_texture_nv12(struct obs_core_video *video,
|
||||
gs_texture_t *texture)
|
||||
{
|
||||
profile_start(render_convert_nv12_name);
|
||||
|
||||
render_nv12(video, texture, video->convert_texture, "NV12_Y",
|
||||
video->output_width, video->output_height);
|
||||
render_nv12(video, texture, video->convert_uv_texture, "NV12_UV",
|
||||
video->output_width / 2, video->output_height / 2);
|
||||
|
||||
video->texture_converted = true;
|
||||
|
||||
profile_end(render_convert_nv12_name);
|
||||
}
|
||||
|
||||
static const char *stage_output_texture_name = "stage_output_texture";
|
||||
static inline void stage_output_texture(struct obs_core_video *video,
|
||||
gs_texture_t *texture, int cur_texture)
|
||||
int cur_texture)
|
||||
{
|
||||
profile_start(stage_output_texture_name);
|
||||
|
||||
bool texture_ready;
|
||||
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];
|
||||
|
||||
if (video->gpu_conversion) {
|
||||
texture = video->convert_texture;
|
||||
texture_ready = video->texture_converted;
|
||||
} else {
|
||||
texture_ready = true;
|
||||
}
|
||||
|
||||
unmap_last_surface(video);
|
||||
|
||||
if (!texture_ready)
|
||||
goto end;
|
||||
if (!video->gpu_conversion) {
|
||||
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture][0];
|
||||
if (copy)
|
||||
gs_stage_texture(copy, video->output_texture);
|
||||
|
||||
gs_stage_texture(copy, texture);
|
||||
video->textures_copied[cur_texture] = true;
|
||||
} else if (video->texture_converted) {
|
||||
for (int i = 0; i < NUM_CHANNELS; i++) {
|
||||
gs_stagesurf_t *copy =
|
||||
video->copy_surfaces[cur_texture][i];
|
||||
if (copy)
|
||||
gs_stage_texture(copy,
|
||||
video->convert_textures[i]);
|
||||
}
|
||||
|
||||
video->textures_copied[cur_texture] = true;
|
||||
video->textures_copied[cur_texture] = true;
|
||||
}
|
||||
|
||||
end:
|
||||
profile_end(stage_output_texture_name);
|
||||
}
|
||||
|
||||
@ -458,13 +414,13 @@ static inline bool queue_frame(struct obs_core_video *video, bool raw_active,
|
||||
* reason. otherwise, it goes to the 'duplicate' case above, which
|
||||
* will ensure better performance. */
|
||||
if (raw_active || vframe_info->count > 1) {
|
||||
gs_copy_texture(tf.tex, video->convert_texture);
|
||||
gs_copy_texture(tf.tex, video->convert_textures[0]);
|
||||
} else {
|
||||
gs_texture_t *tex = video->convert_texture;
|
||||
gs_texture_t *tex_uv = video->convert_uv_texture;
|
||||
gs_texture_t *tex = video->convert_textures[0];
|
||||
gs_texture_t *tex_uv = video->convert_textures[1];
|
||||
|
||||
video->convert_texture = tf.tex;
|
||||
video->convert_uv_texture = tf.tex_uv;
|
||||
video->convert_textures[0] = tf.tex;
|
||||
video->convert_textures[1] = tf.tex_uv;
|
||||
|
||||
tf.tex = tex;
|
||||
tf.tex_uv = tex_uv;
|
||||
@ -529,17 +485,12 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
gs_texture_t *texture = render_output_texture(video);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (gpu_active) {
|
||||
if (gpu_active)
|
||||
gs_flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (video->gpu_conversion) {
|
||||
if (video->using_nv12_tex)
|
||||
render_convert_texture_nv12(video, texture);
|
||||
else
|
||||
render_convert_texture(video, texture);
|
||||
}
|
||||
if (video->gpu_conversion)
|
||||
render_convert_texture(video, texture);
|
||||
|
||||
#ifdef _WIN32
|
||||
if (gpu_active) {
|
||||
@ -547,8 +498,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
output_gpu_encoders(video, raw_active);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (raw_active)
|
||||
stage_output_texture(video, texture, cur_texture);
|
||||
stage_output_texture(video, cur_texture);
|
||||
}
|
||||
|
||||
gs_set_render_target(NULL, NULL);
|
||||
@ -560,73 +512,41 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
|
||||
static inline bool download_frame(struct obs_core_video *video,
|
||||
int prev_texture, struct video_data *frame)
|
||||
{
|
||||
gs_stagesurf_t *surface = video->copy_surfaces[prev_texture];
|
||||
|
||||
if (!video->textures_copied[prev_texture])
|
||||
return false;
|
||||
|
||||
if (!gs_stagesurface_map(surface, &frame->data[0], &frame->linesize[0]))
|
||||
return false;
|
||||
for (int channel = 0; channel < NUM_CHANNELS; ++channel) {
|
||||
gs_stagesurf_t *surface =
|
||||
video->copy_surfaces[prev_texture][channel];
|
||||
if (surface) {
|
||||
if (!gs_stagesurface_map(surface, &frame->data[channel],
|
||||
&frame->linesize[channel]))
|
||||
return false;
|
||||
|
||||
video->mapped_surface = surface;
|
||||
video->mapped_surfaces[channel] = surface;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline uint32_t calc_linesize(uint32_t pos, uint32_t linesize)
|
||||
static const uint8_t *set_gpu_converted_plane(uint32_t width, uint32_t height,
|
||||
uint32_t linesize_input,
|
||||
uint32_t linesize_output,
|
||||
const uint8_t *in, uint8_t *out)
|
||||
{
|
||||
uint32_t size = pos % linesize;
|
||||
return size ? size : linesize;
|
||||
}
|
||||
|
||||
static void copy_dealign(uint8_t *dst, uint32_t dst_pos, uint32_t dst_linesize,
|
||||
const uint8_t *src, uint32_t src_pos,
|
||||
uint32_t src_linesize, uint32_t remaining)
|
||||
{
|
||||
while (remaining) {
|
||||
uint32_t src_remainder = src_pos % src_linesize;
|
||||
uint32_t dst_offset = dst_linesize - src_remainder;
|
||||
uint32_t src_offset = src_linesize - src_remainder;
|
||||
|
||||
if (remaining < dst_offset) {
|
||||
memcpy(dst + dst_pos, src + src_pos, remaining);
|
||||
src_pos += remaining;
|
||||
dst_pos += remaining;
|
||||
remaining = 0;
|
||||
} else {
|
||||
memcpy(dst + dst_pos, src + src_pos, dst_offset);
|
||||
src_pos += src_offset;
|
||||
dst_pos += dst_offset;
|
||||
remaining -= dst_offset;
|
||||
if ((width == linesize_input) && (width == linesize_output)) {
|
||||
size_t total = width * height;
|
||||
memcpy(out, in, total);
|
||||
in += total;
|
||||
} else {
|
||||
for (size_t y = 0; y < height; y++) {
|
||||
memcpy(out, in, width);
|
||||
out += linesize_output;
|
||||
in += linesize_input;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t make_aligned_linesize_offset(uint32_t offset,
|
||||
uint32_t dst_linesize,
|
||||
uint32_t src_linesize)
|
||||
{
|
||||
uint32_t remainder = offset % dst_linesize;
|
||||
return (offset / dst_linesize) * src_linesize + remainder;
|
||||
}
|
||||
|
||||
static void fix_gpu_converted_alignment(struct obs_core_video *video,
|
||||
struct video_frame *output,
|
||||
const struct video_data *input)
|
||||
{
|
||||
uint32_t src_linesize = input->linesize[0];
|
||||
uint32_t dst_linesize = output->linesize[0] * 4;
|
||||
uint32_t src_pos = 0;
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
if (video->plane_linewidth[i] == 0)
|
||||
break;
|
||||
|
||||
src_pos = make_aligned_linesize_offset(
|
||||
video->plane_offsets[i], dst_linesize, src_linesize);
|
||||
|
||||
copy_dealign(output->data[i], 0, dst_linesize, input->data[0],
|
||||
src_pos, src_linesize, video->plane_sizes[i]);
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
static void set_gpu_converted_data(struct obs_core_video *video,
|
||||
@ -634,41 +554,91 @@ static void set_gpu_converted_data(struct obs_core_video *video,
|
||||
const struct video_data *input,
|
||||
const struct video_output_info *info)
|
||||
{
|
||||
if (input->linesize[0] == video->output_width * 4) {
|
||||
struct video_frame frame;
|
||||
if (video->using_nv12_tex) {
|
||||
const uint32_t width = info->width;
|
||||
const uint32_t height = info->height;
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
if (video->plane_linewidth[i] == 0)
|
||||
break;
|
||||
|
||||
frame.linesize[i] = video->plane_linewidth[i];
|
||||
frame.data[i] =
|
||||
input->data[0] + video->plane_offsets[i];
|
||||
}
|
||||
|
||||
video_frame_copy(output, &frame, info->format, info->height);
|
||||
|
||||
} else if (video->using_nv12_tex) {
|
||||
size_t width = info->width;
|
||||
size_t height = info->height;
|
||||
size_t height_d2 = height / 2;
|
||||
uint8_t *out_y = output->data[0];
|
||||
uint8_t *out_uv = output->data[1];
|
||||
uint8_t *in = input->data[0];
|
||||
|
||||
for (size_t y = 0; y < height; y++) {
|
||||
memcpy(out_y, in, width);
|
||||
out_y += output->linesize[0];
|
||||
in += input->linesize[0];
|
||||
}
|
||||
for (size_t y = 0; y < height_d2; y++) {
|
||||
memcpy(out_uv, in, width);
|
||||
out_uv += output->linesize[0];
|
||||
in += input->linesize[0];
|
||||
}
|
||||
const uint8_t *const in_uv = set_gpu_converted_plane(
|
||||
width, height, input->linesize[0], output->linesize[0],
|
||||
input->data[0], output->data[0]);
|
||||
|
||||
const uint32_t height_d2 = height / 2;
|
||||
set_gpu_converted_plane(width, height_d2, input->linesize[0],
|
||||
output->linesize[1], in_uv,
|
||||
output->data[1]);
|
||||
} else {
|
||||
fix_gpu_converted_alignment(video, output, input);
|
||||
switch (info->format) {
|
||||
case VIDEO_FORMAT_I420: {
|
||||
const uint32_t width = info->width;
|
||||
const uint32_t height = info->height;
|
||||
|
||||
set_gpu_converted_plane(width, height,
|
||||
input->linesize[0],
|
||||
output->linesize[0],
|
||||
input->data[0],
|
||||
output->data[0]);
|
||||
|
||||
const uint32_t width_d2 = width / 2;
|
||||
const uint32_t height_d2 = height / 2;
|
||||
|
||||
set_gpu_converted_plane(width_d2, height_d2,
|
||||
input->linesize[1],
|
||||
output->linesize[1],
|
||||
input->data[1],
|
||||
output->data[1]);
|
||||
|
||||
set_gpu_converted_plane(width_d2, height_d2,
|
||||
input->linesize[2],
|
||||
output->linesize[2],
|
||||
input->data[2],
|
||||
output->data[2]);
|
||||
|
||||
break;
|
||||
}
|
||||
case VIDEO_FORMAT_NV12: {
|
||||
const uint32_t width = info->width;
|
||||
const uint32_t height = info->height;
|
||||
|
||||
set_gpu_converted_plane(width, height,
|
||||
input->linesize[0],
|
||||
output->linesize[0],
|
||||
input->data[0],
|
||||
output->data[0]);
|
||||
|
||||
const uint32_t height_d2 = height / 2;
|
||||
set_gpu_converted_plane(width, height_d2,
|
||||
input->linesize[1],
|
||||
output->linesize[1],
|
||||
input->data[1],
|
||||
output->data[1]);
|
||||
|
||||
break;
|
||||
}
|
||||
case VIDEO_FORMAT_I444: {
|
||||
const uint32_t width = info->width;
|
||||
const uint32_t height = info->height;
|
||||
|
||||
set_gpu_converted_plane(width, height,
|
||||
input->linesize[0],
|
||||
output->linesize[0],
|
||||
input->data[0],
|
||||
output->data[0]);
|
||||
|
||||
set_gpu_converted_plane(width, height,
|
||||
input->linesize[1],
|
||||
output->linesize[1],
|
||||
input->data[1],
|
||||
output->data[1]);
|
||||
|
||||
set_gpu_converted_plane(width, height,
|
||||
input->linesize[2],
|
||||
output->linesize[2],
|
||||
input->data[2],
|
||||
output->data[2]);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
281
libobs/obs.c
281
libobs/obs.c
@ -42,117 +42,35 @@ static inline void make_video_info(struct video_output_info *vi,
|
||||
vi->cache_size = 6;
|
||||
}
|
||||
|
||||
#define PIXEL_SIZE 4
|
||||
|
||||
#define GET_ALIGN(val, align) (((val) + (align - 1)) & ~(align - 1))
|
||||
|
||||
static inline void set_420p_sizes(const struct obs_video_info *ovi)
|
||||
{
|
||||
struct obs_core_video *video = &obs->video;
|
||||
uint32_t chroma_pixels;
|
||||
uint32_t total_bytes;
|
||||
|
||||
chroma_pixels = (ovi->output_width * ovi->output_height / 4);
|
||||
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
|
||||
|
||||
video->plane_offsets[0] = 0;
|
||||
video->plane_offsets[1] = ovi->output_width * ovi->output_height;
|
||||
video->plane_offsets[2] = video->plane_offsets[1] + chroma_pixels;
|
||||
|
||||
video->plane_linewidth[0] = ovi->output_width;
|
||||
video->plane_linewidth[1] = ovi->output_width / 2;
|
||||
video->plane_linewidth[2] = ovi->output_width / 2;
|
||||
|
||||
video->plane_sizes[0] = video->plane_offsets[1];
|
||||
video->plane_sizes[1] = video->plane_sizes[0] / 4;
|
||||
video->plane_sizes[2] = video->plane_sizes[1];
|
||||
|
||||
total_bytes = video->plane_offsets[2] + chroma_pixels;
|
||||
|
||||
video->conversion_height =
|
||||
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
|
||||
ovi->output_width;
|
||||
|
||||
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
|
||||
video->conversion_tech = "Planar420";
|
||||
}
|
||||
|
||||
static inline void set_nv12_sizes(const struct obs_video_info *ovi)
|
||||
{
|
||||
struct obs_core_video *video = &obs->video;
|
||||
uint32_t chroma_pixels;
|
||||
uint32_t total_bytes;
|
||||
|
||||
chroma_pixels = (ovi->output_width * ovi->output_height / 2);
|
||||
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
|
||||
|
||||
video->plane_offsets[0] = 0;
|
||||
video->plane_offsets[1] = ovi->output_width * ovi->output_height;
|
||||
|
||||
video->plane_linewidth[0] = ovi->output_width;
|
||||
video->plane_linewidth[1] = ovi->output_width;
|
||||
|
||||
video->plane_sizes[0] = video->plane_offsets[1];
|
||||
video->plane_sizes[1] = video->plane_sizes[0] / 2;
|
||||
|
||||
total_bytes = video->plane_offsets[1] + chroma_pixels;
|
||||
|
||||
video->conversion_height =
|
||||
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
|
||||
ovi->output_width;
|
||||
|
||||
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
|
||||
video->conversion_tech = "NV12";
|
||||
}
|
||||
|
||||
static inline void set_444p_sizes(const struct obs_video_info *ovi)
|
||||
{
|
||||
struct obs_core_video *video = &obs->video;
|
||||
uint32_t chroma_pixels;
|
||||
uint32_t total_bytes;
|
||||
|
||||
chroma_pixels = (ovi->output_width * ovi->output_height);
|
||||
chroma_pixels = GET_ALIGN(chroma_pixels, PIXEL_SIZE);
|
||||
|
||||
video->plane_offsets[0] = 0;
|
||||
video->plane_offsets[1] = chroma_pixels;
|
||||
video->plane_offsets[2] = chroma_pixels + chroma_pixels;
|
||||
|
||||
video->plane_linewidth[0] = ovi->output_width;
|
||||
video->plane_linewidth[1] = ovi->output_width;
|
||||
video->plane_linewidth[2] = ovi->output_width;
|
||||
|
||||
video->plane_sizes[0] = chroma_pixels;
|
||||
video->plane_sizes[1] = chroma_pixels;
|
||||
video->plane_sizes[2] = chroma_pixels;
|
||||
|
||||
total_bytes = video->plane_offsets[2] + chroma_pixels;
|
||||
|
||||
video->conversion_height =
|
||||
(total_bytes / PIXEL_SIZE + ovi->output_width - 1) /
|
||||
ovi->output_width;
|
||||
|
||||
video->conversion_height = GET_ALIGN(video->conversion_height, 2);
|
||||
video->conversion_tech = "Planar444";
|
||||
}
|
||||
|
||||
static inline void calc_gpu_conversion_sizes(const struct obs_video_info *ovi)
|
||||
{
|
||||
obs->video.conversion_height = 0;
|
||||
memset(obs->video.plane_offsets, 0, sizeof(obs->video.plane_offsets));
|
||||
memset(obs->video.plane_sizes, 0, sizeof(obs->video.plane_sizes));
|
||||
memset(obs->video.plane_linewidth, 0,
|
||||
sizeof(obs->video.plane_linewidth));
|
||||
struct obs_core_video *video = &obs->video;
|
||||
|
||||
video->conversion_needed = false;
|
||||
video->conversion_techs[0] = NULL;
|
||||
video->conversion_techs[1] = NULL;
|
||||
video->conversion_techs[2] = NULL;
|
||||
video->conversion_width_i = 0.f;
|
||||
|
||||
switch ((uint32_t)ovi->output_format) {
|
||||
case VIDEO_FORMAT_I420:
|
||||
set_420p_sizes(ovi);
|
||||
video->conversion_needed = true;
|
||||
video->conversion_techs[0] = "Planar_Y";
|
||||
video->conversion_techs[1] = "Planar_U_Left";
|
||||
video->conversion_techs[2] = "Planar_V_Left";
|
||||
video->conversion_width_i = 1.f / (float)ovi->output_width;
|
||||
break;
|
||||
case VIDEO_FORMAT_NV12:
|
||||
set_nv12_sizes(ovi);
|
||||
video->conversion_needed = true;
|
||||
video->conversion_techs[0] = "NV12_Y";
|
||||
video->conversion_techs[1] = "NV12_UV";
|
||||
video->conversion_width_i = 1.f / (float)ovi->output_width;
|
||||
break;
|
||||
case VIDEO_FORMAT_I444:
|
||||
set_444p_sizes(ovi);
|
||||
video->conversion_needed = true;
|
||||
video->conversion_techs[0] = "Planar_Y";
|
||||
video->conversion_techs[1] = "Planar_U";
|
||||
video->conversion_techs[2] = "Planar_V";
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -167,7 +85,7 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
|
||||
? gs_nv12_available()
|
||||
: false;
|
||||
|
||||
if (!video->conversion_height) {
|
||||
if (!video->conversion_needed) {
|
||||
blog(LOG_INFO, "GPU conversion not available for format: %u",
|
||||
(unsigned int)ovi->output_format);
|
||||
video->gpu_conversion = false;
|
||||
@ -183,23 +101,96 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
|
||||
|
||||
#ifdef _WIN32
|
||||
if (video->using_nv12_tex) {
|
||||
gs_texture_create_nv12(&video->convert_texture,
|
||||
&video->convert_uv_texture,
|
||||
gs_texture_create_nv12(&video->convert_textures[0],
|
||||
&video->convert_textures[1],
|
||||
ovi->output_width, ovi->output_height,
|
||||
GS_RENDER_TARGET | GS_SHARED_KM_TEX);
|
||||
if (!video->convert_uv_texture)
|
||||
return false;
|
||||
} else {
|
||||
#endif
|
||||
video->convert_texture = gs_texture_create(
|
||||
ovi->output_width, video->conversion_height, GS_RGBA, 1,
|
||||
NULL, GS_RENDER_TARGET);
|
||||
video->convert_textures[0] =
|
||||
gs_texture_create(ovi->output_width, ovi->output_height,
|
||||
GS_R8, 1, NULL, GS_RENDER_TARGET);
|
||||
|
||||
const struct video_output_info *info =
|
||||
video_output_get_info(video->video);
|
||||
switch (info->format) {
|
||||
case VIDEO_FORMAT_I420:
|
||||
video->convert_textures[1] = gs_texture_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2,
|
||||
GS_R8, 1, NULL, GS_RENDER_TARGET);
|
||||
video->convert_textures[2] = gs_texture_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2,
|
||||
GS_R8, 1, NULL, GS_RENDER_TARGET);
|
||||
if (!video->convert_textures[2])
|
||||
return false;
|
||||
break;
|
||||
case VIDEO_FORMAT_NV12:
|
||||
video->convert_textures[1] = gs_texture_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2,
|
||||
GS_R8G8, 1, NULL, GS_RENDER_TARGET);
|
||||
break;
|
||||
case VIDEO_FORMAT_I444:
|
||||
video->convert_textures[1] = gs_texture_create(
|
||||
ovi->output_width, ovi->output_height, GS_R8, 1,
|
||||
NULL, GS_RENDER_TARGET);
|
||||
video->convert_textures[2] = gs_texture_create(
|
||||
ovi->output_width, ovi->output_height, GS_R8, 1,
|
||||
NULL, GS_RENDER_TARGET);
|
||||
if (!video->convert_textures[2])
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!video->convert_texture)
|
||||
if (!video->convert_textures[0])
|
||||
return false;
|
||||
if (!video->convert_textures[1])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool obs_init_gpu_copy_surfaces(struct obs_video_info *ovi, size_t i)
|
||||
{
|
||||
struct obs_core_video *video = &obs->video;
|
||||
|
||||
video->copy_surfaces[i][0] = gs_stagesurface_create(
|
||||
ovi->output_width, ovi->output_height, GS_R8);
|
||||
if (!video->copy_surfaces[i][0])
|
||||
return false;
|
||||
|
||||
const struct video_output_info *info =
|
||||
video_output_get_info(video->video);
|
||||
switch (info->format) {
|
||||
case VIDEO_FORMAT_I420:
|
||||
video->copy_surfaces[i][1] = gs_stagesurface_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2, GS_R8);
|
||||
if (!video->copy_surfaces[i][1])
|
||||
return false;
|
||||
video->copy_surfaces[i][2] = gs_stagesurface_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2, GS_R8);
|
||||
if (!video->copy_surfaces[i][2])
|
||||
return false;
|
||||
break;
|
||||
case VIDEO_FORMAT_NV12:
|
||||
video->copy_surfaces[i][1] = gs_stagesurface_create(
|
||||
ovi->output_width / 2, ovi->output_height / 2, GS_R8G8);
|
||||
if (!video->copy_surfaces[i][1])
|
||||
return false;
|
||||
break;
|
||||
case VIDEO_FORMAT_I444:
|
||||
video->copy_surfaces[i][1] = gs_stagesurface_create(
|
||||
ovi->output_width, ovi->output_height, GS_R8);
|
||||
if (!video->copy_surfaces[i][1])
|
||||
return false;
|
||||
video->copy_surfaces[i][2] = gs_stagesurface_create(
|
||||
ovi->output_width, ovi->output_height, GS_R8);
|
||||
if (!video->copy_surfaces[i][2])
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -207,25 +198,29 @@ static bool obs_init_gpu_conversion(struct obs_video_info *ovi)
|
||||
static bool obs_init_textures(struct obs_video_info *ovi)
|
||||
{
|
||||
struct obs_core_video *video = &obs->video;
|
||||
uint32_t output_height = video->gpu_conversion
|
||||
? video->conversion_height
|
||||
: ovi->output_height;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < NUM_TEXTURES; i++) {
|
||||
for (size_t i = 0; i < NUM_TEXTURES; i++) {
|
||||
#ifdef _WIN32
|
||||
if (video->using_nv12_tex) {
|
||||
video->copy_surfaces[i] = gs_stagesurface_create_nv12(
|
||||
ovi->output_width, ovi->output_height);
|
||||
if (!video->copy_surfaces[i])
|
||||
video->copy_surfaces[i][0] =
|
||||
gs_stagesurface_create_nv12(ovi->output_width,
|
||||
ovi->output_height);
|
||||
if (!video->copy_surfaces[i][0])
|
||||
return false;
|
||||
|
||||
} else {
|
||||
#endif
|
||||
video->copy_surfaces[i] = gs_stagesurface_create(
|
||||
ovi->output_width, output_height, GS_RGBA);
|
||||
if (!video->copy_surfaces[i])
|
||||
return false;
|
||||
if (video->gpu_conversion) {
|
||||
if (!obs_init_gpu_copy_surfaces(ovi, i))
|
||||
return false;
|
||||
} else {
|
||||
video->copy_surfaces[i][0] =
|
||||
gs_stagesurface_create(
|
||||
ovi->output_width,
|
||||
ovi->output_height, GS_RGBA);
|
||||
if (!video->copy_surfaces[i][0])
|
||||
return false;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
}
|
||||
#endif
|
||||
@ -465,23 +460,45 @@ static void obs_free_video(void)
|
||||
|
||||
gs_enter_context(video->graphics);
|
||||
|
||||
if (video->mapped_surface) {
|
||||
gs_stagesurface_unmap(video->mapped_surface);
|
||||
video->mapped_surface = NULL;
|
||||
for (size_t c = 0; c < NUM_CHANNELS; c++) {
|
||||
if (video->mapped_surfaces[c]) {
|
||||
gs_stagesurface_unmap(
|
||||
video->mapped_surfaces[c]);
|
||||
video->mapped_surfaces[c] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NUM_TEXTURES; i++) {
|
||||
gs_stagesurface_destroy(video->copy_surfaces[i]);
|
||||
video->copy_surfaces[i] = NULL;
|
||||
for (size_t c = 0; c < NUM_CHANNELS; c++) {
|
||||
if (video->copy_surfaces[i][c]) {
|
||||
gs_stagesurface_destroy(
|
||||
video->copy_surfaces[i][c]);
|
||||
video->copy_surfaces[i][c] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gs_texture_destroy(video->render_texture);
|
||||
gs_texture_destroy(video->convert_texture);
|
||||
gs_texture_destroy(video->convert_uv_texture);
|
||||
|
||||
for (size_t c = 0; c < NUM_CHANNELS; c++) {
|
||||
if (video->convert_textures[c]) {
|
||||
gs_texture_destroy(video->convert_textures[c]);
|
||||
video->convert_textures[c] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NUM_TEXTURES; i++) {
|
||||
for (size_t c = 0; c < NUM_CHANNELS; c++) {
|
||||
if (video->copy_surfaces[i][c]) {
|
||||
gs_stagesurface_destroy(
|
||||
video->copy_surfaces[i][c]);
|
||||
video->copy_surfaces[i][c] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gs_texture_destroy(video->output_texture);
|
||||
video->render_texture = NULL;
|
||||
video->convert_texture = NULL;
|
||||
video->convert_uv_texture = NULL;
|
||||
video->output_texture = NULL;
|
||||
|
||||
gs_leave_context();
|
||||
|
Loading…
x
Reference in New Issue
Block a user