From aa22b61e3ec71acf31ce46d0ecf2bf2755cf6b22 Mon Sep 17 00:00:00 2001 From: James Park Date: Sun, 2 Jun 2019 06:49:38 -0700 Subject: [PATCH] libobs: Full-screen triangle format conversions The cache coherency of rasterization for full-screen passes is better using an oversized triangle that is clipped rather than two triangles. Traversal order of rasterization is GPU-specific, but will almost certainly be better using an undivided primitive. A smaller benefit is that quads along the diagonal are not evaluated multiple times, but that's minor in comparison. Redo format shaders to bypass vertex buffer, and input layout. Add global shader bool "obs_glsl_compile" to make API-specific decisions, i.e. handle upside-down UVs. gl_ortho is not needed for format conversion because the vertex shader does not use ViewProj anymore. This can be applied to more situations, but start small first. Testbed full screen passes, Intel HD Graphics 530: RGBA -> UYVX: 467 -> 439 us, ~6% savings UYVX -> uv: 295 -> 239 us, ~19% savings --- libobs-d3d11/d3d11-shader.cpp | 13 ++++--- libobs-d3d11/d3d11-shaderprocessor.cpp | 11 ++++-- libobs-d3d11/d3d11-subsystem.cpp | 2 +- libobs-opengl/gl-shaderparser.c | 19 +++++++--- libobs-opengl/gl-subsystem.c | 6 ++-- libobs-opengl/gl-vertexbuffer.c | 2 +- libobs/data/format_conversion.effect | 49 +++++++++++++++----------- libobs/obs-source.c | 4 +-- libobs/obs-video.c | 5 ++- 9 files changed, 67 insertions(+), 44 deletions(-) diff --git a/libobs-d3d11/d3d11-shader.cpp b/libobs-d3d11/d3d11-shader.cpp index 1a710486b..35f4a7cd5 100644 --- a/libobs-d3d11/d3d11-shader.cpp +++ b/libobs-d3d11/d3d11-shader.cpp @@ -68,11 +68,14 @@ gs_vertex_shader::gs_vertex_shader(gs_device_t *device, const char *file, if (FAILED(hr)) throw HRError("Failed to create vertex shader", hr); - hr = device->device->CreateInputLayout(layoutData.data(), - (UINT)layoutData.size(), - data.data(), data.size(), layout.Assign()); - if (FAILED(hr)) - throw HRError("Failed to create input layout", hr); + const UINT layoutSize = (UINT)layoutData.size(); + if (layoutSize > 0) { + hr = device->device->CreateInputLayout(layoutData.data(), + (UINT)layoutSize, + data.data(), data.size(), layout.Assign()); + if (FAILED(hr)) + throw HRError("Failed to create input layout", hr); + } viewProj = gs_shader_get_param_by_name(this, "ViewProj"); world = gs_shader_get_param_by_name(this, "World"); diff --git a/libobs-d3d11/d3d11-shaderprocessor.cpp b/libobs-d3d11/d3d11-shaderprocessor.cpp index 7b0b14db4..801f76662 100644 --- a/libobs-d3d11/d3d11-shaderprocessor.cpp +++ b/libobs-d3d11/d3d11-shaderprocessor.cpp @@ -22,9 +22,9 @@ using namespace std; static const char *semanticInputNames[] = - {"POSITION", "NORMAL", "COLOR", "TANGENT", "TEXCOORD"}; + {"POSITION", "NORMAL", "COLOR", "TANGENT", "TEXCOORD", "VERTEXID"}; static const char *semanticOutputNames[] = - {"SV_Position", "NORMAL", "COLOR", "TANGENT", "TEXCOORD"}; + {"SV_Position", "NORMAL", "COLOR", "TANGENT", "TEXCOORD", "VERTEXID"}; static const char *ConvertSemanticName(const char *name) { @@ -111,7 +111,8 @@ static void BuildInputLayoutFromVars(shader_parser *parser, darray *vars, shader_var *var = array+i; if (var->mapping) { - AddInputLayoutVar(var, layout); + if (strcmp(var->mapping, "VERTEXID") != 0) + AddInputLayoutVar(var, layout); } else { shader_struct *st = shader_parser_getstruct(parser, var->type); @@ -197,6 +198,8 @@ void ShaderProcessor::BuildSamplers(vector> &samplers) void ShaderProcessor::BuildString(string &outputString) { stringstream output; + output << "static const bool obs_glsl_compile = false;\n\n"; + cf_token *token = cf_preprocessor_get_tokens(&parser.cfp.pp); while (token->type != CFTOKEN_NONE) { /* cheaply just replace specific tokens */ @@ -214,6 +217,8 @@ void ShaderProcessor::BuildString(string &outputString) throw "texture_rect is not supported in D3D"; else if (strref_cmp(&token->str, "sampler_state") == 0) output << "SamplerState"; + else if (strref_cmp(&token->str, "VERTEXID") == 0) + output << "SV_VertexID"; else output.write(token->str.array, token->str.len); diff --git a/libobs-d3d11/d3d11-subsystem.cpp b/libobs-d3d11/d3d11-subsystem.cpp index 775580a30..5803ae365 100644 --- a/libobs-d3d11/d3d11-subsystem.cpp +++ b/libobs-d3d11/d3d11-subsystem.cpp @@ -1503,7 +1503,7 @@ void device_draw(gs_device_t *device, enum gs_draw_mode draw_mode, if (!device->curPixelShader) throw "No pixel shader specified"; - if (!device->curVertexBuffer) + if (!device->curVertexBuffer && (num_verts == 0)) throw "No vertex buffer specified"; if (!device->curSwapChain && !device->curRenderTarget) diff --git a/libobs-opengl/gl-shaderparser.c b/libobs-opengl/gl-shaderparser.c index 2c607acf7..532a20e18 100644 --- a/libobs-opengl/gl-shaderparser.c +++ b/libobs-opengl/gl-shaderparser.c @@ -163,7 +163,7 @@ static void gl_write_storage_var(struct gl_shader_parser *glsp, if (st) { gl_unwrap_storage_struct(glsp, st, var->name, input, prefix); - } else { + } else if (!input || strcmp(var->mapping, "VERTEXID")) { struct gl_parser_attrib attrib; gl_parser_attrib_init(&attrib); @@ -536,9 +536,13 @@ static void gl_write_main_storage_assign(struct gl_shader_parser *glsp, if (!dstr_is_empty(&dst_copy)) dstr_cat_dstr(&glsp->gl_string, &dst_copy); dstr_cat(&glsp->gl_string, " = "); - if (src) - dstr_cat(&glsp->gl_string, src); - dstr_cat(&glsp->gl_string, var->name); + if (input && (strcmp(var->mapping, "VERTEXID") == 0)) + dstr_cat(&glsp->gl_string, "uint(gl_VertexID)"); + else { + if (src) + dstr_cat(&glsp->gl_string, src); + dstr_cat(&glsp->gl_string, var->name); + } dstr_cat(&glsp->gl_string, ";\n"); if (!input) @@ -628,6 +632,12 @@ static void gl_rename_attributes(struct gl_shader_parser *glsp) size_t val; if (attrib->input) { + if (strcmp(attrib->mapping, "VERTEXID") == 0) { + dstr_replace(&glsp->gl_string, attrib->name.array, + "gl_VertexID"); + continue; + } + prefix = glsp->input_prefix; val = input_idx++; } else { @@ -653,6 +663,7 @@ static bool gl_shader_buildstring(struct gl_shader_parser *glsp) } dstr_copy(&glsp->gl_string, "#version 150\n\n"); + dstr_cat(&glsp->gl_string, "const bool obs_glsl_compile = true;\n\n"); gl_write_params(glsp); gl_write_inputs(glsp, main_func); gl_write_outputs(glsp, main_func); diff --git a/libobs-opengl/gl-subsystem.c b/libobs-opengl/gl-subsystem.c index d91a38d1a..9c1a68478 100644 --- a/libobs-opengl/gl-subsystem.c +++ b/libobs-opengl/gl-subsystem.c @@ -891,7 +891,7 @@ void device_begin_scene(gs_device_t *device) clear_textures(device); } -static inline bool can_render(const gs_device_t *device) +static inline bool can_render(const gs_device_t *device, uint32_t num_verts) { if (!device->cur_vertex_shader) { blog(LOG_ERROR, "No vertex shader specified"); @@ -903,7 +903,7 @@ static inline bool can_render(const gs_device_t *device) return false; } - if (!device->cur_vertex_buffer) { + if (!device->cur_vertex_buffer && (num_verts == 0)) { blog(LOG_ERROR, "No vertex buffer specified"); return false; } @@ -977,7 +977,7 @@ void device_draw(gs_device_t *device, enum gs_draw_mode draw_mode, gs_effect_t *effect = gs_get_effect(); struct gs_program *program; - if (!can_render(device)) + if (!can_render(device, num_verts)) goto fail; if (effect) diff --git a/libobs-opengl/gl-vertexbuffer.c b/libobs-opengl/gl-vertexbuffer.c index a56e6515a..016de4be4 100644 --- a/libobs-opengl/gl-vertexbuffer.c +++ b/libobs-opengl/gl-vertexbuffer.c @@ -257,7 +257,7 @@ bool load_vb_buffers(struct gs_program *program, struct gs_vertex_buffer *vb, struct gs_shader *shader = program->vertex_shader; size_t i; - if (!gl_bind_vertex_array(vb->vao)) + if (vb && !gl_bind_vertex_array(vb->vao)) return false; for (i = 0; i < shader->attribs.num; i++) { diff --git a/libobs/data/format_conversion.effect b/libobs/data/format_conversion.effect index 5cdda7a28..93e4163b2 100644 --- a/libobs/data/format_conversion.effect +++ b/libobs/data/format_conversion.effect @@ -17,8 +17,6 @@ //#define DEBUGGING -uniform float4x4 ViewProj; - uniform float u_plane_offset; uniform float v_plane_offset; @@ -59,11 +57,20 @@ struct VertInOut { float2 uv : TEXCOORD0; }; -VertInOut VSDefault(VertInOut vert_in) +VertInOut VSDefault(uint id : VERTEXID) { + float idHigh = float(id >> 1); + float idLow = float(id & uint(1)); + + float x = idHigh * 4.0 - 1.0; + float y = idLow * 4.0 - 1.0; + + float u = idHigh * 2.0; + float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0); + VertInOut vert_out; - vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj); - vert_out.uv = vert_in.uv; + vert_out.pos = float4(x, y, 0.0, 1.0); + vert_out.uv = float2(u, v); return vert_out; } @@ -407,7 +414,7 @@ technique Planar420 { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPlanar420(vert_in); } } @@ -416,7 +423,7 @@ technique Planar444 { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPlanar444(vert_in); } } @@ -425,7 +432,7 @@ technique NV12 { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSNV12(vert_in); } } @@ -434,7 +441,7 @@ technique NV12_Y { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSNV12_Y(vert_in); } } @@ -443,7 +450,7 @@ technique NV12_UV { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSNV12_UV(vert_in); } } @@ -452,7 +459,7 @@ technique UYVY_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3); } } @@ -461,7 +468,7 @@ technique YUY2_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0); } } @@ -470,7 +477,7 @@ technique YVYU_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0); } } @@ -479,7 +486,7 @@ technique I420_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPlanar420_Reverse(vert_in); } } @@ -488,7 +495,7 @@ technique I444_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSPlanar444_Reverse(vert_in); } } @@ -497,7 +504,7 @@ technique NV12_Reverse { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSNV12_Reverse(vert_in); } } @@ -506,7 +513,7 @@ technique Y800_Limited { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSY800_Limited(vert_in); } } @@ -515,7 +522,7 @@ technique Y800_Full { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSY800_Full(vert_in); } } @@ -524,7 +531,7 @@ technique RGB_Limited { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSRGB_Limited(vert_in); } } @@ -533,7 +540,7 @@ technique BGR3_Limited { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSBGR3_Limited(vert_in); } } @@ -542,7 +549,7 @@ technique BGR3_Full { pass { - vertex_shader = VSDefault(vert_in); + vertex_shader = VSDefault(id); pixel_shader = PSBGR3_Full(vert_in); } } diff --git a/libobs/obs-source.c b/libobs/obs-source.c index bab8f3568..6c9cb77e0 100644 --- a/libobs/obs-source.c +++ b/libobs/obs-source.c @@ -1672,9 +1672,7 @@ static bool update_async_texrender(struct obs_source *source, sizeof(float) * 3); } - gs_ortho(0.f, (float)cx, 0.f, (float)cy, -100.f, 100.f); - - gs_draw_sprite(tex, 0, cx, cy); + gs_draw(GS_TRIS, 0, 3); gs_technique_end_pass(tech); gs_technique_end(tech); diff --git a/libobs/obs-video.c b/libobs/obs-video.c index 79eca0e44..2e0515edc 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -293,8 +293,7 @@ static void render_convert_texture(struct obs_core_video *video) passes = gs_technique_begin(tech); for (i = 0; i < passes; i++) { gs_technique_begin_pass(tech, i); - gs_draw_sprite(texture, 0, video->output_width, - video->conversion_height); + gs_draw(GS_TRIS, 0, 3); gs_technique_end_pass(tech); } gs_technique_end(tech); @@ -324,7 +323,7 @@ static void render_nv12(struct obs_core_video *video, gs_texture_t *target, passes = gs_technique_begin(tech); for (i = 0; i < passes; i++) { gs_technique_begin_pass(tech, i); - gs_draw_sprite(texture, 0, width, height); + gs_draw(GS_TRIS, 0, 3); gs_technique_end_pass(tech); } gs_technique_end(tech);