Make a number of key optimizations

- Changed glMapBuffer to glMapBufferRange to allow invalidation.  Using
   just glMapBuffer alone was causing some unacceptable stalls.

 - Changed dynamic buffers from GL_DYNAMIC_WRITE to GL_STREAM_WRITE
   because I had misunderstood the OpenGL specification

 - Added _OPENGL and _D3D11 builtin preprocessor macros to effects to
   allow special processing if needed

 - Added fmod support to shaders (NOTE: D3D and GL do not function
   identically with negative numbers when using this.  Positive numbers
   however function identically)

 - Created a planar conversion shader that converts from packed YUV to
   planar 420 right on the GPU without any CPU processing.  Reduces
   required GPU download size to approximately 37.5% of its normal rate
   as well.  GPU usage down by 10 entire percentage points despite the
   extra required pass.
This commit is contained in:
jp9000
2014-02-16 19:28:21 -07:00
parent fc368f663e
commit 2dbbffe4a2
21 changed files with 470 additions and 24 deletions

View File

@@ -19,6 +19,7 @@
#include <util/c99defs.h>
EXPORT const char *device_preprocessor_name(void);
EXPORT device_t device_create(struct gs_init_data *data);
EXPORT void device_destroy(device_t device);
EXPORT void device_entercontext(device_t device);

View File

@@ -155,8 +155,12 @@ bool update_buffer(GLenum target, GLuint buffer, void *data, size_t size)
if (!gl_bind_buffer(target, buffer))
return false;
ptr = glMapBuffer(target, GL_WRITE_ONLY);
success = gl_success("glMapBuffer");
/* glMapBufferRange with these flags will actually give far better
* performance than a plain glMapBuffer call */
ptr = glMapBufferRange(target, 0, size,
GL_MAP_WRITE_BIT |
GL_MAP_INVALIDATE_BUFFER_BIT);
success = gl_success("glMapBufferRange");
if (success && ptr) {
memcpy(ptr, data, size);
glUnmapBuffer(target);

View File

@@ -239,7 +239,7 @@ static inline void gl_write_structs(struct gl_shader_parser *glsp)
* clip -> (unsupported)
* ddx -> dFdx
* ddy -> dFdy
* fmod -> (unsupported)
* fmod -> mod (XXX: these are different if sign is negative)
* frac -> fract
* lerp -> mix
* lit -> (unsupported)
@@ -367,6 +367,8 @@ static bool gl_write_intrinsic(struct gl_shader_parser *glsp,
dstr_cat(&glsp->gl_string, "fract");
} else if (strref_cmp(&token->str, "lerp") == 0) {
dstr_cat(&glsp->gl_string, "mix");
} else if (strref_cmp(&token->str, "fmod") == 0) {
dstr_cat(&glsp->gl_string, "mod");
} else if (strref_cmp(&token->str, "rsqrt") == 0) {
dstr_cat(&glsp->gl_string, "inversesqrt");
} else if (strref_cmp(&token->str, "saturate") == 0) {

View File

@@ -163,6 +163,11 @@ void convert_sampler_info(struct gs_sampler_state *sampler,
info->max_anisotropy, sampler->max_anisotropy);
}
const char *device_preprocessor_name(void)
{
return "_OPENGL";
}
device_t device_create(struct gs_init_data *info)
{
struct gs_device *device = bzalloc(sizeof(struct gs_device));

View File

@@ -20,7 +20,7 @@
static bool create_buffers(struct gs_vertex_buffer *vb)
{
GLenum usage = vb->dynamic ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW;
GLenum usage = vb->dynamic ? GL_STREAM_DRAW : GL_STATIC_DRAW;
size_t i;
if (!gl_create_buffer(GL_ARRAY_BUFFER, &vb->vertex_buffer,