Make a number of key optimizations
- Changed glMapBuffer to glMapBufferRange to allow invalidation. Using just glMapBuffer alone was causing some unacceptable stalls. - Changed dynamic buffers from GL_DYNAMIC_WRITE to GL_STREAM_WRITE because I had misunderstood the OpenGL specification - Added _OPENGL and _D3D11 builtin preprocessor macros to effects to allow special processing if needed - Added fmod support to shaders (NOTE: D3D and GL do not function identically with negative numbers when using this. Positive numbers however function identically) - Created a planar conversion shader that converts from packed YUV to planar 420 right on the GPU without any CPU processing. Reduces required GPU download size to approximately 37.5% of its normal rate as well. GPU usage down by 10 entire percentage points despite the extra required pass.
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
|
||||
#include <util/c99defs.h>
|
||||
|
||||
EXPORT const char *device_preprocessor_name(void);
|
||||
EXPORT device_t device_create(struct gs_init_data *data);
|
||||
EXPORT void device_destroy(device_t device);
|
||||
EXPORT void device_entercontext(device_t device);
|
||||
|
@@ -155,8 +155,12 @@ bool update_buffer(GLenum target, GLuint buffer, void *data, size_t size)
|
||||
if (!gl_bind_buffer(target, buffer))
|
||||
return false;
|
||||
|
||||
ptr = glMapBuffer(target, GL_WRITE_ONLY);
|
||||
success = gl_success("glMapBuffer");
|
||||
/* glMapBufferRange with these flags will actually give far better
|
||||
* performance than a plain glMapBuffer call */
|
||||
ptr = glMapBufferRange(target, 0, size,
|
||||
GL_MAP_WRITE_BIT |
|
||||
GL_MAP_INVALIDATE_BUFFER_BIT);
|
||||
success = gl_success("glMapBufferRange");
|
||||
if (success && ptr) {
|
||||
memcpy(ptr, data, size);
|
||||
glUnmapBuffer(target);
|
||||
|
@@ -239,7 +239,7 @@ static inline void gl_write_structs(struct gl_shader_parser *glsp)
|
||||
* clip -> (unsupported)
|
||||
* ddx -> dFdx
|
||||
* ddy -> dFdy
|
||||
* fmod -> (unsupported)
|
||||
* fmod -> mod (XXX: these are different if sign is negative)
|
||||
* frac -> fract
|
||||
* lerp -> mix
|
||||
* lit -> (unsupported)
|
||||
@@ -367,6 +367,8 @@ static bool gl_write_intrinsic(struct gl_shader_parser *glsp,
|
||||
dstr_cat(&glsp->gl_string, "fract");
|
||||
} else if (strref_cmp(&token->str, "lerp") == 0) {
|
||||
dstr_cat(&glsp->gl_string, "mix");
|
||||
} else if (strref_cmp(&token->str, "fmod") == 0) {
|
||||
dstr_cat(&glsp->gl_string, "mod");
|
||||
} else if (strref_cmp(&token->str, "rsqrt") == 0) {
|
||||
dstr_cat(&glsp->gl_string, "inversesqrt");
|
||||
} else if (strref_cmp(&token->str, "saturate") == 0) {
|
||||
|
@@ -163,6 +163,11 @@ void convert_sampler_info(struct gs_sampler_state *sampler,
|
||||
info->max_anisotropy, sampler->max_anisotropy);
|
||||
}
|
||||
|
||||
const char *device_preprocessor_name(void)
|
||||
{
|
||||
return "_OPENGL";
|
||||
}
|
||||
|
||||
device_t device_create(struct gs_init_data *info)
|
||||
{
|
||||
struct gs_device *device = bzalloc(sizeof(struct gs_device));
|
||||
|
@@ -20,7 +20,7 @@
|
||||
|
||||
static bool create_buffers(struct gs_vertex_buffer *vb)
|
||||
{
|
||||
GLenum usage = vb->dynamic ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW;
|
||||
GLenum usage = vb->dynamic ? GL_STREAM_DRAW : GL_STATIC_DRAW;
|
||||
size_t i;
|
||||
|
||||
if (!gl_create_buffer(GL_ARRAY_BUFFER, &vb->vertex_buffer,
|
||||
|
Reference in New Issue
Block a user