libobs/graphics: Add SSE intrinsics for matrix4_transpose

This commit is contained in:
hwdro 2016-05-26 21:37:03 -07:00 committed by jp9000
parent 017d77b399
commit e3847109be

View File

@ -261,25 +261,38 @@ bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)
void matrix4_transpose(struct matrix4 *dst, const struct matrix4 *m)
{
struct matrix4 temp;
if (dst == m) {
struct matrix4 temp = *m;
matrix4_transpose(dst, &temp);
return;
}
/* TODO: Add SSE */
temp.x.x = m->x.x;
temp.x.y = m->y.x;
temp.x.z = m->z.x;
temp.x.w = m->t.x;
temp.y.x = m->x.y;
temp.y.y = m->y.y;
temp.y.z = m->z.y;
temp.y.w = m->t.y;
temp.z.x = m->x.z;
temp.z.y = m->y.z;
temp.z.z = m->z.z;
temp.z.w = m->t.z;
temp.t.x = m->x.w;
temp.t.y = m->y.w;
temp.t.z = m->z.w;
temp.t.w = m->t.w;
#ifdef NO_INTRINSICS
dst->x.x = m->x.x;
dst->x.y = m->y.x;
dst->x.z = m->z.x;
dst->x.w = m->t.x;
dst->y.x = m->x.y;
dst->y.y = m->y.y;
dst->y.z = m->z.y;
dst->y.w = m->t.y;
dst->z.x = m->x.z;
dst->z.y = m->y.z;
dst->z.z = m->z.z;
dst->z.w = m->t.z;
dst->t.x = m->x.w;
dst->t.y = m->y.w;
dst->t.z = m->z.w;
dst->t.w = m->t.w;
#else
__m128 a0 = _mm_unpacklo_ps(m->x.m, m->z.m);
__m128 a1 = _mm_unpacklo_ps(m->y.m, m->t.m);
__m128 a2 = _mm_unpackhi_ps(m->x.m, m->z.m);
__m128 a3 = _mm_unpackhi_ps(m->y.m, m->t.m);
matrix4_copy(dst, &temp);
dst->x.m = _mm_unpacklo_ps(a0, a1);
dst->y.m = _mm_unpackhi_ps(a0, a1);
dst->z.m = _mm_unpacklo_ps(a2, a3);
dst->t.m = _mm_unpackhi_ps(a2, a3);
#endif
}