Fixed inline assembler for gcc in irrMath. Removed some code using the irrMath functions.
git-svn-id: svn://svn.code.sf.net/p/irrlicht/code/trunk@894 dfc29bdd-3216-0410-991c-e03cc46cb475master
parent
8b0f65e138
commit
f3a8858e09
|
@ -232,27 +232,30 @@ namespace core
|
|||
}
|
||||
|
||||
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
|
||||
REALINLINE void clearFPUException ()
|
||||
{
|
||||
#ifdef feclearexcept
|
||||
feclearexcept(FE_ALL_EXCEPT);
|
||||
#elif defined(_MSC_VER)
|
||||
__asm fnclex;
|
||||
#elif defined(__GNUC__)
|
||||
__asm__ __volatile__ ("fclex \n\t");
|
||||
#else
|
||||
# warn clearFPUException not supported.
|
||||
#endif
|
||||
}
|
||||
|
||||
// comes from Nvidia
|
||||
#if 1
|
||||
REALINLINE f32 reciprocal_squareroot(const f32 x)
|
||||
{
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
// comes from Nvidia
|
||||
#if 1
|
||||
u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
|
||||
f32 y = *(f32*)&tmp;
|
||||
return y * (1.47f - 0.47f * x * y * y);
|
||||
}
|
||||
#endif
|
||||
|
||||
// an sse2 version
|
||||
#if 0
|
||||
REALINLINE f32 reciprocal_squareroot(const f32 x)
|
||||
{
|
||||
#else
|
||||
// an sse2 version
|
||||
__asm
|
||||
{
|
||||
movss xmm0, x
|
||||
|
@ -260,117 +263,137 @@ namespace core
|
|||
movss x, xmm0
|
||||
}
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
#else // no fast math
|
||||
return 1.f / sqrtf ( x );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
//! i do not divide through 0.. (fpu expection)
|
||||
// instead set f to a high value to get a return value near zero..
|
||||
// -1000000000000.f.. is use minus to stay negative..
|
||||
// must test's here (plane.normal dot anything ) checks on <= 0.f
|
||||
REALINLINE f32 reciprocal ( const f32 f )
|
||||
{
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
//! i do not divide through 0.. (fpu expection)
|
||||
// instead set f to a high value to get a return value near zero..
|
||||
// -1000000000000.f.. is use minus to stay negative..
|
||||
// must test's here (plane.normal dot anything ) checks on <= 0.f
|
||||
return 1.f / f;
|
||||
//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
|
||||
//return 1.f / FR ( x );
|
||||
#else // no fast math
|
||||
return 1.f / f;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
REALINLINE f32 reciprocal_approxim ( const f32 p )
|
||||
{
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
register u32 x = 0x7F000000 - IR ( p );
|
||||
const f32 r = FR ( x );
|
||||
return r * (2.0f - p * r);
|
||||
#else // no fast math
|
||||
return 1.f / p;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
REALINLINE s32 floor32(f32 x)
|
||||
{
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
const f32 h = 0.5f;
|
||||
|
||||
s32 t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
__asm
|
||||
{
|
||||
fld x
|
||||
fld x
|
||||
fsub h
|
||||
fistp t
|
||||
fistp t
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
__asm__ __volatile__ (
|
||||
"fsub %2 \n\t"
|
||||
"fistpl %0"
|
||||
: "=m" (t)
|
||||
: "t" (x), "f" (h)
|
||||
: "st"
|
||||
);
|
||||
#else
|
||||
# warn IRRLICHT_FAST_MATH not supported.
|
||||
return (s32) floorf ( x );
|
||||
#endif
|
||||
return t;
|
||||
#else // no fast math
|
||||
return (s32) floorf ( x );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
REALINLINE s32 ceil32 ( f32 x )
|
||||
{
|
||||
#ifdef IRRLICHT_FAST_MATH
|
||||
const f32 h = 0.5f;
|
||||
|
||||
s32 t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
__asm
|
||||
{
|
||||
fld x
|
||||
fld x
|
||||
fadd h
|
||||
fistp t
|
||||
fistp t
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
__asm__ __volatile__ (
|
||||
"fadd %2 \n\t"
|
||||
"fistpl %0 \n\t"
|
||||
: "=m"(t)
|
||||
: "t"(x), "f"(h)
|
||||
: "st"
|
||||
);
|
||||
#else
|
||||
# warn IRRLICHT_FAST_MATH not supported.
|
||||
return (s32) ceilf ( x );
|
||||
#endif
|
||||
return t;
|
||||
|
||||
#else // not fast math
|
||||
return (s32) ceilf ( x );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
REALINLINE s32 round32(f32 x)
|
||||
{
|
||||
#if defined(IRRLICHT_FAST_MATH)
|
||||
s32 t;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
__asm
|
||||
{
|
||||
fld x
|
||||
fistp t
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
__asm__ __volatile__ (
|
||||
"fistpl %0 \n\t"
|
||||
: "=m"(t)
|
||||
: "t"(x)
|
||||
: "st"
|
||||
);
|
||||
#else
|
||||
|
||||
REALINLINE void clearFPUException ()
|
||||
{
|
||||
# warn IRRLICHT_FAST_MATH not supported.
|
||||
return (s32) round(x);
|
||||
#endif
|
||||
return t;
|
||||
#else // no fast math
|
||||
return (s32) round(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
inline f32 reciprocal_squareroot(const f32 x)
|
||||
{
|
||||
return 1.f / sqrtf ( x );
|
||||
}
|
||||
|
||||
|
||||
inline f32 reciprocal ( const f32 x )
|
||||
{
|
||||
return 1.f / x;
|
||||
}
|
||||
|
||||
inline f32 reciprocal_approxim ( const f32 x )
|
||||
{
|
||||
return 1.f / x;
|
||||
}
|
||||
|
||||
|
||||
inline s32 floor32 ( f32 x )
|
||||
{
|
||||
return (s32) floorf ( x );
|
||||
}
|
||||
|
||||
inline s32 ceil32 ( f32 x )
|
||||
{
|
||||
return (s32) ceilf ( x );
|
||||
}
|
||||
|
||||
inline s32 round32 ( f32 x )
|
||||
{
|
||||
return (s32) ( x + 0.5f );
|
||||
}
|
||||
|
||||
|
||||
inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
|
||||
{
|
||||
return a > b ? (a > c ? a : c) : (b > c ? b : c);
|
||||
|
@ -381,8 +404,6 @@ namespace core
|
|||
return a < b ? (a < c ? a : c) : (b < c ? b : c);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline f32 fract ( f32 x )
|
||||
{
|
||||
return x - floorf ( x );
|
||||
|
@ -390,10 +411,9 @@ namespace core
|
|||
|
||||
inline f32 round ( f32 x )
|
||||
{
|
||||
return floorf ( x + 0.5f );
|
||||
return ::round(x);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace core
|
||||
} // end namespace irr
|
||||
|
||||
|
|
|
@ -1957,7 +1957,7 @@ public:
|
|||
|
||||
virtual void setString(const char* text)
|
||||
{
|
||||
sscanf(text, "0x%x", (int*)(&Value));
|
||||
sscanf(text, "0x%x", (unsigned int*)(&Value));
|
||||
}
|
||||
|
||||
virtual E_ATTRIBUTE_TYPE getType() const
|
||||
|
|
|
@ -61,7 +61,7 @@ void COpenGLExtensionHandler::dump() const
|
|||
void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
|
||||
{
|
||||
const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(glGetString(GL_VERSION)));
|
||||
Version = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f);
|
||||
Version = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
|
||||
if ( Version >= 102)
|
||||
os::Printer::log("OpenGL driver version is 1.2 or better.", ELL_INFORMATION);
|
||||
else
|
||||
|
@ -375,7 +375,7 @@ void COpenGLExtensionHandler::initExtensions(bool stencilBuffer)
|
|||
else
|
||||
{
|
||||
const f32 ver = core::fast_atof(reinterpret_cast<const c8*>(shaderVersion));
|
||||
ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32((ver-floor(ver))*10.0f);
|
||||
ShaderLanguageVersion = core::floor32(ver)*100+core::ceil32(core::fract(ver)*10.0f);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -989,13 +989,6 @@ inline f32 CQ3LevelMesh::Blend( const f64 s[3], const f64 t[3], const tBSPVertex
|
|||
return (f32) res;
|
||||
}
|
||||
|
||||
//!helper function
|
||||
inline s32 s32_min ( s32 a, s32 b)
|
||||
{
|
||||
s32 mask = (a - b) >> 31;
|
||||
return (a & mask) | (b & ~mask);
|
||||
}
|
||||
|
||||
void CQ3LevelMesh::S3DVertex2TCoords_64::copyto ( video::S3DVertex2TCoords &dest ) const
|
||||
{
|
||||
dest.Pos.X = core::round ( (f32) Pos.X );
|
||||
|
@ -1041,10 +1034,10 @@ void CQ3LevelMesh::copy ( S3DVertex2TCoords_64 * dest, const tBSPVertex * source
|
|||
|
||||
if ( vertexcolor )
|
||||
{
|
||||
u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 );
|
||||
u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 );
|
||||
u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 );
|
||||
u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 );
|
||||
u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
|
||||
u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
|
||||
u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
|
||||
u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );
|
||||
|
||||
dest->Color.set ( a * 1.f/255.f,
|
||||
r * 1.f/255.f,
|
||||
|
@ -1082,10 +1075,10 @@ inline void CQ3LevelMesh::copy ( video::S3DVertex2TCoords * dest, const tBSPVert
|
|||
|
||||
if ( vertexcolor )
|
||||
{
|
||||
u32 a = s32_min ( source->color[3] * quake3::defaultModulate, 255 );
|
||||
u32 r = s32_min ( source->color[0] * quake3::defaultModulate, 255 );
|
||||
u32 g = s32_min ( source->color[1] * quake3::defaultModulate, 255 );
|
||||
u32 b = s32_min ( source->color[2] * quake3::defaultModulate, 255 );
|
||||
u32 a = core::s32_min ( source->color[3] * quake3::defaultModulate, 255 );
|
||||
u32 r = core::s32_min ( source->color[0] * quake3::defaultModulate, 255 );
|
||||
u32 g = core::s32_min ( source->color[1] * quake3::defaultModulate, 255 );
|
||||
u32 b = core::s32_min ( source->color[2] * quake3::defaultModulate, 255 );
|
||||
|
||||
dest->Color.color = a << 24 | r << 16 | g << 8 | b;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue