From 44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 22 Dec 2009 05:40:49 +0100 Subject: [PATCH] Use SSE variants for IRFPM_FLOOR/CEIL/TRUNC unless SSE4.1 available. --- src/lj_asm.c | 18 ++++++++++++++++++ src/lj_vm.h | 3 +++ 2 files changed, 21 insertions(+) diff --git a/src/lj_asm.c b/src/lj_asm.c index f26a40a5..c2cc4342 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2020,6 +2020,16 @@ static void asm_fpmath(ASMState *as, IRIns *ir) as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ } *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ + } else if (fpm <= IRFPM_TRUNC) { + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + ra_destreg(as, ir, RID_XMM0); + emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse : + fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); + ra_left(as, RID_XMM0, ir->op1); } else { int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ Reg dest = ir->r; @@ -3275,6 +3285,14 @@ static void asm_setup_regsp(ASMState *as, Trace *T) if (inloop) as->modset = RSET_SCRATCH; break; + case IR_FPMATH: + if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { + ir->prev = REGSP_HINT(RID_XMM0); + if (inloop) + as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); + continue; + } + break; /* Non-constant shift counts need to be in RID_ECX. */ case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) diff --git a/src/lj_vm.h b/src/lj_vm.h index e4adc8db..07adc36d 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,6 +37,9 @@ LJ_ASMF void lj_vm_exit_interp(void); LJ_ASMF void lj_vm_floor(void); LJ_ASMF void lj_vm_ceil(void); LJ_ASMF void lj_vm_trunc(void); +LJ_ASMF void lj_vm_floor_sse(void); +LJ_ASMF void lj_vm_ceil_sse(void); +LJ_ASMF void lj_vm_trunc_sse(void); LJ_ASMF void lj_vm_exp(void); LJ_ASMF void lj_vm_exp2(void); LJ_ASMF void lj_vm_pow(void);