Use fastcall for remaining 1-arg/2-arg calls from interpreter.

Simplifies conversion to x64 calling conventions.
master
Mike Pall 2009-12-27 17:42:41 +01:00
parent 690760aa38
commit bc47063708
15 changed files with 1252 additions and 1308 deletions

View File

@ -588,14 +588,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| // - A return back from a lua_call() with (high) nresults adjustment.
| mov L:RB->top, BASE // Save current top held in BASE (yes).
| mov NRESULTS, RD // Need to fill only remainder with nil.
|.if X64
| mov CARG2d, RA // Caveat: CARG1d may be RA.
| mov CARG1d, L:RB
|.else
| mov ARG2, RA // Grow by wanted nresults+1.
| mov ARG1, L:RB
|.endif
| call extern lj_state_growstack // (lua_State *L, int n)
| mov FCARG2, RA
| mov FCARG1, L:RB
| call extern lj_state_growstack@8 // (lua_State *L, int n)
| mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
| jmp <3
|
@ -653,11 +648,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|//-- Grow stack on-demand -----------------------------------------------
|
|->gate_c_growstack: // Grow stack for C function.
|.if X64
| mov CARG2d, LUA_MINSTACK
|.else
| mov ARG2, LUA_MINSTACK
|.endif
| mov FCARG2, LUA_MINSTACK
| jmp >1
|
|->gate_lv_growstack: // Grow stack for vararg Lua function.
@ -677,17 +668,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov L:RB->base, BASE
| mov L:RB->top, RC
| mov SAVE_PC, PC
|.if X64
| mov CARG2d, RA
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|.else
| mov ARG2, RA
| mov ARG1, L:RB
|.endif
| mov FCARG2, RA
|1:
| mov FCARG1, L:RB
| // L:RB = L, L->base = new base, L->top = top
| // SAVE_PC = initial PC+1 (undefined for C functions)
| call extern lj_state_growstack // (lua_State *L, int n)
| call extern lj_state_growstack@8 // (lua_State *L, int n)
| mov RA, L:RB->base
| mov RC, L:RB->top
| mov LFUNC:RB, [RA-8]
@ -1189,20 +1175,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| jmp aword LFUNC:RB->gate
|
|->vmeta_len:
|.if X64
| mov L:RB, SAVE_L
| mov L:RB->base, BASE // Caveat: CARG2d may be BASE.
| lea CARG2d, [BASE+RD*8]
| mov CARG1d, L:RB
|.else
| lea RD, [BASE+RD*8]
| mov L:RB, SAVE_L
| mov ARG2, RD
| mov ARG1, L:RB
| mov L:RB->base, BASE
|.endif
| lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
| mov L:FCARG1, L:RB
| mov SAVE_PC, PC
| call extern lj_meta_len // (lua_State *L, TValue *o)
| call extern lj_meta_len@8 // (lua_State *L, TValue *o)
| // TValue * (metamethod) returned in eax (RC).
| mov BASE, L:RB->base
| jmp ->vmeta_binop // Binop call for compatibility.
@ -1243,19 +1221,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|//-- Argument coercion for 'for' statement ------------------------------
|
|->vmeta_for:
|.if X64
| mov L:RB, SAVE_L
| mov L:RB->base, BASE // Caveat: CARG2d may be BASE.
| mov CARG2d, RA
| mov CARG1d, L:RB // Caveat: CARG1d may be RA.
|.else
| mov L:RB, SAVE_L
| mov ARG2, RA
| mov ARG1, L:RB
| mov L:RB->base, BASE
|.endif
| mov FCARG2, RA // Caveat: FCARG2 == BASE
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
| mov SAVE_PC, PC
| call extern lj_meta_for // (lua_State *L, TValue *base)
| call extern lj_meta_for@8 // (lua_State *L, TValue *base)
| mov BASE, L:RB->base
| mov RC, [PC-4]
| movzx RA, RCH
@ -1572,30 +1543,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|.ffunc_1 ipairs_aux
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
| cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
| // Caveat: xmm0/xmm1/ARG2 used in getinth call, too.
if (sse) {
| movsd xmm0, qword [RA+8]
| sseconst_1 xmm1, RBa
|.if X64WIN
| addsd xmm1, xmm0
| cvtsd2si RC, xmm1
| movsd qword [RA-8], xmm1
|.else
| addsd xmm0, xmm1
| cvtsd2si RC, xmm0
| movsd qword [RA-8], xmm0
| .if not X64
| mov ARG2, RC
| .endif
|.endif
} else {
|.if not X64
| fld qword [RA+8]
| fld1
| faddp st1
| fist ARG2
| fist ARG1
| fstp qword [RA-8]
| mov RC, ARG2
| mov RC, ARG1
|.endif
}
| mov TAB:RB, [RA]
@ -1611,14 +1572,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| jmp ->fff_res2
|2: // Check for empty hash part first. Otherwise call C function.
| cmp dword TAB:RB->hmask, 0; je ->fff_res0
|.if X64
| mov CARG1d, TAB:RB
|.else
| mov ARG1, TAB:RB
|.endif
| mov TMP1, BASE // Save BASE and RA.
|.if X64 and not X64WIN
| mov FCARG1, TAB:RB
| mov RB, RA
| call extern lj_tab_getinth // (GCtab *t, int32_t key)
|.else
| xchg FCARG1, TAB:RB // Caveat: FCARG1 == RA
|.endif
| mov FCARG2, RC
| call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
| // cTValue * or NULL returned in eax (RC).
| mov RA, RB
| mov BASE, TMP1
@ -1825,28 +1787,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov RD, 1+2 // nresults+1 = 1 + false + error.
| jmp <7
|.else
|.if X64
| mov CARG2d, L:PC
| mov CARG1d, L:RB
|.else
| mov ARG2, L:PC
| mov ARG1, L:RB
|.endif
| call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
| mov FCARG2, L:PC
| mov FCARG1, L:RB
| call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
| // Error function does not return.
|.endif
|
|9: // Handle stack expansion on return from yield.
| mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
| mov L:RA->top, KBASE // Undo coroutine stack clearing.
|.if X64
| mov CARG2d, PC
| mov CARG1d, L:RB
| mov L:RA, TMP1
|.else
| mov ARG2, PC
| mov ARG1, L:RB
| mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
|.endif
| call extern lj_state_growstack // (lua_State *L, int n)
| mov L:RA->top, KBASE // Undo coroutine stack clearing.
| mov FCARG2, PC
| mov FCARG1, L:RB
| call extern lj_state_growstack@8 // (lua_State *L, int n)
| mov BASE, L:RB->base
| jmp <4 // Retry the stack move.
|.endmacro
@ -2493,13 +2449,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov TMP1, BASE // Save old BASE (relative).
| mov L:RB->base, RA
| lea RC, [RA+NARGS:RC*8-8]
| mov ARG1, L:RB
| lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
| mov L:RB->top, RC
| mov CFUNC:RA, [RA-8]
| mov CFUNC:RC, [RA-8]
| cmp BASE, L:RB->maxstack
| ja >5 // Need to grow stack.
| call aword CFUNC:RA->f // (lua_State *L)
|.if X64
| mov CARG1d, L:RB
|.else
| mov ARG1, L:RB
|.endif
| call aword CFUNC:RC->f // (lua_State *L)
| // Either throws an error or recovers and returns 0 or NRESULTS (+1).
| test RC, RC; jnz >3
|1: // Returned 0: retry fast path.
@ -2526,8 +2486,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| jmp ->fff_res
|
|5: // Grow stack for fallback handler.
| mov ARG2, LUA_MINSTACK
| call extern lj_state_growstack // (lua_State *L, int n)
| mov FCARG2, LUA_MINSTACK
| mov FCARG1, L:RB
| call extern lj_state_growstack@8 // (lua_State *L, int n)
| jmp <1 // Dumb retry (goes through ff first).
|
|->fff_gcstep: // Call GC step function.
@ -2541,13 +2502,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov L:RB->base, RA
| lea RC, [RA+NARGS:RC*8-8]
|.if X64
| mov CARG1d, L:RB
|.else
| mov ARG1, L:RB
|.endif
| mov FCARG1, L:RB
| mov L:RB->top, RC
| call extern lj_gc_step // (lua_State *L)
| call extern lj_gc_step@4 // (lua_State *L)
| mov RA, L:RB->base
| mov RC, L:RB->top
| sub RC, RA
@ -2619,17 +2576,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
#if LJ_HASJIT
| mov L:RB, SAVE_L
| mov L:RB->base, BASE
|.if X64
| mov CARG2d, PC
| lea CARG1d, [DISPATCH+GG_DISP2J]
|.else
| lea RA, [DISPATCH+GG_DISP2J]
| mov ARG2, PC
| mov ARG1, RA
|.endif
| mov FCARG2, PC
| lea FCARG1, [DISPATCH+GG_DISP2J]
| mov [DISPATCH+DISPATCH_J(L)], L:RB
| mov SAVE_PC, PC
| call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
| call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
| jmp <4
#endif
|
@ -2637,17 +2588,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
#if LJ_HASJIT
| mov L:RB, SAVE_L
| mov L:RB->base, BASE
|.if X64
| mov CARG2d, PC
| lea CARG1d, [DISPATCH+GG_DISP2J]
|.else
| lea RA, [DISPATCH+GG_DISP2J]
| mov ARG2, PC
| mov ARG1, RA
|.endif
| mov FCARG2, PC
| lea FCARG1, [DISPATCH+GG_DISP2J]
| mov [DISPATCH+DISPATCH_J(L)], L:RB
| mov SAVE_PC, PC
| call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
| call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
| mov BASE, L:RB->base
| // Dispatch the first instruction and optionally record it.
| ins_next
@ -2689,12 +2634,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
| mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
| mov [DISPATCH+DISPATCH_J(L)], L:RB
| lea RC, [esp+16]
| mov L:RB->base, BASE
| lea RA, [DISPATCH+GG_DISP2J]
| mov ARG2, RC
| mov ARG1, RA
| call extern lj_trace_exit // (jit_State *J, ExitState *ex)
| lea FCARG2, [esp+16]
| lea FCARG1, [DISPATCH+GG_DISP2J]
| call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
| // Interpreter C frame returned in eax.
| mov esp, eax // Reposition stack to C frame.
| mov BASE, L:RB->base
@ -3863,11 +3806,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| mov L:RB, SAVE_L
| cmp dword L:RB->openupval, 0
| je >1
| lea RA, [BASE+RA*8]
| mov ARG2, RA
| mov ARG1, L:RB
| mov L:RB->base, BASE
| call extern lj_func_closeuv // (lua_State *L, TValue *level)
| lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
| call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
| mov BASE, L:RB->base
|1:
| ins_next
@ -4456,7 +4398,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| jbe <3 // No vararg slots?
| mov RB, RC
| shr RB, 3
| mov ARG2, RB // Store this for stack growth below.
| add RB, 1
| mov NRESULTS, RB // NRESULTS = #varargs+1
| mov L:RB, SAVE_L
@ -4479,8 +4420,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| mov L:RB->top, RA
| mov SAVE_PC, PC
| sub KBASE, BASE // Need delta, because BASE may change.
| mov ARG1, L:RB
| call extern lj_state_growstack // (lua_State *L, int n)
| mov FCARG2, NRESULTS
| sub FCARG2, 1
| mov FCARG1, L:RB
| call extern lj_state_growstack@8 // (lua_State *L, int n)
| mov BASE, L:RB->base
| mov RA, L:RB->top
| add KBASE, BASE

File diff suppressed because it is too large Load Diff

View File

@ -507,10 +507,11 @@ LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux)
/* Inline declarations. */
LJ_ASMF void lj_ff_coroutine_wrap_aux(void);
LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co);
LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
lua_State *co);
/* Error handler, called from assembler VM. */
void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
{
co->top--; copyTV(L, L->top, co->top); L->top++;
if (tvisstr(L->top-1))

View File

@ -103,7 +103,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
}
/* Close all open upvalues pointing to some stack level or above. */
void lj_func_closeuv(lua_State *L, TValue *level)
void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
{
GCupval *uv;
global_State *g = G(L);

View File

@ -13,7 +13,7 @@ LJ_FUNC GCproto *lj_func_newproto(lua_State *L);
LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt);
/* Upvalues. */
LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level);
LJ_FUNCA void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level);
LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv);
/* Functions (closures). */

View File

@ -613,7 +613,7 @@ static size_t gc_onestep(lua_State *L)
}
/* Perform a limited amount of incremental GC steps. */
int lj_gc_step(lua_State *L)
int LJ_FASTCALL lj_gc_step(lua_State *L)
{
global_State *g = G(L);
MSize lim;

View File

@ -42,7 +42,7 @@ enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize };
LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
LJ_FUNC void lj_gc_freeall(global_State *g);
LJ_FUNCA int lj_gc_step(lua_State *L);
LJ_FUNCA int LJ_FASTCALL lj_gc_step(lua_State *L);
LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
LJ_FUNC void lj_gc_fullgc(lua_State *L);

View File

@ -269,7 +269,7 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
}
/* Helper for LEN. __len metamethod. */
TValue *lj_meta_len(lua_State *L, cTValue *o)
TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o)
{
cTValue *mo = lj_meta_lookup(L, o, MM_len);
if (tvisnil(mo)) {
@ -349,7 +349,7 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
}
/* Helper for FORI. Coercion. */
void lj_meta_for(lua_State *L, TValue *base)
void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base)
{
if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT);
if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM);

View File

@ -24,10 +24,10 @@ LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k);
LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb,
cTValue *rc, BCReg op);
LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left);
LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o);
LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base);
LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base);
#endif

View File

@ -85,7 +85,7 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
}
/* Try to grow stack. */
void lj_state_growstack(lua_State *L, MSize need)
void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
{
if (L->stacksize > LJ_STACK_MAXEX) /* overflow while handling overflow? */
lj_err_throw(L, LUA_ERRERR);
@ -99,7 +99,7 @@ void lj_state_growstack(lua_State *L, MSize need)
}
}
void lj_state_growstack1(lua_State *L)
void LJ_FASTCALL lj_state_growstack1(lua_State *L)
{
lj_state_growstack(L, 1);
}

View File

@ -16,8 +16,8 @@
LJ_FUNC void lj_state_relimitstack(lua_State *L);
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
LJ_FUNC void lj_state_growstack1(lua_State *L);
LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
{

View File

@ -371,7 +371,7 @@ void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
/* -- Table getters ------------------------------------------------------- */
cTValue *lj_tab_getinth(GCtab *t, int32_t key)
cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
{
TValue k;
Node *n;

View File

@ -18,7 +18,7 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
/* Caveat: all getters except lj_tab_get() can return NULL! */
LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key);
LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);

View File

@ -552,7 +552,7 @@ static void trace_new(jit_State *J)
}
/* A hotcount triggered. Start recording a root trace. */
void lj_trace_hot(jit_State *J, const BCIns *pc)
void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc)
{
lua_State *L = J->L;
L->top = curr_topL(L); /* Only called from Lua and NRESULTS is not used. */
@ -564,7 +564,7 @@ void lj_trace_hot(jit_State *J, const BCIns *pc)
}
/* A trace exited. Restore interpreter state and check for hot exits. */
void *lj_trace_exit(jit_State *J, void *exptr)
void * LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
{
lua_State *L = J->L;
void *cf;

View File

@ -32,8 +32,8 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
/* Event handling. */
LJ_FUNC void lj_trace_ins(jit_State *J);
LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc);
LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr);
LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
LJ_FUNCA void * LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
/* Signal asynchronous abort of trace or end of trace. */
#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)