Record calls to vararg functions.

This loop is now roughly 1000x faster than the Lua interpreter:
  local function f(a,b,...) end; for i=1,2e8 do f(1,2,i) end
Yet another silly microbenchmark -- I know.
master
Mike Pall 2010-09-12 01:37:02 +02:00
parent b72ae54dc0
commit c2c08ba9b3
4 changed files with 56 additions and 16 deletions

View File

@ -384,17 +384,18 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
callhook(L, LUA_HOOKRET, -1);
}
/* Initialize call. Ensure stack space and clear missing parameters. */
static void call_init(lua_State *L, GCfunc *fn)
/* Initialize call. Ensure stack space and return # of missing parameters. */
static int call_init(lua_State *L, GCfunc *fn)
{
if (isluafunc(fn)) {
MSize numparams = funcproto(fn)->numparams;
TValue *o;
lj_state_checkstack(L, numparams);
for (o = L->base + numparams; L->top < o; L->top++)
setnilV(L->top); /* Clear missing parameters. */
int numparams = funcproto(fn)->numparams;
int gotparams = (int)(L->top - L->base);
lj_state_checkstack(L, (MSize)numparams);
numparams -= gotparams;
return numparams >= 0 ? numparams : 0;
} else {
lj_state_checkstack(L, LUA_MINSTACK);
return 0;
}
}
@ -407,7 +408,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
#if LJ_HASJIT
jit_State *J = G2J(g);
#endif
call_init(L, fn);
int missing = call_init(L, fn);
#if LJ_HASJIT
J->L = L;
if ((uintptr_t)pc & 1) { /* Marker for hot call. */
@ -420,8 +421,15 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
}
#endif
if ((g->hookmask & LUA_MASKCALL))
if ((g->hookmask & LUA_MASKCALL)) {
int i;
for (i = 0; i < missing; i++) /* Add missing parameters. */
setnilV(L->top++);
callhook(L, LUA_HOOKCALL, -1);
/* Preserve modifications of missing parameters by lua_setlocal(). */
while (missing-- > 0 && tvisnil(L->top - 1))
L->top--;
}
#if LJ_HASJIT
out:
#endif

View File

@ -570,6 +570,17 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
frame = frame_prevd(frame);
}
if (frame_isvarg(frame)) {
BCReg cbase = (BCReg)frame_delta(frame);
lua_assert(J->framedepth != 1);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL);
lua_assert(J->baseslot > 1);
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
frame = frame_prevd(frame);
}
if (frame_islua(frame)) { /* Return to Lua frame. */
BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
@ -1840,7 +1851,6 @@ static void rec_func_setup(jit_State *J)
BCReg s, numparams = pt->numparams;
if ((pt->flags & PROTO_NO_JIT))
lj_trace_err(J, LJ_TRERR_CJITOFF);
lua_assert(!(pt->flags & PROTO_IS_VARARG));
if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
/* Fill up missing parameters with nil. */
@ -1850,6 +1860,27 @@ static void rec_func_setup(jit_State *J)
J->maxslot = numparams;
}
/* Record Lua vararg function setup. */
static void rec_func_vararg(jit_State *J)
{
GCproto *pt = J->pt;
BCReg s, fixargs, vframe = J->maxslot+1;
lua_assert((pt->flags & PROTO_IS_VARARG));
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
J->base[vframe-1] = J->base[-1]; /* Copy function up. */
/* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) {
J->base[vframe+s] = J->base[s];
J->base[s] = TREF_NIL;
}
J->maxslot = fixargs;
J->framedepth++;
J->base += vframe;
J->baseslot += vframe;
}
/* Record entry to a Lua function. */
static void rec_func_lua(jit_State *J)
{
@ -2258,8 +2289,11 @@ void lj_record_ins(jit_State *J)
break;
case BC_FUNCV:
rec_func_vararg(J);
rec_func_lua(J);
break;
case BC_JFUNCV:
lj_trace_err(J, LJ_TRERR_NYIVF);
lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */
break;
case BC_FUNCC:

View File

@ -87,15 +87,14 @@ static void snapshot_framelinks(jit_State *J, SnapEntry *map)
if (frame_islua(frame)) {
map[f++] = SNAP_MKPC(frame_pc(frame));
frame = frame_prevl(frame);
} else if (frame_ispcall(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
} else if (frame_iscont(frame)) {
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
frame = frame_prevd(frame);
} else {
lua_assert(0);
lua_assert(!frame_isc(frame));
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
frame = frame_prevd(frame);
}
}
lua_assert(f == (MSize)(1 + J->framedepth));

View File

@ -23,7 +23,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "call to JIT-disabled function")
TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting")
TREDEF(NYIVF, "NYI: vararg function")
TREDEF(NYICF, "NYI: C function %p")
TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")