Fast forward to sync public repo.

Compile math.sinh(), math.cosh(), math.tanh() and math.random().
Compile various io.*() functions.
Drive the GC forward on string allocations in the parser.
Improve KNUM fuse vs. load heuristics.
Add abstract C call handling to IR.
master
Mike Pall 2009-12-08 20:35:29 +01:00
parent 5287b93264
commit 3f1f9e11f4
44 changed files with 1218 additions and 766 deletions

View File

@ -319,7 +319,7 @@ enable it <b>after</b> running <tt>luaL_openlibs</tt>.
</p>
<p>
LuaJIT already intercepts exception handling for systems using
ELF/DWARF2 stack unwinding (e.g. Linux). This is a zero-cost mechanism
DWARF2 stack unwinding (e.g. Linux, OSX). This is a zero-cost mechanism
and always enabled. You don't need to use any wrapper functions,
except when you want to get a more specific error message than
<tt>"C++&nbsp;exception"</tt>.

View File

@ -48,10 +48,27 @@ The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJI
</p>
<p>
Please check the
<a href="http://luajit.org/luajit_changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
<a href="http://luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
to see whether newer versions are available.
</p>
<div class="major" style="background: #d0d0d0;">
<h2 id="snap">Development Snapshot</h2>
<ul>
<li>Add abstract C call handling to IR.</li>
<li>Improve KNUM fuse vs. load heuristics.</li>
<li>Drive the GC forward on string allocations in the parser.</li>
<li>Compile various <tt>io.*()</tt> functions.</li>
<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt>
and <tt>math.random()</tt>.</li>
<li>Fix <tt>lua_tocfunction()</tt>.</li>
<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li>
<li>Fix PHI marking algorithm for references from variant slots.</li>
<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li>
<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li>
</ul>
</div>
<div class="major" style="background: #ffd0d0;">
<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 &mdash; 2009-11-09</h2>
<ul>
@ -59,14 +76,14 @@ to see whether newer versions are available.
<li>Allow C++ exception conversion on all platforms
using a wrapper function.</li>
<li>Automatically catch C++ exceptions and rethrow Lua error
(ELF/DWARF2 only).</li>
(DWARF2 only).</li>
<li>Check for the correct x87 FPU precision at strategic points.</li>
<li>Always use wrappers for libm functions.</li>
<li>Resurrect metamethod name strings before copying them.</li>
<li>Mark current trace, even if compiler is idle.</li>
<li>Ensure FILE metatable is created only once.</li>
<li>Fix type comparisons when different integer types are involved.</li>
<li>Fix getmetatable() recording.</li>
<li>Fix <tt>getmetatable()</tt> recording.</li>
<li>Fix TDUP with dead keys in template table.</li>
<li><tt>jit.flush(tr)</tt> returns status.
Prevent manual flush of a trace that's still linked.</li>
@ -234,7 +251,7 @@ on a separate line.</li>
<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
<li>Miscellaneous doc changes. Added a section about
<a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li>
<a href="install.html#embedding">embedding LuaJIT</a>.</li>
</ul>
<p>
This release is in sync with Coco 1.1.0 (see the

View File

@ -46,17 +46,15 @@ You can also send any questions you have directly to me:
<script type="text/javascript">
<!--
var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz"
function xD(s)
var xS="@-:\" .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<abc>defghijklmnopqrstuvwxyz";function xD(s)
{var len=s.length;var r="";for(var i=0;i<len;i++)
{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)
c=xS.charAt(66-n);r+=c;}
{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)c=xS.charAt(69-n);r+=c;}
document.write("<"+"p>"+r+"<"+"/p>\n");}
//-->
</script>
<script type="text/javascript">
<!--
xD("ewYKA7vu-EIwslx7 K9A.t41C")
xD("fyZKB8xv\"FJytmz8.KAB0u52D")
//--></script>
<noscript>
<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13">

View File

@ -8,6 +8,7 @@
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
<meta name="description" content="LuaJIT is a Just-In-Time (JIT) compiler for the Lua language.">
</head>
<body>
<div id="site">

View File

@ -30,7 +30,7 @@
-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello"
--
-- local out = {
-- -- Do something wich each line:
-- -- Do something with each line:
-- write = function(t, ...) io.write(...) end,
-- close = function(t) end,
-- flush = function(t) end,

View File

@ -144,7 +144,7 @@ local colortype_ansi = {
[0] = "%s",
"%s",
"%s",
"%s",
"\027[36m%s\027[m",
"\027[32m%s\027[m",
"%s",
"\027[1m%s\027[m",
@ -199,9 +199,9 @@ margin-right: 2em;
span.irt_str { color: #00a000; }
span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
span.irt_tab { color: #c00000; }
span.irt_udt { color: #00c0c0; }
span.irt_num { color: #0000c0; }
span.irt_int { color: #c000c0; }
span.irt_udt, span.irt_lud { color: #00c0c0; }
span.irt_num { color: #4040c0; }
span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
</style>
]]
@ -210,7 +210,7 @@ local colorize, irtype
-- Lookup table to convert some literals into names.
local litname = {
["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", },
["XLOAD "] = { [0] = "", "unaligned", },
["XLOAD "] = { [0] = "", "R", "U", "RU", },
["TOINT "] = { [0] = "check", "index", "", },
["FLOAD "] = vmdef.irfield,
["FREF "] = vmdef.irfield,
@ -313,6 +313,27 @@ local function ridsp_name(ridsp)
return ""
end
-- Recursively gather CALL* args and dump them.
local function dumpcallargs(tr, ins)
if ins < 0 then
out:write(formatk(tr, ins))
else
local m, ot, op1, op2 = traceir(tr, ins)
local oidx = 6*shr(ot, 8)
local op = sub(vmdef.irnames, oidx+1, oidx+6)
if op == "CARG " then
dumpcallargs(tr, op1)
if op2 < 0 then
out:write(" ", formatk(tr, op2))
else
out:write(" ", format("%04d", op2))
end
else
out:write(format("%04d", ins))
end
end
end
-- Dump IR and interleaved snapshots.
local function dump_ir(tr, dumpsnap, dumpreg)
local info = traceinfo(tr)
@ -348,7 +369,8 @@ local function dump_ir(tr, dumpsnap, dumpreg)
else
out:write(format("%04d ------ LOOP ------------\n", ins))
end
elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then
elseif op ~= "NOP " and op ~= "CARG " and
(dumpreg or op ~= "RENAME") then
if dumpreg then
out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
else
@ -359,7 +381,11 @@ local function dump_ir(tr, dumpsnap, dumpreg)
band(ot, 128) == 0 and " " or "+",
irtype[t], op))
local m1 = band(m, 3)
if m1 ~= 3 then -- op1 != IRMnone
if sub(op, 1, 4) == "CALL" then
out:write(format("%-10s (", vmdef.ircall[op2]))
if op1 ~= -1 then dumpcallargs(tr, op1) end
out:write(")")
elseif m1 ~= 3 then -- op1 != IRMnone
if op1 < 0 then
out:write(formatk(tr, op1))
else

View File

@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \
lj_libdef.h
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \
lj_lib.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \
lj_target.h lj_target_x86.h
lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \
lj_vm.h lj_target.h lj_target_x86.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
lj_ir.h lj_dispatch.h
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
lj_traceerr.h
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
lj_bc.h lj_traceerr.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \

View File

@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME)
};
const char *const irfield_names[] = {
#define FLNAME(name, type, field) #name,
#define FLNAME(name, ofs) #name,
IRFLDEF(FLNAME)
#undef FLNAME
NULL
};
const char *const ircall_names[] = {
#define IRCALLNAME(name, nargs, kind, type, flags) #name,
IRCALLDEF(IRCALLNAME)
#undef IRCALLNAME
NULL
};
static const char *const trace_errors[] = {
#define TREDEF(name, msg) msg,
#include "lj_traceerr.h"
@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx)
}
fprintf(ctx->fp, "}\n\n");
fprintf(ctx->fp, "ircall = {\n[0]=");
for (i = 0; ircall_names[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
fprintf(ctx->fp, "}\n\n");
fprintf(ctx->fp, "traceerr = {\n[0]=");
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);

View File

@ -102,5 +102,6 @@ extern const char *const bc_names[];
extern const char *const ir_names[];
extern const char *const irfpm_names[];
extern const char *const irfield_names[];
extern const char *const ircall_names[];
#endif

View File

@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
{
const char *sym = ctx->extnames[r->sym];
const char *p = strchr(sym, '@');
char buf[80];
if (p) {
/* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */
strncpy(buf, sym, p-sym);
buf[p-sym] = '\0';
sym = buf;
}
switch (ctx->mode) {
case BUILD_elfasm:
if (r->type)

View File

@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
for (i = 0; irfield_names[i]; i++)
if (!strcmp(irfield_names[i], p+5))
return i;
} else if (allowlit && !strncmp(p, "IRCALL_", 7)) {
for (i = 0; ircall_names[i]; i++)
if (!strcmp(ircall_names[i], p+7))
return i;
} else if (allowany && !strcmp("any", p)) {
return 0xff;
} else {

View File

@ -85,6 +85,7 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
#define PEOBJ_RELOC_DIR32 0x06
#define PEOBJ_SYM_PREFIX "_"
#define PEOBJ_SYMF_PREFIX "@"
#elif LJ_TARGET_X64
#define PEOBJ_ARCH_TARGET 0x8664
#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
for (i = 0; ctx->extnames[i]; i++) {
sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]);
const char *sym = ctx->extnames[i];
const char *p = strchr(sym, '@');
if (p) {
#ifdef PEOBJ_SYMF_PREFIX
sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym);
#else
strncpy(name, sym, p-sym);
name[p-sym] = '\0';
#endif
} else {
sprintf(name, PEOBJ_SYM_PREFIX "%s", sym);
}
emit_peobj_sym(ctx, name, 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
}

View File

@ -30,6 +30,9 @@
|.define RD, RC
|.define RDL, RCL
|
|.define FCARG1, ecx // Fastcall arguments.
|.define FCARG2, edx
|
|// Type definitions. Some of these are only used for documentation.
|.type L, lua_State
|.type GL, global_State
@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| mov RB, LJ_TNUMX
|7:
| not RB
| mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
| mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
| jmp <2
|
|.ffunc_2 setmetatable
@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| jmp ->fff_res1
|3: // Handle numbers inline, unless a number base metatable is present.
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
| cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
| cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
| jne ->fff_fallback
| ffgccheck // Caveat: uses label 1.
| mov L:RB, SAVE_L
| mov ARG1, L:RB
| mov ARG2, RA
| mov L:RB->base, RA // Add frame since C call can throw.
| mov [RA-4], PC
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov ARG3, BASE // Save BASE.
| call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
| mov FCARG2, RA // Caveat: FCARG2 == BASE
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
| call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
| // GCstr returned in eax (RC).
| mov RA, L:RB->base
| mov BASE, ARG3
@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
|
|.ffunc_1 table_getn
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
| mov TAB:RB, [RA]
| mov ARG1, TAB:RB
| mov RB, RA // Save RA and BASE.
| mov ARG2, BASE
| call extern lj_tab_len // (GCtab *t)
| mov ARG2, BASE // Save RA and BASE.
| mov RB, RA
| mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
| call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov RA, RB // Restore RA and BASE.
@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
|2:
| checktab RD, ->vmeta_len
| mov TAB:RD, [BASE+RD*8]
| mov ARG1, TAB:RD
| mov TAB:FCARG1, [BASE+RD*8]
| mov RB, BASE // Save BASE.
| call extern lj_tab_len // (GCtab *t)
| call extern lj_tab_len@4 // (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov BASE, RB // Restore BASE.
@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
break;
case BC_USETV:
#define TV2MARKOFS \
((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
| ins_AD // RA = upvalue #, RD = src
| // Really ugly code due to the lack of a 4th free register.
| mov LFUNC:RB, [BASE-8]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
| jnz >4
|1:
| mov RA, [BASE+RD*8]
|2:
| cmp byte UPVAL:RB->closed, 0
| mov RB, UPVAL:RB->v
| mov RA, [BASE+RD*8]
| mov RD, [BASE+RD*8+4]
| mov [RB], RA
| mov [RB+4], RD
|3:
| jz >1
| // Check barrier for closed upvalue.
| test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
| jnz >2
|1:
| ins_next
|
|4: // Upvalue is black. Check if new value is collectable and white.
| mov RA, [BASE+RD*8+4]
| sub RA, LJ_TISGCV
| cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
|2: // Upvalue is black. Check if new value is collectable and white.
| sub RD, LJ_TISGCV
| cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
| jbe <1
| mov GCOBJ:RA, [BASE+RD*8]
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
| jz <2
| // Crossed a write barrier. So move the barrier forward.
| mov ARG2, UPVAL:RB
| mov ARG3, GCOBJ:RA
| mov RB, UPVAL:RB->v
| mov RD, [BASE+RD*8+4]
| mov [RB], GCOBJ:RA
| mov [RB+4], RD
|->BC_USETV_Z:
| mov L:RB, SAVE_L
| lea GL:RA, [DISPATCH+GG_DISP2G]
| mov L:RB->base, BASE
| mov ARG1, GL:RA
| call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
| mov BASE, L:RB->base
| jmp <3
| jz <1
| // Crossed a write barrier. Move the barrier forward.
| xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
| lea GL:FCARG1, [DISPATCH+GG_DISP2G]
| call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
| mov BASE, RB // Restore BASE.
| jmp <1
break;
#undef TV2MARKOFS
case BC_USETS:
| ins_AND // RA = upvalue #, RD = str const (~)
| mov LFUNC:RB, [BASE-8]
| mov GCOBJ:RD, [KBASE+RD*4]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
| mov RA, UPVAL:RB->v
| mov dword [RA+4], LJ_TSTR
| mov [RA], GCOBJ:RD
| mov GCOBJ:RA, [KBASE+RD*4]
| mov RD, UPVAL:RB->v
| mov [RD], GCOBJ:RA
| mov dword [RD+4], LJ_TSTR
| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
| jnz >2
|1:
| ins_next
|
|2: // Upvalue is black. Check if string is white.
| test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
|2: // Check if string is white and ensure upvalue is closed.
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
| jz <1
| // Crossed a write barrier. So move the barrier forward.
| mov ARG3, GCOBJ:RD
| mov ARG2, UPVAL:RB
| jmp ->BC_USETV_Z
| cmp byte UPVAL:RB->closed, 0
| jz <1
| // Crossed a write barrier. Move the barrier forward.
| mov RB, BASE // Save BASE (FCARG2 == BASE).
| mov FCARG2, RD
| lea GL:FCARG1, [DISPATCH+GG_DISP2G]
| call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
| mov BASE, RB // Restore BASE.
| jmp <1
break;
case BC_USETN:
| ins_AD // RA = upvalue #, RD = num const
@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|2:
| call extern lj_gc_step_fixtop // (lua_State *L)
| mov ARG1, L:RB // Args owned by callee. Set it again.
| mov L:FCARG1, L:RB
| call extern lj_gc_step_fixtop@4 // (lua_State *L)
| jmp <1
break;
case BC_TDUP:
| ins_AND // RA = dst, RD = table const (~) (holding template table)
| mov TAB:RD, [KBASE+RD*4]
| mov L:RB, SAVE_L
| mov ARG2, TAB:RD
| mov ARG1, L:RB
| mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
| mov SAVE_PC, PC
| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
| mov L:RB->base, BASE
| jae >3
|2:
| call extern lj_tab_dup // (lua_State *L, Table *kt)
| mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
| mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
| call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
| // Table * returned in eax (RC).
| mov BASE, L:RB->base
| movzx RA, PC_RA
@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|3:
| call extern lj_gc_step_fixtop // (lua_State *L)
| mov ARG1, L:RB // Args owned by callee. Set it again.
| mov L:FCARG1, L:RB
| call extern lj_gc_step_fixtop@4 // (lua_State *L)
| movzx RD, PC_RD // Need to reload RD.
| not RD
| jmp <2
break;

View File

@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
int32_t base = lj_lib_optint(L, 2, 10);
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) {
if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) {
setnumV(L->base-1, numV(o));
return FFH_RES(1);
}
@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
return FFH_RES(1);
}
LJLIB_PUSH("nil")
LJLIB_PUSH("false")
LJLIB_PUSH("true")
LJLIB_ASM(tostring) LJLIB_REC(.)
{
TValue *o = lj_lib_checkany(L, 1);
@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
GCstr *s;
if (tvisnum(o)) {
s = lj_str_fromnum(L, &o->n);
} else if (tvisnil(o)) {
s = lj_str_newlit(L, "nil");
} else if (tvisfalse(o)) {
s = lj_str_newlit(L, "false");
} else if (tvistrue(o)) {
s = lj_str_newlit(L, "true");
} else if (tvispri(o)) {
s = strV(lj_lib_upvalue(L, -itype(o)));
} else {
if (tvisfunc(o) && isffunc(funcV(o)))
lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);

View File

@ -17,14 +17,28 @@
#include "lualib.h"
#include "lj_obj.h"
#include "lj_err.h"
#include "lj_gc.h"
#include "lj_err.h"
#include "lj_str.h"
#include "lj_ff.h"
#include "lj_trace.h"
#include "lj_lib.h"
/* Index of standard handles in function environment. */
#define IO_INPUT 1
#define IO_OUTPUT 2
/* Userdata payload for I/O file. */
typedef struct IOFileUD {
FILE *fp; /* File handle. */
uint32_t type; /* File type. */
} IOFileUD;
#define IOFILE_TYPE_FILE 0 /* Regular file. */
#define IOFILE_TYPE_PIPE 1 /* Pipe. */
#define IOFILE_TYPE_STDF 2 /* Standard file handle. */
#define IOFILE_TYPE_MASK 3
#define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */
#define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud)
#define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id))))
/* -- Error handling ------------------------------------------------------ */
@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname)
return 1;
} else {
int en = errno; /* Lua API calls may change this value. */
lua_pushnil(L);
setnilV(L->top++);
if (fname)
lua_pushfstring(L, "%s: %s", fname, strerror(en));
else
lua_pushfstring(L, "%s", strerror(en));
lua_pushinteger(L, en);
setintV(L->top++, en);
lj_trace_abort(G(L));
return 3;
}
}
static void io_file_error(lua_State *L, int arg, const char *fname)
/* -- Open/close helpers -------------------------------------------------- */
static IOFileUD *io_tofilep(lua_State *L)
{
lua_pushfstring(L, "%s: %s", fname, strerror(errno));
luaL_argerror(L, arg, lua_tostring(L, -1));
if (!(L->base < L->top && tvisudata(L->base) &&
udataV(L->base)->udtype == UDTYPE_IO_FILE))
lj_err_argtype(L, 1, "FILE*");
return (IOFileUD *)uddata(udataV(L->base));
}
/* -- Open helpers -------------------------------------------------------- */
#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
static FILE *io_tofile(lua_State *L)
static IOFileUD *io_tofile(lua_State *L)
{
FILE **f = io_tofilep(L);
if (*f == NULL)
IOFileUD *iof = io_tofilep(L);
if (iof->fp == NULL)
lj_err_caller(L, LJ_ERR_IOCLFL);
return *f;
return iof;
}
static FILE **io_file_new(lua_State *L)
static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
{
FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *));
*pf = NULL;
luaL_getmetatable(L, LUA_FILEHANDLE);
lua_setmetatable(L, -2);
return pf;
IOFileUD *iof = IOSTDF_IOF(L, id);
if (iof->fp == NULL)
lj_err_caller(L, LJ_ERR_IOSTDCL);
return iof->fp;
}
/* -- Close helpers ------------------------------------------------------- */
static int lj_cf_io_std_close(lua_State *L)
static IOFileUD *io_file_new(lua_State *L)
{
lua_pushnil(L);
lua_pushliteral(L, "cannot close standard file");
return 2;
IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
GCudata *ud = udataV(L->top-1);
ud->udtype = UDTYPE_IO_FILE;
/* NOBARRIER: The GCudata is new (marked white). */
setgcrefr(ud->metatable, curr_func(L)->c.env);
iof->fp = NULL;
iof->type = IOFILE_TYPE_FILE;
return iof;
}
static int lj_cf_io_pipe_close(lua_State *L)
static IOFileUD *io_file_open(lua_State *L, const char *mode)
{
FILE **p = io_tofilep(L);
const char *fname = strdata(lj_lib_checkstr(L, 1));
IOFileUD *iof = io_file_new(L);
iof->fp = fopen(fname, mode);
if (iof->fp == NULL)
luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
return iof;
}
static int io_file_close(lua_State *L, IOFileUD *iof)
{
int ok;
if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) {
ok = (fclose(iof->fp) == 0);
} else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) {
#if defined(LUA_USE_POSIX)
int ok = (pclose(*p) != -1);
ok = (pclose(iof->fp) != -1);
#elif defined(LUA_USE_WIN)
int ok = (_pclose(*p) != -1);
ok = (_pclose(iof->fp) != -1);
#else
int ok = 0;
ok = 0;
#endif
*p = NULL;
} else {
lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
setnilV(L->top++);
lua_pushliteral(L, "cannot close standard file");
return 2;
}
iof->fp = NULL;
return io_pushresult(L, ok, NULL);
}
static int lj_cf_io_file_close(lua_State *L)
{
FILE **p = io_tofilep(L);
int ok = (fclose(*p) == 0);
*p = NULL;
return io_pushresult(L, ok, NULL);
}
static int io_file_close(lua_State *L)
{
lua_getfenv(L, 1);
lua_getfield(L, -1, "__close");
return (lua_tocfunction(L, -1))(L);
}
/* -- Read/write helpers -------------------------------------------------- */
static int io_file_readnum(lua_State *L, FILE *fp)
{
lua_Number d;
if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
lua_pushnumber(L, d);
setnumV(L->top++, d);
return 1;
} else {
return 0; /* read fails */
return 0;
}
}
static int test_eof(lua_State *L, FILE *fp)
static int io_file_testeof(lua_State *L, FILE *fp)
{
int c = getc(fp);
ungetc(c, fp);
@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
n -= nr; /* still have to read `n' chars */
} while (n > 0 && nr == rlen); /* until end of count or eof */
luaL_pushresult(&b); /* close buffer */
return (n == 0 || lua_objlen(L, -1) > 0);
return (n == 0 || strV(L->top-1)->len > 0);
}
static int io_file_read(lua_State *L, FILE *fp, int start)
@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
lj_err_arg(L, n+1, LJ_ERR_INVFMT);
} else if (tvisnum(L->base+n)) {
size_t len = (size_t)lj_lib_checkint(L, n+1);
ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp);
ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp);
} else {
lj_err_arg(L, n+1, LJ_ERR_INVOPT);
}
@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
LJLIB_CF(io_method_close)
{
if (lua_isnone(L, 1))
lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT);
io_tofile(L);
return io_file_close(L);
IOFileUD *iof = L->base < L->top ? io_tofile(L) :
IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
return io_file_close(L, iof);
}
LJLIB_CF(io_method_read)
{
return io_file_read(L, io_tofile(L), 1);
return io_file_read(L, io_tofile(L)->fp, 1);
}
LJLIB_CF(io_method_write)
LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
{
return io_file_write(L, io_tofile(L), 1);
return io_file_write(L, io_tofile(L)->fp, 1);
}
LJLIB_CF(io_method_flush)
LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
{
return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL);
return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
}
LJLIB_CF(io_method_seek)
{
FILE *fp = io_tofile(L);
FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
lua_Number ofs;
int res;
@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek)
LJLIB_CF(io_method_setvbuf)
{
FILE *fp = io_tofile(L);
FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
if (opt == 0) opt = _IOFBF;
else if (opt == 1) opt = _IOLBF;
else if (opt == 2) opt = _IONBF;
return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL);
return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL);
}
/* Forward declaration. */
static void io_file_lines(lua_State *L, int idx, int toclose);
LJLIB_PUSH(top-2) /* io_lines_iter */
LJLIB_CF(io_method_lines)
{
io_tofile(L);
io_file_lines(L, 1, 0);
return 1;
setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
setudataV(L, L->top+1, udataV(L->base));
L->top += 2;
return 2;
}
LJLIB_CF(io_method___gc)
{
FILE *fp = *io_tofilep(L);
if (fp != NULL) io_file_close(L);
IOFileUD *iof = io_tofilep(L);
if (iof->fp != NULL)
io_file_close(L, iof);
return 0;
}
LJLIB_CF(io_method___tostring)
{
FILE *fp = *io_tofilep(L);
if (fp == NULL)
lua_pushliteral(L, "file (closed)");
IOFileUD *iof = io_tofilep(L);
if (iof->fp != NULL)
lua_pushfstring(L, "file (%p)", iof->fp);
else
lua_pushfstring(L, "file (%p)", fp);
lua_pushliteral(L, "file (closed)");
return 1;
}
@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index)
LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
static FILE *io_file_get(lua_State *L, int findex)
{
GCtab *fenv = tabref(curr_func(L)->c.env);
GCudata *ud = udataV(&tvref(fenv->array)[findex]);
FILE *fp = *(FILE **)uddata(ud);
if (fp == NULL)
lj_err_caller(L, LJ_ERR_IOSTDCL);
return fp;
}
LJLIB_CF(io_open)
{
const char *fname = luaL_checkstring(L, 1);
const char *mode = luaL_optstring(L, 2, "r");
FILE **pf = io_file_new(L);
*pf = fopen(fname, mode);
return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
const char *fname = strdata(lj_lib_checkstr(L, 1));
GCstr *s = lj_lib_optstr(L, 2);
const char *mode = s ? strdata(s) : "r";
IOFileUD *iof = io_file_new(L);
iof->fp = fopen(fname, mode);
return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
}
LJLIB_CF(io_popen)
{
#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
const char *fname = strdata(lj_lib_checkstr(L, 1));
GCstr *s = lj_lib_optstr(L, 2);
const char *mode = s ? strdata(s) : "r";
IOFileUD *iof = io_file_new(L);
iof->type = IOFILE_TYPE_PIPE;
#ifdef LUA_USE_POSIX
fflush(NULL);
iof->fp = popen(fname, mode);
#else
iof->fp = _popen(fname, mode);
#endif
return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
#else
luaL_error(L, LUA_QL("popen") " not supported");
#endif
}
LJLIB_CF(io_tmpfile)
{
FILE **pf = io_file_new(L);
*pf = tmpfile();
return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1;
IOFileUD *iof = io_file_new(L);
iof->fp = tmpfile();
return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL);
}
LJLIB_CF(io_close)
@ -373,169 +405,112 @@ LJLIB_CF(io_close)
LJLIB_CF(io_read)
{
return io_file_read(L, io_file_get(L, IO_INPUT), 0);
return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0);
}
LJLIB_CF(io_write)
LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
{
return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0);
}
LJLIB_CF(io_flush)
LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
{
return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
}
LJLIB_NOREG LJLIB_CF(io_lines_iter)
static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
{
FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
int ok;
if (fp == NULL)
lj_err_caller(L, LJ_ERR_IOCLFL);
ok = io_file_readline(L, fp);
if (ferror(fp))
return luaL_error(L, "%s", strerror(errno));
if (ok)
return 1;
if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
L->top = L->base+1;
setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
io_file_close(L);
}
return 0;
}
static void io_file_lines(lua_State *L, int idx, int toclose)
{
lua_pushvalue(L, idx);
lua_pushboolean(L, toclose);
lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
funcV(L->top-1)->c.ffid = FF_io_lines_iter;
}
LJLIB_CF(io_lines)
{
if (lua_isnoneornil(L, 1)) { /* no arguments? */
/* will iterate over default input */
lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
return lj_cf_io_method_lines(L);
} else {
const char *fname = luaL_checkstring(L, 1);
FILE **pf = io_file_new(L);
*pf = fopen(fname, "r");
if (*pf == NULL)
io_file_error(L, 1, fname);
io_file_lines(L, lua_gettop(L), 1);
return 1;
}
}
static int io_std_get(lua_State *L, int fp, const char *mode)
{
if (!lua_isnoneornil(L, 1)) {
const char *fname = lua_tostring(L, 1);
if (fname) {
FILE **pf = io_file_new(L);
*pf = fopen(fname, mode);
if (*pf == NULL)
io_file_error(L, 1, fname);
if (L->base < L->top && !tvisnil(L->base)) {
if (tvisudata(L->base)) {
io_tofile(L);
L->top = L->base+1;
} else {
io_tofile(L); /* check that it's a valid file handle */
lua_pushvalue(L, 1);
io_file_open(L, mode);
}
lua_rawseti(L, LUA_ENVIRONINDEX, fp);
/* NOBARRIER: The standard I/O handles are GC roots. */
setgcref(G(L)->gcroot[id], gcV(L->top-1));
} else {
setudataV(L, L->top++, IOSTDF_UD(L, id));
}
/* return current value */
lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
return 1;
}
LJLIB_CF(io_input)
{
return io_std_get(L, IO_INPUT, "r");
return io_std_getset(L, GCROOT_IO_INPUT, "r");
}
LJLIB_CF(io_output)
{
return io_std_get(L, IO_OUTPUT, "w");
return io_std_getset(L, GCROOT_IO_OUTPUT, "w");
}
LJLIB_NOREG LJLIB_CF(io_lines_iter)
{
IOFileUD *iof = io_tofile(L);
int ok = io_file_readline(L, iof->fp);
if (ferror(iof->fp))
lj_err_callermsg(L, strerror(errno));
if (!ok && (iof->type & IOFILE_FLAG_CLOSE))
io_file_close(L, iof); /* Return values are ignored (ok is 0). */
return ok;
}
LJLIB_PUSH(top-3) /* io_lines_iter */
LJLIB_CF(io_lines)
{
if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */
IOFileUD *iof = io_file_open(L, "r");
iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE;
setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1)));
} else { /* io.lines() iterates over stdin. */
setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT));
L->top += 2;
}
return 2;
}
LJLIB_CF(io_type)
{
void *ud;
luaL_checkany(L, 1);
ud = lua_touserdata(L, 1);
lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1))
lua_pushnil(L); /* not a file */
else if (*((FILE **)ud) == NULL)
lua_pushliteral(L, "closed file");
else
cTValue *o = lj_lib_checkany(L, 1);
if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE))
setnilV(L->top++);
else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL)
lua_pushliteral(L, "file");
else
lua_pushliteral(L, "closed file");
return 1;
}
LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */
LJLIB_CF(io_popen)
{
#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
const char *fname = luaL_checkstring(L, 1);
const char *mode = luaL_optstring(L, 2, "r");
FILE **pf = io_file_new(L);
#ifdef LUA_USE_POSIX
fflush(NULL);
*pf = popen(fname, mode);
#else
*pf = _popen(fname, mode);
#endif
return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
#else
luaL_error(L, LUA_QL("popen") " not supported");
#endif
}
#include "lj_libdef.h"
/* ------------------------------------------------------------------------ */
static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname)
static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
{
FILE **pf = io_file_new(L);
IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
GCudata *ud = udataV(L->top-1);
GCtab *envt = tabV(L->top-2);
*pf = fp;
setgcref(ud->env, obj2gco(envt));
lj_gc_objbarrier(L, obj2gco(ud), envt);
if (k > 0) {
lua_pushvalue(L, -1);
lua_rawseti(L, -5, k);
}
lua_setfield(L, -3, fname);
}
static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
{
lua_createtable(L, narr, 1);
lua_pushcfunction(L, cls);
lua_setfield(L, -2, "__close");
ud->udtype = UDTYPE_IO_FILE;
/* NOBARRIER: The GCudata is new (marked white). */
setgcref(ud->metatable, gcV(L->top-3));
iof->fp = fp;
iof->type = IOFILE_TYPE_STDF;
lua_setfield(L, -2, name);
return obj2gco(ud);
}
LUALIB_API int luaopen_io(lua_State *L)
{
lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
if (tvisnil(L->top-1)) {
LJ_LIB_REG_(L, NULL, io_method);
lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
}
io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
lua_pushcfunction(L, lj_cf_io_lines_iter);
funcV(L->top-1)->c.ffid = FF_io_lines_iter;
LJ_LIB_REG_(L, NULL, io_method);
copyTV(L, L->top, L->top-1); L->top++;
lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
LJ_LIB_REG(L, io);
io_fenv_new(L, 0, lj_cf_io_std_close);
io_std_new(L, stdin, IO_INPUT, "stdin");
io_std_new(L, stdout, IO_OUTPUT, "stdout");
io_std_new(L, stderr, 0, "stderr");
L->top--;
setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin"));
setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout"));
io_std_new(L, stderr, "stderr");
return 1;
}

View File

@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
LJLIB_ASM_(math_sinh)
LJLIB_ASM_(math_cosh)
LJLIB_ASM_(math_tanh)
LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
LJLIB_ASM_(math_frexp)
LJLIB_ASM_(math_modf) LJLIB_REC(.)
@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
*/
/* PRNG state. */
typedef struct TW223State {
struct RandomState {
uint64_t gen[4]; /* State of the 4 LFSR generators. */
int valid; /* State is valid. */
} TW223State;
};
/* Union needed for bit-pattern conversion between uint64_t and double. */
typedef union { uint64_t u64; double d; } U64double;
/* Update generator i and compute a running xor of all states. */
#define TW223_GEN(i, k, q, s) \
z = tw->gen[i]; \
z = rs->gen[i]; \
z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
r ^= z; tw->gen[i] = z;
r ^= z; rs->gen[i] = z;
/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
static LJ_NOINLINE double tw223_step(TW223State *tw)
LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
{
uint64_t z, r = 0;
U64double u;
TW223_GEN(0, 63, 31, 18)
TW223_GEN(1, 58, 19, 28)
TW223_GEN(2, 55, 24, 7)
TW223_GEN(3, 47, 21, 8)
u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52);
return u.d;
return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
}
/* PRNG initialization function. */
static void tw223_init(TW223State *tw, double d)
static void random_init(RandomState *rs, double d)
{
uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
int i;
@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d)
r >>= 8;
u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
tw->gen[i] = u.u64;
rs->gen[i] = u.u64;
}
tw->valid = 1;
rs->valid = 1;
for (i = 0; i < 10; i++)
tw223_step(tw);
lj_math_random_step(rs);
}
/* PRNG extract function. */
LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
LJLIB_CF(math_random)
LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
LJLIB_CF(math_random) LJLIB_REC(.)
{
int n = cast_int(L->top - L->base);
TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
U64double u;
double d;
if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0);
d = tw223_step(tw) - 1.0;
if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
u.u64 = lj_math_random_step(rs);
d = u.d - 1.0;
if (n > 0) {
double r1 = lj_lib_checknum(L, 1);
if (n == 1) {
@ -150,11 +150,11 @@ LJLIB_CF(math_random)
}
/* PRNG seed function. */
LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
LJLIB_CF(math_randomseed)
{
TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
tw223_init(tw, lj_lib_checknum(L, 1));
RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
random_init(rs, lj_lib_checknum(L, 1));
return 0;
}
@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed)
LUALIB_API int luaopen_math(lua_State *L)
{
TW223State *tw;
tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State));
tw->valid = 0; /* Use lazy initialization to save some time on startup. */
RandomState *rs;
rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
rs->valid = 0; /* Use lazy initialization to save some time on startup. */
LJ_LIB_REG(L, math);
#if defined(LUA_COMPAT_MOD)
lua_getfield(L, -1, "fmod");

View File

@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L)
{
GCtab *mt;
GCstr *mmstr;
global_State *g;
LJ_LIB_REG(L, string);
#if defined(LUA_COMPAT_GFIND)
lua_getfield(L, -1, "gmatch");
lua_setfield(L, -2, "gfind");
#endif
mt = lj_tab_new(L, 0, 1);
/* NOBARRIER: G(L)->mmname[] is a GC root. */
setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt));
mmstr = strref(G(L)->mmname[MM_index]);
if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
/* NOBARRIER: basemt is a GC root. */
g = G(L);
setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
mmstr = strref(g->mmname[MM_index]);
if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1));
mt->nomm = cast_byte(~(1u<<MM_index));
return 1;

View File

@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
size_t rsize = oldsize - nb;
newp = oldp;
if (rsize >= MIN_CHUNK_SIZE) {
mchunkptr remainder = chunk_plus_offset(newp, nb);
mchunkptr rem = chunk_plus_offset(newp, nb);
set_inuse(m, newp, nb);
set_inuse(m, remainder, rsize);
lj_alloc_free(m, chunk2mem(remainder));
set_inuse(m, rem, rsize);
lj_alloc_free(m, chunk2mem(rem));
}
} else if (next == m->top && oldsize + m->topsize > nb) {
/* Expand into top */

View File

@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx)
{
cTValue *o = index2adr(L, idx);
TValue tmp;
return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)));
return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp)));
}
LUA_API int lua_isstring(lua_State *L, int idx)
@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
TValue tmp;
if (LJ_LIKELY(tvisnum(o)))
return numV(o);
else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
return numV(&tmp);
else
return 0;
@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
TValue tmp;
if (tvisnum(o))
return numV(o);
else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
return numV(o);
else if (tvisnil(o))
return def;
else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
return 0;
@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
n = numV(o);
else if (tvisnil(o))
return def;
else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
mt = tabref(G(L)->basemt[itypemap(o)]);
mt = tabref(basemt_obj(G(L), o));
if (mt == NULL)
return 0;
settabV(L, L->top, mt);
@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM);
if (tvisbool(o)) {
/* NOBARRIER: g->basemt[] is a GC root. */
setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt));
setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt));
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));
setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt));
} else {
/* NOBARRIER: g->basemt[] is a GC root. */
setgcref(g->basemt[itypemap(o)], obj2gco(mt));
/* NOBARRIER: basemt is a GC root. */
setgcref(basemt_obj(g, o), obj2gco(mt));
}
}
L->top--;

View File

@ -13,6 +13,7 @@
#include "lj_gc.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_frame.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
@ -81,6 +82,10 @@ typedef struct ASMState {
#define IR(ref) (&as->ir[(ref)])
#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
#define ASMREF_L REF_NIL /* Stores register for L. */
/* Check for variant to invariant references. */
#define iscrossref(as, ref) ((ref) < as->sectref)
@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
{ MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
#define REX_64 (FORCE_REX|0x080000)
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
#define REX_64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
{
uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
if (rex != 0x40) {
rex |= (rr >> 16);
if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
*--p = (MCode)rex;
}
@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
#define emit_setargr(as, narg, r) \
emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
#define emit_setargi(as, narg, imm) \
emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
#define emit_setargp(as, narg, ptr) \
emit_setargi(as, (narg), ptr2addr((ptr)))
/* -- Register allocator debugging ---------------------------------------- */
/* #define LUAJIT_DEBUG_RA */
@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
memset(as->phireg, 0, sizeof(as->phireg));
memset(as->cost, 0, sizeof(as->cost));
as->cost[RID_ESP] = REGCOST(~0u, 0u);
/* Start slots for spill slot allocation. */
as->evenspill = (SPS_FIRST+1)&~1;
as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
}
/* Rematerialize constants. */
@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
} else if (ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base);
} else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
lua_assert(irt_isnil(ir->t));
emit_getgl(as, r, jit_L);
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
ir->o == IR_KPTR || ir->o == IR_KNULL);
@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
return sps_scale(slot);
}
/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
static Reg ra_releasetmp(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
Reg r = ir->r;
lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
ra_free(as, r);
ra_modified(as, r);
ir->r = RID_INIT;
return r;
}
/* Restore a register (marked as free). Rematerialize or force a spill. */
static Reg ra_restore(ASMState *as, IRRef ref)
{
@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
/* Arch-specific field offsets. */
static const uint8_t field_ofs[IRFL__MAX+1] = {
#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
#define FLOFS(name, ofs) (uint8_t)(ofs),
IRFLDEF(FLOFS)
#undef FLOFS
0
@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
{
IRIns *irr;
lua_assert(ir->o == IR_STRREF);
as->mrm.idx = as->mrm.base = RID_NONE;
as->mrm.base = as->mrm.idx = RID_NONE;
as->mrm.scale = XM_SCALE1;
as->mrm.ofs = sizeof(GCstr);
if (irref_isk(ir->op1)) {
@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
}
}
static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
{
if (ir->o == IR_KPTR) {
as->mrm.ofs = ir->i;
as->mrm.base = as->mrm.idx = RID_NONE;
} else {
lua_assert(ir->o == IR_STRREF);
asm_fusestrref(as, ir, allow);
}
}
/* Fuse load into memory operand. */
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
{
@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
if (ir->o == IR_KNUM) {
RegSet avail = as->freeset & ~as->modset & RSET_FPR;
lua_assert(allow != RSET_EMPTY);
if (!(as->freeset & ~as->modset & RSET_FPR)) {
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
as->mrm.ofs = ptr2addr(ir_knum(ir));
as->mrm.base = as->mrm.idx = RID_NONE;
return RID_MRM;
@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_XLOAD) {
/* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
/* Generic fusion is only ok for 32 bit operand (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if (irt_isint(ir->t)) {
asm_fusestrref(as, IR(ir->op1), xallow);
if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
asm_fusexref(as, IR(ir->op1), xallow);
return RID_MRM;
}
}
@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return ra_allocref(as, ref, allow);
}
/* -- Calls --------------------------------------------------------------- */
/* Generate a call to a C function. */
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
RegSet allow = RSET_ALL;
uint32_t n, nargs = CCI_NARGS(ci);
int32_t ofs = 0;
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
emit_call(as, ci->func);
for (n = 0; n < nargs; n++) { /* Setup args. */
#if LJ_64
#error "NYI: 64 bit mode call argument setup"
#endif
IRIns *ir = IR(args[n]);
if (irt_isnum(ir->t)) {
if ((ofs & 4) && irref_isk(args[n])) {
/* Split stores for unaligned FP consts. */
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
} else {
Reg r;
if ((allow & RSET_FPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_FPR);
allow &= ~RID2RSET(r);
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
}
ofs += 8;
} else {
if ((ci->flags & CCI_FASTCALL) && n < 2) {
Reg r = n == 0 ? RID_ECX : RID_EDX;
if (args[n] < ASMREF_TMP1) {
emit_loadi(as, r, ir->i);
} else {
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
allow &= ~RID2RSET(r);
if (ra_hasreg(ir->r))
emit_movrr(as, r, ir->r);
else
ra_allocref(as, args[n], RID2RSET(r));
}
} else {
if (args[n] < ASMREF_TMP1) {
emit_movmroi(as, RID_ESP, ofs, ir->i);
} else {
Reg r;
if ((allow & RSET_GPR) == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_alloc1(as, args[n], allow & RSET_GPR);
allow &= ~RID2RSET(r);
emit_movtomro(as, r, RID_ESP, ofs);
}
ofs += 4;
}
}
}
}
/* Setup result reg/sp for call. Evict scratch regs. */
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
RegSet drop = RSET_SCRATCH;
if ((ci->flags & CCI_NOFPRCLOBBER))
drop &= ~RSET_FPR;
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
ra_evictset(as, drop); /* Evictions must be performed first. */
if (ra_used(ir)) {
if (irt_isnum(ir->t)) {
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
#if LJ_64
if ((ci->flags & CCI_CASTU64)) {
Reg dest = ir->r;
if (ra_hasreg(dest)) {
ra_free(as, dest);
ra_modified(as, dest);
emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
} else {
emit_movrmro(as, RID_RET, RID_ESP, ofs);
}
} else {
ra_destreg(as, ir, RID_FPRET);
}
#else
/* Number result is in x87 st0 for x86 calling convention. */
Reg dest = ir->r;
if (ra_hasreg(dest)) {
ra_free(as, dest);
ra_modified(as, dest);
emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
}
if ((ci->flags & CCI_CASTU64)) {
emit_movtomro(as, RID_RET, RID_ESP, ofs);
emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
} else {
emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
}
#endif
} else {
lua_assert(!irt_ispri(ir->t));
ra_destreg(as, ir, RID_RET);
}
}
}
/* Collect arguments from CALL* and ARG instructions. */
static void asm_collectargs(ASMState *as, IRIns *ir,
const CCallInfo *ci, IRRef *args)
{
uint32_t n = CCI_NARGS(ci);
lua_assert(n <= CCI_NARGS_MAX);
if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
while (n-- > 1) {
ir = IR(ir->op1);
lua_assert(ir->o == IR_CARG);
args[n] = ir->op2;
}
args[0] = ir->op1;
lua_assert(IR(ir->op1)->o != IR_CARG);
}
static void asm_call(ASMState *as, IRIns *ir)
{
IRRef args[CCI_NARGS_MAX];
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
asm_collectargs(as, ir, ci, args);
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
}
/* -- Type conversions ---------------------------------------------------- */
static void asm_tonum(ASMState *as, IRIns *ir)
@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
Reg str;
int32_t ofs;
RegSet drop = RSET_SCRATCH;
/* Force a spill slot for the destination register (if any). */
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
IRRef args[2];
RegSet drop = RSET_SCRATCH;
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
ra_evictset(as, drop);
asm_guardcc(as, CC_E);
emit_rr(as, XO_TEST, RID_RET, RID_RET);
/* int lj_str_numconv(const char *s, TValue *n) */
emit_call(as, lj_str_numconv);
ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
if (ofs == 0) {
emit_setargr(as, 2, RID_ESP);
} else {
emit_setargr(as, 2, RID_RET);
emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
}
emit_setargr(as, 1, RID_RET);
str = ra_alloc1(as, ir->op1, RSET_GPR);
emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
args[0] = ir->op1;
args[1] = ASMREF_TMP1;
asm_gencall(as, ci, args);
/* Store the result to the spill slot or slots SPS_TEMP1/2. */
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
RID_ESP, sps_scale(ir->s));
}
static void asm_tostr(ASMState *as, IRIns *ir)
{
IRIns *irl = IR(ir->op1);
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
IRRef args[2];
args[0] = ASMREF_L;
as->gcsteps++;
if (irt_isnum(irl->t)) {
/* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
emit_call(as, lj_str_fromnum);
emit_setargr(as, 1, RID_RET);
emit_getgl(as, RID_RET, jit_L);
emit_setargr(as, 2, RID_RET);
emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
args[1] = ASMREF_TMP1;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
RID_ESP, ra_spill(as, irl));
} else {
/* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
emit_call(as, lj_str_fromint);
emit_setargr(as, 1, RID_RET);
emit_getgl(as, RID_RET, jit_L);
emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
args[1] = ir->op1;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
}
}
@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
lua_assert(!irt_isnil(ir->t));
return irt_type(ir->t)-IRT_FALSE;
} else {
lua_assert(irt_isaddr(ir->t));
lua_assert(irt_isgcv(ir->t));
lo = u32ptr(ir_kgc(ir));
hi = lo - 0x04c11db7;
}
@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_newref(ASMState *as, IRIns *ir)
{
IRRef keyref = ir->op2;
IRIns *irkey = IR(keyref);
RegSet allow = RSET_GPR;
Reg tab, tmp;
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
tab = ra_alloc1(as, ir->op1, allow);
tmp = ra_scratch(as, rset_clear(allow, tab));
/* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
emit_call(as, lj_tab_newkey);
emit_setargr(as, 1, tmp);
emit_setargr(as, 2, tab);
emit_getgl(as, tmp, jit_L);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
IRRef args[3];
IRIns *irkey;
Reg tmp;
args[0] = ASMREF_L;
args[1] = ir->op1;
args[2] = ASMREF_TMP1;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
irkey = IR(ir->op2);
if (irt_isnum(irkey->t)) {
/* For numbers use the constant itself or a spill slot as a TValue. */
if (irref_isk(keyref)) {
emit_setargp(as, 3, ir_knum(irkey));
} else {
emit_setargr(as, 3, tmp);
if (irref_isk(ir->op2))
emit_loada(as, tmp, ir_knum(irkey));
else
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
}
} else {
/* Otherwise use g->tmptv to hold the TValue. */
lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
emit_setargr(as, 3, tmp);
if (!irref_isk(keyref)) {
Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
if (!irref_isk(ir->op2)) {
Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
emit_movtomro(as, src, tmp, 0);
} else if (!irt_ispri(irkey->t)) {
emit_movmroi(as, tmp, 0, irkey->i);
@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
/* -- Loads and stores ---------------------------------------------------- */
static void asm_fload(ASMState *as, IRIns *ir)
static void asm_fxload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
x86Op xo;
asm_fusefref(as, ir, RSET_GPR);
if (ir->o == IR_FLOAD)
asm_fusefref(as, ir, RSET_GPR);
else
asm_fusexref(as, IR(ir->op1), RSET_GPR);
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
switch (irt_type(ir->t)) {
case IRT_I8: xo = XO_MOVSXb; break;
case IRT_U8: xo = XO_MOVZXb; break;
@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
}
}
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
x86Op xo;
asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
switch (irt_type(ir->t)) {
case IRT_I8: xo = XO_MOVSXb; break;
case IRT_U8: xo = XO_MOVZXb; break;
case IRT_I16: xo = XO_MOVSXw; break;
case IRT_U16: xo = XO_MOVZXw; break;
default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
}
emit_mrm(as, xo, dest, RID_MRM);
}
/* -- String ops ---------------------------------------------------------- */
/* -- Allocations --------------------------------------------------------- */
static void asm_snew(ASMState *as, IRIns *ir)
{
RegSet allow = RSET_GPR;
Reg left, right;
IRIns *irl;
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
irl = IR(ir->op1);
left = irl->r;
right = IR(ir->op2)->r;
if (ra_noreg(left)) {
lua_assert(irl->o == IR_STRREF);
/* Get register only for non-const STRREF. */
if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
if (ra_hasreg(right)) rset_clear(allow, right);
left = ra_allocref(as, ir->op1, allow);
}
}
if (ra_noreg(right) && !irref_isk(ir->op2)) {
if (ra_hasreg(left)) rset_clear(allow, left);
right = ra_allocref(as, ir->op2, allow);
}
/* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
emit_call(as, lj_str_new);
emit_setargr(as, 1, RID_RET);
emit_getgl(as, RID_RET, jit_L);
if (ra_noreg(left)) /* Use immediate for const STRREF. */
emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
(int32_t)sizeof(GCstr));
else
emit_setargr(as, 2, left);
if (ra_noreg(right))
emit_setargi(as, 3, IR(ir->op2)->i);
else
emit_setargr(as, 3, right);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
IRRef args[3];
args[0] = ASMREF_L;
args[1] = ir->op1;
args[2] = ir->op2;
as->gcsteps++;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
}
/* -- Table ops ----------------------------------------------------------- */
static void asm_tnew(ASMState *as, IRIns *ir)
{
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
/* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
emit_call(as, lj_tab_new);
emit_setargr(as, 1, RID_RET);
emit_setargi(as, 2, ir->op1);
emit_setargi(as, 3, ir->op2);
emit_getgl(as, RID_RET, jit_L);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
IRRef args[2];
args[0] = ASMREF_L;
args[1] = ASMREF_TMP1;
as->gcsteps++;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
}
static void asm_tdup(ASMState *as, IRIns *ir)
{
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
/* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
emit_call(as, lj_tab_dup);
emit_setargr(as, 1, RID_RET);
emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
emit_getgl(as, RID_RET, jit_L);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
IRRef args[2];
args[0] = ASMREF_L;
args[1] = ir->op1;
as->gcsteps++;
asm_setupresult(as, ir, ci);
asm_gencall(as, ci, args);
}
static void asm_tlen(ASMState *as, IRIns *ir)
{
ra_destreg(as, ir, RID_RET);
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
}
/* -- Write barriers ------------------------------------------------------ */
static void asm_tbar(ASMState *as, IRIns *ir)
{
@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
static void asm_obar(ASMState *as, IRIns *ir)
{
RegSet allow = RSET_GPR;
Reg obj, val;
GCobj *valp;
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2];
MCLabel l_end;
int32_t ofs;
ra_evictset(as, RSET_SCRATCH);
if (irref_isk(ir->op2)) {
valp = ir_kgc(IR(ir->op2));
val = RID_NONE;
} else {
valp = NULL;
val = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, val);
}
obj = ra_alloc1(as, ir->op1, allow);
l_end = emit_label(as);
Reg obj;
/* No need for other object barriers (yet). */
lua_assert(IR(ir->op1)->o == IR_UREFC);
ofs = -(int32_t)offsetof(GCupval, tv);
/* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
emit_call(as, lj_gc_barrieruv);
if (ofs == 0) {
emit_setargr(as, 2, obj);
} else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
emit_setargr(as, 2, obj);
emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
} else {
emit_setargr(as, 2, RID_RET);
emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
}
emit_setargp(as, 1, J2G(as->J));
if (valp)
emit_setargp(as, 3, valp);
else
emit_setargr(as, 3, val);
l_end = emit_label(as);
args[0] = ASMREF_TMP1;
args[1] = ir->op1;
asm_gencall(as, ci, args);
emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
obj = IR(ir->op1)->r;
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_WHITES);
if (valp)
emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
else
if (irref_isk(ir->op2)) {
GCobj *vp = ir_kgc(IR(ir->op2));
emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
} else {
Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
}
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_BLACK);
emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
ofs + (int32_t)offsetof(GChead, marked));
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
}
/* -- FP/int arithmetic and logic operations ------------------------------ */
@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
}
}
emit_mrm(as, XO_UCOMISD, left, right);
} else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
} else {
IRRef lref = ir->op1, rref = ir->op2;
IROp leftop = (IROp)(IR(lref)->o);
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
/* Swap constants (only for ABC) and fusable loads to the right. */
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
} else {
Reg left;
if (opisfusableload((IROp)irl->o) &&
((irt_isi8(irl->t) && checki8(imm)) ||
(irt_isu8(irl->t) && checku8(imm)))) {
/* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
** loads are handled here. The IRT_I16/IRT_U16 loads should never be
** fused, since cmp word [mem], imm16 has a length-changing prefix.
((irt_isu8(irl->t) && checku8(imm)) ||
((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
(irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
/* Only the IRT_INT case is fused by asm_fuseload.
** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
** are handled here.
** Note that cmp word [mem], imm16 should not be generated,
** since it has a length-changing prefix. Compares of a word
** against a sign-extended imm8 are ok, however.
*/
IRType1 origt = irl->t; /* Temporarily flip types. */
irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
if (left == RID_MRM) { /* Fusion succeeded? */
asm_guardcc(as, cc);
emit_i8(as, imm);
emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
return;
} /* Otherwise handle register case as usual. */
} else {
@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
asm_guardcc(as, cc);
emit_mrm(as, XO_CMP, left, right);
}
} else { /* Handle ordered string compares. */
RegSet allow = RSET_GPR;
/* This assumes lj_str_cmp never uses any SSE registers. */
ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
asm_guardcc(as, cc);
emit_rr(as, XO_TEST, RID_RET, RID_RET);
emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
if (irref_isk(ir->op1)) {
emit_setargi(as, 1, IR(ir->op1)->i);
} else {
Reg left = ra_alloc1(as, ir->op1, allow);
rset_clear(allow, left);
emit_setargr(as, 1, left);
}
if (irref_isk(ir->op2)) {
emit_setargi(as, 2, IR(ir->op2)->i);
} else {
Reg right = ra_alloc1(as, ir->op2, allow);
emit_setargr(as, 2, right);
}
}
}
@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
/* -- GC handling --------------------------------------------------------- */
/* Sync all live GC values to Lua stack slots. */
static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
{
/* Some care must be taken when allocating registers here, since this is
** not part of the fast path. All scratch registers are evicted in the
** fast path, so it's easiest to force allocation from scratch registers
** only. This avoids register allocation state unification.
*/
RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
IRRef2 *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as, SnapShot *snap)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
IRRef args[2];
MCLabel l_end;
const BCIns *pc;
Reg tmp, base;
Reg base, lstate, tmp;
RegSet drop = RSET_SCRATCH;
/* Must evict BASE because the stack may be reallocated by the GC. */
if (ra_hasreg(IR(REF_BASE)->r))
drop |= RID2RSET(IR(REF_BASE)->r);
if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
ra_evictset(as, drop);
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
l_end = emit_label(as);
/* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
emit_call(as, lj_gc_step_jit);
emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
emit_setargr(as, 1, RID_RET);
emit_setargi(as, 3, (int32_t)as->gcsteps);
emit_getgl(as, RID_RET, jit_L);
pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
emit_setargp(as, 2, pc);
asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
args[0] = ASMREF_L;
args[1] = ASMREF_TMP1;
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
emit_loadi(as, tmp, (int32_t)as->gcsteps);
/* We don't know spadj yet, so get the C frame from L->cframe. */
emit_movmroi(as, tmp, CFRAME_OFS_PC,
(int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
lstate = IR(ASMREF_L)->r;
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
/* It's ok if lstate is already in a non-scratch reg. But all allocations
** in the non-fast path must use a scratch reg. See comment above.
*/
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
emit_movtomro(as, base, lstate, offsetof(lua_State, base));
asm_gc_sync(as, snap, base);
/* BASE/L get restored anyway, better do it inside the slow path. */
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
ra_restore(as, ASMREF_L);
/* Jump around GC step if GC total < GC threshold. */
tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
emit_sjcc(as, CC_B, l_end);
@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
{
int32_t spadj;
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
spadj = sps_adjust(as);
spadj = sps_adjust(as->evenspill);
as->T->spadjust = (uint16_t)spadj;
emit_addptr(as, RID_ESP, -spadj);
}
@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
ra_free(as, r);
if (r != RID_BASE) {
ra_scratch(as, RID2RSET(RID_BASE));
emit_rr(as, XO_MOV, r, RID_BASE);
lua_assert(!ra_hasspill(ir->s));
if (ra_hasreg(r)) {
ra_free(as, r);
if (r != RID_BASE) {
ra_scratch(as, RID2RSET(RID_BASE));
emit_rr(as, XO_MOV, r, RID_BASE);
}
}
}
@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
}
/* Calculate stack frame adjustment. */
spadj = sps_adjust(as);
spadj = sps_adjust(as->evenspill);
spdelta = spadj - (int32_t)as->parent->spadjust;
if (spdelta < 0) { /* Don't shrink the stack frame. */
spadj = (int32_t)as->parent->spadjust;
@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
GCfunc *fn = ir_kfunc(IR(ir->op2));
if (isluafunc(fn)) {
BCReg fs = s + funcproto(fn)->framesize;
newbase = s;
if (secondbase == ~(BCReg)0) secondbase = s;
if (fs > topslot) topslot = fs;
if (s != 0) {
newbase = s;
if (secondbase == ~(BCReg)0) secondbase = s;
}
}
}
}
@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
/* Loads and stores. */
case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
case IR_FLOAD: asm_fload(as, ir); break;
case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
case IR_SLOAD: asm_sload(as, ir); break;
case IR_XLOAD: asm_xload(as, ir); break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
/* String ops. */
/* Allocations. */
case IR_SNEW: asm_snew(as, ir); break;
/* Table ops. */
case IR_TNEW: asm_tnew(as, ir); break;
case IR_TDUP: asm_tdup(as, ir); break;
case IR_TLEN: asm_tlen(as, ir); break;
/* Write barriers. */
case IR_TBAR: asm_tbar(as, ir); break;
case IR_OBAR: asm_obar(as, ir); break;
@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_TOSTR: asm_tostr(as, ir); break;
case IR_STRTO: asm_strto(as, ir); break;
/* Calls. */
case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
case IR_CARG: break;
default:
setintV(&as->J->errinfo, ir->o);
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
IRRef i, nins;
int inloop;
ra_setup(as);
/* Clear reg/sp for constants. */
for (i = T->nk; i < REF_BIAS; i++)
IR(i)->prev = REGSP_INIT;
@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
as->curins = nins;
inloop = 0;
as->evenspill = SPS_FIRST;
for (i = REF_FIRST; i < nins; i++) {
IRIns *ir = IR(i);
switch (ir->o) {
@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (i == as->stopins+1 && ir->op1 == ir->op2)
as->stopins++;
break;
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
/* NYI: not fastcall-aware, but doesn't matter (yet). */
if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
as->evenspill = (int32_t)CCI_NARGS(ci);
#if LJ_64
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
#else
ir->prev = REGSP_HINT(RID_RET);
#endif
if (inloop)
as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
(RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
continue;
}
/* C calls evict all scratch regs and return results in RID_RET. */
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
case IR_NEWREF:
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (inloop)
as->modset = RSET_SCRATCH;
break;
/* Ordered string compares evict all integer scratch registers. */
case IR_LT: case IR_GE: case IR_LE: case IR_GT:
if (irt_isstr(ir->t) && inloop)
as->modset |= (RSET_SCRATCH & RSET_GPR);
break;
/* Non-constant shift counts need to be in RID_ECX. */
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
}
ir->prev = REGSP_INIT;
}
if ((as->evenspill & 1))
as->oddspill = as->evenspill++;
else
as->oddspill = 0;
}
/* -- Assembler core ------------------------------------------------------ */
@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
/* Setup register allocation. */
ra_setup(as);
asm_setup_regsp(as, T);
if (!as->loopref) {

View File

@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t;
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
#define checki16(x) ((x) == (int32_t)(int16_t)(x))
#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))

View File

@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o)
}
}
/* Mark the base metatables. */
static void gc_mark_basemt(global_State *g)
/* Mark GC roots. */
static void gc_mark_gcroot(global_State *g)
{
int i;
for (i = 0; i < BASEMT_MAX; i++)
if (tabref(g->basemt[i]) != NULL)
gc_markobj(g, tabref(g->basemt[i]));
ptrdiff_t i;
for (i = 0; i < GCROOT__MAX; i++)
if (gcref(g->gcroot[i]) != NULL)
gc_markobj(g, gcref(g->gcroot[i]));
}
/* Start a GC cycle and mark the root set. */
@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g)
gc_markobj(g, mainthread(g));
gc_markobj(g, tabref(mainthread(g)->env));
gc_marktv(g, &g->registrytv);
gc_mark_basemt(g);
gc_mark_gcroot(g);
g->gc.state = GCSpropagate;
}
@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L)
lua_assert(!iswhite(obj2gco(mainthread(g))));
gc_markobj(g, L); /* Mark running thread. */
gc_mark_curtrace(g); /* Mark current trace. */
gc_mark_basemt(g); /* Mark base metatables (again). */
gc_mark_gcroot(g); /* Mark GC roots (again). */
gc_propagate_gray(g); /* Propagate all of the above. */
setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L)
}
/* Ditto, but fix the stack top first. */
void lj_gc_step_fixtop(lua_State *L)
void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
{
if (curr_funcisL(L)) L->top = curr_topL(L);
lj_gc_step(L);
}
/* Perform multiple GC steps. Called from JIT-compiled code. */
void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps)
void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
{
cframe_pc(cframe_raw(L->cframe)) = pc;
L->top = curr_topL(L);
while (steps-- > 0 && lj_gc_step(L) == 0)
;
@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
makewhite(g, o); /* Make it white to avoid the following barrier. */
}
/* The reason for duplicating this is that it needs to be visible from ASM. */
void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v)
/* Specialized barrier for closed upvalue. Pass &uv->tv. */
void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv)
{
lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
lua_assert(o->gch.gct == ~LJ_TUPVAL);
/* Preserve invariant during propagation. Otherwise it doesn't matter. */
#define TV2MARKED(x) \
(*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked)))
if (g->gc.state == GCSpropagate)
gc_mark(g, v); /* Move frontier forward. */
gc_mark(g, gcV(tv));
else
makewhite(g, o); /* Make it white to avoid the following barrier. */
TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g);
#undef TV2MARKED
}
/* Close upvalue. Also needs a write barrier. */

View File

@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
LJ_FUNC void lj_gc_freeall(global_State *g);
LJ_FUNCA int lj_gc_step(lua_State *L);
LJ_FUNCA void lj_gc_step_fixtop(lua_State *L);
LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps);
LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* GC check: drive collector forward if the GC threshold has been reached. */
@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* Write barriers. */
LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v);
LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv);
LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);

View File

@ -6,16 +6,22 @@
#define lj_ir_c
#define LUA_CORE
/* For pointers to libc/libm functions. */
#include <stdio.h>
#include <math.h>
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_gc.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_trace.h"
#include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@ -32,6 +38,17 @@ IRDEF(IRMODE)
0
};
/* C call info for CALL* instructions. */
LJ_DATADEF const CCallInfo lj_ir_callinfo[] = {
#define IRCALLCI(name, nargs, kind, type, flags) \
{ (ASMFunction)name, \
(nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) },
IRCALLDEF(IRCALLCI)
#undef IRCALLCI
{ NULL, 0 }
};
/* -- IR emitter ---------------------------------------------------------- */
/* Grow IR buffer at the top. */
@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J)
return TREF(ref, irt_t((ir->t = fins->t)));
}
/* Emit call to a C function. */
TRef lj_ir_call(jit_State *J, IRCallID id, ...)
{
const CCallInfo *ci = &lj_ir_callinfo[id];
uint32_t n = CCI_NARGS(ci);
TRef tr = TREF_NIL;
va_list argp;
va_start(argp, id);
if ((ci->flags & CCI_L)) n--;
if (n > 0)
tr = va_arg(argp, IRRef);
while (n-- > 1)
tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef));
va_end(argp);
if (CCI_OP(ci) == IR_CALLS)
J->needsnap = 1; /* Need snapshot after call with side effect. */
return emitir(CCI_OPTYPE(ci), tr, id);
}
/* -- Interning of constants ---------------------------------------------- */
/*

View File

@ -8,6 +8,8 @@
#include "lj_obj.h"
/* -- IR instructions ----------------------------------------------------- */
/* IR instruction definition. Order matters, see below. */
#define IRDEF(_) \
/* Miscellaneous ops. */ \
@ -101,13 +103,12 @@
_(USTORE, S , ref, ref) \
_(FSTORE, S , ref, ref) \
\
/* String ops. */ \
_(SNEW, N , ref, ref) \
\
/* Table ops. */ \
/* Allocations. */ \
_(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
_(TNEW, A , lit, lit) \
_(TDUP, A , ref, ___) \
_(TLEN, L , ref, ___) \
\
/* Write barriers. */ \
_(TBAR, S , ref, ___) \
_(OBAR, S , ref, ref) \
\
@ -118,6 +119,12 @@
_(TOSTR, N , ref, ___) \
_(STRTO, G , ref, ___) \
\
/* Calls. */ \
_(CALLN, N , ref, lit) \
_(CALLL, L , ref, lit) \
_(CALLS, S , ref, lit) \
_(CARG, N , ref, ref) \
\
/* End of list. */
/* IR opcodes (max. 256). */
@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
/* -- Named IR literals --------------------------------------------------- */
/* FPMATH sub-functions. ORDER FPM. */
#define IRFPMDEF(_) \
_(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM)
IRFPM__MAX
} IRFPMathOp;
/* FLOAD field IDs. */
/* FLOAD fields. */
#define IRFLDEF(_) \
_(STR_LEN, GCstr, len) \
_(FUNC_ENV, GCfunc, l.env) \
_(TAB_META, GCtab, metatable) \
_(TAB_ARRAY, GCtab, array) \
_(TAB_NODE, GCtab, node) \
_(TAB_ASIZE, GCtab, asize) \
_(TAB_HMASK, GCtab, hmask) \
_(TAB_NOMM, GCtab, nomm) \
_(UDATA_META, GCudata, metatable)
_(STR_LEN, offsetof(GCstr, len)) \
_(FUNC_ENV, offsetof(GCfunc, l.env)) \
_(TAB_META, offsetof(GCtab, metatable)) \
_(TAB_ARRAY, offsetof(GCtab, array)) \
_(TAB_NODE, offsetof(GCtab, node)) \
_(TAB_ASIZE, offsetof(GCtab, asize)) \
_(TAB_HMASK, offsetof(GCtab, hmask)) \
_(TAB_NOMM, offsetof(GCtab, nomm)) \
_(UDATA_META, offsetof(GCudata, metatable)) \
_(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
_(UDATA_FILE, sizeof(GCudata))
typedef enum {
#define FLENUM(name, type, field) IRFL_##name,
#define FLENUM(name, ofs) IRFL_##name,
IRFLDEF(FLENUM)
#undef FLENUM
IRFL__MAX
@ -183,7 +194,8 @@ IRFLDEF(FLENUM)
#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
/* XLOAD mode, stored in op2. */
#define IRXLOAD_UNALIGNED 1
#define IRXLOAD_READONLY 1 /* Load from read-only data. */
#define IRXLOAD_UNALIGNED 2 /* Unaligned load. */
/* TOINT mode, stored in op2. Ordered by strength of the checks. */
#define IRTOINT_CHECK 0 /* Number checked for integerness. */
@ -191,6 +203,67 @@ IRFLDEF(FLENUM)
#define IRTOINT_ANY 2 /* Any FP number is ok. */
#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
/* C call info for CALL* instructions. */
typedef struct CCallInfo {
ASMFunction func; /* Function pointer. */
uint32_t flags; /* Number of arguments and flags. */
} CCallInfo;
#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */
#define CCI_NARGS_MAX 16 /* Max. # of args. */
#define CCI_OTSHIFT 16
#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
#define CCI_OPSHIFT 24
#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL)
#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL)
#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL)
/* C call info flags. */
#define CCI_L 0x0100 /* Implicit L arg. */
#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
/* Function definitions for CALL* instructions. */
#define IRCALLDEF(_) \
_(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(lj_str_new, 3, S, STR, CCI_L) \
_(lj_str_tonum, 2, FN, INT, 0) \
_(lj_str_fromint, 2, FN, STR, CCI_L) \
_(lj_str_fromnum, 2, FN, STR, CCI_L) \
_(lj_tab_new1, 2, FS, TAB, CCI_L) \
_(lj_tab_dup, 2, FS, TAB, CCI_L) \
_(lj_tab_newkey, 3, S, PTR, CCI_L) \
_(lj_tab_len, 1, FL, INT, 0) \
_(lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(lj_gc_barrieruv, 2, FS, NIL, 0) \
_(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
_(sinh, 1, N, NUM, 0) \
_(cosh, 1, N, NUM, 0) \
_(tanh, 1, N, NUM, 0) \
_(fputc, 2, S, INT, 0) \
_(fwrite, 4, S, INT, 0) \
_(fflush, 1, S, INT, 0) \
\
/* End of list. */
typedef enum {
#define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name,
IRCALLDEF(IRCALLENUM)
#undef IRCALLENUM
IRCALL__MAX
} IRCallID;
LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
/* -- IR operands --------------------------------------------------------- */
/* IR operand mode (2 bit). */
typedef enum {
IRMref, /* IR reference. */
@ -227,6 +300,8 @@ typedef enum {
LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
/* -- IR instruction types ------------------------------------------------ */
/* IR result type and flags (8 bit). */
typedef enum {
/* Map of itypes to non-negative numbers. ORDER LJ_T */
@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
/* Stored combined IR opcode and type. */
typedef uint16_t IROpT;
/* -- IR references ------------------------------------------------------- */
/* IR references. */
typedef uint16_t IRRef1; /* One stored reference. */
typedef uint32_t IRRef2; /* Two stored references. */
@ -382,6 +459,8 @@ typedef uint32_t TRef;
#define TREF_FALSE (TREF_PRI(IRT_FALSE))
#define TREF_TRUE (TREF_PRI(IRT_TRUE))
/* -- IR format ----------------------------------------------------------- */
/* IR instruction format (64 bit).
**
** 16 16 8 8 8 8
@ -425,5 +504,6 @@ typedef union IRIns {
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_knum(ir) (mref((ir)->ptr, cTValue))
#define ir_kptr(ir) (mref((ir)->ptr, void))
#endif

View File

@ -6,6 +6,8 @@
#ifndef _LJ_IROPT_H
#define _LJ_IROPT_H
#include <stdarg.h>
#include "lj_obj.h"
#include "lj_jit.h"
@ -13,6 +15,7 @@
/* IR emitter. */
LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
/* Save current IR in J->fold.ins, but do not emit it (yet). */
static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
/* Emit IR instructions with on-the-fly optimizations. */
LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim);
/* Special return values for the fold functions. */
enum {
@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
/* Dead-store elimination. */

View File

@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
(tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o)))))
(tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o)))))
lj_err_argt(L, narg, LUA_TNUMBER);
return numV(o);
}

View File

@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
#define LIBINIT_FFID 0xfe
#define LIBINIT_END 0xff
/* Exported library functions. */
typedef struct RandomState RandomState;
LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
#endif

View File

@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
mt = tabref(G(L)->basemt[itypemap(o)]);
mt = tabref(basemt_obj(G(L), o));
if (mt) {
cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
if (mo)
@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n)
{
if (tvisnum(o))
return o;
else if (tvisstr(o) && lj_str_numconv(strVdata(o), n))
else if (tvisstr(o) && lj_str_tonum(strV(o), n))
return n;
else
return NULL;
@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
top = curr_top(L);
setcont(top, ne ? lj_cont_condf : lj_cont_condt);
copyTV(L, top+1, mo);
it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA;
it = ~o1->gch.gct;
setgcV(L, top+2, &o1->gch, it);
setgcV(L, top+3, &o2->gch, it);
return top+2; /* Trigger metamethod call. */

View File

@ -315,7 +315,7 @@ typedef struct GCstr {
/* Userdata object. Payload follows. */
typedef struct GCudata {
GCHeader;
uint8_t unused1;
uint8_t udtype; /* Userdata type. */
uint8_t unused2;
GCRef env; /* Should be at same offset in GCfunc. */
MSize len; /* Size of payload. */
@ -323,6 +323,13 @@ typedef struct GCudata {
uint32_t align1; /* To force 8 byte alignment of the payload. */
} GCudata;
/* Userdata types. */
enum {
UDTYPE_USERDATA, /* Regular userdata. */
UDTYPE_IO_FILE, /* I/O library FILE. */
UDTYPE__MAX
};
#define uddata(u) ((void *)((u)+1))
#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
@ -496,7 +503,17 @@ MMDEF(MMENUM)
MM_FAST = MM_eq
} MMS;
#define BASEMT_MAX ((~LJ_TNUMX)+1)
/* GC root IDs. */
typedef enum {
GCROOT_BASEMT, /* Metatables for base types. */
GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */
GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */
GCROOT__MAX
} GCRootID;
#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)])
#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
typedef struct GCState {
MSize total; /* Memory currently allocated. */
@ -544,7 +561,7 @@ typedef struct global_State {
volatile int32_t vmstate; /* VM state or current JIT code trace number. */
GCRef jit_L; /* Current JIT code lua_State or NULL. */
MRef jit_base; /* Current JIT code L->base. */
GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */
GCRef gcroot[GCROOT__MAX]; /* GC roots. */
GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
} global_State;

View File

@ -282,21 +282,50 @@ LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)
{
TValue n;
if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
if (lj_str_tonum(ir_kstr(fleft), &n))
return lj_ir_knum(J, numV(&n));
return FAILFOLD;
}
LJFOLD(SNEW STRREF KINT)
LJFOLDF(kfold_snew)
LJFOLD(SNEW KPTR KINT)
LJFOLDF(kfold_snew_kptr)
{
GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
return lj_ir_kstr(J, s);
}
LJFOLD(SNEW any KINT)
LJFOLDF(kfold_snew_empty)
{
if (fright->i == 0)
return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
return NEXTFOLD;
}
LJFOLD(STRREF KGC KINT)
LJFOLDF(kfold_strref)
{
GCstr *str = ir_kstr(fleft);
lua_assert((MSize)fright->i < str->len);
return lj_ir_kptr(J, (char *)strdata(str) + fright->i);
}
LJFOLD(STRREF SNEW any)
LJFOLDF(kfold_strref_snew)
{
PHIBARRIER(fleft);
if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
const char *s = strdata(ir_kstr(IR(fleft->op1)));
int32_t ofs = IR(fleft->op2)->i;
return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
if (irref_isk(fins->op2) && fright->i == 0) {
return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
} else {
/* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
IRIns *ir = IR(fleft->op1);
IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
lua_assert(ir->o == IR_STRREF);
PHIBARRIER(ir);
fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
fins->op1 = str;
fins->ot = IRT(IR_STRREF, IRT_PTR);
return RETRYFOLD;
}
return NEXTFOLD;
}
@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp)
}
}
LJFOLD(LT KGC KGC)
LJFOLD(GE KGC KGC)
LJFOLD(LE KGC KGC)
LJFOLD(GT KGC KGC)
LJFOLDF(kfold_strcomp)
LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
LJFOLDF(kfold_strcmp)
{
if (irt_isstr(fins->t)) {
GCstr *a = ir_kstr(fleft);
GCstr *b = ir_kstr(fright);
return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
GCstr *a = ir_kstr(IR(fleft->op1));
GCstr *b = ir_kstr(IR(fleft->op2));
return INTFOLD(lj_str_cmp(a, b));
}
return NEXTFOLD;
}
@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc)
uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
len == 2 ? IRT(IR_XLOAD, IRT_U16) :
IRTI(IR_XLOAD));
TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
TRef tmp = emitir(ot, strref,
IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
if (len == 3)
tmp = emitir(IRTI(IR_BAND), tmp,
@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload)
LJFOLD(ULOAD any)
LJFOLDX(lj_opt_fwd_uload)
LJFOLD(TLEN any)
LJFOLDX(lj_opt_fwd_tlen)
LJFOLD(CALLL any IRCALL_lj_tab_len)
LJFOLDX(lj_opt_fwd_tab_len)
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah)
/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
LJFOLD(FLOAD KGC IRFL_STR_LEN)
LJFOLDF(fload_str_len)
LJFOLDF(fload_str_len_kgc)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
return INTFOLD((int32_t)ir_kstr(fleft)->len);
return NEXTFOLD;
}
LJFOLD(FLOAD SNEW IRFL_STR_LEN)
LJFOLDF(fload_str_len_snew)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
PHIBARRIER(fleft);
return fleft->op2;
}
return NEXTFOLD;
}
LJFOLD(FLOAD any IRFL_STR_LEN)
LJFOLDX(lj_opt_cse)
@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload)
return J->slot[fins->op1];
}
/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
LJFOLD(XLOAD STRREF any)
LJFOLDF(xload_str)
LJFOLD(XLOAD KPTR any)
LJFOLDF(xload_kptr)
{
if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
GCstr *str = ir_kstr(IR(fleft->op1));
int32_t ofs = IR(fleft->op2)->i;
lua_assert((MSize)ofs < str->len);
lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
}
return CSEFOLD;
/* Only fold read-only integer loads for now. */
if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t))
return INTFOLD(kfold_xload(fins, ir_kptr(fleft)));
return NEXTFOLD;
}
/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
LJFOLD(XLOAD any any)
LJFOLDF(fwd_xload)
{
IRRef ref = J->chain[IR_XLOAD];
IRRef op1 = fins->op1;
while (ref > op1) {
if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
return ref;
ref = IR(ref)->prev;
}
return EMITFOLD;
}
/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
/* -- Write barriers ------------------------------------------------------ */
@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any)
LJFOLDX(lj_opt_dse_fstore)
LJFOLD(NEWREF any any) /* Treated like a store. */
LJFOLD(CALLS any any)
LJFOLD(CALLL any any) /* Safeguard fallback. */
LJFOLD(TNEW any any)
LJFOLD(TDUP any)
LJFOLDF(store_raw)
{
return EMITFOLD;
}
LJFOLDX(lj_ir_emit)
/* ------------------------------------------------------------------------ */
@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
}
}
/* CSE with explicit search limit. */
TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
{
IRRef ref = J->chain[fins->o];
IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
while (ref > lim) {
if (IR(ref)->op12 == op12)
return ref;
ref = IR(ref)->prev;
}
return lj_ir_emit(J);
}
/* ------------------------------------------------------------------------ */
#undef IR

View File

@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J)
/* Undo any partial changes made by the loop optimization. */
static void loop_undo(jit_State *J, IRRef ins)
{
ptrdiff_t i;
lj_ir_rollback(J, ins);
for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
BPropEntry *bp = &J->bpropcache[i];
if (bp->val >= ins)
bp->key = 0;
}
for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
IRIns *ir = IR(ins);
irt_clearphi(ir->t);

View File

@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
ref = J->chain[IR_ULOAD];
while (ref > lim) {
IRIns *load = IR(ref);
if (load->op1 == uref)
return ref; /* Load forwarding. */
ref = load->prev;
}
return EMITFOLD; /* Conflict or no match. */
return lj_opt_cselim(J, lim);
}
/* USTORE elimination. */
@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
ref = J->chain[IR_FLOAD];
while (ref > lim) {
IRIns *load = IR(ref);
if (load->op1 == oref && load->op2 == fid)
return ref; /* Load forwarding. */
ref = load->prev;
}
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
return lj_opt_cselim(J, lim);
}
/* FSTORE elimination. */
@ -458,10 +444,10 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
/* -- TLEN forwarding ----------------------------------------------------- */
/* -- Forwarding of lj_tab_len -------------------------------------------- */
/* This is rather simplistic right now, but better than nothing. */
TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
{
IRRef tab = fins->op1; /* Table reference. */
IRRef lim = tab; /* Search limit. */
@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
}
/* Try to find a matching load. Below the conflicting store, if any. */
ref = J->chain[IR_TLEN];
while (ref > lim) {
IRIns *tlen = IR(ref);
if (tlen->op1 == tab)
return ref; /* Load forwarding. */
ref = tlen->prev;
}
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
return lj_opt_cselim(J, lim);
}
/* -- ASTORE/HSTORE previous type analysis -------------------------------- */

View File

@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
{
lua_Number n;
if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc))
if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
n = numV(vc);
/* Limit narrowing for pow to small exponents (or for two constants). */

View File

@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
GCstr *s = lj_str_new(L, str, len);
TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
lj_gc_check(L);
return s;
}

View File

@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
} else {
/* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]);
mt = tabref(basemt_obj(J2G(J), &ix->tabv));
if (mt == NULL)
return 0; /* No metamethod. */
mix.tab = lj_ir_ktab(J, mt);
@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
/* Get runtime value of int argument. */
static int32_t argv2int(jit_State *J, TValue *o)
{
if (tvisstr(o) && !lj_str_numconv(strVdata(o), o))
if (tvisstr(o) && !lj_str_tonum(strV(o), o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
return lj_num2bit(numV(o));
}
@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
/* Otherwise res[0] already contains the result. */
} else if (tref_isnumber(tr)) {
res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
} else if (tref_ispri(tr)) {
res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)]));
} else {
recff_err_nyi(J, rd);
}
@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = emitir(IRTN(IR_ATAN2), y, x);
}
static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, arg[0]);
res[0] = lj_ir_call(J, rd->data, tr);
}
static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef tr = arg[0];
if (tref_isinteger(arg[0])) {
if (tref_isinteger(tr)) {
res[0] = tr;
res[1] = lj_ir_kint(J, 0);
} else {
@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, arg[0]);
if (!tref_isnumber_str(arg[1]))
lj_trace_err(J, LJ_TRERR_BADTYPE);
res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]);
res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]);
UNUSED(rd);
}
@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = tr;
}
static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd)
{
GCudata *ud = udataV(&rd->fn->c.upvalue[0]);
TRef tr, one;
lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
one = lj_ir_knum_one(J);
tr = emitir(IRTN(IR_SUB), tr, one);
if (arg[0]) {
TRef tr1 = lj_ir_tonum(J, arg[0]);
if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */
TRef tr2 = lj_ir_tonum(J, arg[1]);
tr2 = emitir(IRTN(IR_SUB), tr2, tr1);
tr2 = emitir(IRTN(IR_ADD), tr2, one);
tr = emitir(IRTN(IR_MUL), tr, tr2);
tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
tr = emitir(IRTN(IR_ADD), tr, tr1);
} else { /* d = floor(d*r1) + 1.0 */
tr = emitir(IRTN(IR_MUL), tr, tr1);
tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
tr = emitir(IRTN(IR_ADD), tr, one);
}
}
res[0] = tr;
}
/* -- Bit library fast functions ------------------------------------------ */
/* Record unary bit.tobit, bit.bnot, bit.bswap. */
@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0);
res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
}
} else { /* Empty range or range underflow: return no results. */
emitir(IRTGI(IR_LE), trend, trstart);
@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
res[0] = emitir(IRTI(IR_TLEN), arg[0], 0);
res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
} /* else: Interpreter will throw. */
UNUSED(rd);
}
@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */
TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
MSize len = lj_tab_len(t);
emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
rd->nres = 0;
if (tref_istab(arg[0]) && arg[1]) {
if (!arg[2]) { /* Simple push: t[#t+1] = v */
TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
RecordIndex ix;
ix.tab = arg[0];
@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
/* -- I/O library fast functions ------------------------------------------ */
/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
** no need to encode the alternate cases for any of the guards.
*/
static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id)
{
TRef tr, ud, fp;
if (id) { /* io.func() */
tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
} else { /* fp:method() */
ud = arg[0];
if (!tref_isudata(ud))
lj_trace_err(J, LJ_TRERR_BADTYPE);
tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
}
fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE);
emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD));
return fp;
}
static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef fp = recff_io_fp(J, res, rd->data);
TRef zero = lj_ir_kint(J, 0);
TRef one = lj_ir_kint(J, 1);
ptrdiff_t i = rd->data == 0 ? 1 : 0;
for (; arg[i]; i++) {
TRef str = lj_ir_tostr(J, arg[i]);
TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero);
TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
if (rd->cres != 0) /* Check result only if requested. */
emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
} else {
TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp);
if (rd->cres != 0) /* Check result only if requested. */
emitir(IRTGI(IR_EQ), tr, len);
}
}
res[0] = TREF_TRUE;
}
static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef fp = recff_io_fp(J, res, rd->data);
TRef tr = lj_ir_call(J, IRCALL_fflush, fp);
if (rd->cres != 0) /* Check result only if requested. */
emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0));
res[0] = TREF_TRUE;
}
/* -- Record calls and returns -------------------------------------------- */
#undef arg
@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J)
if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
} else if (ta == IRT_STR) {
if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
rc = lj_ir_kint(J, 0);
ta = IRT_INT;
} else {
rec_mm_comp(J, &ix, (int)op);
break;
@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J)
if (tref_isstr(rc)) {
rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
} else if (tref_istab(rc)) {
rc = emitir(IRTI(IR_TLEN), rc, 0);
rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
} else {
ix.tab = rc;
copyTV(J->L, &ix.tabv, &ix.keyv);
@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J)
/* fallthrough */
case BC_CALL:
callop:
if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
}
rec_call(J, ra, (int)(rb-1), (int)(rc-1));
break;
@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T)
BCReg j;
for (j = 0; j < s; j++)
if (snap_ref(map[j]) == ref) {
if (ir->o == IR_FRAME && irt_isfunc(ir->t))
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
lua_assert(s != 0);
J->baseslot = s+1;
J->framedepth++;
}
tr = J->slot[j];
goto dupslot;
}
@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T)
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
J->baseslot = s+1;
J->framedepth++;
if (s != 0) {
J->baseslot = s+1;
J->framedepth++;
}
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {

View File

@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr)
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
if (isluafunc(fn)) {
TValue *fs;
newbase = o+1;
fs = newbase + funcproto(fn)->framesize;
fs = o+1 + funcproto(fn)->framesize;
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
if (s != 0) newbase = o+1;
}
}
}
@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr)
setnilV(o); /* Clear unreferenced slots of newly added frames. */
}
}
if (newbase) { /* Clear remainder of newly added frames. */
L->base = newbase;
if (ntop >= L->maxstack) { /* Need to grow the stack again. */
MSize need = (MSize)(ntop - o);
L->top = o;
lj_state_growstack(L, need);
o = L->top;
ntop = o + need;
}
L->top = curr_topL(L);
for (; o < ntop; o++)
setnilV(o);
} else { /* Must not clear slots of existing frame. */
L->top = curr_topL(L);
if (newbase) L->base = newbase;
if (ntop >= L->maxstack) { /* Need to grow the stack again. */
MSize need = (MSize)(ntop - o);
L->top = o;
lj_state_growstack(L, need);
o = L->top;
ntop = o + need;
}
L->top = curr_topL(L);
for (; o < ntop; o++) /* Clear remainder of newly added frames. */
setnilV(o);
lua_assert(map + nslots == flinks-1);
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
}

View File

@ -17,7 +17,7 @@
LJ_FUNC void lj_state_relimitstack(lua_State *L);
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
LJ_FUNCA void lj_state_growstack1(lua_State *L);
LJ_FUNC void lj_state_growstack1(lua_State *L);
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
{

View File

@ -21,7 +21,7 @@
/* -- String interning ---------------------------------------------------- */
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
int32_t lj_str_cmp(GCstr *a, GCstr *b)
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
{
MSize i, n = a->len > b->len ? b->len : a->len;
for (i = 0; i < n; i += 4) {
@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
/* -- Type conversions ---------------------------------------------------- */
/* Convert string object to number. */
int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n)
{
return lj_str_numconv(strdata(str), n);
}
/* Convert string to number. */
int lj_str_numconv(const char *s, TValue *n)
int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n)
{
lua_Number sign = 1;
const uint8_t *p = (const uint8_t *)s;
@ -167,7 +173,7 @@ parsedbl:
}
/* Convert number to string. */
GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
{
char s[LUAI_MAXNUMBER2STR];
lua_Number n = *np;
@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
}
/* Convert integer to string. */
GCstr *lj_str_fromint(lua_State *L, int32_t k)
GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
{
char s[1+10];
char *p = s+sizeof(s);

View File

@ -11,7 +11,7 @@
#include "lj_obj.h"
/* String interning. */
LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b);
LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */
LJ_FUNCA int lj_str_numconv(const char *s, TValue *n);
LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np);
LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k);
LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n);
LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n);
LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
/* String formatting. */
LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);

View File

@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
return t;
}
GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
{
GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24);
clearapart(t);
if (t->hmask > 0) clearhpart(t);
return t;
}
/* Duplicate a table. */
GCtab *lj_tab_dup(lua_State *L, const GCtab *kt)
GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
{
GCtab *t;
uint32_t asize, hmask;
@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
{
uint32_t b, sum, na = 0, sz = 0, nn = *narray;
for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++)
if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) {
for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++)
if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) {
sz = (2u<<b)+1;
na = sum;
}
@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j)
** Try to find a boundary in table `t'. A `boundary' is an integer index
** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
*/
MSize lj_tab_len(GCtab *t)
MSize LJ_FASTCALL lj_tab_len(GCtab *t)
{
MSize j = (MSize)t->asize;
if (j > 1 && tvisnil(arrayslot(t, j-1))) {

View File

@ -11,7 +11,8 @@
#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt);
LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
(inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
LJ_FUNCA MSize lj_tab_len(GCtab *t);
LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
#endif

View File

@ -32,6 +32,11 @@ enum {
/* Calling conventions. */
RID_RET = RID_EAX,
#if LJ_64
RID_FPRET = RID_XMM0,
#else
RID_RETHI = RID_EDX,
#endif
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_EDX, /* Interpreter BASE. */
@ -98,8 +103,8 @@ enum {
};
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3))
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
/* -- Exit state ---------------------------------------------------------- */
@ -185,6 +190,7 @@ typedef enum {
XO_ARITHib = XO_(80),
XO_ARITHi = XO_(81),
XO_ARITHi8 = XO_(83),
XO_ARITHiw8 = XO_66(83),
XO_SHIFTi = XO_(c1),
XO_SHIFT1 = XO_(d1),
XO_SHIFTcl = XO_(d3),
@ -216,6 +222,7 @@ typedef enum {
XO_CVTSI2SD = XO_f20f(2a),
XO_CVTSD2SI = XO_f20f(2d),
XO_CVTTSD2SI= XO_f20f(2c),
XO_MOVD = XO_660f(6e),
XO_MOVDto = XO_660f(7e),
XO_FLDq = XO_(dd), XOg_FLDq = 0,

View File

@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
global_State *g = G(L);
newwhite(g, ud); /* Not finalized. */
ud->gct = ~LJ_TUDATA;
ud->udtype = UDTYPE_USERDATA;
ud->len = sz;
/* NOBARRIER: The GCudata is new (marked white). */
setgcrefnull(ud->metatable);