PPC: Integrate and enable JIT compiler.

master
Mike Pall 2011-10-24 16:43:51 +02:00
parent fa9ade356b
commit 0cf8c20be8
7 changed files with 2603 additions and 6 deletions

View File

@ -163,7 +163,6 @@
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
#define LJ_ARCH_NOJIT 1
#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE

View File

@ -161,6 +161,8 @@ IRFLDEF(FLOFS)
#include "lj_emit_x86.h"
#elif LJ_TARGET_ARM
#include "lj_emit_arm.h"
#elif LJ_TARGET_PPC
#include "lj_emit_ppc.h"
#else
#error "Missing instruction emitter for target CPU"
#endif
@ -1205,8 +1207,10 @@ static void asm_loop(ASMState *as)
#include "lj_asm_x86.h"
#elif LJ_TARGET_ARM
#include "lj_asm_arm.h"
#elif LJ_TARGET_PPC
#include "lj_asm_ppc.h"
#else
#error "Missing instruction emitter for target CPU"
#error "Missing assembler for target CPU"
#endif
/* -- Head of trace ------------------------------------------------------- */

2074
src/lj_asm_ppc.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -526,9 +526,13 @@ again:
idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1));
sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info)));
idx = crec_reassoc_ofs(J, idx, &ofs, sz);
#if LJ_TARGET_ARM || LJ_TARGET_PPC
/* Hoist base add to allow fusion of index/shift into operands. */
if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs
#if LJ_TARGET_ARM
/* Hoist base add to allow fusion of shifts into operands. */
if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) {
&& (sz == 1 || sz == 4)
#endif
) {
ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
ofs = 0;
}

232
src/lj_emit_ppc.h Normal file
View File

@ -0,0 +1,232 @@
/*
** PPC instruction emitter.
** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
*/
/* -- Emit basic instructions --------------------------------------------- */
static void emit_tab(ASMState *as, PPCIns pi, Reg rt, Reg ra, Reg rb)
{
*--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | PPCF_B(rb);
}
#define emit_asb(as, pi, ra, rs, rb) emit_tab(as, (pi), (rs), (ra), (rb))
#define emit_as(as, pi, ra, rs) emit_tab(as, (pi), (rs), (ra), 0)
#define emit_ab(as, pi, ra, rb) emit_tab(as, (pi), 0, (ra), (rb))
static void emit_tai(ASMState *as, PPCIns pi, Reg rt, Reg ra, int32_t i)
{
*--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | (i & 0xffff);
}
#define emit_ti(as, pi, rt, i) emit_tai(as, (pi), (rt), 0, (i))
#define emit_ai(as, pi, ra, i) emit_tai(as, (pi), 0, (ra), (i))
#define emit_asi(as, pi, ra, rs, i) emit_tai(as, (pi), (rs), (ra), (i))
#define emit_fab(as, pi, rf, ra, rb) \
emit_tab(as, (pi), (rf)&31, (ra)&31, (rb)&31)
#define emit_fb(as, pi, rf, rb) emit_tab(as, (pi), (rf)&31, 0, (rb)&31)
#define emit_fac(as, pi, rf, ra, rc) \
emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, 0)
#define emit_facb(as, pi, rf, ra, rc, rb) \
emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, (rb)&31)
#define emit_fai(as, pi, rf, ra, i) emit_tai(as, (pi), (rf)&31, (ra), (i))
static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs,
int32_t n, int32_t b, int32_t e)
{
*--as->mcp = pi | PPCF_T(rs) | PPCF_A(ra) | PPCF_B(n) |
PPCF_MB(b) | PPCF_ME(e);
}
static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n)
{
lua_assert(n >= 0 && n < 32);
emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n);
}
static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
{
lua_assert(n >= 0 && n < 32);
emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31);
}
/* -- Emit loads/stores --------------------------------------------------- */
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Try to find a one step delta relative to another constant. */
static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
{
RegSet work = ~as->freeset & RSET_GPR;
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
lua_assert(r != t);
if (ref < ASMREF_L) {
int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
if (checki16(delta)) {
emit_tai(as, PPCI_ADDI, t, r, delta);
return 1;
}
}
rset_clear(work, r);
}
return 0; /* Failed. */
}
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg r, int32_t i)
{
if (checki16(i)) {
emit_ti(as, PPCI_LI, r, i);
} else {
if ((i & 0xffff)) {
int32_t jgl = i32ptr(J2G(as->J));
if ((uint32_t)(i-jgl) < 65536) {
emit_tai(as, PPCI_ADDI, r, RID_JGL, i-jgl-32768);
return;
} else if (emit_kdelta1(as, r, i)) {
return;
}
emit_asi(as, PPCI_ORI, r, r, i);
}
emit_ti(as, PPCI_LIS, r, (i >> 16));
}
}
#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
{
int32_t jgl = i32ptr(J2G(as->J));
int32_t i = i32ptr(p);
Reg base;
if ((uint32_t)(i-jgl) < 65536) {
i = i-jgl-32768;
base = RID_JGL;
} else {
base = ra_allock(as, i-(int16_t)i, allow);
}
emit_tai(as, pi, r, base, i);
}
#define emit_loadn(as, r, tv) \
emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR)
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
{
emit_tai(as, pi, r, RID_JGL, ofs-32768);
}
#define emit_getgl(as, r, field) \
emit_lsglptr(as, PPCI_LWZ, (r), (int32_t)offsetof(global_State, field))
#define emit_setgl(as, r, field) \
emit_lsglptr(as, PPCI_STW, (r), (int32_t)offsetof(global_State, field))
/* Trace number is determined from per-trace exit stubs. */
#define emit_setvmstate(as, i) UNUSED(i)
/* -- Emit control-flow instructions -------------------------------------- */
/* Label for internal jumps. */
typedef MCode *MCLabel;
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target)
{
MCode *p = as->mcp;
ptrdiff_t delta = ((char *)target - (char *)p) + 4;
lua_assert(((delta + 0x8000) >> 16) == 0);
pi ^= (delta & 0x8000) * (PPCF_Y/0x8000);
*--p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu);
as->mcp = p;
}
static void emit_call(ASMState *as, void *target)
{
MCode *p = --as->mcp;
ptrdiff_t delta = (char *)target - (char *)p;
if ((((delta>>2) + 0x00800000) >> 24) == 0) {
*p = PPCI_BL | (delta & 0x03fffffcu);
} else { /* Target out of range: need indirect call. Don't use arg reg. */
RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
Reg r = ra_allock(as, i32ptr(target), allow);
*p = PPCI_BCTRL;
p[-1] = PPCI_MTCTR | PPCF_T(r);
as->mcp = p-1;
}
}
/* -- Emit generic operations --------------------------------------------- */
#define emit_mr(as, dst, src) \
emit_asb(as, PPCI_MR, (dst), (src), (src))
/* Generic move between two regs. */
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
{
UNUSED(ir);
if (dst < RID_MAX_GPR)
emit_mr(as, dst, src);
else
emit_fb(as, PPCI_FMR, dst, src);
}
/* Generic load of register from stack slot. */
static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_tai(as, PPCI_LWZ, r, RID_SP, ofs);
else
emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs);
}
/* Generic store of register to stack slot. */
static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
{
if (r < RID_MAX_GPR)
emit_tai(as, PPCI_STW, r, RID_SP, ofs);
else
emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs);
}
/* Emit a compare (for equality) with a constant operand. */
static void emit_cmpi(ASMState *as, Reg r, int32_t k)
{
if (checki16(k)) {
emit_ai(as, PPCI_CMPWI, r, k);
} else if (checku16(k)) {
emit_ai(as, PPCI_CMPLWI, r, k);
} else {
emit_ai(as, PPCI_CMPLWI, RID_TMP, k);
emit_asi(as, PPCI_XORIS, RID_TMP, r, (k >> 16));
}
}
/* Add offset to pointer. */
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
emit_tai(as, PPCI_ADDI, r, r, ofs);
if (!checki16(ofs))
emit_tai(as, PPCI_ADDIS, r, r, (ofs + 32768) >> 16);
}
}
static void emit_spsub(ASMState *as, int32_t ofs)
{
if (ofs) {
emit_tai(as, PPCI_STWU, RID_TMP, RID_SP, -ofs);
emit_tai(as, PPCI_ADDI, RID_TMP, RID_SP,
CFRAME_SIZE + (as->parent ? as->parent->spadjust : 0));
}
}

View File

@ -50,21 +50,30 @@ typedef uint32_t RegSP;
/* -- Register sets ------------------------------------------------------- */
/* Bitset for registers. 32 registers suffice right now.
/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
#if LJ_TARGET_PPC
typedef uint64_t RegSet;
#else
typedef uint32_t RegSet;
#endif
#define RID2RSET(r) (((RegSet)1) << (r))
#define RSET_EMPTY 0
#define RSET_EMPTY ((RegSet)0)
#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo))
#define rset_test(rs, r) (((rs) >> (r)) & 1)
#define rset_set(rs, r) (rs |= RID2RSET(r))
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
#if LJ_TARGET_PPC
#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
#else
#define rset_picktop(rs) ((Reg)lj_fls(rs))
#define rset_pickbot(rs) ((Reg)lj_ffs(rs))
#endif
/* -- Register allocation cost -------------------------------------------- */
@ -127,6 +136,8 @@ typedef uint32_t RegCost;
#include "lj_target_x86.h"
#elif LJ_TARGET_ARM
#include "lj_target_arm.h"
#elif LJ_TARGET_PPC
#include "lj_target_ppc.h"
#else
#error "Missing include for target CPU"
#endif

273
src/lj_target_ppc.h Normal file
View File

@ -0,0 +1,273 @@
/*
** Definitions for PPC CPUs.
** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_TARGET_PPC_H
#define _LJ_TARGET_PPC_H
/* -- Registers IDs ------------------------------------------------------- */
#define GPRDEF(_) \
_(R0) _(SP) _(SYS1) _(R3) _(R4) _(R5) _(R6) _(R7) \
_(R8) _(R9) _(R10) _(R11) _(R12) _(SYS2) _(R14) _(R15) \
_(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
_(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31)
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
_(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
_(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
#define VRIDDEF(_)
#define RIDENUM(name) RID_##name,
enum {
GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_TMP = RID_R0,
/* Calling conventions. */
RID_RET = RID_R3,
RID_RETHI = RID_R3,
RID_RETLO = RID_R4,
RID_FPRET = RID_F1,
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_R14, /* Interpreter BASE. */
RID_LPC = RID_R16, /* Interpreter PC. */
RID_DISPATCH = RID_R17, /* Interpreter DISPATCH table. */
RID_LREG = RID_R18, /* Interpreter L. */
RID_JGL = RID_R31, /* On-trace: global_State + 32768. */
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_R0,
RID_MAX_GPR = RID_R31+1,
RID_MIN_FPR = RID_F0,
RID_MAX_FPR = RID_F31+1,
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
};
#define RID_NUM_KREF RID_NUM_GPR
#define RID_MIN_KREF RID_R0
/* -- Register sets ------------------------------------------------------- */
/* Make use of all registers, except TMP, SP, SYS1, SYS2 and JGL. */
#define RSET_FIXED \
(RID2RSET(RID_TMP)|RID2RSET(RID_SP)|RID2RSET(RID_SYS1)|\
RID2RSET(RID_SYS2)|RID2RSET(RID_JGL))
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
#define RSET_SCRATCH_GPR (RSET_RANGE(RID_R3, RID_R12+1))
#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F13+1))
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R3
#define REGARG_LASTGPR RID_R10
#define REGARG_NUMGPR 8
#define REGARG_FIRSTFPR RID_F1
#define REGARG_LASTFPR RID_F8
#define REGARG_NUMFPR 8
/* -- Spill slots --------------------------------------------------------- */
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
**
** SPS_FIXED: Available fixed spill slots in interpreter frame.
** This definition must match with the *.dasc file(s).
**
** SPS_FIRST: First spill slot for general use.
** [sp+12] tmplo word \
** [sp+ 8] tmphi word / tmp dword, parameter area for callee
** [sp+ 4] tmpw, LR of callee
** [sp+ 0] stack chain
*/
#define SPS_FIXED 7
#define SPS_FIRST 4
/* Stack offsets for temporary slots. Used for FP<->int conversions etc. */
#define SPOFS_TMPW 4
#define SPOFS_TMP 8
#define SPOFS_TMPHI 8
#define SPOFS_TMPLO 12
#define sps_scale(slot) (4 * (int32_t)(slot))
#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
/* -- Exit state ---------------------------------------------------------- */
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
/* Highest exit + 1 indicates stack check. */
#define EXITSTATE_CHECKEXIT 1
/* Return the address of a per-trace exit stub. */
static LJ_AINLINE MCode *exitstub_trace_addr(GCtrace *T, ExitNo exitno)
{
/* Keep this in-sync with asm_exitstub_*. */
MCode *p = (MCode *)((char *)T->mcode + T->szmcode);
while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */
return p + 3 + exitno;
}
/* -- Instructions -------------------------------------------------------- */
/* Instruction fields. */
#define PPCF_CC(cc) ((((cc) & 3) << 16) | (((cc) & 4) << 22))
#define PPCF_T(r) ((r) << 21)
#define PPCF_A(r) ((r) << 16)
#define PPCF_B(r) ((r) << 11)
#define PPCF_C(r) ((r) << 6)
#define PPCF_MB(n) ((n) << 6)
#define PPCF_ME(n) ((n) << 1)
#define PPCF_Y 0x00200000
#define PPCF_DOT 0x00000001
typedef enum PPCIns {
/* Integer instructions. */
PPCI_MR = 0x7c000378,
PPCI_NOP = 0x60000000,
PPCI_LI = 0x38000000,
PPCI_LIS = 0x3c000000,
PPCI_ADD = 0x7c000214,
PPCI_ADDC = 0x7c000014,
PPCI_ADDO = 0x7c000614,
PPCI_ADDE = 0x7c000114,
PPCI_ADDZE = 0x7c000194,
PPCI_ADDME = 0x7c0001d4,
PPCI_ADDI = 0x38000000,
PPCI_ADDIS = 0x3c000000,
PPCI_ADDIC = 0x30000000,
PPCI_ADDICDOT = 0x34000000,
PPCI_SUBF = 0x7c000050,
PPCI_SUBFC = 0x7c000010,
PPCI_SUBFO = 0x7c000450,
PPCI_SUBFE = 0x7c000110,
PPCI_SUBFZE = 0x7c000190,
PPCI_SUBFME = 0x7c0001d0,
PPCI_SUBFIC = 0x20000000,
PPCI_NEG = 0x7c0000d0,
PPCI_AND = 0x7c000038,
PPCI_ANDC = 0x7c000078,
PPCI_NAND = 0x7c0003b8,
PPCI_ANDIDOT = 0x70000000,
PPCI_ANDISDOT = 0x74000000,
PPCI_OR = 0x7c000378,
PPCI_NOR = 0x7c0000f8,
PPCI_ORI = 0x60000000,
PPCI_ORIS = 0x64000000,
PPCI_XOR = 0x7c000278,
PPCI_EQV = 0x7c000238,
PPCI_XORI = 0x68000000,
PPCI_XORIS = 0x6c000000,
PPCI_CMPW = 0x7c000000,
PPCI_CMPLW = 0x7c000040,
PPCI_CMPWI = 0x2c000000,
PPCI_CMPLWI = 0x28000000,
PPCI_MULLW = 0x7c0001d6,
PPCI_MULLI = 0x1c000000,
PPCI_MULLWO = 0x7c0005d6,
PPCI_EXTSB = 0x7c000774,
PPCI_EXTSH = 0x7c000734,
PPCI_SLW = 0x7c000030,
PPCI_SRW = 0x7c000430,
PPCI_SRAW = 0x7c000630,
PPCI_SRAWI = 0x7c000670,
PPCI_RLWNM = 0x5c000000,
PPCI_RLWINM = 0x54000000,
PPCI_RLWIMI = 0x50000000,
PPCI_B = 0x48000000,
PPCI_BL = 0x48000001,
PPCI_BC = 0x40800000,
PPCI_BCL = 0x40800001,
PPCI_BCTR = 0x4e800420,
PPCI_BCTRL = 0x4e800421,
PPCI_CRANDC = 0x4c000102,
PPCI_CRAND = 0x4c000202,
PPCI_CRORC = 0x4c000342,
PPCI_CROR = 0x4c000382,
PPCI_MFLR = 0x7c0802a6,
PPCI_MTCTR = 0x7c0903a6,
PPCI_MCRXR = 0x7c000400,
/* Load/store instructions. */
PPCI_LWZ = 0x80000000,
PPCI_LBZ = 0x88000000,
PPCI_STW = 0x90000000,
PPCI_STB = 0x98000000,
PPCI_LHZ = 0xa0000000,
PPCI_LHA = 0xa8000000,
PPCI_STH = 0xb0000000,
PPCI_STWU = 0x94000000,
PPCI_LFS = 0xc0000000,
PPCI_LFD = 0xc8000000,
PPCI_STFS = 0xd0000000,
PPCI_STFD = 0xd8000000,
PPCI_LWZX = 0x7c00002e,
PPCI_LBZX = 0x7c0000ae,
PPCI_STWX = 0x7c00012e,
PPCI_STBX = 0x7c0001ae,
PPCI_LHZX = 0x7c00022e,
PPCI_LHAX = 0x7c0002ae,
PPCI_STHX = 0x7c00032e,
PPCI_LFSX = 0x7c00042e,
PPCI_LFDX = 0x7c0004ae,
PPCI_STFSX = 0x7c00052e,
PPCI_STFDX = 0x7c0005ae,
/* FP instructions. */
PPCI_FMR = 0xfc000090,
PPCI_FNEG = 0xfc000050,
PPCI_FABS = 0xfc000210,
PPCI_FRSP = 0xfc000018,
PPCI_FCTIWZ = 0xfc00001e,
PPCI_FADD = 0xfc00002a,
PPCI_FSUB = 0xfc000028,
PPCI_FMUL = 0xfc000032,
PPCI_FDIV = 0xfc000024,
PPCI_FMADD = 0xfc00003a,
PPCI_FMSUB = 0xfc000038,
PPCI_FNMSUB = 0xfc00003c,
PPCI_FCMPU = 0xfc000000,
PPCI_FSEL = 0xfc00002e,
} PPCIns;
typedef enum PPCCC {
CC_GE, CC_LE, CC_NE, CC_NS, CC_LT, CC_GT, CC_EQ, CC_SO
} PPCCC;
#endif