ocaml/asmcomp/power/emit.mlp

931 lines
31 KiB
Plaintext

(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Emission of PowerPC assembly code *)
module StringSet = Set.Make(struct type t = string let compare = compare end)
open Location
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Layout of the stack. The stack is kept 16-aligned. *)
let stack_offset = ref 0
let frame_size () =
let size =
!stack_offset + (* Trap frame, outgoing parameters *)
4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + (* Local variables *)
(if !contains_calls then 4 else 0) in (* The return address *)
Misc.align size 16
let slot_offset loc cls =
match loc with
Local n ->
if cls = 0
then !stack_offset + num_stack_slots.(1) * 8 + n * 4
else !stack_offset + n * 8
| Incoming n -> frame_size() + n
| Outgoing n -> n
(* Output a symbol *)
let emit_symbol =
match Config.system with
| "elf" | "bsd" -> (fun s -> Emitaux.emit_symbol '.' s)
| "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s)
| _ -> assert false
(* Output a label *)
let label_prefix =
match Config.system with
| "elf" | "bsd" -> ".L"
| "rhapsody" -> "L"
| _ -> assert false
let emit_label lbl =
emit_string label_prefix; emit_int lbl
(* Section switching *)
let data_space =
match Config.system with
| "elf" | "bsd" -> " .section \".data\"\n"
| "rhapsody" -> " .data\n"
| _ -> assert false
let code_space =
match Config.system with
| "elf" | "bsd" -> " .section \".text\"\n"
| "rhapsody" -> " .text\n"
| _ -> assert false
let rodata_space =
match Config.system with
| "elf" | "bsd" -> " .section \".rodata\"\n"
| "rhapsody" -> " .const\n"
| _ -> assert false
(* Output a pseudo-register *)
let emit_reg r =
match r.loc with
Reg r -> emit_string (register_name r)
| _ -> fatal_error "Emit.emit_reg"
let use_full_regnames =
Config.system = "rhapsody"
let emit_gpr r =
if use_full_regnames then emit_char 'r';
emit_int r
let emit_fpr r =
if use_full_regnames then emit_char 'f';
emit_int r
let emit_ccr r =
if use_full_regnames then emit_string "cr";
emit_int r
(* Output a stack reference *)
let emit_stack r =
match r.loc with
Stack s ->
let ofs = slot_offset s (register_class r) in `{emit_int ofs}({emit_gpr 1})`
| _ -> fatal_error "Emit.emit_stack"
(* Split a 32-bit integer constants in two 16-bit halves *)
let low n = n land 0xFFFF
let high n = n asr 16
let nativelow n = Nativeint.to_int n land 0xFFFF
let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16)
let is_immediate n =
n <= 32767 && n >= -32768
let is_native_immediate n =
n <= Nativeint.of_int 32767 && n >= Nativeint.of_int (-32768)
(* Output a "upper 16 bits" or "lower 16 bits" operator. *)
let emit_upper emit_fun arg =
match Config.system with
| "elf" | "bsd" ->
emit_fun arg; emit_string "@ha"
| "rhapsody" ->
emit_string "ha16("; emit_fun arg; emit_string ")"
| _ -> assert false
let emit_lower emit_fun arg =
match Config.system with
| "elf" | "bsd" ->
emit_fun arg; emit_string "@l"
| "rhapsody" ->
emit_string "lo16("; emit_fun arg; emit_string ")"
| _ -> assert false
(* Output a load or store operation *)
let emit_symbol_offset (s, d) =
emit_symbol s;
if d > 0 then `+`;
if d <> 0 then emit_int d
let emit_load_store instr addressing_mode addr n arg =
match addressing_mode with
Ibased(s, d) ->
` addis {emit_gpr 11}, 0, {emit_upper emit_symbol_offset (s,d)}\n`;
` {emit_string instr} {emit_reg arg}, {emit_lower emit_symbol_offset (s,d)}({emit_gpr 11})\n`
| Iindexed ofs ->
if is_immediate ofs then
` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
else begin
` lis {emit_gpr 0}, {emit_int(high ofs)}\n`;
if low ofs <> 0 then
` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`;
` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n`
end
| Iindexed2 ->
` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n`
(* After a comparison, extract the result as 0 or 1 *)
let emit_set_comp cmp res =
` mfcr {emit_gpr 0}\n`;
let bitnum =
match cmp with
Ceq | Cne -> 2
| Cgt | Cle -> 1
| Clt | Cge -> 0 in
` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`;
begin match cmp with
Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n`
| _ -> ()
end
(* Record live pointers at call points *)
type frame_descr =
{ fd_lbl: int; (* Return address *)
fd_frame_size: int; (* Size of stack frame *)
fd_live_offset: int list } (* Offsets/regs of live addresses *)
let frame_descriptors = ref([] : frame_descr list)
let record_frame live =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
live_offset := (r lsl 1) + 1 :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset } :: !frame_descriptors;
`{emit_label lbl}:`
let emit_frame fd =
` .long {emit_label fd.fd_lbl} + 4\n`;
` .short {emit_int fd.fd_frame_size}\n`;
` .short {emit_int (List.length fd.fd_live_offset)}\n`;
List.iter
(fun n ->
` .short {emit_int n}\n`)
fd.fd_live_offset;
` .align 2\n`
(* Record floating-point literals *)
let float_literals = ref ([] : (string * int) list)
(* Record external C functions to be called in a position-independent way
(for MacOSX) *)
let pic_externals = (Config.system = "rhapsody")
let external_functions = ref StringSet.empty
let emit_external s =
` .non_lazy_symbol_pointer\n`;
`L{emit_symbol s}$non_lazy_ptr:\n`;
` .indirect_symbol {emit_symbol s}\n`;
` .long 0\n`
(* Names for conditional branches after comparisons *)
let branch_for_comparison = function
Ceq -> "beq" | Cne -> "bne"
| Cle -> "ble" | Cgt -> "bgt"
| Cge -> "bge" | Clt -> "blt"
let name_for_int_comparison = function
Isigned cmp -> ("cmpw", branch_for_comparison cmp)
| Iunsigned cmp -> ("cmplw", branch_for_comparison cmp)
(* Names for various instructions *)
let name_for_intop = function
Iadd -> "add"
| Imul -> "mullw"
| Idiv -> "divw"
| Iand -> "and"
| Ior -> "or"
| Ixor -> "xor"
| Ilsl -> "slw"
| Ilsr -> "srw"
| Iasr -> "sraw"
| _ -> Misc.fatal_error "Emit.Intop"
let name_for_intop_imm = function
Iadd -> "addi"
| Imul -> "mulli"
| Iand -> "andi."
| Ior -> "ori"
| Ixor -> "xori"
| Ilsl -> "slwi"
| Ilsr -> "srwi"
| Iasr -> "srawi"
| _ -> Misc.fatal_error "Emit.Intop_imm"
let name_for_floatop1 = function
Inegf -> "fneg"
| Iabsf -> "fabs"
| _ -> Misc.fatal_error "Emit.Iopf1"
let name_for_floatop2 = function
Iaddf -> "fadd"
| Isubf -> "fsub"
| Imulf -> "fmul"
| Idivf -> "fdiv"
| _ -> Misc.fatal_error "Emit.Iopf2"
let name_for_specific = function
Imultaddf -> "fmadd"
| Imultsubf -> "fmsub"
| _ -> Misc.fatal_error "Emit.Ispecific"
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Names of functions defined in the current file *)
let defined_functions = ref StringSet.empty
(* Label of glue code for calling the GC *)
let call_gc_label = ref 0
(* Label of jump table *)
let lbl_jumptbl = ref 0
(* List of all labels in jumptable (reverse order) *)
let jumptbl_entries = ref []
(* Number of jumptable entries *)
let num_jumptbl_entries = ref 0
(* Fixup conditional branches that exceed hardware allowed range *)
let load_store_size = function
Ibased(s, d) -> 2
| Iindexed ofs -> if is_immediate ofs then 1 else 3
| Iindexed2 -> 1
let instr_size = function
Lend -> 0
| Lop(Imove | Ispill | Ireload) -> 1
| Lop(Iconst_int n) -> if is_native_immediate n then 1 else 2
| Lop(Iconst_float s) -> 2
| Lop(Iconst_symbol s) -> 2
| Lop(Icall_ind) -> 2
| Lop(Icall_imm s) -> 1
| Lop(Itailcall_ind) -> 5
| Lop(Itailcall_imm s) -> if s = !function_name then 1 else 4
| Lop(Iextcall(s, true)) -> 3
| Lop(Iextcall(s, false)) -> if pic_externals then 4 else 1
| Lop(Istackoffset n) -> 1
| Lop(Iload(chunk, addr)) ->
if chunk = Byte_signed
then load_store_size addr + 1
else load_store_size addr
| Lop(Istore(chunk, addr)) -> load_store_size addr
| Lop(Ialloc n) -> 4
| Lop(Ispecific(Ialloc_far n)) -> 5
| Lop(Iintop Imod) -> 3
| Lop(Iintop(Icomp cmp)) -> 4
| Lop(Iintop op) -> 1
| Lop(Iintop_imm(Idiv, n)) -> 2
| Lop(Iintop_imm(Imod, n)) -> 4
| Lop(Iintop_imm(Icomp cmp, n)) -> 4
| Lop(Iintop_imm(op, n)) -> 1
| Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1
| Lop(Ifloatofint) -> 9
| Lop(Iintoffloat) -> 4
| Lop(Ispecific sop) -> 1
| Lreloadretaddr -> 2
| Lreturn -> 2
| Llabel lbl -> 0
| Lbranch lbl -> 1
| Lcondbranch(tst, lbl) -> 2
| Lcondbranch3(lbl0, lbl1, lbl2) ->
1 + (if lbl0 = None then 0 else 1)
+ (if lbl1 = None then 0 else 1)
+ (if lbl2 = None then 0 else 1)
| Lswitch jumptbl -> 8
| Lsetuptrap lbl -> 1
| Lpushtrap -> 4
| Lpoptrap -> 2
| Lraise -> 6
let label_map code =
let map = Hashtbl.create 37 in
let rec fill_map pc instr =
match instr.desc with
Lend -> (pc, map)
| Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next
| op -> fill_map (pc + instr_size op) instr.next
in fill_map 0 code
let max_branch_offset = 8180
(* 14-bit signed offset in words. Remember to cut some slack
for multi-word instructions where the branch can be anywhere in
the middle. 12 words of slack is plenty. *)
let branch_overflows map pc_branch lbl_dest =
let pc_dest = Hashtbl.find map lbl_dest in
let delta = pc_dest - (pc_branch + 1) in
delta <= -max_branch_offset || delta >= max_branch_offset
let opt_branch_overflows map pc_branch opt_lbl_dest =
match opt_lbl_dest with
None -> false
| Some lbl_dest -> branch_overflows map pc_branch lbl_dest
let fixup_branches codesize map code =
let expand_optbranch lbl n arg next =
match lbl with
None -> next
| Some l ->
instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l))
arg [||] next in
let rec fixup did_fix pc instr =
match instr.desc with
Lend -> did_fix
| Lcondbranch(test, lbl) when branch_overflows map pc lbl ->
let lbl2 = new_label() in
let cont =
instr_cons (Lbranch lbl) [||] [||]
(instr_cons (Llabel lbl2) [||] [||] instr.next) in
instr.desc <- Lcondbranch(invert_test test, lbl2);
instr.next <- cont;
fixup true (pc + 2) instr.next
| Lcondbranch3(lbl0, lbl1, lbl2)
when opt_branch_overflows map pc lbl0
|| opt_branch_overflows map pc lbl1
|| opt_branch_overflows map pc lbl2 ->
let cont =
expand_optbranch lbl0 0 instr.arg
(expand_optbranch lbl1 1 instr.arg
(expand_optbranch lbl2 2 instr.arg instr.next)) in
instr.desc <- cont.desc;
instr.next <- cont.next;
fixup true pc instr
| Lop(Ialloc n) when codesize - pc >= max_branch_offset ->
instr.desc <- Lop(Ispecific(Ialloc_far n));
fixup true (pc + 4) instr.next
| op ->
fixup did_fix (pc + instr_size op) instr.next
in fixup false 0 code
(* Iterate branch expansion till all conditional branches are OK *)
let rec branch_normalization code =
let (codesize, map) = label_map code in
if codesize >= max_branch_offset && fixup_branches codesize map code
then branch_normalization code
else ()
(* Output the assembly code for an instruction *)
let rec emit_instr i dslot =
match i.desc with
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
match (src, dst) with
{loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
` mr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
` fmr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
` stw {emit_reg src}, {emit_stack dst}\n`
| {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
` stfd {emit_reg src}, {emit_stack dst}\n`
| {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
` lwz {emit_reg dst}, {emit_stack src}\n`
| {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
` lfd {emit_reg dst}, {emit_stack src}\n`
| (_, _) ->
fatal_error "Emit: Imove"
end
| Lop(Iconst_int n) ->
if is_native_immediate n then
` li {emit_reg i.res.(0)}, {emit_nativeint n}\n`
else begin
` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`;
if nativelow n <> 0 then
` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n`
end
| Lop(Iconst_float s) ->
let lbl = new_label() in
float_literals := (s, lbl) :: !float_literals;
` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`;
` lfd {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n`
| Lop(Iconst_symbol s) ->
` addis {emit_reg i.res.(0)}, 0, {emit_upper emit_symbol s}\n`;
` addi {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_lower emit_symbol s}\n`
| Lop(Icall_ind) ->
` mtctr {emit_reg i.arg.(0)}\n`;
record_frame i.live;
` bctrl\n`
| Lop(Icall_imm s) ->
record_frame i.live;
` bl {emit_symbol s}\n`
| Lop(Itailcall_ind) ->
let n = frame_size() in
` mtctr {emit_reg i.arg.(0)}\n`;
if !contains_calls then begin
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` mtlr {emit_gpr 11}\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`
end;
` bctr\n`
| Lop(Itailcall_imm s) ->
if s = !function_name then
` b {emit_label !tailrec_entry_point}\n`
else begin
let n = frame_size() in
if !contains_calls then begin
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` mtlr {emit_gpr 11}\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`
end;
` b {emit_symbol s}\n`
end
| Lop(Iextcall(s, alloc)) ->
if alloc then begin
if pic_externals then begin
external_functions := StringSet.add s !external_functions;
` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`;
` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`
end else begin
` addis {emit_gpr 11}, 0, {emit_upper emit_symbol s}\n`;
` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_symbol s}\n`
end;
record_frame i.live;
` bl {emit_symbol "caml_c_call"}\n`
end else begin
if pic_externals then begin
external_functions := StringSet.add s !external_functions;
` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`;
` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`;
` mtctr {emit_gpr 11}\n`;
` bctrl\n`
end else
` bl {emit_symbol s}\n`
end
| Lop(Istackoffset n) ->
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int (-n)}\n`;
stack_offset := !stack_offset + n
| Lop(Iload(chunk, addr)) ->
let loadinstr =
match chunk with
Byte_unsigned -> "lbz"
| Byte_signed -> "lbz"
| Sixteen_unsigned -> "lhz"
| Sixteen_signed -> "lha"
| Single -> "lfs"
| Double | Double_u -> "lfd"
| _ -> "lwz" in
emit_load_store loadinstr addr i.arg 0 i.res.(0);
if chunk = Byte_signed then
` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Istore(chunk, addr)) ->
let storeinstr =
match chunk with
Byte_unsigned | Byte_signed -> "stb"
| Sixteen_unsigned | Sixteen_signed -> "sth"
| Single -> "stfs"
| Double | Double_u -> "stfd"
| _ -> "stw" in
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc n) ->
if !call_gc_label = 0 then call_gc_label := new_label();
` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
` cmplw {emit_gpr 31}, {emit_gpr 30}\n`;
` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n`;
record_frame i.live;
` bltl {emit_label !call_gc_label}\n`
| Lop(Ispecific(Ialloc_far n)) ->
if !call_gc_label = 0 then call_gc_label := new_label();
let lbl = new_label() in
` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
` cmplw {emit_gpr 31}, {emit_gpr 30}\n`;
` bge {emit_label lbl}\n`;
record_frame i.live;
` bl {emit_label !call_gc_label}\n`;
`{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n`
| Lop(Iintop Isub) -> (* subfc has swapped arguments *)
` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop Imod) ->
` divw {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` mullw {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop(Icomp cmp)) ->
begin match cmp with
Isigned c ->
` cmpw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_set_comp c i.res.(0)
| Iunsigned c ->
` cmplw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_set_comp c i.res.(0)
end
| Lop(Iintop Icheckbound) ->
` twlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop op) ->
let instr = name_for_intop op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(Isub, n)) ->
` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n`
| Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *)
let l = Misc.log2 n in
` srawi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *)
let l = Misc.log2 n in
` srawi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
` addze {emit_gpr 0}, {emit_gpr 0}\n`;
` slwi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`;
` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop_imm(Icomp cmp, n)) ->
begin match cmp with
Isigned c ->
` cmpwi {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_set_comp c i.res.(0)
| Iunsigned c ->
` cmplwi {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_set_comp c i.res.(0)
end
| Lop(Iintop_imm(Icheckbound, n)) ->
` twllei {emit_reg i.arg.(0)}, {emit_int n}\n`
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_intop_imm op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n`
| Lop(Inegf | Iabsf as op) ->
let instr = name_for_floatop1 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
let instr = name_for_floatop2 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Ifloatofint) ->
let lbl = new_label() in
float_literals := ("4.503601774854144e15", lbl) :: !float_literals;
(* That float above represents 0x4330000080000000 *)
` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`;
` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`;
` lis {emit_gpr 0}, 0x4330\n`;
` stwu {emit_gpr 0}, -8({emit_gpr 1})\n`;
` xoris {emit_gpr 0}, {emit_reg i.arg.(0)}, 0x8000\n`;
` stw {emit_gpr 0}, 4({emit_gpr 1})\n`;
` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 8\n`;
` fsub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_fpr 0}\n`
| Lop(Iintoffloat) ->
` fctiwz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`;
` stfdu {emit_fpr 0}, -8({emit_gpr 1})\n`;
` lwz {emit_reg i.res.(0)}, 4({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 8\n`
| Lop(Ispecific sop) ->
let instr = name_for_specific sop in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
| Lreloadretaddr ->
let n = frame_size() in
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` mtlr {emit_gpr 11}\n`
| Lreturn ->
let n = frame_size() in
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` blr\n`
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
` b {emit_label lbl}\n`
| Lcondbranch(tst, lbl) ->
begin match tst with
Itruetest ->
` cmpwi {emit_reg i.arg.(0)}, 0\n`;
emit_delay dslot;
` bne {emit_label lbl}\n`
| Ifalsetest ->
` cmpwi {emit_reg i.arg.(0)}, 0\n`;
emit_delay dslot;
` beq {emit_label lbl}\n`
| Iinttest cmp ->
let (comp, branch) = name_for_int_comparison cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_delay dslot;
` {emit_string branch} {emit_label lbl}\n`
| Iinttest_imm(cmp, n) ->
let (comp, branch) = name_for_int_comparison cmp in
` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_delay dslot;
` {emit_string branch} {emit_label lbl}\n`
| Ifloattest(cmp, neg) ->
` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
(* bit 0 = lt, bit 1 = gt, bit 2 = eq *)
let (bitnum, negtst) =
match cmp with
Ceq -> (2, neg)
| Cne -> (2, not neg)
| Cle -> ` cror 3, 0, 2\n`; (* lt or eq *)
(3, neg)
| Cgt -> (1, neg)
| Cge -> ` cror 3, 1, 2\n`; (* gt or eq *)
(3, neg)
| Clt -> (0, neg) in
emit_delay dslot;
if negtst
then ` bf {emit_int bitnum}, {emit_label lbl}\n`
else ` bt {emit_int bitnum}, {emit_label lbl}\n`
| Ioddtest ->
` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
` bne {emit_label lbl}\n`
| Ieventest ->
` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
` beq {emit_label lbl}\n`
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmpwi {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
begin match lbl0 with
None -> ()
| Some lbl -> ` blt {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` beq {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` bgt {emit_label lbl}\n`
end
| Lswitch jumptbl ->
if !lbl_jumptbl = 0 then lbl_jumptbl := new_label();
` addis {emit_gpr 11}, 0, {emit_upper emit_label !lbl_jumptbl}\n`;
` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_label !lbl_jumptbl}\n`;
` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`;
` slwi {emit_gpr 0}, {emit_gpr 0}, 2\n`;
` lwzx {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
` mtctr {emit_gpr 0}\n`;
` bctr\n`;
for i = 0 to Array.length jumptbl - 1 do
jumptbl_entries := jumptbl.(i) :: !jumptbl_entries;
incr num_jumptbl_entries
done
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`
| Lpushtrap ->
stack_offset := !stack_offset + 16;
` mflr {emit_gpr 0}\n`;
` stwu {emit_gpr 0}, -16({emit_gpr 1})\n`;
` stw {emit_gpr 29}, 4({emit_gpr 1})\n`;
` mr {emit_gpr 29}, {emit_gpr 1}\n`
| Lpoptrap ->
` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`;
stack_offset := !stack_offset - 16
| Lraise ->
` lwz {emit_gpr 0}, 0({emit_gpr 29})\n`;
` mr {emit_gpr 1}, {emit_gpr 29}\n`;
` mtlr {emit_gpr 0}\n`;
` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`;
` blr\n`
and emit_delay = function
None -> ()
| Some i -> emit_instr i None
(* Checks if a pseudo-instruction expands to instructions
that do not branch and do not affect CR0 nor R12. *)
let is_simple_instr i =
match i.desc with
Lop op ->
begin match op with
Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind |
Iextcall(_, _) -> false
| Ialloc(_) -> false
| Iintop(Icomp _) -> false
| Iintop_imm(Iand, _) -> false
| Iintop_imm(Icomp _, _) -> false
| _ -> true
end
| Lreloadretaddr -> true
| _ -> false
let no_interference res arg =
try
for i = 0 to Array.length arg - 1 do
for j = 0 to Array.length res - 1 do
if arg.(i).loc = res.(j).loc then raise Exit
done
done;
true
with Exit ->
false
(* Emit a sequence of instructions, trying to fill delay slots for branches *)
let rec emit_all i =
match i with
{desc = Lend} -> ()
| {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}}
when is_simple_instr i & no_interference i.res i.next.arg ->
emit_instr i.next (Some i);
emit_all i.next.next
| _ ->
emit_instr i None;
emit_all i.next
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
defined_functions := StringSet.add fundecl.fun_name !defined_functions;
tailrec_entry_point := new_label();
stack_offset := 0;
call_gc_label := 0;
float_literals := [];
` .globl {emit_symbol fundecl.fun_name}\n`;
begin match Config.system with
| "elf" | "bsd" ->
` .type {emit_symbol fundecl.fun_name}, @function\n`
| _ -> ()
end;
emit_string code_space;
` .align 2\n`;
`{emit_symbol fundecl.fun_name}:\n`;
let n = frame_size() in
if !contains_calls then begin
` mflr {emit_gpr 0}\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`;
` stw {emit_gpr 0}, {emit_int(n - 4)}({emit_gpr 1})\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`
end;
`{emit_label !tailrec_entry_point}:\n`;
branch_normalization fundecl.fun_body;
emit_all fundecl.fun_body;
(* Emit the glue code to call the GC *)
if !call_gc_label > 0 then begin
`{emit_label !call_gc_label}:\n`;
` b {emit_symbol "caml_call_gc"}\n`
end;
(* Emit the floating-point literals *)
if !float_literals <> [] then begin
emit_string rodata_space;
` .align 3\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}: .double 0d{emit_string f}\n`)
!float_literals
end
(* Emission of data *)
let declare_global_data s =
` .globl {emit_symbol s}\n`;
if Config.system = "elf" || Config.system = "bsd" then
` .type {emit_symbol s}, @object\n`
let emit_item = function
Cglobal_symbol s ->
declare_global_data s
| Cdefine_symbol s ->
`{emit_symbol s}:\n`;
| Cdefine_label lbl ->
`{emit_label (lbl + 100000)}:\n`
| Cint8 n ->
` .byte {emit_int n}\n`
| Cint16 n ->
` .short {emit_int n}\n`
| Cint32 n ->
` .long {emit_nativeint n}\n`
| Cint n ->
` .long {emit_nativeint n}\n`
| Csingle f ->
` .float 0d{emit_string f}\n`
| Cdouble f ->
` .double 0d{emit_string f}\n`
| Csymbol_address s ->
` .long {emit_symbol s}\n`
| Clabel_address lbl ->
` .long {emit_label (lbl + 100000)}\n`
| Cstring s ->
emit_bytes_directive " .byte " s
| Cskip n ->
if n > 0 then ` .space {emit_int n}\n`
| Calign n ->
` .align {emit_int (Misc.log2 n)}\n`
let data l =
emit_string data_space;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
defined_functions := StringSet.empty;
external_functions := StringSet.empty;
num_jumptbl_entries := 0;
jumptbl_entries := [];
lbl_jumptbl := 0;
(* Emit the beginning of the segments *)
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
emit_string data_space;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
emit_string code_space;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`
let end_assembly() =
(* Emit the jump table *)
if !num_jumptbl_entries > 0 then begin
emit_string code_space;
`{emit_label !lbl_jumptbl}:\n`;
List.iter
(fun lbl -> ` .long {emit_label lbl} - {emit_label !lbl_jumptbl}\n`)
(List.rev !jumptbl_entries);
jumptbl_entries := []
end;
if pic_externals then
(* Emit the pointers to external functions *)
StringSet.iter emit_external !external_functions;
(* Emit the end of the segments *)
emit_string code_space;
let lbl_end = Compilenv.make_symbol (Some "code_end") in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
emit_string data_space;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
(* Emit the frame descriptors *)
emit_string rodata_space;
let lbl = Compilenv.make_symbol (Some "frametable") in
declare_global_data lbl;
`{emit_symbol lbl}:\n`;
` .long {emit_int (List.length !frame_descriptors)}\n`;
List.iter emit_frame !frame_descriptors;
frame_descriptors := []