ocaml/asmcomp/power/emit.mlp

1100 lines
37 KiB
Plaintext
Raw Normal View History

(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Emission of PowerPC assembly code *)
module StringSet = Set.Make(struct type t = string let compare = compare end)
open Location
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Layout of the stack *)
(* In the TOC-based model:
The bottom 32 bytes of the stack are reserved at all times
for a standard linkage area.
In this area, the word at offset +20 is used by glue code and others to
save the TOC register.
The bottom two words are used as temporaries and for trap frames.
The stack is kept 16-aligned.
In the absolute-address model:
No reserved space at the bottom of the stack.
The stack is kept 16-aligned. *)
let stack_linkage_area = if toc then 32 else 0
let trap_frame_size = if toc then 32 else 16
let stack_offset = ref 0
let frame_size () =
let size =
stack_linkage_area + (* The bottom linkage area *)
!stack_offset + (* Trap frame, outgoing parameters *)
4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + (* Local variables *)
(if !contains_calls then 4 else 0) in (* The return address *)
Misc.align size 16
let slot_offset loc cls =
match loc with
Local n ->
if cls = 0
then stack_linkage_area + !stack_offset + num_stack_slots.(1) * 8 + n * 4
else stack_linkage_area + !stack_offset + n * 8
| Incoming n -> frame_size() + n
| Outgoing n -> n
(* Output a symbol *)
let emit_symbol =
match Config.system with
"aix" | "elf" | "bsd" -> (fun s -> Emitaux.emit_symbol '.' s)
| "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s)
| _ -> assert false
let emit_codesymbol s =
if toc then emit_char '.';
emit_symbol s
(* Output a label *)
let label_prefix =
match Config.system with
"aix" -> "L.."
| "elf" | "bsd" -> ".L"
| "rhapsody" -> "L"
| _ -> assert false
let emit_label lbl =
emit_string label_prefix; emit_int lbl
(* Section switching *)
let data_space =
match Config.system with
"aix" -> " .csect .data[RW]\n"
| "elf" | "bsd" -> " .section \".data\"\n"
| "rhapsody" -> " .data\n"
| _ -> assert false
let code_space =
match Config.system with
"aix" -> " .csect .text[PR]\n"
| "elf" | "bsd" -> " .section \".text\"\n"
| "rhapsody" -> " .text\n"
| _ -> assert false
let rodata_space =
match Config.system with
"aix" -> " .csect .data[RW]\n" (* ?? *)
| "elf" | "bsd" -> " .section \".rodata\"\n"
| "rhapsody" -> " .const\n"
| _ -> assert false
(* Output a pseudo-register *)
let emit_reg r =
match r.loc with
Reg r -> emit_string (register_name r)
| _ -> fatal_error "Emit.emit_reg"
let use_full_regnames =
Config.system = "rhapsody"
let emit_gpr r =
if use_full_regnames then emit_char 'r';
emit_int r
let emit_fpr r =
if use_full_regnames then emit_char 'f';
emit_int r
let emit_ccr r =
if use_full_regnames then emit_string "cr";
emit_int r
(* Output a stack reference *)
let emit_stack r =
match r.loc with
Stack s ->
let ofs = slot_offset s (register_class r) in `{emit_int ofs}({emit_gpr 1})`
| _ -> fatal_error "Emit.emit_stack"
(* Split a 32-bit integer constants in two 16-bit halves *)
let low n = n land 0xFFFF
let high n = n asr 16
let nativelow n = Nativeint.to_int n land 0xFFFF
let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16)
let is_immediate n =
n <= 32767 && n >= -32768
let is_native_immediate n =
n <= Nativeint.of_int 32767 && n >= Nativeint.of_int (-32768)
(* Output a "upper 16 bits" or "lower 16 bits" operator
(for the absolute addressing mode) *)
let emit_upper emit_fun arg =
match Config.system with
"elf" | "bsd" ->
emit_fun arg; emit_string "@ha"
| "rhapsody" ->
emit_string "ha16("; emit_fun arg; emit_string ")"
| _ -> assert false
let emit_lower emit_fun arg =
match Config.system with
"elf" | "bsd" ->
emit_fun arg; emit_string "@l"
| "rhapsody" ->
emit_string "lo16("; emit_fun arg; emit_string ")"
| _ -> assert false
(* Output a load or store operation *)
let emit_symbol_offset (s, d) =
emit_symbol s;
if d > 0 then `+`;
if d <> 0 then emit_int d
let emit_load_store instr addressing_mode addr n arg =
match addressing_mode with
Ibased(s, d) ->
(* Only relevant in the absolute model *)
` addis {emit_gpr 11}, 0, {emit_upper emit_symbol_offset (s,d)}\n`;
` {emit_string instr} {emit_reg arg}, {emit_lower emit_symbol_offset (s,d)}({emit_gpr 11})\n`
| Iindexed ofs ->
if is_immediate ofs then
` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
else begin
` lis {emit_gpr 0}, {emit_int(high ofs)}\n`;
if low ofs <> 0 then
` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`;
` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n`
end
| Iindexed2 ->
` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n`
(* After a comparison, extract the result as 0 or 1 *)
let emit_set_comp cmp res =
` mfcr {emit_gpr 0}\n`;
let bitnum =
match cmp with
Ceq | Cne -> 2
| Cgt | Cle -> 1
| Clt | Cge -> 0 in
` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`;
begin match cmp with
Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n`
| _ -> ()
end
(* Record live pointers at call points *)
type frame_descr =
{ fd_lbl: int; (* Return address *)
fd_frame_size: int; (* Size of stack frame *)
fd_live_offset: int list } (* Offsets/regs of live addresses *)
let frame_descriptors = ref([] : frame_descr list)
let record_frame live =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
live_offset := (r lsl 1) + 1 :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset } :: !frame_descriptors;
`{emit_label lbl}:`
let emit_frame fd =
` .long {emit_label fd.fd_lbl} + 4\n`;
` .short {emit_int fd.fd_frame_size}\n`;
` .short {emit_int (List.length fd.fd_live_offset)}\n`;
List.iter
(fun n ->
` .short {emit_int n}\n`)
fd.fd_live_offset;
` .align 2\n`
(* Record symbols and floating-point constants (for the TOC model).
These will go in the toc section. *)
let label_constant table constant =
try
Hashtbl.find table constant
with Not_found ->
let lbl = new_label() in
Hashtbl.add table constant lbl;
lbl
let symbol_constants = (Hashtbl.create 17 : (string, int) Hashtbl.t)
let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
let label_symbol s = label_constant symbol_constants s
let label_float s = label_constant float_constants s
let emit_symbol_constant symb lbl =
`{emit_label lbl}: .tc {emit_symbol symb}[TC], {emit_symbol symb}\n`
let emit_float_constant float lbl =
`{emit_label lbl}: .tc FD_`;
for i = 0 to 7 do
emit_printf "%02x" (Char.code (String.unsafe_get float i))
done;
`[TC], 0x`;
for i = 0 to 3 do
emit_printf "%02x" (Char.code (String.unsafe_get float i))
done;
`, 0x`;
for i = 4 to 7 do
emit_printf "%02x" (Char.code (String.unsafe_get float i))
done;
`\n`
(* Record floating-point literals (for the ELF model) *)
let float_literals = ref ([] : (string * int) list)
(* Record external C functions to be called in a position-independent way
(for Rhapsody) *)
let pic_externals = (Config.system = "rhapsody")
let external_functions = ref StringSet.empty
let emit_external s =
` .non_lazy_symbol_pointer\n`;
`L{emit_symbol s}$non_lazy_ptr:\n`;
` .indirect_symbol {emit_symbol s}\n`;
` .long 0\n`
(* Names for conditional branches after comparisons *)
let branch_for_comparison = function
Ceq -> "beq" | Cne -> "bne"
| Cle -> "ble" | Cgt -> "bgt"
| Cge -> "bge" | Clt -> "blt"
let name_for_int_comparison = function
Isigned cmp -> ("cmpw", branch_for_comparison cmp)
| Iunsigned cmp -> ("cmplw", branch_for_comparison cmp)
(* Names for various instructions *)
let name_for_intop = function
Iadd -> "add"
| Imul -> "mullw"
| Idiv -> if powerpc then "divw" else "divs"
| Iand -> "and"
| Ior -> "or"
| Ixor -> "xor"
| Ilsl -> "slw"
| Ilsr -> "srw"
| Iasr -> "sraw"
| _ -> Misc.fatal_error "Emit.Intop"
let name_for_intop_imm = function
Iadd -> "addi"
| Imul -> "mulli"
| Iand -> "andi."
| Ior -> "ori"
| Ixor -> "xori"
| Ilsl -> "slwi"
| Ilsr -> "srwi"
| Iasr -> "srawi"
| _ -> Misc.fatal_error "Emit.Intop_imm"
let name_for_floatop1 = function
Inegf -> "fneg"
| Iabsf -> "fabs"
| _ -> Misc.fatal_error "Emit.Iopf1"
let name_for_floatop2 = function
Iaddf -> "fadd"
| Isubf -> "fsub"
| Imulf -> "fmul"
| Idivf -> "fdiv"
| _ -> Misc.fatal_error "Emit.Iopf2"
let name_for_specific = function
Imultaddf -> "fmadd"
| Imultsubf -> "fmsub"
| _ -> Misc.fatal_error "Emit.Ispecific"
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Names of functions defined in the current file *)
let defined_functions = ref StringSet.empty
(* Label of glue code for calling the GC *)
let call_gc_label = ref 0
(* Label of jump table *)
let lbl_jumptbl = ref 0
(* List of all labels in jumptable (reverse order) *)
let jumptbl_entries = ref []
(* Number of jumptable entries *)
let num_jumptbl_entries = ref 0
(* Fixup conditional branches that exceed hardware allowed range *)
let load_store_size = function
Ibased(s, d) -> 2
| Iindexed ofs -> if is_immediate ofs then 1 else 3
| Iindexed2 -> 1
let instr_size = function
Lend -> 0
| Lop(Imove | Ispill | Ireload) -> 1
| Lop(Iconst_int n) -> if is_native_immediate n then 1 else 2
| Lop(Iconst_float s) -> if toc then 1 else 2
| Lop(Iconst_symbol s) -> if toc then 1 else 2
| Lop(Icall_ind) -> if toc then 6 else 2
| Lop(Icall_imm s) ->
if toc && not (StringSet.mem s !defined_functions) then 2 else 1
| Lop(Itailcall_ind) -> if toc then 7 else 5
| Lop(Itailcall_imm s) ->
if s = !function_name then 1
else if not toc || StringSet.mem s !defined_functions then 4
else 8
| Lop(Iextcall(s, true)) -> if toc then 2 else 3
| Lop(Iextcall(s, false)) -> if pic_externals then 4 else 1
| Lop(Istackoffset n) -> 1
| Lop(Iload(chunk, addr)) ->
if chunk = Byte_signed
then load_store_size addr + 1
else load_store_size addr
| Lop(Istore(chunk, addr)) -> load_store_size addr
| Lop(Ialloc n) -> 4
| Lop(Ispecific(Ialloc_far n)) -> 5
| Lop(Iintop Imod) -> if powerpc then 3 else 2
| Lop(Iintop(Icomp cmp)) -> 4
| Lop(Iintop op) -> 1
| Lop(Iintop_imm(Idiv, n)) -> 2
| Lop(Iintop_imm(Imod, n)) -> 4
| Lop(Iintop_imm(Icomp cmp, n)) -> 4
| Lop(Iintop_imm(op, n)) -> 1
| Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1
| Lop(Ifloatofint) -> 9
| Lop(Iintoffloat) -> 4
| Lop(Ispecific sop) -> 1
| Lreloadretaddr -> 2
| Lreturn -> 2
| Llabel lbl -> 0
| Lbranch lbl -> 1
| Lcondbranch(tst, lbl) -> 2
| Lcondbranch3(lbl0, lbl1, lbl2) ->
1 + (if lbl0 = None then 0 else 1)
+ (if lbl1 = None then 0 else 1)
+ (if lbl2 = None then 0 else 1)
| Lswitch jumptbl -> 8
| Lsetuptrap lbl -> 1
| Lpushtrap -> if toc then 5 else 4
| Lpoptrap -> 2
| Lraise -> if toc then 7 else 6
let label_map code =
let map = Hashtbl.create 37 in
let rec fill_map pc instr =
match instr.desc with
Lend -> (pc, map)
| Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next
| op -> fill_map (pc + instr_size op) instr.next
in fill_map 0 code
let max_branch_offset = 8180
(* 14-bit signed offset in words. Remember to cut some slack
for multi-word instructions where the branch can be anywhere in
the middle. 12 words of slack is plenty. *)
let branch_overflows map pc_branch lbl_dest =
let pc_dest = Hashtbl.find map lbl_dest in
let delta = pc_dest - (pc_branch + 1) in
delta <= -max_branch_offset || delta >= max_branch_offset
let opt_branch_overflows map pc_branch opt_lbl_dest =
match opt_lbl_dest with
None -> false
| Some lbl_dest -> branch_overflows map pc_branch lbl_dest
let fixup_branches codesize map code =
let expand_optbranch lbl n arg next =
match lbl with
None -> next
| Some l ->
instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l))
arg [||] next in
let rec fixup did_fix pc instr =
match instr.desc with
Lend -> did_fix
| Lcondbranch(test, lbl) when branch_overflows map pc lbl ->
let lbl2 = new_label() in
let cont =
instr_cons (Lbranch lbl) [||] [||]
(instr_cons (Llabel lbl2) [||] [||] instr.next) in
instr.desc <- Lcondbranch(invert_test test, lbl2);
instr.next <- cont;
fixup true (pc + 2) instr.next
| Lcondbranch3(lbl0, lbl1, lbl2)
when opt_branch_overflows map pc lbl0
|| opt_branch_overflows map pc lbl1
|| opt_branch_overflows map pc lbl2 ->
let cont =
expand_optbranch lbl0 0 instr.arg
(expand_optbranch lbl1 1 instr.arg
(expand_optbranch lbl2 2 instr.arg instr.next)) in
instr.desc <- cont.desc;
instr.next <- cont.next;
fixup true pc instr
| Lop(Ialloc n) when codesize - pc >= max_branch_offset ->
instr.desc <- Lop(Ispecific(Ialloc_far n));
fixup true (pc + 4) instr.next
| op ->
fixup did_fix (pc + instr_size op) instr.next
in fixup false 0 code
(* Iterate branch expansion till all conditional branches are OK *)
let rec branch_normalization code =
let (codesize, map) = label_map code in
if codesize >= max_branch_offset && fixup_branches codesize map code
then branch_normalization code
else ()
(* Output the assembly code for an instruction *)
let rec emit_instr i dslot =
match i.desc with
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
match (src, dst) with
{loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
` mr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
` fmr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
` stw {emit_reg src}, {emit_stack dst}\n`
| {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
` stfd {emit_reg src}, {emit_stack dst}\n`
| {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
` lwz {emit_reg dst}, {emit_stack src}\n`
| {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
` lfd {emit_reg dst}, {emit_stack src}\n`
| (_, _) ->
fatal_error "Emit: Imove"
end
| Lop(Iconst_int n) ->
if is_native_immediate n then
` li {emit_reg i.res.(0)}, {emit_nativeint n}\n`
else begin
` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`;
if nativelow n <> 0 then
` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n`
end
| Lop(Iconst_float s) ->
if toc then begin
let repr = (Obj.magic (float_of_string s) : string) in
let lbl = label_float repr in
` lfd {emit_reg i.res.(0)}, {emit_label lbl}(2) # {emit_string s}\n`
end else begin
let lbl = new_label() in
float_literals := (s, lbl) :: !float_literals;
` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`;
` lfd {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n`
end
| Lop(Iconst_symbol s) ->
if toc then begin
let lbl = label_symbol s in
` lwz {emit_reg i.res.(0)}, {emit_label lbl}(2) # {emit_symbol s}\n`
end else begin
` addis {emit_reg i.res.(0)}, 0, {emit_upper emit_symbol s}\n`;
` addi {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_lower emit_symbol s}\n`
end
| Lop(Icall_ind) ->
if toc then begin
` lwz 0, 0({emit_reg i.arg.(0)})\n`;
` stw 2, 20(1)\n`;
` mtlr 0\n`;
` lwz 2, 4({emit_reg i.arg.(0)})\n`;
record_frame i.live;
` blrl\n`;
` lwz 2, 20(1)\n`
end else begin
` mtlr {emit_reg i.arg.(0)}\n`;
record_frame i.live;
` blrl\n`
end
| Lop(Icall_imm s) ->
record_frame i.live;
` bl {emit_codesymbol s}\n`;
if toc && not (StringSet.mem s !defined_functions) then
` cror 31, 31, 31\n` (* nop *)
| Lop(Itailcall_ind) ->
let n = frame_size() in
if toc then begin
` lwz 0, 0({emit_reg i.arg.(0)})\n`;
` lwz 2, 4({emit_reg i.arg.(0)})\n`;
` mtctr 0\n`
end else begin
` mtctr {emit_reg i.arg.(0)}\n`
end;
if !contains_calls then begin
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` mtlr {emit_gpr 11}\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`
end;
` bctr\n`
| Lop(Itailcall_imm s) ->
if s = !function_name then
` b {emit_label !tailrec_entry_point}\n`
else if not toc || StringSet.mem s !defined_functions then begin
let n = frame_size() in
if !contains_calls then begin
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` mtlr {emit_gpr 11}\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`
end;
` b {emit_codesymbol s}\n`
end else begin
(* Tailcalling a function that has a possibly different TOC
is difficult, because the callee's TOC must be loaded in r2,
but ours must not be stored in 20(r1), which would overwrite
our caller's saved TOC. Hence we can't go through the
standard glue code. Here, we just proceed as in tailcall_ind. *)
let lbl = label_symbol s in
let n = frame_size() in
` lwz 12, {emit_label lbl}(2) # {emit_symbol s}\n`;
if !contains_calls then begin
` lwz 11, {emit_int(n - 4)}(1)\n`;
` lwz 0, 0(12)\n`;
` lwz 2, 4(12)\n`;
` mtctr 0\n`;
` addi 1, 1, {emit_int n}\n`;
` mtlr 11\n`
end else begin
` lwz 0, 0(12)\n`;
` lwz 2, 4(12)\n`;
` mtctr 0\n`;
if n > 0 then
` addi 1, 1, {emit_int n}\n`
end;
` bctr\n`
end
| Lop(Iextcall(s, alloc)) ->
if alloc then begin
if toc then begin
let lbl = label_symbol s in
` lwz 11, {emit_label lbl}(2) # {emit_symbol s}\n`
end else if pic_externals then begin
external_functions := StringSet.add s !external_functions;
` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`;
` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`
end else begin
` addis {emit_gpr 11}, 0, {emit_upper emit_symbol s}\n`;
` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_symbol s}\n`
end;
record_frame i.live;
` bl {emit_codesymbol "caml_c_call"}\n`
end else begin
if pic_externals then begin
external_functions := StringSet.add s !external_functions;
` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`;
` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`;
` mtlr {emit_gpr 11}\n`;
` blrl\n`
end else
` bl {emit_codesymbol s}\n`
end;
if toc then
` cror 31, 31, 31\n` (* nop *)
| Lop(Istackoffset n) ->
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int (-n)}\n`;
stack_offset := !stack_offset + n
| Lop(Iload(chunk, addr)) ->
let loadinstr =
match chunk with
Byte_unsigned -> "lbz"
| Byte_signed -> "lbz"
| Sixteen_unsigned -> "lhz"
| Sixteen_signed -> "lha"
| Single -> "lfs"
| Double | Double_u -> "lfd"
| _ -> "lwz" in
emit_load_store loadinstr addr i.arg 0 i.res.(0);
if chunk = Byte_signed then
` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Istore(chunk, addr)) ->
let storeinstr =
match chunk with
Byte_unsigned | Byte_signed -> "stb"
| Sixteen_unsigned | Sixteen_signed -> "sth"
| Single -> "stfs"
| Double | Double_u -> "stfd"
| _ -> "stw" in
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc n) ->
if !call_gc_label = 0 then call_gc_label := new_label();
` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
` cmplw {emit_gpr 31}, {emit_gpr 30}\n`;
` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n`;
record_frame i.live;
` bltl {emit_label !call_gc_label}\n`
| Lop(Ispecific(Ialloc_far n)) ->
if !call_gc_label = 0 then call_gc_label := new_label();
let lbl = new_label() in
` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`;
` cmplw {emit_gpr 31}, {emit_gpr 30}\n`;
` bge {emit_label lbl}\n`;
record_frame i.live;
` bl {emit_label !call_gc_label}\n`;
`{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n`
| Lop(Iintop Isub) -> (* subf has swapped arguments *)
(* Use subfc instead of subf for RS6000 compatibility. *)
` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop Imod) ->
if powerpc then begin
` divw {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` mullw {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
end else begin
` divs {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` mfmq {emit_reg i.res.(0)}\n`
end
| Lop(Iintop(Icomp cmp)) ->
begin match cmp with
Isigned c ->
` cmpw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_set_comp c i.res.(0)
| Iunsigned c ->
` cmplw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_set_comp c i.res.(0)
end
| Lop(Iintop Icheckbound) ->
` twlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop op) ->
let instr = name_for_intop op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(Isub, n)) ->
` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n`
| Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *)
let l = Misc.log2 n in
` srawi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *)
let l = Misc.log2 n in
` srawi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`;
` addze {emit_gpr 0}, {emit_gpr 0}\n`;
` slwi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`;
` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop_imm(Icomp cmp, n)) ->
begin match cmp with
Isigned c ->
` cmpwi {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_set_comp c i.res.(0)
| Iunsigned c ->
` cmplwi {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_set_comp c i.res.(0)
end
| Lop(Iintop_imm(Icheckbound, n)) ->
` twllei {emit_reg i.arg.(0)}, {emit_int n}\n`
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_intop_imm op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n`
| Lop(Inegf | Iabsf as op) ->
let instr = name_for_floatop1 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
let instr = name_for_floatop2 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Ifloatofint) ->
if toc then begin
let lbl = label_float "\067\048\000\000\128\000\000\000" in
(* That string above represents 0x4330000080000000 *)
` lfd 0, {emit_label lbl}(2)\n`
end else begin
let lbl = new_label() in
float_literals := ("4.503601774854144e15", lbl) :: !float_literals;
(* That float above also represents 0x4330000080000000 *)
` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`;
` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`
end;
` lis {emit_gpr 0}, 0x4330\n`;
` stwu {emit_gpr 0}, -8({emit_gpr 1})\n`;
` xoris {emit_gpr 0}, {emit_reg i.arg.(0)}, 0x8000\n`;
` stw {emit_gpr 0}, 4({emit_gpr 1})\n`;
` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 8\n`;
` fsub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_fpr 0}\n`
| Lop(Iintoffloat) ->
` fctiwz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`;
` stfdu {emit_fpr 0}, -8({emit_gpr 1})\n`;
` lwz {emit_reg i.res.(0)}, 4({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, 8\n`
| Lop(Ispecific sop) ->
let instr = name_for_specific sop in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
| Lreloadretaddr ->
let n = frame_size() in
` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`;
` mtlr {emit_gpr 11}\n`
| Lreturn ->
let n = frame_size() in
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`;
` blr\n`
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
` b {emit_label lbl}\n`
| Lcondbranch(tst, lbl) ->
begin match tst with
Itruetest ->
` cmpwi {emit_reg i.arg.(0)}, 0\n`;
emit_delay dslot;
` bne {emit_label lbl}\n`
| Ifalsetest ->
` cmpwi {emit_reg i.arg.(0)}, 0\n`;
emit_delay dslot;
` beq {emit_label lbl}\n`
| Iinttest cmp ->
let (comp, branch) = name_for_int_comparison cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
emit_delay dslot;
` {emit_string branch} {emit_label lbl}\n`
| Iinttest_imm(cmp, n) ->
let (comp, branch) = name_for_int_comparison cmp in
` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`;
emit_delay dslot;
` {emit_string branch} {emit_label lbl}\n`
| Ifloattest(cmp, neg) ->
` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
(* bit 0 = lt, bit 1 = gt, bit 2 = eq *)
let (bitnum, negtst) =
match cmp with
Ceq -> (2, neg)
| Cne -> (2, not neg)
| Cle -> ` cror 3, 0, 2\n`; (* lt or eq *)
(3, neg)
| Cgt -> (1, neg)
| Cge -> ` cror 3, 1, 2\n`; (* gt or eq *)
(3, neg)
| Clt -> (0, neg) in
emit_delay dslot;
if negtst
then ` bf {emit_int bitnum}, {emit_label lbl}\n`
else ` bt {emit_int bitnum}, {emit_label lbl}\n`
| Ioddtest ->
` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
` bne {emit_label lbl}\n`
| Ieventest ->
` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
` beq {emit_label lbl}\n`
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmpwi {emit_reg i.arg.(0)}, 1\n`;
emit_delay dslot;
begin match lbl0 with
None -> ()
| Some lbl -> ` blt {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` beq {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` bgt {emit_label lbl}\n`
end
| Lswitch jumptbl ->
if !lbl_jumptbl = 0 then lbl_jumptbl := new_label();
if toc then begin
` lwz 11, {emit_label !lbl_jumptbl}(2)\n`
end else begin
` addis {emit_gpr 11}, 0, {emit_upper emit_label !lbl_jumptbl}\n`;
` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_label !lbl_jumptbl}\n`
end;
` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`;
` slwi {emit_gpr 0}, {emit_gpr 0}, 2\n`;
` lwzx {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`;
` mtctr {emit_gpr 0}\n`;
` bctr\n`;
for i = 0 to Array.length jumptbl - 1 do
jumptbl_entries := jumptbl.(i) :: !jumptbl_entries;
incr num_jumptbl_entries
done
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`
| Lpushtrap ->
stack_offset := !stack_offset + trap_frame_size;
` mflr {emit_gpr 0}\n`;
` stwu {emit_gpr 0}, -{emit_int trap_frame_size}({emit_gpr 1})\n`;
` stw {emit_gpr 29}, 4({emit_gpr 1})\n`;
if toc then
` stw {emit_gpr 2}, 20({emit_gpr 1})\n`;
` mr {emit_gpr 29}, {emit_gpr 1}\n`
| Lpoptrap ->
` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int trap_frame_size}\n`;
stack_offset := !stack_offset - trap_frame_size
| Lraise ->
` lwz {emit_gpr 0}, 0({emit_gpr 29})\n`;
` mr {emit_gpr 1}, {emit_gpr 29}\n`;
` mtlr {emit_gpr 0}\n`;
` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`;
if toc then
` lwz {emit_gpr 2}, 20({emit_gpr 1})\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int trap_frame_size}\n`;
` blr\n`
and emit_delay = function
None -> ()
| Some i -> emit_instr i None
(* Checks if a pseudo-instruction expands to instructions
that do not branch and do not affect CR0 nor R12. *)
let is_simple_instr i =
match i.desc with
Lop op ->
begin match op with
Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind |
Iextcall(_, _) -> false
| Ialloc(_) -> false
| Iintop(Icomp _) -> false
| Iintop_imm(Iand, _) -> false
| Iintop_imm(Icomp _, _) -> false
| _ -> true
end
| Lreloadretaddr -> true
| _ -> false
let no_interference res arg =
try
for i = 0 to Array.length arg - 1 do
for j = 0 to Array.length res - 1 do
if arg.(i).loc = res.(j).loc then raise Exit
done
done;
true
with Exit ->
false
(* Emit a sequence of instructions, trying to fill delay slots for branches *)
let rec emit_all i =
match i with
{desc = Lend} -> ()
| {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}}
when is_simple_instr i & no_interference i.res i.next.arg ->
emit_instr i.next (Some i);
emit_all i.next.next
| _ ->
emit_instr i None;
emit_all i.next
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
defined_functions := StringSet.add fundecl.fun_name !defined_functions;
tailrec_entry_point := new_label();
stack_offset := 0;
call_gc_label := 0;
float_literals := [];
` .globl {emit_symbol fundecl.fun_name}\n`;
begin match Config.system with
"aix" ->
` .globl .{emit_symbol fundecl.fun_name}\n`;
` .csect {emit_symbol fundecl.fun_name}[DS]\n`;
`{emit_symbol fundecl.fun_name}:\n`;
` .long .{emit_symbol fundecl.fun_name}, TOC[tc0], 0\n`
| "elf" | "bsd" ->
` .type {emit_symbol fundecl.fun_name}, @function\n`
| _ -> ()
end;
emit_string code_space;
` .align 2\n`;
`{emit_codesymbol fundecl.fun_name}:\n`;
let n = frame_size() in
if !contains_calls then begin
` mflr {emit_gpr 0}\n`;
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`;
` stw {emit_gpr 0}, {emit_int(n - 4)}({emit_gpr 1})\n`
end else begin
if n > 0 then
` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`
end;
`{emit_label !tailrec_entry_point}:\n`;
branch_normalization fundecl.fun_body;
emit_all fundecl.fun_body;
(* Emit the glue code to call the GC *)
if !call_gc_label > 0 then begin
`{emit_label !call_gc_label}:\n`;
if toc then begin
` mflr 0\n`; (* Save return address in r0 *)
` bl .caml_call_gc\n`;
` cror 31, 31, 31\n`; (* nop *)
` blr\n` (* Will re-execute the allocation *)
end else begin
` b {emit_symbol "caml_call_gc"}\n`
end
end;
(* Emit the floating-point literals *)
if !float_literals <> [] then begin
emit_string rodata_space;
` .align 3\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}: .double 0d{emit_string f}\n`)
!float_literals
end
(* Emission of data *)
let declare_global_data s =
` .globl {emit_symbol s}\n`;
if Config.system = "elf" || Config.system = "bsd" then
` .type {emit_symbol s}, @object\n`
let emit_item = function
Cglobal_symbol s ->
declare_global_data s
| Cdefine_symbol s ->
`{emit_symbol s}:\n`;
| Cdefine_label lbl ->
`{emit_label (lbl + 100000)}:\n`
| Cint8 n ->
` .byte {emit_int n}\n`
| Cint16 n ->
` .short {emit_int n}\n`
| Cint32 n ->
` .long {emit_nativeint n}\n`
| Cint n ->
` .long {emit_nativeint n}\n`
| Csingle f ->
` .float 0d{emit_string f}\n`
| Cdouble f ->
` .double 0d{emit_string f}\n`
| Csymbol_address s ->
` .long {emit_symbol s}\n`
| Clabel_address lbl ->
` .long {emit_label (lbl + 100000)}\n`
| Cstring s ->
emit_bytes_directive " .byte " s
| Cskip n ->
if n > 0 then ` .space {emit_int n}\n`
| Calign n ->
` .align {emit_int (Misc.log2 n)}\n`
let data l =
emit_string data_space;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
Hashtbl.clear symbol_constants;
Hashtbl.clear float_constants;
defined_functions := StringSet.empty;
external_functions := StringSet.empty;
num_jumptbl_entries := 0;
jumptbl_entries := [];
lbl_jumptbl := 0;
(* Emit the beginning of the segments *)
let lbl_begin = Compilenv.current_unit_name() ^ "__data_begin" in
emit_string data_space;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.current_unit_name() ^ "__code_begin" in
emit_string code_space;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`
let end_assembly() =
(* Emit the jump table *)
if !num_jumptbl_entries > 0 then begin
let lbl_tbl =
if toc then begin
let lbl_tbl = new_label() in
` .toc\n`;
`{emit_label !lbl_jumptbl}: .tc {emit_label lbl_tbl}[TC], {emit_label lbl_tbl}\n`;
lbl_tbl
end else !lbl_jumptbl in
emit_string code_space;
`{emit_label lbl_tbl}:\n`;
List.iter
(fun lbl -> ` .long {emit_label lbl} - {emit_label lbl_tbl}\n`)
(List.rev !jumptbl_entries);
jumptbl_entries := []
end;
if toc then begin
(* Emit the table of constants *)
` .toc\n`;
Hashtbl.iter emit_symbol_constant symbol_constants;
Hashtbl.iter emit_float_constant float_constants
end;
if pic_externals then
(* Emit the pointers to external functions *)
StringSet.iter emit_external !external_functions;
(* Emit the end of the segments *)
emit_string code_space;
let lbl_end = Compilenv.current_unit_name() ^ "__code_end" in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
emit_string data_space;
let lbl_end = Compilenv.current_unit_name() ^ "__data_end" in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
(* Emit the frame descriptors *)
emit_string rodata_space;
let lbl = Compilenv.current_unit_name() ^ "__frametable" in
declare_global_data lbl;
`{emit_symbol lbl}:\n`;
` .long {emit_int (List.length !frame_descriptors)}\n`;
List.iter emit_frame !frame_descriptors;
frame_descriptors := []