(***********************************************************************) (* *) (* OCaml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0. *) (* *) (***********************************************************************) (* Emission of PowerPC assembly code *) module StringSet = Set.Make(struct type t = string let compare (x:t) y = compare x y end) open Misc open Cmm open Arch open Proc open Reg open Mach open Linearize open Emitaux (* Layout of the stack. The stack is kept 16-aligned. *) let stack_offset = ref 0 let frame_size () = let size = !stack_offset + (* Trap frame, outgoing parameters *) size_int * num_stack_slots.(0) + (* Local int variables *) size_float * num_stack_slots.(1) + (* Local float variables *) (if !contains_calls then size_int else 0) in (* The return address *) Misc.align size 16 let slot_offset loc cls = match loc with Local n -> if cls = 0 then !stack_offset + num_stack_slots.(1) * size_float + n * size_int else !stack_offset + n * size_float | Incoming n -> frame_size() + n | Outgoing n -> n (* Output a symbol *) let emit_symbol = match Config.system with | "elf" | "bsd" | "bsd_elf" -> (fun s -> Emitaux.emit_symbol '.' s) | "rhapsody" -> (fun s -> emit_char '_'; Emitaux.emit_symbol '$' s) | _ -> assert false (* Output a label *) let label_prefix = match Config.system with | "elf" | "bsd" | "bsd_elf" -> ".L" | "rhapsody" -> "L" | _ -> assert false let emit_label lbl = emit_string label_prefix; emit_int lbl let emit_data_label lbl = emit_string label_prefix; emit_string "d"; emit_int lbl (* Section switching *) let data_space = match Config.system with | "elf" | "bsd" | "bsd_elf" -> " .section \".data\"\n" | "rhapsody" -> " .data\n" | _ -> assert false let code_space = match Config.system with | "elf" | "bsd" | "bsd_elf" -> " .section \".text\"\n" | "rhapsody" -> " .text\n" | _ -> assert false let rodata_space = match Config.system with | "elf" | "bsd" | "bsd_elf" -> " .section \".rodata\"\n" | "rhapsody" -> " .const\n" | _ -> assert false (* Names of instructions that differ in 32 and 64-bit modes *) let lg = if ppc64 then "ld" else "lwz" let stg = if ppc64 then "std" else "stw" let lwa = if ppc64 then "lwa" else "lwz" let cmpg = if ppc64 then "cmpd" else "cmpw" let cmplg = if ppc64 then "cmpld" else "cmplw" let datag = if ppc64 then ".quad" else ".long" let aligng = if ppc64 then 3 else 2 let mullg = if ppc64 then "mulld" else "mullw" let divg = if ppc64 then "divd" else "divw" let tglle = if ppc64 then "tdlle" else "twlle" let sragi = if ppc64 then "sradi" else "srawi" let slgi = if ppc64 then "sldi" else "slwi" let fctigz = if ppc64 then "fctidz" else "fctiwz" (* Output a pseudo-register *) let emit_reg r = match r.loc with Reg r -> emit_string (register_name r) | _ -> fatal_error "Emit.emit_reg" let use_full_regnames = Config.system = "rhapsody" let emit_gpr r = if use_full_regnames then emit_char 'r'; emit_int r let emit_fpr r = if use_full_regnames then emit_char 'f'; emit_int r let emit_ccr r = if use_full_regnames then emit_string "cr"; emit_int r (* Output a stack reference *) let emit_stack r = match r.loc with Stack s -> let ofs = slot_offset s (register_class r) in `{emit_int ofs}({emit_gpr 1})` | _ -> fatal_error "Emit.emit_stack" (* Split a 32-bit integer constants in two 16-bit halves *) let low n = n land 0xFFFF let high n = n asr 16 let nativelow n = Nativeint.to_int n land 0xFFFF let nativehigh n = Nativeint.to_int (Nativeint.shift_right n 16) let is_immediate n = n <= 32767 && n >= -32768 let is_native_immediate n = n <= 32767n && n >= -32768n (* Output a "upper 16 bits" or "lower 16 bits" operator. *) let emit_upper emit_fun arg = match Config.system with | "elf" | "bsd" | "bsd_elf" -> emit_fun arg; emit_string "@ha" | "rhapsody" -> emit_string "ha16("; emit_fun arg; emit_string ")" | _ -> assert false let emit_lower emit_fun arg = match Config.system with | "elf" | "bsd" | "bsd_elf" -> emit_fun arg; emit_string "@l" | "rhapsody" -> emit_string "lo16("; emit_fun arg; emit_string ")" | _ -> assert false (* Output a load or store operation *) let emit_symbol_offset (s, d) = emit_symbol s; if d > 0 then `+`; if d <> 0 then emit_int d let valid_offset instr ofs = ofs land 3 = 0 || (instr <> "ld" && instr <> "std") let emit_load_store instr addressing_mode addr n arg = match addressing_mode with Ibased(s, d) -> ` addis {emit_gpr 11}, 0, {emit_upper emit_symbol_offset (s,d)}\n`; ` {emit_string instr} {emit_reg arg}, {emit_lower emit_symbol_offset (s,d)}({emit_gpr 11})\n` | Iindexed ofs -> if is_immediate ofs && valid_offset instr ofs then ` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n` else begin ` lis {emit_gpr 0}, {emit_int(high ofs)}\n`; if low ofs <> 0 then ` ori {emit_gpr 0}, {emit_gpr 0}, {emit_int(low ofs)}\n`; ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_gpr 0}\n` end | Iindexed2 -> ` {emit_string instr}x {emit_reg arg}, {emit_reg addr.(n)}, {emit_reg addr.(n+1)}\n` (* After a comparison, extract the result as 0 or 1 *) let emit_set_comp cmp res = ` mfcr {emit_gpr 0}\n`; let bitnum = match cmp with Ceq | Cne -> 2 | Cgt | Cle -> 1 | Clt | Cge -> 0 in ` rlwinm {emit_reg res}, {emit_gpr 0}, {emit_int(bitnum+1)}, 31, 31\n`; begin match cmp with Cne | Cle | Cge -> ` xori {emit_reg res}, {emit_reg res}, 1\n` | _ -> () end (* Record live pointers at call points *) let record_frame live dbg = let lbl = new_label() in let live_offset = ref [] in Reg.Set.iter (function {typ = Addr; loc = Reg r} -> live_offset := (r lsl 1) + 1 :: !live_offset | {typ = Addr; loc = Stack s} as reg -> live_offset := slot_offset s (register_class reg) :: !live_offset | _ -> ()) live; frame_descriptors := { fd_lbl = lbl; fd_frame_size = frame_size(); fd_live_offset = !live_offset; fd_debuginfo = dbg } :: !frame_descriptors; `{emit_label lbl}:\n` (* Record floating-point and large integer literals *) let float_literals = ref ([] : (int64 * int) list) let int_literals = ref ([] : (nativeint * int) list) (* Record external C functions to be called in a position-independent way (for MacOSX) *) let pic_externals = (Config.system = "rhapsody") let external_functions = ref StringSet.empty let emit_external s = ` .non_lazy_symbol_pointer\n`; `L{emit_symbol s}$non_lazy_ptr:\n`; ` .indirect_symbol {emit_symbol s}\n`; ` {emit_string datag} 0\n` (* Names for conditional branches after comparisons *) let branch_for_comparison = function Ceq -> "beq" | Cne -> "bne" | Cle -> "ble" | Cgt -> "bgt" | Cge -> "bge" | Clt -> "blt" let name_for_int_comparison = function Isigned cmp -> (cmpg, branch_for_comparison cmp) | Iunsigned cmp -> (cmplg, branch_for_comparison cmp) (* Names for various instructions *) let name_for_intop = function Iadd -> "add" | Imul -> if ppc64 then "mulld" else "mullw" | Imulh -> if ppc64 then "mulhd" else "mulhw" | Idiv -> if ppc64 then "divd" else "divw" | Iand -> "and" | Ior -> "or" | Ixor -> "xor" | Ilsl -> if ppc64 then "sld" else "slw" | Ilsr -> if ppc64 then "srd" else "srw" | Iasr -> if ppc64 then "srad" else "sraw" | _ -> Misc.fatal_error "Emit.Intop" let name_for_intop_imm = function Iadd -> "addi" | Imul -> "mulli" | Iand -> "andi." | Ior -> "ori" | Ixor -> "xori" | Ilsl -> if ppc64 then "sldi" else "slwi" | Ilsr -> if ppc64 then "srdi" else "srwi" | Iasr -> if ppc64 then "sradi" else "srawi" | _ -> Misc.fatal_error "Emit.Intop_imm" let name_for_floatop1 = function Inegf -> "fneg" | Iabsf -> "fabs" | _ -> Misc.fatal_error "Emit.Iopf1" let name_for_floatop2 = function Iaddf -> "fadd" | Isubf -> "fsub" | Imulf -> "fmul" | Idivf -> "fdiv" | _ -> Misc.fatal_error "Emit.Iopf2" let name_for_specific = function Imultaddf -> "fmadd" | Imultsubf -> "fmsub" | _ -> Misc.fatal_error "Emit.Ispecific" (* Name of current function *) let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 (* Names of functions defined in the current file *) let defined_functions = ref StringSet.empty (* Label of glue code for calling the GC *) let call_gc_label = ref 0 (* Fixup conditional branches that exceed hardware allowed range *) let load_store_size = function Ibased(s, d) -> 2 | Iindexed ofs -> if is_immediate ofs then 1 else 3 | Iindexed2 -> 1 let instr_size = function Lend -> 0 | Lop(Imove | Ispill | Ireload) -> 1 | Lop(Iconst_int n | Iconst_blockheader n) -> if is_native_immediate n then 1 else 2 | Lop(Iconst_float s) -> 2 | Lop(Iconst_symbol s) -> 2 | Lop(Icall_ind) -> 2 | Lop(Icall_imm s) -> 1 | Lop(Itailcall_ind) -> 5 | Lop(Itailcall_imm s) -> if s = !function_name then 1 else 4 | Lop(Iextcall(s, true)) -> 3 | Lop(Iextcall(s, false)) -> if pic_externals then 4 else 1 | Lop(Istackoffset n) -> 1 | Lop(Iload(chunk, addr)) -> if chunk = Byte_signed then load_store_size addr + 1 else load_store_size addr | Lop(Istore(chunk, addr, _)) -> load_store_size addr | Lop(Ialloc n) -> 4 | Lop(Ispecific(Ialloc_far n)) -> 5 | Lop(Iintop Imod) -> 3 | Lop(Iintop(Icomp cmp)) -> 4 | Lop(Iintop op) -> 1 | Lop(Iintop_imm(Icomp cmp, n)) -> 4 | Lop(Iintop_imm(op, n)) -> 1 | Lop(Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf) -> 1 | Lop(Ifloatofint) -> 9 | Lop(Iintoffloat) -> 4 | Lop(Ispecific sop) -> 1 | Lreloadretaddr -> 2 | Lreturn -> 2 | Llabel lbl -> 0 | Lbranch lbl -> 1 | Lcondbranch(tst, lbl) -> 2 | Lcondbranch3(lbl0, lbl1, lbl2) -> 1 + (if lbl0 = None then 0 else 1) + (if lbl1 = None then 0 else 1) + (if lbl2 = None then 0 else 1) | Lswitch jumptbl -> 8 | Lsetuptrap lbl -> 1 | Lpushtrap -> 4 | Lpoptrap -> 2 | Lraise _ -> 6 let label_map code = let map = Hashtbl.create 37 in let rec fill_map pc instr = match instr.desc with Lend -> (pc, map) | Llabel lbl -> Hashtbl.add map lbl pc; fill_map pc instr.next | op -> fill_map (pc + instr_size op) instr.next in fill_map 0 code let max_branch_offset = 8180 (* 14-bit signed offset in words. Remember to cut some slack for multi-word instructions where the branch can be anywhere in the middle. 12 words of slack is plenty. *) let branch_overflows map pc_branch lbl_dest = let pc_dest = Hashtbl.find map lbl_dest in let delta = pc_dest - (pc_branch + 1) in delta <= -max_branch_offset || delta >= max_branch_offset let opt_branch_overflows map pc_branch opt_lbl_dest = match opt_lbl_dest with None -> false | Some lbl_dest -> branch_overflows map pc_branch lbl_dest let fixup_branches codesize map code = let expand_optbranch lbl n arg next = match lbl with None -> next | Some l -> instr_cons (Lcondbranch(Iinttest_imm(Isigned Ceq, n), l)) arg [||] next in let rec fixup did_fix pc instr = match instr.desc with Lend -> did_fix | Lcondbranch(test, lbl) when branch_overflows map pc lbl -> let lbl2 = new_label() in let cont = instr_cons (Lbranch lbl) [||] [||] (instr_cons (Llabel lbl2) [||] [||] instr.next) in instr.desc <- Lcondbranch(invert_test test, lbl2); instr.next <- cont; fixup true (pc + 2) instr.next | Lcondbranch3(lbl0, lbl1, lbl2) when opt_branch_overflows map pc lbl0 || opt_branch_overflows map pc lbl1 || opt_branch_overflows map pc lbl2 -> let cont = expand_optbranch lbl0 0 instr.arg (expand_optbranch lbl1 1 instr.arg (expand_optbranch lbl2 2 instr.arg instr.next)) in instr.desc <- cont.desc; instr.next <- cont.next; fixup true pc instr | Lop(Ialloc n) when codesize - pc >= max_branch_offset -> instr.desc <- Lop(Ispecific(Ialloc_far n)); fixup true (pc + 4) instr.next | op -> fixup did_fix (pc + instr_size op) instr.next in fixup false 0 code (* Iterate branch expansion till all conditional branches are OK *) let rec branch_normalization code = let (codesize, map) = label_map code in if codesize >= max_branch_offset && fixup_branches codesize map code then branch_normalization code else () (* Output the assembly code for an instruction *) let rec emit_instr i dslot = match i.desc with Lend -> () | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in if src.loc <> dst.loc then begin match (src, dst) with {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} -> ` mr {emit_reg dst}, {emit_reg src}\n` | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> ` fmr {emit_reg dst}, {emit_reg src}\n` | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} -> ` {emit_string stg} {emit_reg src}, {emit_stack dst}\n` | {loc = Reg rs; typ = Float}, {loc = Stack sd} -> ` stfd {emit_reg src}, {emit_stack dst}\n` | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} -> ` {emit_string lg} {emit_reg dst}, {emit_stack src}\n` | {loc = Stack ss; typ = Float}, {loc = Reg rd} -> ` lfd {emit_reg dst}, {emit_stack src}\n` | (_, _) -> fatal_error "Emit: Imove" end | Lop(Iconst_int n | Iconst_blockheader n) -> if is_native_immediate n then ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin ` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`; if nativelow n <> 0 then ` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n` end else begin let lbl = new_label() in int_literals := (n, lbl) :: !int_literals; ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` {emit_string lg} {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n` end | Lop(Iconst_float f) -> let lbl = new_label() in float_literals := (Int64.bits_of_float f, lbl) :: !float_literals; ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` lfd {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n` | Lop(Iconst_symbol s) -> ` addis {emit_reg i.res.(0)}, 0, {emit_upper emit_symbol s}\n`; ` addi {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_lower emit_symbol s}\n` | Lop(Icall_ind) -> ` mtctr {emit_reg i.arg.(0)}\n`; ` bctrl\n`; record_frame i.live i.dbg | Lop(Icall_imm s) -> ` bl {emit_symbol s}\n`; record_frame i.live i.dbg | Lop(Itailcall_ind) -> let n = frame_size() in ` mtctr {emit_reg i.arg.(0)}\n`; if !contains_calls then begin ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`; ` mtlr {emit_gpr 11}\n` end else begin if n > 0 then ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n` end; ` bctr\n` | Lop(Itailcall_imm s) -> if s = !function_name then ` b {emit_label !tailrec_entry_point}\n` else begin let n = frame_size() in if !contains_calls then begin ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`; ` mtlr {emit_gpr 11}\n` end else begin if n > 0 then ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n` end; ` b {emit_symbol s}\n` end | Lop(Iextcall(s, alloc)) -> if alloc then begin if pic_externals then begin external_functions := StringSet.add s !external_functions; ` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`; ` {emit_string lg} {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n` end else begin ` addis {emit_gpr 11}, 0, {emit_upper emit_symbol s}\n`; ` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_symbol s}\n` end; ` bl {emit_symbol "caml_c_call"}\n`; record_frame i.live i.dbg end else begin if pic_externals then begin external_functions := StringSet.add s !external_functions; ` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`; ` {emit_string lg} {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`; ` mtctr {emit_gpr 11}\n`; ` bctrl\n` end else ` bl {emit_symbol s}\n` end | Lop(Istackoffset n) -> ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int (-n)}\n`; stack_offset := !stack_offset + n | Lop(Iload(chunk, addr)) -> let loadinstr = match chunk with Byte_unsigned -> "lbz" | Byte_signed -> "lbz" | Sixteen_unsigned -> "lhz" | Sixteen_signed -> "lha" | Thirtytwo_unsigned -> "lwz" | Thirtytwo_signed -> if ppc64 then "lwa" else "lwz" | Word -> lg | Single -> "lfs" | Double | Double_u -> "lfd" in emit_load_store loadinstr addr i.arg 0 i.res.(0); if chunk = Byte_signed then ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` | Lop(Istore(chunk, addr, _)) -> let storeinstr = match chunk with Byte_unsigned | Byte_signed -> "stb" | Sixteen_unsigned | Sixteen_signed -> "sth" | Thirtytwo_unsigned | Thirtytwo_signed -> "stw" | Word -> stg | Single -> "stfs" | Double | Double_u -> "stfd" in emit_load_store storeinstr addr i.arg 1 i.arg.(0) | Lop(Ialloc n) -> if !call_gc_label = 0 then call_gc_label := new_label(); ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; ` {emit_string cmplg} {emit_gpr 31}, {emit_gpr 30}\n`; ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n`; ` bltl {emit_label !call_gc_label}\n`; record_frame i.live Debuginfo.none | Lop(Ispecific(Ialloc_far n)) -> if !call_gc_label = 0 then call_gc_label := new_label(); let lbl = new_label() in ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; ` {emit_string cmplg} {emit_gpr 31}, {emit_gpr 30}\n`; ` bge {emit_label lbl}\n`; ` bl {emit_label !call_gc_label}\n`; record_frame i.live Debuginfo.none; `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n` | Lop(Iintop Isub) -> (* subfc has swapped arguments *) ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` | Lop(Iintop Imod) -> ` {emit_string divg} {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` {emit_string mullg} {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`; ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` | Lop(Iintop(Icomp cmp)) -> begin match cmp with Isigned c -> ` {emit_string cmpg} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; emit_set_comp c i.res.(0) | Iunsigned c -> ` {emit_string cmplg} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; emit_set_comp c i.res.(0) end | Lop(Iintop Icheckbound) -> if !Clflags.debug then record_frame Reg.Set.empty i.dbg; ` {emit_string tglle} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> let instr = name_for_intop op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop_imm(Isub, n)) -> ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> begin match cmp with Isigned c -> ` {emit_string cmpg}i {emit_reg i.arg.(0)}, {emit_int n}\n`; emit_set_comp c i.res.(0) | Iunsigned c -> ` {emit_string cmplg}i {emit_reg i.arg.(0)}, {emit_int n}\n`; emit_set_comp c i.res.(0) end | Lop(Iintop_imm(Icheckbound, n)) -> if !Clflags.debug then record_frame Reg.Set.empty i.dbg; ` {emit_string tglle}i {emit_reg i.arg.(0)}, {emit_int n}\n` | Lop(Iintop_imm(op, n)) -> let instr = name_for_intop_imm op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` | Lop(Inegf | Iabsf as op) -> let instr = name_for_floatop1 op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> let instr = name_for_floatop2 op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Ifloatofint) -> if ppc64 then begin ` stdu {emit_reg i.arg.(0)}, -16({emit_gpr 1})\n`; ` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` end else begin let lbl = new_label() in float_literals := (0x4330000080000000L, lbl) :: !float_literals; ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`; ` lis {emit_gpr 0}, 0x4330\n`; ` stwu {emit_gpr 0}, -16({emit_gpr 1})\n`; ` xoris {emit_gpr 0}, {emit_reg i.arg.(0)}, 0x8000\n`; ` stw {emit_gpr 0}, 4({emit_gpr 1})\n`; ` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; ` fsub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_fpr 0}\n` end | Lop(Iintoffloat) -> let ofs = if ppc64 then 0 else 4 in ` {emit_string fctigz} {emit_fpr 0}, {emit_reg i.arg.(0)}\n`; ` stfdu {emit_fpr 0}, -16({emit_gpr 1})\n`; ` {emit_string lg} {emit_reg i.res.(0)}, {emit_int ofs}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n` | Lop(Ispecific sop) -> let instr = name_for_specific sop in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` | Lreloadretaddr -> let n = frame_size() in ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` mtlr {emit_gpr 11}\n` | Lreturn -> let n = frame_size() in if n > 0 then ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`; ` blr\n` | Llabel lbl -> `{emit_label lbl}:\n` | Lbranch lbl -> ` b {emit_label lbl}\n` | Lcondbranch(tst, lbl) -> begin match tst with Itruetest -> ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 0\n`; emit_delay dslot; ` bne {emit_label lbl}\n` | Ifalsetest -> ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 0\n`; emit_delay dslot; ` beq {emit_label lbl}\n` | Iinttest cmp -> let (comp, branch) = name_for_int_comparison cmp in ` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; emit_delay dslot; ` {emit_string branch} {emit_label lbl}\n` | Iinttest_imm(cmp, n) -> let (comp, branch) = name_for_int_comparison cmp in ` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`; emit_delay dslot; ` {emit_string branch} {emit_label lbl}\n` | Ifloattest(cmp, neg) -> ` fcmpu {emit_ccr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; (* bit 0 = lt, bit 1 = gt, bit 2 = eq *) let (bitnum, negtst) = match cmp with Ceq -> (2, neg) | Cne -> (2, not neg) | Cle -> ` cror 3, 0, 2\n`; (* lt or eq *) (3, neg) | Cgt -> (1, neg) | Cge -> ` cror 3, 1, 2\n`; (* gt or eq *) (3, neg) | Clt -> (0, neg) in emit_delay dslot; if negtst then ` bf {emit_int bitnum}, {emit_label lbl}\n` else ` bt {emit_int bitnum}, {emit_label lbl}\n` | Ioddtest -> ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; emit_delay dslot; ` bne {emit_label lbl}\n` | Ieventest -> ` andi. {emit_gpr 0}, {emit_reg i.arg.(0)}, 1\n`; emit_delay dslot; ` beq {emit_label lbl}\n` end | Lcondbranch3(lbl0, lbl1, lbl2) -> ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 1\n`; emit_delay dslot; begin match lbl0 with None -> () | Some lbl -> ` blt {emit_label lbl}\n` end; begin match lbl1 with None -> () | Some lbl -> ` beq {emit_label lbl}\n` end; begin match lbl2 with None -> () | Some lbl -> ` bgt {emit_label lbl}\n` end | Lswitch jumptbl -> let lbl = new_label() in ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; ` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_label lbl}\n`; ` {emit_string slgi} {emit_gpr 0}, {emit_reg i.arg.(0)}, 2\n`; ` {emit_string lwa}x {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ` mtctr {emit_gpr 0}\n`; ` bctr\n`; emit_string rodata_space; `{emit_label lbl}:`; for i = 0 to Array.length jumptbl - 1 do ` .long {emit_label jumptbl.(i)} - {emit_label lbl}\n` done; emit_string code_space | Lsetuptrap lbl -> ` bl {emit_label lbl}\n` | Lpushtrap -> stack_offset := !stack_offset + 16; ` mflr {emit_gpr 0}\n`; ` {emit_string stg}u {emit_gpr 0}, -16({emit_gpr 1})\n`; ` {emit_string stg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` mr {emit_gpr 29}, {emit_gpr 1}\n` | Lpoptrap -> ` {emit_string lg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; stack_offset := !stack_offset - 16 | Lraise k -> begin match !Clflags.debug, k with | true, Lambda.Raise_regular -> ` bl {emit_symbol "caml_raise_exn"}\n`; record_frame Reg.Set.empty i.dbg | true, Lambda.Raise_reraise -> ` bl {emit_symbol "caml_reraise_exn"}\n`; record_frame Reg.Set.empty i.dbg | false, _ | true, Lambda.Raise_notrace -> ` {emit_string lg} {emit_gpr 0}, 0({emit_gpr 29})\n`; ` mr {emit_gpr 1}, {emit_gpr 29}\n`; ` mtctr {emit_gpr 0}\n`; ` {emit_string lg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; ` bctr\n` end and emit_delay = function None -> () | Some i -> emit_instr i None (* Checks if a pseudo-instruction expands to instructions that do not branch and do not affect CR0 nor R12. *) let is_simple_instr i = match i.desc with Lop op -> begin match op with Icall_imm _ | Icall_ind | Itailcall_imm _ | Itailcall_ind | Iextcall(_, _) -> false | Ialloc(_) -> false | Iintop(Icomp _) -> false | Iintop_imm(Iand, _) -> false | Iintop_imm(Icomp _, _) -> false | _ -> true end | Lreloadretaddr -> true | _ -> false let no_interference res arg = try for i = 0 to Array.length arg - 1 do for j = 0 to Array.length res - 1 do if arg.(i).loc = res.(j).loc then raise Exit done done; true with Exit -> false (* Emit a sequence of instructions, trying to fill delay slots for branches *) let rec emit_all i = match i with {desc = Lend} -> () | {next = {desc = (Lcondbranch(_, _) | Lcondbranch3(_, _, _))}} when is_simple_instr i && no_interference i.res i.next.arg -> emit_instr i.next (Some i); emit_all i.next.next | _ -> emit_instr i None; emit_all i.next (* Emission of a function declaration *) let fundecl fundecl = function_name := fundecl.fun_name; defined_functions := StringSet.add fundecl.fun_name !defined_functions; tailrec_entry_point := new_label(); stack_offset := 0; call_gc_label := 0; float_literals := []; int_literals := []; if Config.system = "rhapsody" && not !Clflags.output_c_object && is_generic_function fundecl.fun_name then (* PR#4690 *) ` .private_extern {emit_symbol fundecl.fun_name}\n` else ` .globl {emit_symbol fundecl.fun_name}\n`; begin match Config.system with | "elf" | "bsd" | "bsd_elf" -> ` .type {emit_symbol fundecl.fun_name}, @function\n` | _ -> () end; emit_string code_space; ` .align 2\n`; `{emit_symbol fundecl.fun_name}:\n`; let n = frame_size() in if !contains_calls then begin ` mflr {emit_gpr 0}\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`; ` {emit_string stg} {emit_gpr 0}, {emit_int(n - size_addr)}({emit_gpr 1})\n` end else begin if n > 0 then ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n` end; `{emit_label !tailrec_entry_point}:\n`; branch_normalization fundecl.fun_body; emit_all fundecl.fun_body; (* Emit the glue code to call the GC *) if !call_gc_label > 0 then begin `{emit_label !call_gc_label}:\n`; ` b {emit_symbol "caml_call_gc"}\n` end; (* Emit the numeric literals *) if !float_literals <> [] || !int_literals <> [] then begin emit_string rodata_space; ` .align 3\n`; List.iter (fun (f, lbl) -> `{emit_label lbl}:`; if ppc64 then emit_float64_directive ".quad" f else emit_float64_split_directive ".long" f) !float_literals; List.iter (fun (n, lbl) -> `{emit_label lbl}: {emit_string datag} {emit_nativeint n}\n`) !int_literals end (* Emission of data *) let declare_global_data s = ` .globl {emit_symbol s}\n`; match Config.system with | "elf" | "bsd" | "bsd_elf" -> ` .type {emit_symbol s}, @object\n` | "rhapsody" -> () | _ -> assert false let emit_item = function Cglobal_symbol s -> declare_global_data s | Cdefine_symbol s -> `{emit_symbol s}:\n`; | Cdefine_label lbl -> `{emit_data_label lbl}:\n` | Cint8 n -> ` .byte {emit_int n}\n` | Cint16 n -> ` .short {emit_int n}\n` | Cint32 n -> ` .long {emit_nativeint n}\n` | Cint n -> ` {emit_string datag} {emit_nativeint n}\n` | Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f) | Cdouble f -> if ppc64 then emit_float64_directive ".quad" (Int64.bits_of_float f) else emit_float64_split_directive ".long" (Int64.bits_of_float f) | Csymbol_address s -> ` {emit_string datag} {emit_symbol s}\n` | Clabel_address lbl -> ` {emit_string datag} {emit_data_label lbl}\n` | Cstring s -> emit_bytes_directive " .byte " s | Cskip n -> if n > 0 then ` .space {emit_int n}\n` | Calign n -> ` .align {emit_int (Misc.log2 n)}\n` let data l = emit_string data_space; List.iter emit_item l (* Beginning / end of an assembly file *) let begin_assembly() = defined_functions := StringSet.empty; external_functions := StringSet.empty; (* Emit the beginning of the segments *) let lbl_begin = Compilenv.make_symbol (Some "data_begin") in emit_string data_space; declare_global_data lbl_begin; `{emit_symbol lbl_begin}:\n`; let lbl_begin = Compilenv.make_symbol (Some "code_begin") in emit_string code_space; declare_global_data lbl_begin; `{emit_symbol lbl_begin}:\n` let end_assembly() = if pic_externals then (* Emit the pointers to external functions *) StringSet.iter emit_external !external_functions; (* Emit the end of the segments *) emit_string code_space; let lbl_end = Compilenv.make_symbol (Some "code_end") in declare_global_data lbl_end; `{emit_symbol lbl_end}:\n`; ` .long 0\n`; emit_string data_space; let lbl_end = Compilenv.make_symbol (Some "data_end") in declare_global_data lbl_end; `{emit_symbol lbl_end}:\n`; ` {emit_string datag} 0\n`; (* Emit the frame descriptors *) emit_string rodata_space; let lbl = Compilenv.make_symbol (Some "frametable") in declare_global_data lbl; `{emit_symbol lbl}:\n`; emit_frames { efa_label = (fun l -> ` {emit_string datag} {emit_label l}\n`); efa_16 = (fun n -> ` .short {emit_int n}\n`); efa_32 = (fun n -> ` .long {emit_int32 n}\n`); efa_word = (fun n -> ` {emit_string datag} {emit_int n}\n`); efa_align = (fun n -> ` .align {emit_int (Misc.log2 n)}\n`); efa_label_rel = (fun lbl ofs -> ` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`); efa_def_label = (fun l -> `{emit_label l}:\n`); efa_string = (fun s -> emit_bytes_directive " .byte " (s ^ "\000")) }