(***********************************************************************) (* *) (* Objective Caml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* $Id$ *) (* Emission of HP PA-RISC assembly code *) (* Must come before open Reg... *) module StringSet = Set.Make(struct type t = string let compare = compare end) open Misc open Cmm open Arch open Proc open Reg open Mach open Linearize open Emitaux (* Adaptation to HPUX and NextStep *) let hpux = match Config.system with "hpux" -> true | "nextstep" -> false | _ -> fatal_error "Emit_hppa.hpux" (* Tradeoff between code size and code speed *) let fastcode_flag = ref true (* Layout of the stack *) (* Always keep the stack 8-aligned. *) let stack_offset = ref 0 let frame_size () = let size = !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + (if !contains_calls then 4 else 0) in Misc.align size 8 let slot_offset loc cl = match loc with Incoming n -> -frame_size() - n | Local n -> if cl = 0 then - !stack_offset - num_stack_slots.(1) * 8 - n * 4 - 4 else - !stack_offset - n * 8 - 8 | Outgoing n -> -n (* Output a label *) let label_prefix = if hpux then "L$" else "L" let emit_label lbl = emit_string label_prefix; emit_int lbl (* Output a symbol *) let symbol_prefix = if hpux then "" else "_" let emit_symbol s = emit_string symbol_prefix; Emitaux.emit_symbol '$' s (* Output a pseudo-register *) let emit_reg r = match r.loc with Reg r -> emit_string (register_name r) | _ -> fatal_error "Emit.emit_reg" (* Output low address / high address prefixes *) let low_prefix = if hpux then "RR'" else "R\`" let high_prefix = if hpux then "LR'" else "L\`" let is_immediate n = (n < 16) && (n >= -16) (* 5 bits *) let emit_int_low n = emit_string low_prefix; emit_int n let emit_int_high n = emit_string high_prefix; emit_int n let emit_nativeint_low n = emit_string low_prefix; emit_nativeint n let emit_nativeint_high n = emit_string high_prefix; emit_nativeint n let emit_symbol_low s = if hpux then `RR'{emit_symbol s}-$global$` else `R\`{emit_symbol s}` let load_symbol_high s = if hpux then ` addil LR'{emit_symbol s}-$global$, %r27\n` else ` ldil L\`{emit_symbol s}, %r1\n` let load_symbol_offset_high s ofs = if hpux then ` addil LR'{emit_symbol s}-$global$+{emit_int ofs}, %r27\n` else ` ldil L\`{emit_symbol s}+{emit_int ofs}, %r1\n` (* Record imported and defined symbols *) let used_symbols = ref StringSet.empty let defined_symbols = ref StringSet.empty let called_symbols = ref StringSet.empty let use_symbol s = if hpux then used_symbols := StringSet.add s !used_symbols let define_symbol s = defined_symbols := StringSet.add s !defined_symbols let call_symbol s = if hpux then begin used_symbols := StringSet.add s !used_symbols; called_symbols := StringSet.add s !called_symbols end (* An external symbol is code if either it is branched to, or it does not start with an uppercase letter. *) let emit_import s = if not(StringSet.mem s !defined_symbols) then begin ` .import {emit_symbol s}`; if StringSet.mem s !called_symbols or s.[0] < 'A' or s.[0] > 'Z' then `, code\n` else `, data\n` end let emit_imports () = StringSet.iter emit_import !used_symbols; used_symbols := StringSet.empty; defined_symbols := StringSet.empty; called_symbols := StringSet.empty (* Output an integer load / store *) let is_offset n = (n < 8192) && (n >= -8192) (* 14 bits *) let is_offset_native n = Nativeint.cmp n 8192 < 0 && Nativeint.cmp n (-8192) >= 0 let emit_load instr addr arg dst = match addr with Ibased(s, 0) -> use_symbol s; load_symbol_high s; ` {emit_string instr} {emit_symbol_low s}(%r1), {emit_reg dst}\n` | Ibased(s, ofs) -> load_symbol_offset_high s ofs; use_symbol s; ` {emit_string instr} {emit_symbol_low s}+{emit_int ofs}(%r1), {emit_reg dst}\n` | Iindexed ofs -> if is_offset ofs then ` {emit_string instr} {emit_int ofs}({emit_reg arg.(0)}), {emit_reg dst}\n` else begin ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`; ` {emit_string instr} {emit_int_low ofs}(%r1), {emit_reg dst}\n` end let emit_store instr addr arg src = match addr with Ibased(s, 0) -> use_symbol s; load_symbol_high s; ` {emit_string instr} {emit_reg src}, {emit_symbol_low s}(%r1)\n` | Ibased(s, ofs) -> use_symbol s; load_symbol_offset_high s ofs; ` {emit_string instr} {emit_reg src}, {emit_symbol_low s}+{emit_int ofs}(%r1)\n` | Iindexed ofs -> if is_offset ofs then ` {emit_string instr} {emit_reg src}, {emit_int ofs}({emit_reg arg.(1)})\n` else begin ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`; ` {emit_string instr} {emit_reg src}, {emit_int_low ofs}(%r1)\n` end (* Output a floating-point load / store *) let emit_float_load addr arg dst = match addr with Ibased(s, 0) -> use_symbol s; load_symbol_high s; ` ldo {emit_symbol_low s}(%r1), %r1\n`; ` fldws 0(%r1), {emit_reg dst}L\n`; ` fldws 4(%r1), {emit_reg dst}R\n` | Ibased(s, ofs) -> use_symbol s; load_symbol_offset_high s ofs; ` ldo {emit_symbol_low s}+{emit_int ofs}(%r1), %r1\n`; ` fldws 0(%r1), {emit_reg dst}L\n`; ` fldws 4(%r1), {emit_reg dst}R\n` | Iindexed ofs -> if is_immediate ofs & is_immediate (ofs+4) then begin ` fldws {emit_int ofs}({emit_reg arg.(0)}), {emit_reg dst}L\n`; ` fldws {emit_int (ofs+4)}({emit_reg arg.(0)}), {emit_reg dst}R\n` end else begin if is_offset ofs then ` ldo {emit_int ofs}({emit_reg arg.(0)}), %r1\n` else begin ` addil {emit_int_high ofs}, {emit_reg arg.(0)}\n`; ` ldo {emit_int_low ofs}(%r1), %r1\n` end; ` fldws 0(%r1), {emit_reg dst}L\n`; ` fldws 4(%r1), {emit_reg dst}R\n` end let emit_float_store addr arg src = match addr with Ibased(s, 0) -> use_symbol s; load_symbol_high s; ` ldo {emit_symbol_low s}(%r1), %r1\n`; ` fstws {emit_reg src}L, 0(%r1)\n`; ` fstws {emit_reg src}R, 4(%r1)\n` | Ibased(s, ofs) -> use_symbol s; load_symbol_offset_high s ofs; ` ldo {emit_symbol_low s}+{emit_int ofs}(%r1), %r1\n`; ` fstws {emit_reg src}L, 0(%r1)\n`; ` fstws {emit_reg src}R, 4(%r1)\n` | Iindexed ofs -> if is_immediate ofs & is_immediate (ofs+4) then begin ` fstws {emit_reg src}L, {emit_int ofs}({emit_reg arg.(1)})\n`; ` fstws {emit_reg src}R, {emit_int(ofs+4)}({emit_reg arg.(1)})\n` end else begin if is_offset ofs then ` ldo {emit_int ofs}({emit_reg arg.(1)}), %r1\n` else begin ` addil {emit_int_high ofs}, {emit_reg arg.(1)}\n`; ` ldo {emit_int_low ofs}(%r1), %r1\n` end; ` fstws {emit_reg src}L, 0(%r1)\n`; ` fstws {emit_reg src}R, 4(%r1)\n` end (* Output an align directive. Under HPUX: alignment = number of bytes Undex NextStep: alignment = log2 of number of bytes *) let emit_align n = if hpux then ` .align {emit_int n}\n` else ` .align {emit_int(Misc.log2 n)}\n` (* Record live pointers at call points *) type frame_descr = { fd_lbl: int; (* Return address *) fd_frame_size: int; (* Size of stack frame *) fd_live_offset: int list } (* Offsets/regs of live addresses *) let frame_descriptors = ref([] : frame_descr list) let record_frame live = let lbl = new_label() in let live_offset = ref [] in Reg.Set.iter (function {typ = Addr; loc = Reg r} -> live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Addr; loc = Stack s} as reg -> live_offset := slot_offset s (register_class reg) :: !live_offset | _ -> ()) live; frame_descriptors := { fd_lbl = lbl; fd_frame_size = frame_size(); fd_live_offset = !live_offset } :: !frame_descriptors; `{emit_label lbl}:\n` let emit_frame fd = ` .long {emit_label fd.fd_lbl} + 3\n`; ` .short {emit_int fd.fd_frame_size}\n`; ` .short {emit_int (List.length fd.fd_live_offset)}\n`; List.iter (fun n -> ` .short {emit_int n}\n`) fd.fd_live_offset; emit_align 4 (* Record floating-point constants *) let float_constants = ref ([] : (int * string) list) let emit_float_constant (lbl, cst) = if hpux then begin ` .space $TEXT$\n`; ` .subspa $LIT$\n` end else ` .literal8\n`; emit_align 8; `{emit_label lbl}: .double {emit_string cst}\n` (* Record external calls and generate stub code for these *) let stub_label_table = (Hashtbl.create 19 : (string, int) Hashtbl.t) let stub_label symb = try Hashtbl.find stub_label_table symb with Not_found -> let lbl = new_label() in Hashtbl.add stub_label_table symb lbl; lbl let emit_stub symb lbl = `{emit_label lbl}: ldil L\`{emit_symbol symb}, %r1\n`; ` ble,n {emit_symbol_low symb}(4, %r1)\n` let emit_stubs () = ` .text\n`; emit_align 4; Hashtbl.iter emit_stub stub_label_table (* Describe the registers used to pass arguments to a C function *) let describe_call arg = ` .CALL RTNVAL=NO`; let pos = ref 0 in for i = 0 to Array.length arg - 1 do if !pos < 4 then begin match arg.(i).typ with Float -> `, ARGW{emit_int !pos}=FR, ARGW{emit_int(!pos + 1)}=FU`; pos := !pos + 2 | _ -> `, ARGW{emit_int !pos}=GR`; pos := !pos + 1 end done; `\n` (* Output a function call *) let emit_call s retreg = if hpux then begin ` bl {emit_symbol s}, {emit_string retreg}\n`; call_symbol s end else if StringSet.mem s !defined_symbols then ` bl {emit_symbol s}, {emit_string retreg}\n` else begin let lbl = stub_label s in ` jbsr {emit_symbol s}, {emit_string retreg}, {emit_label lbl}\n` end (* Names of various instructions *) let name_for_int_operation = function Iadd -> "add" | Isub -> "sub" | Iand -> "and" | Ior -> "or" | Ixor -> "xor" | _ -> Misc.fatal_error "Emit.name_for_int_operation" let name_for_float_operation = function Iaddf -> "fadd,dbl" | Isubf -> "fsub,dbl" | Imulf -> "fmpy,dbl" | Idivf -> "fdiv,dbl" | _ -> Misc.fatal_error "Emit.name_for_float_operation" let name_for_specific_operation = function Ishift1add -> "sh1add" | Ishift2add -> "sh2add" | Ishift3add -> "sh3add" let name_for_int_comparison = function Isigned Ceq -> "=" | Isigned Cne -> "<>" | Isigned Cle -> "<=" | Isigned Cgt -> ">" | Isigned Clt -> "<" | Isigned Cge -> ">=" | Iunsigned Ceq -> "=" | Iunsigned Cne -> "<>" | Iunsigned Cle -> "<<=" | Iunsigned Cgt -> ">>" | Iunsigned Clt -> "<<" | Iunsigned Cge -> ">>=" let name_for_float_comparison cmp neg = match cmp with Ceq -> if neg then "=" else "!=" | Cne -> if neg then "!=" else "=" | Cle -> if neg then "<=" else "!<=" | Cgt -> if neg then ">" else "!>" | Clt -> if neg then "<" else "!<" | Cge -> if neg then ">=" else "!>=" let negate_int_comparison = function Isigned cmp -> Isigned(Cmm.negate_comparison cmp) | Iunsigned cmp -> Iunsigned(Cmm.negate_comparison cmp) let swap_int_comparison = function Isigned cmp -> Isigned(Cmm.swap_comparison cmp) | Iunsigned cmp -> Iunsigned(Cmm.swap_comparison cmp) (* Output the assembly code for an instruction *) (* Name of current function *) let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 (* Label of trap for out-of-range accesses *) let range_check_trap = ref 0 let rec emit_instr i dslot = match i.desc with Lend -> () | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in begin match (src, dst) with {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} -> ` copy {emit_reg src}, {emit_reg dst}\n` | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> ` fcpy,dbl {emit_reg src}, {emit_reg dst}\n` | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} -> let ofs = slot_offset sd 0 in ` stw {emit_reg src}, {emit_int ofs}(%r30)\n` | {loc = Reg rs; typ = Float}, {loc = Stack sd} -> let ofs = slot_offset sd 1 in if is_immediate ofs then ` fstds {emit_reg src}, {emit_int ofs}(%r30)\n` else begin ` ldo {emit_int ofs}(%r30), %r1\n`; ` fstds {emit_reg src}, 0(%r1)\n` end | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} -> let ofs = slot_offset ss 0 in ` ldw {emit_int ofs}(%r30), {emit_reg dst}\n` | {loc = Stack ss; typ = Float}, {loc = Reg rd} -> let ofs = slot_offset ss 1 in if is_immediate ofs then ` fldds {emit_int ofs}(%r30), {emit_reg dst}\n` else begin ` ldo {emit_int ofs}(%r30), %r1\n`; ` fldds 0(%r1), {emit_reg dst}\n` end | (_, _) -> fatal_error "Emit: Imove" end | Lop(Iconst_int n) -> if is_offset_native n then ` ldi {emit_nativeint n}, {emit_reg i.res.(0)}\n` else begin ` ldil {emit_nativeint_high n}, {emit_reg i.res.(0)}\n`; ` ldo {emit_nativeint_low n}({emit_reg i.res.(0)}), {emit_reg i.res.(0)}\n` end | Lop(Iconst_float s) -> let lbl = new_label() in float_constants := (lbl, s) :: !float_constants; ` ldil {emit_string high_prefix}{emit_label lbl}, %r1\n`; ` ldo {emit_string low_prefix}{emit_label lbl}(%r1), %r1\n`; ` fldds 0(%r1), {emit_reg i.res.(0)}\n` | Lop(Iconst_symbol s) -> use_symbol s; load_symbol_high s; ` ldo {emit_symbol_low s}(%r1), {emit_reg i.res.(0)}\n` | Lop(Icall_ind) -> ` ble 0(4, {emit_reg i.arg.(0)})\n`; (* retaddr in %r31 *) ` copy %r31, %r2\n`; (* in delay slot: save retaddr in %r2 *) record_frame i.live | Lop(Icall_imm s) -> emit_call s "%r2"; fill_delay_slot dslot; record_frame i.live | Lop(Itailcall_ind) -> let n = frame_size() in ` bv 0({emit_reg i.arg.(0)})\n`; if !contains_calls (* in delay slot *) then ` ldwm {emit_int(-n)}(%r30), %r2\n` else ` ldo {emit_int(-n)}(%r30), %r30\n` | Lop(Itailcall_imm s) -> let n = frame_size() in if s = !function_name then begin ` b,n {emit_label !tailrec_entry_point}\n` end else begin emit_call s "%r0"; if !contains_calls (* in delay slot *) then ` ldwm {emit_int(-n)}(%r30), %r2\n` else ` ldo {emit_int(-n)}(%r30), %r30\n` end | Lop(Iextcall(s, alloc)) -> if alloc then begin call_symbol s; if hpux then begin ` ldil LP'{emit_symbol s}, %r22\n`; describe_call i.arg; emit_call "caml_c_call" "%r2"; ` ldo RP'{emit_symbol s}(%r22), %r22\n` (* in delay slot *) end else begin ` ldil L\`{emit_symbol s}, %r22\n`; emit_call "caml_c_call" "%r2"; ` ldo {emit_symbol_low s}(%r22), %r22\n` (* in delay slot *) end; record_frame i.live end else begin if hpux then describe_call i.arg; emit_call s "%r2"; fill_delay_slot dslot end | Lop(Istackoffset n) -> ` ldo {emit_int n}(%r30), %r30\n`; stack_offset := !stack_offset + n | Lop(Iload(chunk, addr)) -> begin match i.res.(0).typ with Int | Addr -> let loadinstr = match chunk with Word -> "ldw" | Byte_unsigned | Byte_signed -> "ldb" | Sixteen_unsigned | Sixteen_signed -> "ldh" in emit_load loadinstr addr i.arg i.res.(0); begin match chunk with Byte_signed -> ` extrs {emit_reg i.res.(0)}, 31, 8, {emit_reg i.res.(0)}\n` | Sixteen_signed -> ` extrs {emit_reg i.res.(0)}, 31, 16, {emit_reg i.res.(0)}\n` | _ -> () end | Float -> emit_float_load addr i.arg i.res.(0) end | Lop(Istore(chunk, addr)) -> begin match i.arg.(0).typ with Int | Addr -> let storeinstr = match chunk with Word -> "stw" | Byte_unsigned | Byte_signed -> "stb" | Sixteen_unsigned | Sixteen_signed -> "sth" in emit_store storeinstr addr i.arg i.arg.(0) | Float -> emit_float_store addr i.arg i.arg.(0) end | Lop(Ialloc n) -> if !fastcode_flag then begin let lbl_cont = new_label() in ` ldw 0(%r4), %r1\n`; ` ldo {emit_int (-n)}(%r3), %r3\n`; ` comb,>>= %r3, %r1, {emit_label lbl_cont}\n`; ` addi 4, %r3, {emit_reg i.res.(0)}\n`; (* in delay slot *) emit_call "caml_call_gc" "%r2"; (* Cannot use %r1 to pass size, since clobbered by glue call code *) ` ldi {emit_int n}, %r29\n`; (* in delay slot *) record_frame i.live; ` addi 4, %r3, {emit_reg i.res.(0)}\n`; `{emit_label lbl_cont}:\n` end else begin emit_call "caml_alloc" "%r2"; (* Cannot use %r1 either *) ` ldi {emit_int n}, %r29\n`; (* in delay slot *) record_frame i.live; ` addi 4, %r3, {emit_reg i.res.(0)}\n` (* in delay slot *) end | Lop(Iintop Imul) -> ` stws,ma {emit_reg i.arg.(0)}, 8(%r30)\n`; ` stw {emit_reg i.arg.(1)}, -4(%r30)\n`; ` fldws -8(%r30), %fr31L\n`; ` fldws -4(%r30), %fr31R\n`; ` xmpyu %fr31L, %fr31R, %fr31\n`; ` fstws %fr31R, -8(%r30)\n`; (* poor scheduling *) ` ldws,mb -8(%r30), {emit_reg i.res.(0)}\n` | Lop(Iintop Idiv) -> (* Arguments are assumed to be in %r26 and %r25, result in %r29 *) if hpux then ` bl $$divI, %r31\n` else begin ` ldil L\`$$divI, %r1\n`; ` ble R\`$$divI(4, %r1)\n` end; fill_delay_slot dslot | Lop(Iintop Imod) -> (* Arguments are assumed to be in %r26 and %r25, result in %r29 *) if hpux then ` bl $$remI, %r31\n` else begin ` ldil L\`$$remI, %r1\n`; ` ble R\`$$remI(4, %r1)\n` end; fill_delay_slot dslot | Lop(Iintop Ilsl) -> ` subi 31, {emit_reg i.arg.(1)}, %r1\n`; ` mtsar %r1\n`; ` zvdep {emit_reg i.arg.(0)}, 32, {emit_reg i.res.(0)}\n` | Lop(Iintop Ilsr) -> ` mtsar {emit_reg i.arg.(1)}\n`; ` vshd %r0, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Iintop Iasr) -> ` subi 31, {emit_reg i.arg.(1)}, %r1\n`; ` mtsar %r1\n`; ` vextrs {emit_reg i.arg.(0)}, 32, {emit_reg i.res.(0)}\n` | Lop(Iintop(Icomp cmp)) -> let comp = name_for_int_comparison(negate_int_comparison cmp) in ` comclr,{emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`; ` ldi 1, {emit_reg i.res.(0)}\n` | Lop(Iintop Icheckbound) -> if !range_check_trap = 0 then range_check_trap := new_label(); ` comclr,>> {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, %r0\n`; ` b,n {emit_label !range_check_trap}\n` | Lop(Iintop op) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iadd, n)) -> ` addi {emit_int n}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Isub, n)) -> ` addi {emit_int(-n)}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Idiv, n)) -> let l = Misc.log2 n in ` comclr,>= {emit_reg i.arg.(0)}, %r0, %r1\n`; ` zdepi -1, 31, {emit_int l}, %r1\n`; ` add {emit_reg i.arg.(0)}, %r1, %r1\n`; ` extrs %r1, {emit_int(31-l)}, {emit_int(32-l)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Imod, n)) -> let l = Misc.log2 n in ` comclr,>= {emit_reg i.arg.(0)}, %r0, %r1\n`; ` zdepi -1, 31, {emit_int l}, %r1\n`; ` add {emit_reg i.arg.(0)}, %r1, %r1\n`; ` depi 0, 31, {emit_int l}, %r1\n`; ` sub {emit_reg i.arg.(0)}, %r1, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Ilsl, n)) -> let n = n land 31 in ` zdep {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Ilsr, n)) -> let n = n land 31 in ` extru {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Iasr, n)) -> let n = n land 31 in ` extrs {emit_reg i.arg.(0)}, {emit_int(31-n)}, {emit_int(32-n)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> let comp = name_for_int_comparison(negate_int_comparison(swap_int_comparison cmp)) in ` comiclr,{emit_string comp} {emit_int n}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`; ` ldi 1, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Icheckbound, n)) -> if !range_check_trap = 0 then range_check_trap := new_label(); ` comiclr,<< {emit_int n}, {emit_reg i.arg.(0)}, %r0\n`; ` b,n {emit_label !range_check_trap}\n` | Lop(Iintop_imm(op, n)) -> fatal_error "Emit_hppa: Iintop_imm" | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> let instr = name_for_float_operation op in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lop(Inegf) -> ` fsub,dbl 0, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Iabsf) -> ` fabs,dbl {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n` | Lop(Ifloatofint) -> ` stws,ma {emit_reg i.arg.(0)}, 8(%r30)\n`; ` fldws,mb -8(%r30), %fr31L\n`; ` fcnvxf,sgl,dbl %fr31L, {emit_reg i.res.(0)}\n` | Lop(Iintoffloat) -> ` fcnvfxt,dbl,sgl {emit_reg i.arg.(0)}, %fr31L\n`; ` fstws,ma %fr31L, 8(%r30)\n`; ` ldws,mb -8(%r30), {emit_reg i.res.(0)}\n` | Lop(Ispecific sop) -> let instr = name_for_specific_operation sop in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lreloadretaddr -> let n = frame_size() in ` ldw {emit_int(-n)}(%r30), %r2\n` | Lreturn -> let n = frame_size() in ` bv 0(%r2)\n`; ` ldo {emit_int(-n)}(%r30), %r30\n` (* in delay slot *) | Llabel lbl -> `{emit_label lbl}:\n` | Lbranch lbl -> begin match dslot with None -> ` b,n {emit_label lbl}\n` | Some i -> ` b {emit_label lbl}\n`; emit_instr i None end | Lcondbranch(tst, lbl) -> begin match tst with Itruetest -> emit_comib "<>" "=" 0 i.arg lbl dslot | Ifalsetest -> emit_comib "=" "<>" 0 i.arg lbl dslot | Iinttest cmp -> let comp = name_for_int_comparison cmp and negcomp = name_for_int_comparison(negate_int_comparison cmp) in emit_comb comp negcomp i.arg lbl dslot | Iinttest_imm(cmp, n) -> let scmp = swap_int_comparison cmp in let comp = name_for_int_comparison scmp and negcomp = name_for_int_comparison(negate_int_comparison scmp) in emit_comib comp negcomp n i.arg lbl dslot | Ifloattest(cmp, neg) -> let comp = name_for_float_comparison cmp neg in ` fcmp,dbl,{emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` ftest\n`; ` b {emit_label lbl}\n`; fill_delay_slot dslot | Ioddtest -> emit_comib "OD" "EV" 0 i.arg lbl dslot | Ieventest -> emit_comib "EV" "OD" 0 i.arg lbl dslot end | Lcondbranch3(lbl0, lbl1, lbl2) -> begin match lbl0 with None -> () | Some lbl -> emit_comib "=" "<>" 0 i.arg lbl None end; begin match lbl1 with None -> () | Some lbl -> emit_comib "=" "<>" 1 i.arg lbl None end; begin match lbl2 with None -> () | Some lbl -> emit_comib "=" "<>" 2 i.arg lbl None end | Lswitch jumptbl -> ` blr {emit_reg i.arg.(0)}, 0\n`; fill_delay_slot dslot; for i = 0 to Array.length jumptbl - 1 do ` b {emit_label jumptbl.(i)}\n`; ` nop\n` done | Lsetuptrap lbl -> ` bl {emit_label lbl}, %r1\n`; fill_delay_slot dslot | Lpushtrap -> stack_offset := !stack_offset + 8; ` stws,ma %r5, 8(%r30)\n`; ` stw %r1, -4(%r30)\n`; ` copy %r30, %r5\n` | Lpoptrap -> ` ldws,mb -8(%r30), %r5\n`; stack_offset := !stack_offset - 8 | Lraise -> ` ldw -4(%r5), %r1\n`; ` copy %r5, %r30\n`; ` bv 0(%r1)\n`; ` ldws,mb -8(%r30), %r5\n` (* in delay slot *) and fill_delay_slot = function None -> ` nop\n` | Some i -> emit_instr i None and emit_delay_slot = function None -> () | Some i -> emit_instr i None and emit_comb comp negcomp arg lbl dslot = if lbl >= 0 then begin ` comb,{emit_string comp} {emit_reg arg.(0)}, {emit_reg arg.(1)}, {emit_label lbl}\n`; fill_delay_slot dslot end else begin emit_delay_slot dslot; ` comclr,{emit_string negcomp} {emit_reg arg.(0)}, {emit_reg arg.(1)}, %r0\n`; ` b,n {emit_label (-lbl)}\n` end and emit_comib comp negcomp cst arg lbl dslot = if lbl >= 0 then begin ` comib,{emit_string comp} {emit_int cst}, {emit_reg arg.(0)}, {emit_label lbl}\n`; fill_delay_slot dslot end else begin emit_delay_slot dslot; ` comiclr,{emit_string negcomp} {emit_int cst}, {emit_reg arg.(0)}, %r0\n`; ` b,n {emit_label (-lbl)}\n` end (* Checks if a pseudo-instruction expands to exactly one machine instruction that does not branch. *) let is_one_instr i = match i.desc with Lop op -> begin match op with Imove | Ispill | Ireload -> begin match (i.arg.(0), i.res.(0)) with ({typ = Float; loc = Stack s}, _) -> is_immediate(slot_offset s 1) | (_, {typ = Float; loc = Stack s}) -> is_immediate(slot_offset s 1) | (_, _) -> true end | Iconst_int n -> is_offset_native n | Istackoffset _ -> true | Iload(_, Iindexed n) -> i.res.(0).typ <> Float & is_offset n | Istore(_, Iindexed n) -> i.arg.(0).typ <> Float & is_offset n | Iintop(Iadd | Isub | Iand | Ior | Ixor) -> true | Iintop_imm((Iadd | Isub | Ilsl | Ilsr | Iasr), _) -> true | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf -> true | Ispecific _ -> true | _ -> false end | Lreloadretaddr -> true | _ -> false let no_interference res arg = try for i = 0 to Array.length arg - 1 do for j = 0 to Array.length res - 1 do if arg.(i).loc = res.(j).loc then raise Exit done done; true with Exit -> false (* Emit a sequence of instructions, trying to fill delay slots for branches *) let rec emit_all i = match i with {desc = Lend} -> () | {next = {desc = Lop(Icall_imm _) | Lop(Iextcall(_, false)) | Lop(Iintop(Idiv | Imod)) | Lbranch _ | Lsetuptrap _ }} when is_one_instr i -> emit_instr i.next (Some i); emit_all i.next.next | {next = {desc = Lcondbranch(_, _) | Lswitch _}} when is_one_instr i & no_interference i.res i.next.arg -> emit_instr i.next (Some i); emit_all i.next.next | _ -> emit_instr i None; emit_all i.next (* Estimate the size of an instruction, in actual HPPA instructions *) let is_float_stack r = match r with {loc = Stack _; typ = Float} -> true | _ -> false let sizeof_instr i = match i.desc with Lend -> 0 | Lop op -> begin match op with Imove | Ispill | Ireload -> if is_float_stack i.arg.(0) || is_float_stack i.res.(0) then 2 (* ldo/fxxx *) else 1 | Iconst_int n -> if is_offset_native n then 1 else 2 (* ldi or ldil/ldo *) | Iconst_float _ -> 3 (* ldil/ldo/fldds *) | Iconst_symbol _ -> 2 (* addil/ldo *) | Icall_ind -> 2 (* ble/copy *) | Icall_imm _ -> 2 (* bl/nop *) | Itailcall_ind -> 2 (* bv/ldwm *) | Itailcall_imm _ -> 2 (* bl/ldwm *) | Iextcall(_, alloc) -> if alloc then 3 (* ldil/bl/ldo *) else 2 (* bl/nop *) | Istackoffset _ -> 1 (* ldo *) | Iload(chunk, addr) -> if i.res.(0).typ = Float then 4 (* addil/ldo/fldws/fldws *) else (match addr with Iindexed ofs when is_offset ofs -> 1 | _ -> 2) + (match chunk with Byte_signed -> 1 | Sixteen_signed -> 1 | _ -> 0) | Istore(chunk, addr) -> if i.arg.(0).typ = Float then 4 (* addil/ldo/fstws/fstws *) else (match addr with Iindexed ofs when is_offset ofs -> 1 | _ -> 2) | Ialloc _ -> if !fastcode_flag then 7 else 3 | Iintop Imul -> 7 | Iintop(Idiv | Imod) -> 3 (* ldil/ble/nop *) | Iintop Ilsl -> 3 (* subi/mtsar/zvdep *) | Iintop Ilsr -> 2 (* mtsar/vshd *) | Iintop Iasr -> 3 (* subi/mtsar/vextrs *) | Iintop(Icomp _) -> 2 (* comclr/ldi *) | Iintop Icheckbound -> 2 (* comclr/b,n *) | Iintop _ -> 1 | Iintop_imm(Idiv, _) -> 4 (* comclr/zdepi/add/extrs *) | Iintop_imm(Imod, _) -> 5 (* comclr/zdepi/add/extrs/sub *) | Iintop_imm(Icomp _, _) -> 2 (* comiclr/ldi *) | Iintop_imm(Icheckbound, _) -> 2 (* comiclr/b,n *) | Iintop_imm(_, _) -> 1 | Ifloatofint -> 3 (* stws,ma/fldws,mb/fcnvxf *) | Iintoffloat -> 3 (* fcnfxt/fstws/ldws *) | _ (* Inegf|Iabsf|Iaddf|Isubf|Imulf|Idivf|Ispecific _ *) -> 1 end | Lreloadretaddr -> 1 | Lreturn -> 2 | Llabel _ -> 0 | Lbranch _ -> 1 (* b,n *) | Lcondbranch(Ifloattest(_, _), _) -> 4 (* fcmp/ftest/b/nop *) | Lcondbranch(_, _) -> 2 (* comb/nop or comclr/b,n *) | Lcondbranch3(_, _, _) -> 6 (* worst case: three comib/nop or comclr/b,n *) | Lswitch tbl -> 2 + 2 * Array.length tbl (* blr/nop b/nop *) | Lsetuptrap _ -> 2 (* bl/nop *) | Lpushtrap -> 3 (* stws,ma/stw/copy *) | Lpoptrap -> 1 (* ldws,mb *) | Lraise -> 4 (* ldw/copy/bv/ldws,mb *) (* Estimate the position of all labels in function body and rewrite long conditional branches with a negative label. *) let fixup_cond_branches funbody = let label_position = (Hashtbl.create 87 : (label, int) Hashtbl.t) in let rec estimate_labels pos i = match i.desc with Lend -> () | Llabel lbl -> Hashtbl.add label_position lbl pos; estimate_labels pos i.next | _ -> estimate_labels (pos + sizeof_instr i) i.next in let long_branch currpos lbl = try let displ = Hashtbl.find label_position lbl - currpos in (* Branch offset is stored in 12 bits, giving a range of -2048 to +2047. Here, we allow 10% error in estimating the code positions. *) displ < -1843 || displ > 1842 with Not_found -> fatal_error "Emit_hppa.long_branch" in let rec fix_branches pos i = match i.desc with Lend -> () | Lcondbranch(tst, lbl) -> if long_branch pos lbl then i.desc <- Lcondbranch(tst, -lbl); fix_branches (pos + sizeof_instr i) i.next | Lcondbranch3(opt1, opt2, opt3) -> let fix_opt = function None -> None | Some lbl -> Some(if long_branch pos lbl then -lbl else lbl) in i.desc <- Lcondbranch3(fix_opt opt1, fix_opt opt2, fix_opt opt3); fix_branches (pos + sizeof_instr i) i.next | _ -> fix_branches (pos + sizeof_instr i) i.next in estimate_labels 0 funbody; fix_branches 0 funbody (* Emission of a function declaration *) let fundecl fundecl = fixup_cond_branches fundecl.fun_body; function_name := fundecl.fun_name; fastcode_flag := fundecl.fun_fast; tailrec_entry_point := new_label(); stack_offset := 0; float_constants := []; define_symbol fundecl.fun_name; range_check_trap := 0; let n = frame_size() in if hpux then begin ` .code\n`; ` .align 4\n`; ` .export {emit_symbol fundecl.fun_name}, entry, priv_lev=3\n`; `{emit_symbol fundecl.fun_name}:\n`; ` .proc\n`; if !contains_calls then ` .callinfo frame={emit_int n}, calls, save_rp\n` else ` .callinfo frame={emit_int n}, no_calls\n`; ` .entry\n` end else begin ` .text\n`; ` .align 2\n`; ` .globl {emit_symbol fundecl.fun_name}\n`; `{emit_symbol fundecl.fun_name}:\n` end; if !contains_calls then ` stwm %r2, {emit_int n}(%r30)\n` else if n > 0 then ` ldo {emit_int n}(%r30), %r30\n`; `{emit_label !tailrec_entry_point}:\n`; emit_all fundecl.fun_body; if !range_check_trap > 0 then begin `{emit_label !range_check_trap}:\n`; if hpux then begin emit_call "array_bound_error" "%r31"; ` nop\n` end else begin ` ldil L\`{emit_symbol "array_bound_error"}, %r1\n`; ` ble,n {emit_symbol_low "array_bound_error"}(4, %r1)\n` end end; if hpux then begin ` .exit\n`; ` .procend\n` end; List.iter emit_float_constant !float_constants (* Emission of data *) let emit_global s = define_symbol s; if hpux then ` .export {emit_symbol s}, data\n` else ` .globl {emit_symbol s}\n` let emit_item = function Cdefine_symbol s -> emit_global s; `{emit_symbol s}:\n` | Cdefine_label lbl -> `{emit_label (lbl + 100000)}:\n` | Cint8 n -> ` .byte {emit_int n}\n` | Cint16 n -> ` .short {emit_int n}\n` | Cint n -> ` .long {emit_nativeint n}\n` | Cfloat f -> ` .double {emit_string f}\n` | Csymbol_address s -> ` .long {emit_symbol s}\n` | Clabel_address lbl -> ` .long {emit_label(lbl + 100000)}\n` | Cstring s -> emit_string_directive " .ascii " s | Cskip n -> if n > 0 then if hpux then ` .block {emit_int n}\n` else ` .space {emit_int n}\n` | Calign n -> emit_align n let data l = ` .data\n`; List.iter emit_item l (* Beginning / end of an assembly file *) let begin_assembly() = if hpux then begin ` .space $PRIVATE$\n`; ` .subspa $DATA$,quad=1,align=8,access=31\n`; ` .subspa $BSS$,quad=1,align=8,access=31,zero,sort=82\n`; ` .space $TEXT$\n`; ` .subspa $LIT$,quad=0,align=8,access=44\n`; ` .subspa $CODE$,quad=0,align=8,access=44,code_only\n`; ` .import $global$, data\n`; ` .import $$divI, millicode\n`; ` .import $$remI, millicode\n` end; used_symbols := StringSet.empty; defined_symbols := StringSet.empty; called_symbols := StringSet.empty; Hashtbl.clear stub_label_table; let lbl_begin = Compilenv.current_unit_name() ^ "_data_begin" in ` .data\n`; emit_global lbl_begin; `{emit_symbol lbl_begin}:\n`; let lbl_begin = Compilenv.current_unit_name() ^ "_code_begin" in ` .code\n`; emit_global lbl_begin; `{emit_symbol lbl_begin}:\n` let end_assembly() = if not hpux then emit_stubs(); ` .code\n`; let lbl_end = Compilenv.current_unit_name() ^ "_code_end" in emit_global lbl_end; `{emit_symbol lbl_end}:\n`; ` .data\n`; let lbl_end = Compilenv.current_unit_name() ^ "_data_end" in emit_global lbl_end; `{emit_symbol lbl_end}:\n`; ` .long 0\n`; let lbl = Compilenv.current_unit_name() ^ "_frametable" in emit_global lbl; `{emit_symbol lbl}:\n`; ` .long {emit_int (List.length !frame_descriptors)}\n`; List.iter emit_frame !frame_descriptors; frame_descriptors := []; if hpux then emit_imports()