ocaml/asmcomp/arm/emit.mlp

959 lines
34 KiB
Plaintext

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 1998 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2012 Benedikt Meurer. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Emission of ARM assembly code *)
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Tradeoff between code size and code speed *)
let fastcode_flag = ref true
(* Output a label *)
let emit_label lbl =
emit_string ".L"; emit_int lbl
let emit_data_label lbl =
emit_string ".Ld"; emit_int lbl
(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
let emit_call s =
if !Clflags.dlcode || !Clflags.pic_code
then `bl {emit_symbol s}(PLT)`
else `bl {emit_symbol s}`
let emit_jump s =
if !Clflags.dlcode || !Clflags.pic_code
then `b {emit_symbol s}(PLT)`
else `b {emit_symbol s}`
(* Output a pseudo-register *)
let emit_reg = function
{loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit_arm.emit_reg"
(* Layout of the stack frame *)
let stack_offset = ref 0
let frame_size () =
let sz =
!stack_offset +
4 * num_stack_slots.(0) +
8 * num_stack_slots.(1) +
8 * num_stack_slots.(2) +
(if !contains_calls then 4 else 0)
in Misc.align sz 8
let slot_offset loc cl =
match loc with
Incoming n ->
assert (n >= 0);
frame_size() + n
| Local n ->
if cl = 0
then !stack_offset + n * 4
else !stack_offset + num_stack_slots.(0) * 4 + n * 8
| Outgoing n ->
assert (n >= 0);
n
(* Output a stack reference *)
let emit_stack r =
match r.loc with
| Stack s ->
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit_arm.emit_stack"
(* Output an addressing mode *)
let emit_addressing addr r n =
match addr with
Iindexed ofs ->
`[{emit_reg r.(n)}, #{emit_int ofs}]`
(* Record live pointers at call points *)
let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
| {typ = Val; loc = Reg r} ->
live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Val; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| {typ = Addr} as r ->
Misc.fatal_error ("bad GC root " ^ Reg.name r)
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset;
fd_debuginfo = dbg } :: !frame_descriptors;
lbl
let record_frame live dbg =
let lbl = record_frame_label live dbg in `{emit_label lbl}:`
(* Record calls to the GC -- we've moved them out of the way *)
type gc_call =
{ gc_lbl: label; (* Entry label *)
gc_return_lbl: label; (* Where to branch after GC *)
gc_frame_lbl: label } (* Label of frame descriptor *)
let call_gc_sites = ref ([] : gc_call list)
let emit_call_gc gc =
`{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
`{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
(* Record calls to caml_ml_array_bound_error.
In debug mode, we maintain one call to caml_ml_array_bound_error
per bound check site. Otherwise, we can share a single call. *)
type bound_error_call =
{ bd_lbl: label; (* Entry label *)
bd_frame_lbl: label } (* Label of frame descriptor *)
let bound_error_sites = ref ([] : bound_error_call list)
let bound_error_label dbg =
if !Clflags.debug || !bound_error_sites = [] then begin
let lbl_bound_error = new_label() in
let lbl_frame = record_frame_label Reg.Set.empty dbg in
bound_error_sites :=
{ bd_lbl = lbl_bound_error;
bd_frame_lbl = lbl_frame } :: !bound_error_sites;
lbl_bound_error
end else begin
let bd = List.hd !bound_error_sites in bd.bd_lbl
end
let emit_call_bound_error bd =
`{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
`{emit_label bd.bd_frame_lbl}:\n`
(* Negate a comparison *)
let negate_integer_comparison = function
Isigned cmp -> Isigned(negate_comparison cmp)
| Iunsigned cmp -> Iunsigned(negate_comparison cmp)
(* Names of various instructions *)
let name_for_comparison = function
Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
| Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
let name_for_int_operation = function
(* Use adds,subs,... to enable 16-bit T1 encoding *)
Iadd -> "adds"
| Isub -> "subs"
| Imul -> "mul"
| Imulh -> "smmul"
| Iand -> "ands"
| Ior -> "orrs"
| Ixor -> "eors"
| Ilsl -> "lsls"
| Ilsr -> "lsrs"
| Iasr -> "asrs"
| _ -> assert false
let name_for_shift_operation = function
Ishiftlogicalleft -> "lsl"
| Ishiftlogicalright -> "lsr"
| Ishiftarithmeticright -> "asr"
(* General functional to decompose a non-immediate integer constant
into 8-bit chunks shifted left 0 ... 30 bits. *)
let decompose_intconst n fn =
let i = ref n in
let shift = ref 0 in
let ninstr = ref 0 in
while !i <> 0l do
if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then
shift := !shift + 2
else begin
let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in
i := Int32.sub !i bits;
shift := !shift + 8;
incr ninstr;
fn bits
end
done;
!ninstr
(* Load an integer constant into a register *)
let emit_intconst dst n =
let nr = Int32.lognot n in
if is_immediate n then begin
(* Use movs here to enable 16-bit T1 encoding *)
` movs {emit_reg dst}, #{emit_int32 n}\n`; 1
end else if is_immediate nr then begin
` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1
end else if !arch > ARMv6 then begin
let nl = Int32.logand 0xffffl n in
let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in
if nh = 0l then begin
` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1
end else if Int32.logand nl 0xffl = nl then begin
` movs {emit_reg dst}, #{emit_int32 nl}\n`;
` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
end else begin
` movw {emit_reg dst}, #{emit_int32 nl}\n`;
` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
end
end else begin
let first = ref true in
decompose_intconst n
(fun bits ->
if !first
(* Use movs,adds here to enable 16-bit T1 encoding *)
then ` movs {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n`
else ` adds {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`;
first := false)
end
(* Adjust sp (up or down) by the given byte amount *)
let emit_stack_adjustment n =
if n = 0 then 0 else begin
let instr = if n < 0 then "sub" else "add" in
let ninstr = decompose_intconst (Int32.of_int (abs n))
(fun bits ->
` {emit_string instr} sp, sp, #{emit_int32 bits}\n`) in
cfi_adjust_cfa_offset (-n);
ninstr
end
(* Deallocate the stack frame before a return or tail call *)
let output_epilogue f =
let n = frame_size() in
if n > 0 then begin
let ninstr = emit_stack_adjustment n in
let ninstr = ninstr + f () in
(* reset CFA back cause function body may continue *)
cfi_adjust_cfa_offset n;
ninstr
end else
f ()
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Pending floating-point literals *)
let float_literals = ref ([] : (int64 * label) list)
(* Pending relative references to the global offset table *)
let gotrel_literals = ref ([] : (label * label) list)
(* Pending symbol literals *)
let symbol_literals = ref ([] : (string * label) list)
(* Total space (in words) occupied by pending literals *)
let num_literals = ref 0
(* Label a floating-point literal *)
let float_literal f =
try
List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
num_literals := !num_literals + 2;
float_literals := (f, lbl) :: !float_literals;
lbl
(* Label a GOTREL literal *)
let gotrel_literal l =
let lbl = new_label() in
num_literals := !num_literals + 1;
gotrel_literals := (l, lbl) :: !gotrel_literals;
lbl
(* Label a symbol literal *)
let symbol_literal s =
try
List.assoc s !symbol_literals
with Not_found ->
let lbl = new_label() in
num_literals := !num_literals + 1;
symbol_literals := (s, lbl) :: !symbol_literals;
lbl
(* Emit all pending literals *)
let emit_literals() =
if !float_literals <> [] then begin
` .align 3\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}:`; emit_float64_split_directive ".long" f)
!float_literals;
float_literals := []
end;
if !symbol_literals <> [] then begin
let offset = if !thumb then 4 else 8 in
let suffix = if !Clflags.pic_code then "(GOT)" else "" in
` .align 2\n`;
List.iter
(fun (l, lbl) ->
`{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`)
!gotrel_literals;
List.iter
(fun (s, lbl) ->
`{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`)
!symbol_literals;
gotrel_literals := [];
symbol_literals := []
end;
num_literals := 0
(* Emit code to load the address of a symbol *)
let emit_load_symbol_addr dst s =
if !Clflags.pic_code then begin
let lbl_pic = new_label() in
let lbl_got = gotrel_literal lbl_pic in
let lbl_sym = symbol_literal s in
(* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml),
so use r12 as temporary scratch register unless the destination is
r12, then we use r3 instead. *)
let tmp = if dst.loc = Reg 8 (*r12*)
then phys_reg 3 (*r3*)
else phys_reg 8 (*r12*) in
` ldr {emit_reg tmp}, {emit_label lbl_got}\n`;
` ldr {emit_reg dst}, {emit_label lbl_sym}\n`;
`{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`;
` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`;
4
end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin
` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`;
` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`;
2
end else begin
let lbl = symbol_literal s in
` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`;
1
end
(* Output the assembly code for an instruction *)
let emit_instr i =
emit_debug_info i.dbg;
match i.desc with
Lend -> 0
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc = dst.loc then 0 else begin
begin match (src, dst) with
{loc = Reg _; typ = Float}, {loc = Reg _} ->
` fcpyd {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _}, {loc = Reg _} ->
` mov {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _; typ = Float}, _ ->
` fstd {emit_reg src}, {emit_stack dst}\n`
| {loc = Reg _}, _ ->
` str {emit_reg src}, {emit_stack dst}\n`
| {typ = Float}, _ ->
` fldd {emit_reg dst}, {emit_stack src}\n`
| _ ->
` ldr {emit_reg dst}, {emit_stack src}\n`
end; 1
end
| Lop(Iconst_int n | Iconst_blockheader n) ->
emit_intconst i.res.(0) (Nativeint.to_int32 n)
| Lop(Iconst_float f) when !fpu = Soft ->
let high_bits = Int64.to_int32 (Int64.shift_right_logical f 32)
and low_bits = Int64.to_int32 f in
if is_immediate low_bits || is_immediate high_bits then begin
let ninstr_low = emit_intconst i.res.(0) low_bits
and ninstr_high = emit_intconst i.res.(1) high_bits in
ninstr_low + ninstr_high
end else begin
let lbl = float_literal f in
` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
2
end
| Lop(Iconst_float f) when !fpu = VFPv2 ->
let lbl = float_literal f in
` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`;
1
| Lop(Iconst_float f) ->
let encode imm =
let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
let ex = Int64.to_int (Int64.shift_right_logical imm 52) in
let ex = (ex land 0x7ff) - 1023 in
let mn = Int64.logand imm 0xfffffffffffffL in
if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4
then
None
else begin
let mn = Int64.to_int (Int64.shift_right_logical mn 48) in
if mn land 0x0f <> mn then
None
else
let ex = ((ex + 3) land 0x07) lxor 0x04 in
Some((sg lsl 7) lor (ex lsl 4) lor mn)
end in
begin match encode f with
None ->
let lbl = float_literal f in
` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`
| Some imm8 ->
` fconstd {emit_reg i.res.(0)}, #{emit_int imm8}\n`
end; 1
| Lop(Iconst_symbol s) ->
emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
if !arch >= ARMv5 then begin
` blx {emit_reg i.arg.(0)}\n`;
`{record_frame i.live i.dbg}\n`; 1
end else begin
` mov lr, pc\n`;
` bx {emit_reg i.arg.(0)}\n`;
`{record_frame i.live i.dbg}\n`; 2
end
| Lop(Icall_imm s) ->
` {emit_call s}\n`;
`{record_frame i.live i.dbg}\n`; 1
| Lop(Itailcall_ind) ->
output_epilogue begin fun () ->
if !contains_calls then
` ldr lr, [sp, #{emit_int (-4)}]\n`;
` bx {emit_reg i.arg.(0)}\n`; 2
end
| Lop(Itailcall_imm s) ->
if s = !function_name then begin
` b {emit_label !tailrec_entry_point}\n`; 1
end else begin
output_epilogue begin fun () ->
if !contains_calls then
` ldr lr, [sp, #{emit_int (-4)}]\n`;
` {emit_jump s}\n`; 2
end
end
| Lop(Iextcall(s, false)) ->
` {emit_call s}\n`; 1
| Lop(Iextcall(s, true)) ->
let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in
` {emit_call "caml_c_call"}\n`;
`{record_frame i.live i.dbg}\n`;
1 + ninstr
| Lop(Istackoffset n) ->
assert (n mod 8 = 0);
let ninstr = emit_stack_adjustment (-n) in
stack_offset := !stack_offset + n;
ninstr
| Lop(Iload(Single, addr)) when !fpu >= VFPv2 ->
` flds s14, {emit_addressing addr i.arg 0}\n`;
` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
| Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
(* Use LDM or LDRD if possible *)
begin match i.res.(0), i.res.(1), addr with
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
when rt < rt2 ->
` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1
| {loc = Reg rt}, {loc = Reg rt2}, addr
when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1
| _ ->
let addr' = offset_addressing addr 4 in
if i.res.(0).loc <> i.arg.(0).loc then begin
` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
end else begin
` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
end; 2
end
| Lop(Iload(size, addr)) ->
let r = i.res.(0) in
let instr =
match size with
Byte_unsigned -> "ldrb"
| Byte_signed -> "ldrsb"
| Sixteen_unsigned -> "ldrh"
| Sixteen_signed -> "ldrsh"
| Double
| Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
| Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 ->
` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
| Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft ->
(* Use STM or STRD if possible *)
begin match i.arg.(0), i.arg.(1), addr with
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
when rt < rt2 ->
` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1
| {loc = Reg rt}, {loc = Reg rt2}, addr
when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1
| _ ->
let addr' = offset_addressing addr 4 in
` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
end
| Lop(Istore(size, addr, _)) ->
let r = i.arg.(0) in
let instr =
match size with
Byte_unsigned
| Byte_signed -> "strb"
| Sixteen_unsigned
| Sixteen_signed -> "strh"
| Double
| Double_u -> "fstd"
| _ (* 32-bit quantities *) -> "str" in
` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1
| Lop(Ialloc n) ->
let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
let lbl_redo = new_label() in
`{emit_label lbl_redo}:`;
let ninstr = decompose_intconst
(Int32.of_int n)
(fun i ->
` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
` cmp alloc_ptr, alloc_limit\n`;
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
let lbl_call_gc = new_label() in
` bcc {emit_label lbl_call_gc}\n`;
call_gc_sites :=
{ gc_lbl = lbl_call_gc;
gc_return_lbl = lbl_redo;
gc_frame_lbl = lbl_frame } :: !call_gc_sites;
3 + ninstr
end else begin
let ninstr =
begin match n with
8 -> ` {emit_call "caml_alloc1"}\n`; 1
| 12 -> ` {emit_call "caml_alloc2"}\n`; 1
| 16 -> ` {emit_call "caml_alloc3"}\n`; 1
| _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in
` {emit_call "caml_allocN"}\n`; 1 + ninstr
end in
`{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
1 + ninstr
end
| Lop(Iintop(Icomp cmp)) ->
let compthen = name_for_comparison cmp in
let compelse = name_for_comparison (negate_integer_comparison cmp) in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` ite {emit_string compthen}\n`;
` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
| Lop(Iintop_imm(Icomp cmp, n)) ->
let compthen = name_for_comparison cmp in
let compelse = name_for_comparison (negate_integer_comparison cmp) in
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` ite {emit_string compthen}\n`;
` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
| Lop(Iintop Icheckbound) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` bls {emit_label lbl}\n`; 2
| Lop(Iintop_imm(Icheckbound, n)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` bls {emit_label lbl}\n`; 2
| Lop(Ispecific(Ishiftcheckbound(shiftop, n))) ->
let lbl = bound_error_label i.dbg in
let op = name_for_shift_operation shiftop in
` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, {emit_string op} #{emit_int n}\n`;
` bcs {emit_label lbl}\n`; 2
| Lop(Iintop Imulh) when !arch < ARMv6 ->
` smull r12, {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
| Lop(Ispecific Imulhadd) ->
` smmla {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
| Lop(Iabsf | Inegf as op) when !fpu = Soft ->
let instr = (match op with
Iabsf -> "bic"
| Inegf -> "eor"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1
| Lop(Iabsf | Inegf | Ispecific Isqrtf as op) ->
let instr = (match op with
Iabsf -> "fabsd"
| Inegf -> "fnegd"
| Ispecific Isqrtf -> "fsqrtd"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
| Lop(Ifloatofint) ->
` fmsr s14, {emit_reg i.arg.(0)}\n`;
` fsitod {emit_reg i.res.(0)}, s14\n`; 2
| Lop(Iintoffloat) ->
` ftosizd s14, {emit_reg i.arg.(0)}\n`;
` fmrs {emit_reg i.res.(0)}, s14\n`; 2
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
let instr = (match op with
Iaddf -> "faddd"
| Isubf -> "fsubd"
| Imulf -> "fmuld"
| Idivf -> "fdivd"
| Ispecific Inegmulf -> "fnmuld"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
1
| Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
let instr = (match op with
Imuladdf -> "fmacd"
| Inegmuladdf -> "fnmacd"
| Imulsubf -> "fmscd"
| Inegmulsubf -> "fnmscd"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`;
1
| Lop(Ispecific(Ishiftarith(op, shiftop, n))) ->
let instr = (match op with
Ishiftadd -> "add"
| Ishiftsub -> "sub"
| Ishiftsubrev -> "rsb"
| Ishiftand -> "and"
| Ishiftor -> "orr"
| Ishiftxor -> "eor") in
let op = name_for_shift_operation shiftop in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_string op} #{emit_int n}\n`; 1
| Lop(Ispecific(Irevsubimm n)) ->
` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
| Lop(Ispecific(Imuladd | Imulsub as op)) ->
let instr = (match op with
Imuladd -> "mla"
| Imulsub -> "mls"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
| Lop(Ispecific(Ibswap size)) ->
begin match size with
16 ->
` rev16 {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
` movt {emit_reg i.res.(0)}, #0\n`; 2
| 32 ->
` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
| _ ->
assert false
end
| Lreloadretaddr ->
let n = frame_size() in
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
| Lreturn ->
output_epilogue begin fun () ->
` bx lr\n`; 1
end
| Llabel lbl ->
`{emit_label lbl}:\n`; 0
| Lbranch lbl ->
` b {emit_label lbl}\n`; 1
| Lcondbranch(tst, lbl) ->
begin match tst with
Itruetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
` bne {emit_label lbl}\n`; 2
| Ifalsetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
` beq {emit_label lbl}\n`; 2
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
` b{emit_string comp} {emit_label lbl}\n`; 2
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
` b{emit_string comp} {emit_label lbl}\n`; 2
| Ifloattest(cmp, neg) ->
let comp = (match (cmp, neg) with
(Ceq, false) | (Cne, true) -> "eq"
| (Cne, false) | (Ceq, true) -> "ne"
| (Clt, false) -> "cc"
| (Clt, true) -> "cs"
| (Cle, false) -> "ls"
| (Cle, true) -> "hi"
| (Cgt, false) -> "gt"
| (Cgt, true) -> "le"
| (Cge, false) -> "ge"
| (Cge, true) -> "lt") in
` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` fmstat\n`;
` b{emit_string comp} {emit_label lbl}\n`; 3
| Ioddtest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
` bne {emit_label lbl}\n`; 2
| Ieventest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
` beq {emit_label lbl}\n`; 2
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
| Some lbl -> ` blt {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` beq {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` bgt {emit_label lbl}\n`
end;
4
| Lswitch jumptbl ->
if !arch > ARMv6 && !thumb then begin
(* The Thumb-2 TBH instruction supports only forward branches,
so we need to generate appropriate trampolines for all labels
that appear before this switch instruction (PR#5623) *)
let tramtbl = Array.copy jumptbl in
` tbh [pc, {emit_reg i.arg.(0)}, lsl #1]\n`;
for j = 0 to Array.length tramtbl - 1 do
let rec label i =
match i.desc with
Lend -> new_label()
| Llabel lbl when lbl = tramtbl.(j) -> lbl
| _ -> label i.next in
tramtbl.(j) <- label i.next;
` .short ({emit_label tramtbl.(j)}-.)/2+{emit_int j}\n`
done;
(* Generate the necessary trampolines *)
for j = 0 to Array.length tramtbl - 1 do
if tramtbl.(j) <> jumptbl.(j) then
`{emit_label tramtbl.(j)}: b {emit_label jumptbl.(j)}\n`
done
end else if not !Clflags.pic_code then begin
` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
` nop\n`;
for j = 0 to Array.length jumptbl - 1 do
` .word {emit_label jumptbl.(j)}\n`
done
end else begin
(* Slightly slower, but position-independent *)
` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`;
` nop\n`;
for j = 0 to Array.length jumptbl - 1 do
` b {emit_label jumptbl.(j)}\n`
done
end;
2 + Array.length jumptbl
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`; 1
| Lpushtrap ->
stack_offset := !stack_offset + 8;
` push \{trap_ptr, lr}\n`;
cfi_adjust_cfa_offset 8;
` mov trap_ptr, sp\n`; 2
| Lpoptrap ->
` pop \{trap_ptr, lr}\n`;
cfi_adjust_cfa_offset (-8);
stack_offset := !stack_offset - 8; 1
| Lraise k ->
begin match !Clflags.debug, k with
| true, (Lambda.Raise_regular | Lambda.Raise_reraise) ->
` {emit_call "caml_raise_exn"}\n`;
`{record_frame Reg.Set.empty i.dbg}\n`; 1
| false, _
| true, Lambda.Raise_notrace ->
` mov sp, trap_ptr\n`;
` pop \{trap_ptr, pc}\n`; 2
end
(* Emission of an instruction sequence *)
let rec emit_all ninstr i =
if i.desc = Lend then () else begin
let n = emit_instr i in
let ninstr' = ninstr + n in
(* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
let limit = (if !fpu >= VFPv2 && !float_literals <> []
then 127
else 511) in
let limit = limit - !num_literals in
if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin
emit_literals();
emit_all 0 i.next
end else if !num_literals != 0 && ninstr' >= limit then begin
let lbl = new_label() in
` b {emit_label lbl}\n`;
emit_literals();
`{emit_label lbl}:\n`;
emit_all 0 i.next
end else
emit_all ninstr' i.next
end
(* Emission of the profiling prelude *)
let emit_profile() =
match Config.system with
"linux_eabi" | "linux_eabihf" | "netbsd" ->
` push \{lr}\n`;
` {emit_call "__gnu_mcount_nc"}\n`
| _ -> ()
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
float_literals := [];
gotrel_literals := [];
symbol_literals := [];
stack_offset := 0;
call_gc_sites := [];
bound_error_sites := [];
` .text\n`;
` .align 2\n`;
` .globl {emit_symbol fundecl.fun_name}\n`;
if !arch > ARMv6 && !thumb then
` .thumb\n`
else
` .arm\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
emit_debug_info fundecl.fun_dbg;
cfi_startproc();
if !Clflags.gprofile then emit_profile();
let n = frame_size() in
if n > 0 then begin
ignore(emit_stack_adjustment (-n));
if !contains_calls then begin
cfi_offset ~reg:14 (* lr *) ~offset:(-4);
` str lr, [sp, #{emit_int(n - 4)}]\n`
end
end;
`{emit_label !tailrec_entry_point}:\n`;
emit_all 0 fundecl.fun_body;
emit_literals();
List.iter emit_call_gc !call_gc_sites;
List.iter emit_call_bound_error !bound_error_sites;
cfi_endproc();
` .type {emit_symbol fundecl.fun_name}, %function\n`;
` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`
(* Emission of data *)
let emit_item = function
Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
| Cdefine_symbol s -> `{emit_symbol s}:\n`
| Cdefine_label lbl -> `{emit_data_label lbl}:\n`
| Cint8 n -> ` .byte {emit_int n}\n`
| Cint16 n -> ` .short {emit_int n}\n`
| Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
| Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
| Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f)
| Cdouble f -> emit_float64_split_directive ".long" (Int64.bits_of_float f)
| Csymbol_address s -> ` .word {emit_symbol s}\n`
| Clabel_address lbl -> ` .word {emit_data_label lbl}\n`
| Cstring s -> emit_string_directive " .ascii " s
| Cskip n -> if n > 0 then ` .space {emit_int n}\n`
| Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
reset_debug_info();
` .file \"\"\n`; (* PR#7037 *)
` .syntax unified\n`;
begin match !arch with
| ARMv4 -> ` .arch armv4t\n`
| ARMv5 -> ` .arch armv5t\n`
| ARMv5TE -> ` .arch armv5te\n`
| ARMv6 -> ` .arch armv6\n`
| ARMv6T2 -> ` .arch armv6t2\n`
| ARMv7 -> ` .arch armv7-a\n`
end;
begin match !fpu with
Soft -> ` .fpu softvfp\n`
| VFPv2 -> ` .fpu vfpv2\n`
| VFPv3_D16 -> ` .fpu vfpv3-d16\n`
| VFPv3 -> ` .fpu vfpv3\n`
end;
`trap_ptr .req r8\n`;
`alloc_ptr .req r10\n`;
`alloc_limit .req r11\n`;
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
emit_frames
{ efa_label = (fun lbl ->
` .type {emit_label lbl}, %function\n`;
` .word {emit_label lbl}\n`);
efa_16 = (fun n -> ` .short {emit_int n}\n`);
efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
efa_word = (fun n -> ` .word {emit_int n}\n`);
efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
efa_label_rel = (fun lbl ofs ->
` .word {emit_label lbl} - . + {emit_int32 ofs}\n`);
efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
efa_string = (fun s -> emit_string_directive " .asciz " s) };
` .type {emit_symbol lbl}, %object\n`;
` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
begin match Config.system with
"linux_eabihf" | "linux_eabi" | "netbsd" ->
(* Mark stack as non-executable *)
` .section .note.GNU-stack,\"\",%progbits\n`
| _ -> ()
end