ocaml/asmcomp/arm64/emit.mlp

992 lines
36 KiB
Plaintext

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2012 Benedikt Meurer. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Emission of ARM assembly code, 64-bit mode *)
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Tradeoff between code size and code speed *)
let fastcode_flag = ref true
(* Names for special regs *)
let reg_trap_ptr = phys_reg 23
let reg_alloc_ptr = phys_reg 24
let reg_alloc_limit = phys_reg 25
let reg_tmp1 = phys_reg 26
let reg_tmp2 = phys_reg 27
let reg_x15 = phys_reg 15
(* Output a label *)
let emit_label lbl =
emit_string ".L"; emit_int lbl
let emit_data_label lbl =
emit_string ".Ld"; emit_int lbl
(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
(* Output a pseudo-register *)
let emit_reg = function
{loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit.emit_reg"
(* Likewise, but with the 32-bit name of the register *)
let int_reg_name_w =
[| "w0"; "w1"; "w2"; "w3"; "w4"; "w5"; "w6"; "w7";
"w8"; "w9"; "w10"; "w11"; "w12"; "w13"; "w14"; "w15";
"w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25";
"w26"; "w27"; "w28"; "w16"; "w17" |]
let emit_wreg = function
{loc = Reg r} -> emit_string int_reg_name_w.(r)
| _ -> fatal_error "Emit.emit_wreg"
(* Layout of the stack frame *)
let stack_offset = ref 0
let frame_size () =
let sz =
!stack_offset +
8 * num_stack_slots.(0) +
8 * num_stack_slots.(1) +
(if !contains_calls then 8 else 0)
in Misc.align sz 16
let slot_offset loc cl =
match loc with
Incoming n ->
assert (n >= 0);
frame_size() + n
| Local n ->
!stack_offset +
(if cl = 0
then n * 8
else num_stack_slots.(0) * 8 + n * 8)
| Outgoing n ->
assert (n >= 0);
n
(* Output a stack reference *)
let emit_stack r =
match r.loc with
| Stack s ->
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit.emit_stack"
(* Output an addressing mode *)
let emit_symbol_offset s ofs =
emit_symbol s;
if ofs > 0 then `+{emit_int ofs}`
else if ofs < 0 then `-{emit_int (-ofs)}`
else ()
let emit_addressing addr r =
match addr with
| Iindexed ofs ->
`[{emit_reg r}, #{emit_int ofs}]`
| Ibased(s, ofs) ->
`[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]`
(* Record live pointers at call points *)
let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
| {typ = Val; loc = Reg r} ->
live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Val; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| {typ = Addr} as r ->
Misc.fatal_error ("bad GC root " ^ Reg.name r)
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset;
fd_debuginfo = dbg } :: !frame_descriptors;
lbl
let record_frame live dbg =
let lbl = record_frame_label live dbg in `{emit_label lbl}:`
(* Record calls to the GC -- we've moved them out of the way *)
type gc_call =
{ gc_lbl: label; (* Entry label *)
gc_return_lbl: label; (* Where to branch after GC *)
gc_frame_lbl: label } (* Label of frame descriptor *)
let call_gc_sites = ref ([] : gc_call list)
let emit_call_gc gc =
`{emit_label gc.gc_lbl}: bl {emit_symbol "caml_call_gc"}\n`;
`{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
(* Record calls to caml_ml_array_bound_error.
In debug mode, we maintain one call to caml_ml_array_bound_error
per bound check site. Otherwise, we can share a single call. *)
type bound_error_call =
{ bd_lbl: label; (* Entry label *)
bd_frame_lbl: label } (* Label of frame descriptor *)
let bound_error_sites = ref ([] : bound_error_call list)
let bound_error_label dbg =
if !Clflags.debug || !bound_error_sites = [] then begin
let lbl_bound_error = new_label() in
let lbl_frame = record_frame_label Reg.Set.empty dbg in
bound_error_sites :=
{ bd_lbl = lbl_bound_error;
bd_frame_lbl = lbl_frame } :: !bound_error_sites;
lbl_bound_error
end else begin
let bd = List.hd !bound_error_sites in bd.bd_lbl
end
let emit_call_bound_error bd =
`{emit_label bd.bd_lbl}: bl {emit_symbol "caml_ml_array_bound_error"}\n`;
`{emit_label bd.bd_frame_lbl}:\n`
(* Names of various instructions *)
let name_for_comparison = function
| Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
| Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
let name_for_int_operation = function
| Iadd -> "add"
| Isub -> "sub"
| Imul -> "mul"
| Idiv -> "sdiv"
| Iand -> "and"
| Ior -> "orr"
| Ixor -> "eor"
| Ilsl -> "lsl"
| Ilsr -> "lsr"
| Iasr -> "asr"
| _ -> assert false
(* Load an integer constant into a register *)
let emit_intconst dst n =
let rec emit_pos first shift =
if shift < 0 then begin
if first then ` mov {emit_reg dst}, xzr\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then emit_pos first (shift - 16) else begin
if first then
` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_pos false (shift - 16)
end
end
and emit_neg first shift =
if shift < 0 then begin
if first then ` movn {emit_reg dst}, #0\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then emit_neg first (shift - 16) else begin
if first then
` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_neg false (shift - 16)
end
end
in
if n < 0n then emit_neg true 48 else emit_pos true 48
let num_instructions_for_intconst n =
let num_instructions = ref 0 in
let rec count_pos first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then count_pos first (shift - 16) else begin
incr num_instructions;
count_pos false (shift - 16)
end
end
and count_neg first shift =
if shift < 0 then begin
if first then incr num_instructions
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then count_neg first (shift - 16) else begin
incr num_instructions;
count_neg false (shift - 16)
end
end
in
if n < 0n then count_neg true 48 else count_pos true 48;
!num_instructions
(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
"a normalized binary floating point encoding with 1 sign bit, 4
bits of fraction and a 3-bit exponent" *)
let is_immediate_float bits =
let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
(* Adjust sp (up or down) by the given byte amount *)
let emit_stack_adjustment n =
let instr = if n < 0 then "sub" else "add" in
let m = abs n in
assert (m < 0x1_000_000);
let ml = m land 0xFFF and mh = m land 0xFFF_000 in
if mh <> 0 then ` {emit_string instr} sp, sp, #{emit_int mh}\n`;
if ml <> 0 then ` {emit_string instr} sp, sp, #{emit_int ml}\n`;
if n <> 0 then cfi_adjust_cfa_offset (-n)
(* Deallocate the stack frame and reload the return address
before a return or tail call *)
let output_epilogue f =
let n = frame_size() in
if !contains_calls then
` ldr x30, [sp, #{emit_int (n-8)}]\n`;
if n > 0 then
emit_stack_adjustment n;
f();
(* reset CFA back because function body may continue *)
if n > 0 then cfi_adjust_cfa_offset n
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Pending floating-point literals *)
let float_literals = ref ([] : (int64 * label) list)
(* Label a floating-point literal *)
let float_literal f =
try
List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
float_literals := (f, lbl) :: !float_literals;
lbl
(* Emit all pending literals *)
let emit_literals() =
if !float_literals <> [] then begin
` .align 3\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}:`; emit_float64_directive ".quad" f)
!float_literals;
float_literals := []
end
(* Emit code to load the address of a symbol *)
let emit_load_symbol_addr dst s =
if (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit s then begin
` adrp {emit_reg dst}, {emit_symbol s}\n`;
` add {emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n`
end else begin
` adrp {emit_reg dst}, :got:{emit_symbol s}\n`;
` ldr {emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n`
end
(* The following functions are used for calculating the sizes of the
call GC and bounds check points emitted out-of-line from the function
body. See branch_relaxation.mli. *)
let num_call_gc_and_check_bound_points instr =
let rec loop instr ((call_gc, check_bound) as totals) =
match instr.desc with
| Lend -> totals
| Lop (Ialloc _) when !fastcode_flag ->
loop instr.next (call_gc + 1, check_bound)
| Lop (Iintop Icheckbound)
| Lop (Iintop_imm (Icheckbound, _))
| Lop (Ispecific (Ishiftcheckbound _)) ->
let check_bound =
(* When not in debug mode, there is at most one check-bound point. *)
if not !Clflags.debug then 1
else check_bound + 1
in
loop instr.next (call_gc, check_bound)
(* The following four should never be seen, since this function is run
before branch relaxation. *)
| Lop (Ispecific (Ifar_alloc _))
| Lop (Ispecific Ifar_intop_checkbound)
| Lop (Ispecific (Ifar_intop_imm_checkbound _))
| Lop (Ispecific (Ifar_shiftcheckbound _)) -> assert false
| _ -> loop instr.next totals
in
loop instr (0, 0)
let max_out_of_line_code_offset instr ~num_call_gc ~num_check_bound =
if num_call_gc < 1 && num_check_bound < 1 then 0
else begin
let size_of_call_gc = 2 in
let size_of_check_bound = 1 in
let size_of_last_thing =
(* Call-GC points come before check-bound points. *)
if num_check_bound >= 1 then size_of_check_bound else size_of_call_gc
in
let total_size =
size_of_call_gc*num_call_gc + size_of_check_bound*num_check_bound
in
let max_offset = total_size - size_of_last_thing in
assert (max_offset >= 0);
max_offset
end
module BR = Branch_relaxation.Make (struct
(* CR-someday mshinwell: B and BL have +/- 128Mb ranges; for the moment we
assume we will never exceed this. It would seem to be most likely to
occur for branches between functions; in this case, the linker should be
able to insert veneers anyway. (See section 4.6.7 of the document
"ELF for the ARM 64-bit architecture (AArch64)".) *)
type distance = int
module Cond_branch = struct
type t = TB | CB | Bcc
let all = [TB; CB; Bcc]
(* AArch64 instructions are 32 bits wide, so [distance] in this module
means units of 32-bit words. *)
let max_displacement = function
| TB -> 32 * 1024 / 4 (* +/- 32Kb *)
| CB | Bcc -> 1 * 1024 * 1024 / 4 (* +/- 1Mb *)
let classify_instr = function
| Lop (Ialloc _)
| Lop (Iintop Icheckbound)
| Lop (Iintop_imm (Icheckbound, _))
| Lop (Ispecific (Ishiftcheckbound _)) -> Some Bcc
(* The various "far" variants in [specific_operation] don't need to
return [Some] here, since their code sequences never contain any
conditional branches that might need relaxing. *)
| Lcondbranch (Itruetest, _)
| Lcondbranch (Ifalsetest, _) -> Some CB
| Lcondbranch (Iinttest _, _)
| Lcondbranch (Iinttest_imm _, _)
| Lcondbranch (Ifloattest _, _) -> Some Bcc
| Lcondbranch (Ioddtest, _)
| Lcondbranch (Ieventest, _) -> Some TB
| Lcondbranch3 _ -> Some Bcc
| _ -> None
end
let offset_pc_at_branch = 0
let epilogue_size () =
if !contains_calls then 3 else 2
let instr_size = function
| Lend -> 0
| Lop (Imove | Ispill | Ireload) -> 1
| Lop (Iconst_int n | Iconst_blockheader n) ->
num_instructions_for_intconst n
| Lop (Iconst_float _) -> 2
| Lop (Iconst_symbol _) -> 2
| Lop (Icall_ind) -> 1
| Lop (Icall_imm _) -> 1
| Lop (Itailcall_ind) -> epilogue_size ()
| Lop (Itailcall_imm s) ->
if s = !function_name then 1 else epilogue_size ()
| Lop (Iextcall (_, false)) -> 1
| Lop (Iextcall (_, true)) -> 3
| Lop (Istackoffset _) -> 2
| Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) ->
let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in
based + begin match size with Single -> 2 | _ -> 1 end
| Lop (Ialloc _) when !fastcode_flag -> 4
| Lop (Ispecific (Ifar_alloc _)) when !fastcode_flag -> 5
| Lop (Ialloc num_words) | Lop (Ispecific (Ifar_alloc num_words)) ->
begin match num_words with
| 16 | 24 | 32 -> 1
| _ -> 1 + num_instructions_for_intconst (Nativeint.of_int num_words)
end
| Lop (Iintop (Icomp _)) -> 2
| Lop (Iintop_imm (Icomp _, _)) -> 2
| Lop (Iintop Icheckbound) -> 2
| Lop (Ispecific Ifar_intop_checkbound) -> 3
| Lop (Iintop_imm (Icheckbound, _)) -> 2
| Lop (Ispecific (Ifar_intop_imm_checkbound _)) -> 3
| Lop (Ispecific (Ishiftcheckbound _)) -> 2
| Lop (Ispecific (Ifar_shiftcheckbound _)) -> 3
| Lop (Iintop Imod) -> 2
| Lop (Iintop Imulh) -> 1
| Lop (Iintop _) -> 1
| Lop (Iintop_imm _) -> 1
| Lop (Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf) -> 1
| Lop (Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf) -> 1
| Lop (Ispecific (Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)) -> 1
| Lop (Ispecific (Ishiftarith _)) -> 1
| Lop (Ispecific (Imuladd | Imulsub)) -> 1
| Lop (Ispecific (Ibswap 16)) -> 2
| Lop (Ispecific (Ibswap _)) -> 1
| Lreloadretaddr -> 0
| Lreturn -> epilogue_size ()
| Llabel _ -> 0
| Lbranch _ -> 1
| Lcondbranch (tst, _) ->
begin match tst with
| Itruetest -> 1
| Ifalsetest -> 1
| Iinttest _ -> 2
| Iinttest_imm _ -> 2
| Ifloattest _ -> 2
| Ioddtest -> 1
| Ieventest -> 1
end
| Lcondbranch3 (lbl0, lbl1, lbl2) ->
1 + begin match lbl0 with None -> 0 | Some _ -> 1 end
+ begin match lbl1 with None -> 0 | Some _ -> 1 end
+ begin match lbl2 with None -> 0 | Some _ -> 1 end
| Lswitch jumptbl -> 3 + Array.length jumptbl
| Lsetuptrap _ -> 2
| Lpushtrap -> 3
| Lpoptrap -> 1
| Lraise k ->
begin match !Clflags.debug, k with
| true, (Lambda.Raise_regular | Lambda.Raise_reraise) -> 1
| false, _
| true, Lambda.Raise_notrace -> 4
end
let relax_allocation ~num_words =
Lop (Ispecific (Ifar_alloc num_words))
let relax_intop_checkbound () =
Lop (Ispecific Ifar_intop_checkbound)
let relax_intop_imm_checkbound ~bound =
Lop (Ispecific (Ifar_intop_imm_checkbound bound))
let relax_specific_op = function
| Ishiftcheckbound shift -> Lop (Ispecific (Ifar_shiftcheckbound shift))
| _ -> assert false
end)
(* Output the assembly code for allocation. *)
let assembly_code_for_allocation i ~n ~far =
let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
let lbl_redo = new_label() in
let lbl_call_gc = new_label() in
`{emit_label lbl_redo}:`;
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`;
if not far then begin
` b.lo {emit_label lbl_call_gc}\n`
end else begin
let lbl = new_label () in
` b.cs {emit_label lbl}\n`;
` b {emit_label lbl_call_gc}\n`;
`{emit_label lbl}:\n`
end;
call_gc_sites :=
{ gc_lbl = lbl_call_gc;
gc_return_lbl = lbl_redo;
gc_frame_lbl = lbl_frame } :: !call_gc_sites
end else begin
begin match n with
| 16 -> ` bl {emit_symbol "caml_alloc1"}\n`
| 24 -> ` bl {emit_symbol "caml_alloc2"}\n`
| 32 -> ` bl {emit_symbol "caml_alloc3"}\n`
| _ -> emit_intconst reg_x15 (Nativeint.of_int n);
` bl {emit_symbol "caml_allocN"}\n`
end;
`{emit_label lbl_frame}: add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`
end
(* Output the assembly code for an instruction *)
let emit_instr i =
emit_debug_info i.dbg;
match i.desc with
| Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
match (src, dst) with
| {loc = Reg _; typ = Float}, {loc = Reg _} ->
` fmov {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _}, {loc = Reg _} ->
` mov {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _}, {loc = Stack _} ->
` str {emit_reg src}, {emit_stack dst}\n`
| {loc = Stack _}, {loc = Reg _} ->
` ldr {emit_reg dst}, {emit_stack src}\n`
| _ ->
assert false
end
| Lop(Iconst_int n | Iconst_blockheader n) ->
emit_intconst i.res.(0) n
| Lop(Iconst_float f) ->
if f = 0L then
` fmov {emit_reg i.res.(0)}, xzr\n`
else if is_immediate_float f then
` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" f}\n`
else begin
let lbl = float_literal f in
` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n`
end
| Lop(Iconst_symbol s) ->
emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
` blr {emit_reg i.arg.(0)}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Icall_imm s) ->
` bl {emit_symbol s}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Itailcall_ind) ->
output_epilogue (fun () -> ` br {emit_reg i.arg.(0)}\n`)
| Lop(Itailcall_imm s) ->
if s = !function_name then
` b {emit_label !tailrec_entry_point}\n`
else
output_epilogue (fun () -> ` b {emit_symbol s}\n`)
| Lop(Iextcall(s, false)) ->
` bl {emit_symbol s}\n`
| Lop(Iextcall(s, true)) ->
emit_load_symbol_addr reg_x15 s;
` bl {emit_symbol "caml_c_call"}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Istackoffset n) ->
assert (n mod 16 = 0);
emit_stack_adjustment (-n);
stack_offset := !stack_offset + n
| Lop(Iload(size, addr)) ->
let dst = i.res.(0) in
let base =
match addr with
| Iindexed ofs -> i.arg.(0)
| Ibased(s, ofs) ->
` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
reg_tmp1 in
begin match size with
| Byte_unsigned ->
` ldrb {emit_wreg dst}, {emit_addressing addr base}\n`
| Byte_signed ->
` ldrsb {emit_reg dst}, {emit_addressing addr base}\n`
| Sixteen_unsigned ->
` ldrh {emit_wreg dst}, {emit_addressing addr base}\n`
| Sixteen_signed ->
` ldrsh {emit_reg dst}, {emit_addressing addr base}\n`
| Thirtytwo_unsigned ->
` ldr {emit_wreg dst}, {emit_addressing addr base}\n`
| Thirtytwo_signed ->
` ldrsw {emit_reg dst}, {emit_addressing addr base}\n`
| Single ->
` ldr s7, {emit_addressing addr base}\n`;
` fcvt {emit_reg dst}, s7\n`
| Word_int | Word_val | Double | Double_u ->
` ldr {emit_reg dst}, {emit_addressing addr base}\n`
end
| Lop(Istore(size, addr, _)) ->
let src = i.arg.(0) in
let base =
match addr with
| Iindexed ofs -> i.arg.(1)
| Ibased(s, ofs) ->
` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
reg_tmp1 in
begin match size with
| Byte_unsigned | Byte_signed ->
` strb {emit_wreg src}, {emit_addressing addr base}\n`
| Sixteen_unsigned | Sixteen_signed ->
` strh {emit_wreg src}, {emit_addressing addr base}\n`
| Thirtytwo_unsigned | Thirtytwo_signed ->
` str {emit_wreg src}, {emit_addressing addr base}\n`
| Single ->
` fcvt s7, {emit_reg src}\n`;
` str s7, {emit_addressing addr base}\n`;
| Word_int | Word_val | Double | Double_u ->
` str {emit_reg src}, {emit_addressing addr base}\n`
end
| Lop(Ialloc n) ->
assembly_code_for_allocation i ~n ~far:false
| Lop(Ispecific (Ifar_alloc n)) ->
assembly_code_for_allocation i ~n ~far:true
| Lop(Iintop(Icomp cmp)) ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
| Lop(Iintop_imm(Icomp cmp, n)) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
| Lop(Iintop Icheckbound) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` b.ls {emit_label lbl}\n`
| Lop(Ispecific Ifar_intop_checkbound) ->
let lbl = bound_error_label i.dbg in
let lbl2 = new_label () in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` b.hi {emit_label lbl2}\n`;
` b {emit_label lbl}\n`;
`{emit_label lbl2}:\n`;
| Lop(Iintop_imm(Icheckbound, n)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` b.ls {emit_label lbl}\n`
| Lop(Ispecific(Ifar_intop_imm_checkbound bound)) ->
let lbl = bound_error_label i.dbg in
let lbl2 = new_label () in
` cmp {emit_reg i.arg.(0)}, #{emit_int bound}\n`;
` b.hi {emit_label lbl2}\n`;
` b {emit_label lbl}\n`;
`{emit_label lbl2}:\n`;
| Lop(Ispecific(Ishiftcheckbound shift)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
` b.cs {emit_label lbl}\n`
| Lop(Ispecific(Ifar_shiftcheckbound shift)) ->
let lbl = bound_error_label i.dbg in
let lbl2 = new_label () in
` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
` b.lo {emit_label lbl2}\n`;
` b {emit_label lbl}\n`;
`{emit_label lbl2}:\n`;
| Lop(Iintop Imod) ->
` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop Imulh) ->
` smulh {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`
| Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) ->
let instr = (match op with
| Ifloatofint -> "scvtf"
| Iintoffloat -> "fcvtzs"
| Iabsf -> "fabs"
| Inegf -> "fneg"
| Ispecific Isqrtf -> "fsqrt"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
let instr = (match op with
| Iaddf -> "fadd"
| Isubf -> "fsub"
| Imulf -> "fmul"
| Idivf -> "fdiv"
| Ispecific Inegmulf -> "fnmul"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
let instr = (match op with
| Imuladdf -> "fmadd"
| Inegmuladdf -> "fnmadd"
| Imulsubf -> "fmsub"
| Inegmulsubf -> "fnmsub"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
| Lop(Ispecific(Ishiftarith(op, shift))) ->
let instr = (match op with
Ishiftadd -> "add"
| Ishiftsub -> "sub") in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
if shift >= 0
then `, lsl #{emit_int shift}\n`
else `, asr #{emit_int (-shift)}\n`
| Lop(Ispecific(Imuladd | Imulsub as op)) ->
let instr = (match op with
Imuladd -> "madd"
| Imulsub -> "msub"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
| Lop(Ispecific(Ibswap size)) ->
begin match size with
| 16 ->
` rev16 {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`;
` ubfm {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #15\n`
| 32 ->
` rev {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`
| 64 ->
` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| _ ->
assert false
end
| Lreloadretaddr ->
()
| Lreturn ->
output_epilogue (fun () -> ` ret\n`)
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
` b {emit_label lbl}\n`
| Lcondbranch(tst, lbl) ->
begin match tst with
| Itruetest ->
` cbnz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
| Ifalsetest ->
` cbz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
` b.{emit_string comp} {emit_label lbl}\n`
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
` b.{emit_string comp} {emit_label lbl}\n`
| Ifloattest(cmp, neg) ->
let comp = (match (cmp, neg) with
| (Ceq, false) | (Cne, true) -> "eq"
| (Cne, false) | (Ceq, true) -> "ne"
| (Clt, false) -> "cc"
| (Clt, true) -> "cs"
| (Cle, false) -> "ls"
| (Cle, true) -> "hi"
| (Cgt, false) -> "gt"
| (Cgt, true) -> "le"
| (Cge, false) -> "ge"
| (Cge, true) -> "lt") in
` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` b.{emit_string comp} {emit_label lbl}\n`
| Ioddtest ->
` tbnz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
| Ieventest ->
` tbz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
| Some lbl -> ` b.lt {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` b.eq {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` b.gt {emit_label lbl}\n`
end
| Lswitch jumptbl ->
let lbltbl = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`;
` br {emit_reg reg_tmp1}\n`;
`{emit_label lbltbl}:`;
for j = 0 to Array.length jumptbl - 1 do
` b {emit_label jumptbl.(j)}\n`
done
(* Alternative:
let lbltbl = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
` ldr {emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`;
` add {emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`;
` br {emit_reg reg_tmp1}\n`;
`{emit_label lbltbl}:\n`;
for j = 0 to Array.length jumptbl - 1 do
` .word {emit_label jumptbl.(j)} - {emit_label lbltbl}\n`
done
*)
| Lsetuptrap lbl ->
let lblnext = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lblnext}\n`;
` b {emit_label lbl}\n`;
`{emit_label lblnext}:\n`
| Lpushtrap ->
stack_offset := !stack_offset + 16;
` str {emit_reg reg_trap_ptr}, [sp, -16]!\n`;
` str {emit_reg reg_tmp1}, [sp, #8]\n`;
cfi_adjust_cfa_offset 16;
` mov {emit_reg reg_trap_ptr}, sp\n`
| Lpoptrap ->
` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
cfi_adjust_cfa_offset (-16);
stack_offset := !stack_offset - 16
| Lraise k ->
begin match !Clflags.debug, k with
| true, (Lambda.Raise_regular | Lambda.Raise_reraise) ->
` bl {emit_symbol "caml_raise_exn"}\n`;
`{record_frame Reg.Set.empty i.dbg}\n`
| false, _
| true, Lambda.Raise_notrace ->
` mov sp, {emit_reg reg_trap_ptr}\n`;
` ldr {emit_reg reg_tmp1}, [sp, #8]\n`;
` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
` br {emit_reg reg_tmp1}\n`
end
(* Emission of an instruction sequence *)
let rec emit_all i =
if i.desc = Lend then () else (emit_instr i; emit_all i.next)
(* Emission of the profiling prelude *)
let emit_profile() = () (* TODO *)
(*
match Config.system with
"linux_eabi" | "linux_eabihf" | "netbsd" ->
` push \{lr}\n`;
` {emit_call "__gnu_mcount_nc"}\n`
| _ -> ()
*)
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
float_literals := [];
stack_offset := 0;
call_gc_sites := [];
bound_error_sites := [];
` .text\n`;
` .align 3\n`;
` .globl {emit_symbol fundecl.fun_name}\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
emit_debug_info fundecl.fun_dbg;
cfi_startproc();
if !Clflags.gprofile then emit_profile();
let n = frame_size() in
if n > 0 then
emit_stack_adjustment (-n);
if !contains_calls then begin
cfi_offset ~reg:30 (* return address *) ~offset:(-8);
` str x30, [sp, #{emit_int (n-8)}]\n`
end;
`{emit_label !tailrec_entry_point}:\n`;
let num_call_gc, num_check_bound =
num_call_gc_and_check_bound_points fundecl.fun_body
in
let max_out_of_line_code_offset =
max_out_of_line_code_offset fundecl.fun_body ~num_call_gc
~num_check_bound
in
BR.relax fundecl.fun_body ~max_out_of_line_code_offset;
emit_all fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
List.iter emit_call_bound_error !bound_error_sites;
assert (List.length !call_gc_sites = num_call_gc);
assert (List.length !bound_error_sites = num_check_bound);
cfi_endproc();
` .type {emit_symbol fundecl.fun_name}, %function\n`;
` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
emit_literals()
(* Emission of data *)
let emit_item = function
| Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
| Cdefine_symbol s -> `{emit_symbol s}:\n`
| Cdefine_label lbl -> `{emit_data_label lbl}:\n`
| Cint8 n -> ` .byte {emit_int n}\n`
| Cint16 n -> ` .short {emit_int n}\n`
| Cint32 n -> ` .long {emit_nativeint n}\n`
| Cint n -> ` .quad {emit_nativeint n}\n`
| Csingle f -> emit_float32_directive ".long" (Int32.bits_of_float f)
| Cdouble f -> emit_float64_directive ".quad" (Int64.bits_of_float f)
| Csymbol_address s -> ` .quad {emit_symbol s}\n`
| Clabel_address lbl -> ` .quad {emit_data_label lbl}\n`
| Cstring s -> emit_string_directive " .ascii " s
| Cskip n -> if n > 0 then ` .space {emit_int n}\n`
| Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
` .align 3\n`;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
reset_debug_info();
` .file \"\"\n`; (* PR#7037 *)
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
emit_frames
{ efa_label = (fun lbl ->
` .type {emit_label lbl}, %function\n`;
` .quad {emit_label lbl}\n`);
efa_16 = (fun n -> ` .short {emit_int n}\n`);
efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
efa_word = (fun n -> ` .quad {emit_int n}\n`);
efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
efa_label_rel = (fun lbl ofs ->
` .long {emit_label lbl} - . + {emit_int32 ofs}\n`);
efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
efa_string = (fun s -> emit_string_directive " .asciz " s) };
` .type {emit_symbol lbl}, %object\n`;
` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
begin match Config.system with
| "linux" ->
(* Mark stack as non-executable *)
` .section .note.GNU-stack,\"\",%progbits\n`
| _ -> ()
end