Port to the ARM 64-bits (AArch64) architecture (experimental).

Merge of branch branches/arm64.


git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@13909 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Xavier Leroy 2013-07-18 16:09:20 +00:00
parent 257818bdf6
commit 055d5c0379
19 changed files with 2060 additions and 6 deletions

View File

@ -3,6 +3,9 @@ Next version:
(Changes that can break existing programs are marked with a "*")
Compilers:
- Experimental native code generator for AArch64 (ARM 64 bits)
Bug fixes:
- PR#4719: Sys.executable_name is wrong if executable name contains dots (Windows)

146
asmcomp/arm64/arch.ml Normal file
View File

@ -0,0 +1,146 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
(* reserved. This file is distributed under the terms of the Q *)
(* Public License version 1.0. *)
(* *)
(***********************************************************************)
let command_line_options = []
(* Specific operations for the ARM processor, 64-bit mode *)
open Format
let command_line_options = []
(* Addressing modes *)
type addressing_mode =
| Iindexed of int (* reg + displ *)
| Ibased of string * int (* global var + displ *)
(* We do not support the reg + shifted reg addressing mode, because
what we really need is reg + shifted reg + displ,
and this is decomposed in two instructions (reg + shifted reg -> tmp,
then addressing tmp + displ). *)
(* Specific operations *)
type specific_operation =
| Ishiftarith of arith_operation * int
| Ishiftcheckbound of int
| Imuladd (* multiply and add *)
| Imulsub (* multiply and subtract *)
| Inegmulf (* floating-point negate and multiply *)
| Imuladdf (* floating-point multiply and add *)
| Inegmuladdf (* floating-point negate, multiply and add *)
| Imulsubf (* floating-point multiply and subtract *)
| Inegmulsubf (* floating-point negate, multiply and subtract *)
| Isqrtf (* floating-point square root *)
| Ibswap of int (* endianess conversion *)
and arith_operation =
Ishiftadd
| Ishiftsub
(* Sizes, endianness *)
let big_endian = false
let size_addr = 8
let size_int = 8
let size_float = 8
let allow_unaligned_access = false
(* Behavior of division *)
let division_crashes_on_overflow = false
(* Operations on addressing modes *)
let identity_addressing = Iindexed 0
let offset_addressing addr delta =
match addr with
| Iindexed n -> Iindexed(n + delta)
| Ibased(s, n) -> Ibased(s, n + delta)
let num_args_addressing = function
| Iindexed n -> 1
| Ibased(s, n) -> 0
(* Printing operations and addressing modes *)
let print_addressing printreg addr ppf arg =
match addr with
| Iindexed n ->
printreg ppf arg.(0);
if n <> 0 then fprintf ppf " + %i" n
| Ibased(s, 0) ->
fprintf ppf "\"%s\"" s
| Ibased(s, n) ->
fprintf ppf "\"%s\" + %i" s n
let print_specific_operation printreg op ppf arg =
match op with
| Ishiftarith(op, shift) ->
let op_name = function
| Ishiftadd -> "+"
| Ishiftsub -> "-" in
let shift_mark =
if shift >= 0
then sprintf "<< %i" shift
else sprintf ">> %i" (-shift) in
fprintf ppf "%a %s %a %s"
printreg arg.(0) (op_name op) printreg arg.(1) shift_mark
| Ishiftcheckbound n ->
fprintf ppf "check %a >> %i > %a" printreg arg.(0) n printreg arg.(1)
| Imuladd ->
fprintf ppf "(%a * %a) + %a"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Imulsub ->
fprintf ppf "-(%a * %a) + %a"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Inegmulf ->
fprintf ppf "-f (%a *f %a)"
printreg arg.(0)
printreg arg.(1)
| Imuladdf ->
fprintf ppf "%a +f (%a *f %a)"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Inegmuladdf ->
fprintf ppf "(-f %a) -f (%a *f %a)"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Imulsubf ->
fprintf ppf "%a -f (%a *f %a)"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Inegmulsubf ->
fprintf ppf "(-f %a) +f (%a *f %a)"
printreg arg.(0)
printreg arg.(1)
printreg arg.(2)
| Isqrtf ->
fprintf ppf "sqrtf %a"
printreg arg.(0)
| Ibswap n ->
fprintf ppf "bswap%i %a" n
printreg arg.(0)

742
asmcomp/arm64/emit.mlp Normal file
View File

@ -0,0 +1,742 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
(* reserved. This file is distributed under the terms of the Q *)
(* Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Emission of ARM assembly code, 64-bit mode *)
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Tradeoff between code size and code speed *)
let fastcode_flag = ref true
(* Names for special regs *)
let reg_trap_ptr = phys_reg 23
let reg_alloc_ptr = phys_reg 24
let reg_alloc_limit = phys_reg 25
let reg_tmp1 = phys_reg 26
let reg_tmp2 = phys_reg 27
let reg_x15 = phys_reg 15
(* Output a label *)
let emit_label lbl =
emit_string ".L"; emit_int lbl
let emit_data_label lbl =
emit_string ".Ld"; emit_int lbl
(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
(* Output a pseudo-register *)
let emit_reg = function
{loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit.emit_reg"
(* Likewise, but with the 32-bit name of the register *)
let int_reg_name_w =
[| "w0"; "w1"; "w2"; "w3"; "w4"; "w5"; "w6"; "w7";
"w8"; "w9"; "w10"; "w11"; "w12"; "w13"; "w14"; "w15";
"w19"; "w20"; "w21"; "w22"; "w23"; "w24"; "w25";
"w26"; "w27"; "w28"; "w16"; "w17" |]
let emit_wreg = function
{loc = Reg r} -> emit_string int_reg_name_w.(r)
| _ -> fatal_error "Emit.emit_wreg"
(* Layout of the stack frame *)
let stack_offset = ref 0
let frame_size () =
let sz =
!stack_offset +
8 * num_stack_slots.(0) +
8 * num_stack_slots.(1) +
(if !contains_calls then 8 else 0)
in Misc.align sz 16
let slot_offset loc cl =
match loc with
Incoming n ->
assert (n >= 0);
frame_size() + n
| Local n ->
!stack_offset +
(if cl = 0
then n * 8
else num_stack_slots.(0) * 8 + n * 8)
| Outgoing n ->
assert (n >= 0);
n
(* Output a stack reference *)
let emit_stack r =
match r.loc with
| Stack s ->
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
| _ -> fatal_error "Emit.emit_stack"
(* Output an addressing mode *)
let emit_symbol_offset s ofs =
emit_symbol s;
if ofs > 0 then `+{emit_int ofs}`
else if ofs < 0 then `-{emit_int (-ofs)}`
else ()
let emit_addressing addr r =
match addr with
| Iindexed ofs ->
`[{emit_reg r}, #{emit_int ofs}]`
| Ibased(s, ofs) ->
`[{emit_reg r}, #:lo12:{emit_symbol_offset s ofs}]`
(* Record live pointers at call points *)
let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset;
fd_debuginfo = dbg } :: !frame_descriptors;
lbl
let record_frame live dbg =
let lbl = record_frame_label live dbg in `{emit_label lbl}:`
(* Record calls to the GC -- we've moved them out of the way *)
type gc_call =
{ gc_lbl: label; (* Entry label *)
gc_return_lbl: label; (* Where to branch after GC *)
gc_frame_lbl: label } (* Label of frame descriptor *)
let call_gc_sites = ref ([] : gc_call list)
let emit_call_gc gc =
`{emit_label gc.gc_lbl}: bl {emit_symbol "caml_call_gc"}\n`;
`{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
(* Record calls to caml_ml_array_bound_error.
In debug mode, we maintain one call to caml_ml_array_bound_error
per bound check site. Otherwise, we can share a single call. *)
type bound_error_call =
{ bd_lbl: label; (* Entry label *)
bd_frame_lbl: label } (* Label of frame descriptor *)
let bound_error_sites = ref ([] : bound_error_call list)
let bound_error_label dbg =
if !Clflags.debug || !bound_error_sites = [] then begin
let lbl_bound_error = new_label() in
let lbl_frame = record_frame_label Reg.Set.empty dbg in
bound_error_sites :=
{ bd_lbl = lbl_bound_error;
bd_frame_lbl = lbl_frame } :: !bound_error_sites;
lbl_bound_error
end else begin
let bd = List.hd !bound_error_sites in bd.bd_lbl
end
let emit_call_bound_error bd =
`{emit_label bd.bd_lbl}: bl {emit_symbol "caml_ml_array_bound_error"}\n`;
`{emit_label bd.bd_frame_lbl}:\n`
(* Names of various instructions *)
let name_for_comparison = function
| Isigned Ceq -> "eq" | Isigned Cne -> "ne" | Isigned Cle -> "le"
| Isigned Cge -> "ge" | Isigned Clt -> "lt" | Isigned Cgt -> "gt"
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
let name_for_int_operation = function
| Iadd -> "add"
| Isub -> "sub"
| Imul -> "mul"
| Idiv -> "sdiv"
| Iand -> "and"
| Ior -> "orr"
| Ixor -> "eor"
| Ilsl -> "lsl"
| Ilsr -> "lsr"
| Iasr -> "asr"
| _ -> assert false
(* Load an integer constant into a register *)
let emit_intconst dst n =
let rec emit_pos first shift =
if shift < 0 then begin
if first then ` mov {emit_reg dst}, xzr\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0n then emit_pos first (shift - 16) else begin
if first then
` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_pos false (shift - 16)
end
end
and emit_neg first shift =
if shift < 0 then begin
if first then ` movn {emit_reg dst}, #0\n`
end else begin
let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in
if s = 0xFFFFn then emit_neg first (shift - 16) else begin
if first then
` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n`
else
` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`;
emit_neg false (shift - 16)
end
end
in
if n < 0n then emit_neg true 48 else emit_pos true 48
(* Recognize float constants appropriate for FMOV dst, #fpimm instruction:
"a normalized binary floating point encoding with 1 sign bit, 4
bits of fraction and a 3-bit exponent" *)
let is_immediate_float bits =
let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
(* Adjust sp (up or down) by the given byte amount *)
let emit_stack_adjustment n =
let instr = if n < 0 then "sub" else "add" in
let m = abs n in
assert (m < 0x1_000_000);
let ml = m land 0xFFF and mh = m land 0xFFF_000 in
if mh <> 0 then ` {emit_string instr} sp, sp, #{emit_int mh}\n`;
if ml <> 0 then ` {emit_string instr} sp, sp, #{emit_int ml}\n`;
if n <> 0 then cfi_adjust_cfa_offset (-n)
(* Deallocate the stack frame and reload the return address
before a return or tail call *)
let output_epilogue f =
let n = frame_size() in
if !contains_calls then
` ldr x30, [sp, #{emit_int (n-8)}]\n`;
if n > 0 then
emit_stack_adjustment n;
f();
(* reset CFA back because function body may continue *)
if n > 0 then cfi_adjust_cfa_offset n
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Pending floating-point literals *)
let float_literals = ref ([] : (int64 * label) list)
(* Label a floating-point literal *)
let float_literal f =
try
List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
float_literals := (f, lbl) :: !float_literals;
lbl
(* Emit all pending literals *)
let emit_literals() =
if !float_literals <> [] then begin
` .align 3\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}: .quad `; emit_printf "0x%Lx\n" f)
!float_literals;
float_literals := []
end
(* Emit code to load the address of a symbol *)
let emit_load_symbol_addr dst s =
if (not !Clflags.dlcode) || Compilenv.symbol_in_current_unit s then begin
` adrp {emit_reg dst}, {emit_symbol s}\n`;
` add {emit_reg dst}, {emit_reg dst}, #:lo12:{emit_symbol s}\n`
end else begin
` adrp {emit_reg dst}, :got:{emit_symbol s}\n`;
` ldr {emit_reg dst}, [{emit_reg dst}, #:got_lo12:{emit_symbol s}]\n`
end
(* Output the assembly code for an instruction *)
let emit_instr i =
emit_debug_info i.dbg;
match i.desc with
| Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
match (src, dst) with
| {loc = Reg _; typ = Float}, {loc = Reg _} ->
` fmov {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _}, {loc = Reg _} ->
` mov {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg _}, {loc = Stack _} ->
` str {emit_reg src}, {emit_stack dst}\n`
| {loc = Stack _}, {loc = Reg _} ->
` ldr {emit_reg dst}, {emit_stack src}\n`
| _ ->
assert false
end
| Lop(Iconst_int n) ->
emit_intconst i.res.(0) n
| Lop(Iconst_float f) ->
let b = Int64.bits_of_float(float_of_string f) in
if b = 0L then
` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n`
else if is_immediate_float b then
` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n`
else begin
let lbl = float_literal b in
` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(0)}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}] /* {emit_string f} */\n`
end
| Lop(Iconst_symbol s) ->
emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
` blr {emit_reg i.arg.(0)}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Icall_imm s) ->
` bl {emit_symbol s}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Itailcall_ind) ->
output_epilogue (fun () -> ` br {emit_reg i.arg.(0)}\n`)
| Lop(Itailcall_imm s) ->
if s = !function_name then
` b {emit_label !tailrec_entry_point}\n`
else
output_epilogue (fun () -> ` b {emit_symbol s}\n`)
| Lop(Iextcall(s, false)) ->
` bl {emit_symbol s}\n`
| Lop(Iextcall(s, true)) ->
emit_load_symbol_addr reg_x15 s;
` bl {emit_symbol "caml_c_call"}\n`;
`{record_frame i.live i.dbg}\n`
| Lop(Istackoffset n) ->
assert (n mod 16 = 0);
emit_stack_adjustment (-n);
stack_offset := !stack_offset + n
| Lop(Iload(size, addr)) ->
let dst = i.res.(0) in
let base =
match addr with
| Iindexed ofs -> i.arg.(0)
| Ibased(s, ofs) ->
` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
reg_tmp1 in
begin match size with
| Byte_unsigned ->
` ldrb {emit_wreg dst}, {emit_addressing addr base}\n`
| Byte_signed ->
` ldrsb {emit_reg dst}, {emit_addressing addr base}\n`
| Sixteen_unsigned ->
` ldrh {emit_wreg dst}, {emit_addressing addr base}\n`
| Sixteen_signed ->
` ldrsh {emit_reg dst}, {emit_addressing addr base}\n`
| Thirtytwo_unsigned ->
` ldr {emit_wreg dst}, {emit_addressing addr base}\n`
| Thirtytwo_signed ->
` ldrsw {emit_reg dst}, {emit_addressing addr base}\n`
| Single ->
` ldr s7, {emit_addressing addr base}\n`;
` fcvt {emit_reg dst}, s7\n`
| Word | Double | Double_u ->
` ldr {emit_reg dst}, {emit_addressing addr base}\n`
end
| Lop(Istore(size, addr)) ->
let src = i.arg.(0) in
let base =
match addr with
| Iindexed ofs -> i.arg.(1)
| Ibased(s, ofs) ->
` adrp {emit_reg reg_tmp1}, {emit_symbol_offset s ofs}\n`;
reg_tmp1 in
begin match size with
| Byte_unsigned | Byte_signed ->
` strb {emit_wreg src}, {emit_addressing addr base}\n`
| Sixteen_unsigned | Sixteen_signed ->
` strh {emit_wreg src}, {emit_addressing addr base}\n`
| Thirtytwo_unsigned | Thirtytwo_signed ->
` str {emit_wreg src}, {emit_addressing addr base}\n`
| Single ->
` fcvt s7, {emit_reg src}\n`;
` str s7, {emit_addressing addr base}\n`;
| Word | Double | Double_u ->
` str {emit_reg src}, {emit_addressing addr base}\n`
end
| Lop(Ialloc n) ->
let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
let lbl_redo = new_label() in
let lbl_call_gc = new_label() in
`{emit_label lbl_redo}:`;
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int n}\n`;
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`;
` b.lo {emit_label lbl_call_gc}\n`;
call_gc_sites :=
{ gc_lbl = lbl_call_gc;
gc_return_lbl = lbl_redo;
gc_frame_lbl = lbl_frame } :: !call_gc_sites
end else begin
begin match n with
| 16 -> ` bl {emit_symbol "caml_alloc1"}\n`
| 24 -> ` bl {emit_symbol "caml_alloc2"}\n`
| 32 -> ` bl {emit_symbol "caml_alloc3"}\n`
| _ -> emit_intconst reg_x15 (Nativeint.of_int n);
` bl {emit_symbol "caml_allocN"}\n`
end;
`{emit_label lbl_frame}: add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`
end
| Lop(Iintop(Icomp cmp)) ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
| Lop(Iintop_imm(Icomp cmp, n)) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n`
| Lop(Iintop Icheckbound) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` b.ls {emit_label lbl}\n`
| Lop(Iintop_imm(Icheckbound, n)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
` b.ls {emit_label lbl}\n`
| Lop(Ispecific(Ishiftcheckbound shift)) ->
let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
` b.cs {emit_label lbl}\n`
| Lop(Iintop Imod) ->
` sdiv {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` msub {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
` asr {emit_reg i.res.(0)}, {emit_reg reg_tmp1}, {emit_int l}\n`
| Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
` asr {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, #63\n`;
` add {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsr {emit_int (64-l)}\n`;
` asr {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_int l}\n`;
` sub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp1}, lsl {emit_int l}\n`
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`
| Lop(Ifloatofint | Iintoffloat | Iabsf | Inegf | Ispecific Isqrtf as op) ->
let instr = (match op with
| Ifloatofint -> "scvtf"
| Iintoffloat -> "fcvtzs"
| Iabsf -> "fabs"
| Inegf -> "fneg"
| Ispecific Isqrtf -> "fsqrt"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
let instr = (match op with
| Iaddf -> "fadd"
| Isubf -> "fsub"
| Imulf -> "fmul"
| Idivf -> "fdiv"
| Ispecific Inegmulf -> "fnmul"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
let instr = (match op with
| Imuladdf -> "fmadd"
| Inegmuladdf -> "fnmadd"
| Imulsubf -> "fmsub"
| Inegmulsubf -> "fnmsub"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
| Lop(Ispecific(Ishiftarith(op, shift))) ->
let instr = (match op with
Ishiftadd -> "add"
| Ishiftsub -> "sub") in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
if shift >= 0
then `, lsl #{emit_int shift}\n`
else `, asr #{emit_int (-shift)}\n`
| Lop(Ispecific(Imuladd | Imulsub as op)) ->
let instr = (match op with
Imuladd -> "madd"
| Imulsub -> "msub"
| _ -> assert false) in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`
| Lop(Ispecific(Ibswap size)) ->
begin match size with
| 16 ->
` rev16 {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`;
` ubfm {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #0, #16\n`
| 32 ->
` rev {emit_wreg i.res.(0)}, {emit_wreg i.arg.(0)}\n`
| 64 ->
` rev {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| _ ->
assert false
end
| Lreloadretaddr ->
()
| Lreturn ->
output_epilogue (fun () -> ` ret\n`)
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
` b {emit_label lbl}\n`
| Lcondbranch(tst, lbl) ->
begin match tst with
| Itruetest ->
` cbnz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
| Ifalsetest ->
` cbz {emit_reg i.arg.(0)}, {emit_label lbl}\n`
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
` b.{emit_string comp} {emit_label lbl}\n`
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
` b.{emit_string comp} {emit_label lbl}\n`
| Ifloattest(cmp, neg) ->
let comp = (match (cmp, neg) with
| (Ceq, false) | (Cne, true) -> "eq"
| (Cne, false) | (Ceq, true) -> "ne"
| (Clt, false) -> "cc"
| (Clt, true) -> "cs"
| (Cle, false) -> "ls"
| (Cle, true) -> "hi"
| (Cgt, false) -> "gt"
| (Cgt, true) -> "le"
| (Cge, false) -> "ge"
| (Cge, true) -> "lt") in
` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` b.{emit_string comp} {emit_label lbl}\n`
| Ioddtest ->
` tbnz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
| Ieventest ->
` tbz {emit_reg i.arg.(0)}, #0, {emit_label lbl}\n`
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
| Some lbl -> ` b.lt {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` b.eq {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` b.gt {emit_label lbl}\n`
end
| Lswitch jumptbl ->
let lbltbl = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
` add {emit_reg reg_tmp1}, {emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2\n`;
` br {emit_reg reg_tmp1}\n`;
`{emit_label lbltbl}:`;
for j = 0 to Array.length jumptbl - 1 do
` b {emit_label jumptbl.(j)}\n`
done
(* Alternative:
let lbltbl = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lbltbl}\n`;
` ldr {emit_wreg reg_tmp2}, [{emit_reg reg_tmp1}, {emit_reg i.arg.(0)}, lsl #2]\n`;
` add {emit_reg reg_tmp1}, {emit_wreg reg_tmp2}, sxtb\n`;
` br {emit_reg reg_tmp1}\n`;
`{emit_label lbltbl}:\n`;
for j = 0 to Array.length jumptbl - 1 do
` .word {emit_label jumptbl.(j)} - {emit_label lbltbl}\n`
done
*)
| Lsetuptrap lbl ->
let lblnext = new_label() in
` adr {emit_reg reg_tmp1}, {emit_label lblnext}\n`;
` b {emit_label lbl}\n`;
`{emit_label lblnext}:\n`
| Lpushtrap ->
stack_offset := !stack_offset + 16;
` str {emit_reg reg_trap_ptr}, [sp, -16]!\n`;
` str {emit_reg reg_tmp1}, [sp, #8]\n`;
cfi_adjust_cfa_offset 16;
` mov {emit_reg reg_trap_ptr}, sp\n`
| Lpoptrap ->
` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
cfi_adjust_cfa_offset (-16);
stack_offset := !stack_offset - 16
| Lraise ->
if !Clflags.debug then begin
` bl {emit_symbol "caml_raise_exn"}\n`;
`{record_frame Reg.Set.empty i.dbg}\n`
end else begin
` mov sp, {emit_reg reg_trap_ptr}\n`;
` ldr {emit_reg reg_tmp1}, [sp, #8]\n`;
` ldr {emit_reg reg_trap_ptr}, [sp], 16\n`;
` br {emit_reg reg_tmp1}\n`
end
(* Emission of an instruction sequence *)
let rec emit_all i =
if i.desc = Lend then () else (emit_instr i; emit_all i.next)
(* Emission of the profiling prelude *)
let emit_profile() = () (* TODO *)
(*
match Config.system with
"linux_eabi" | "linux_eabihf" ->
` push \{lr}\n`;
` {emit_call "__gnu_mcount_nc"}\n`
| _ -> ()
*)
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
float_literals := [];
stack_offset := 0;
call_gc_sites := [];
bound_error_sites := [];
` .text\n`;
` .align 2\n`;
` .globl {emit_symbol fundecl.fun_name}\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
emit_debug_info fundecl.fun_dbg;
cfi_startproc();
if !Clflags.gprofile then emit_profile();
let n = frame_size() in
if n > 0 then
emit_stack_adjustment (-n);
if !contains_calls then
` str x30, [sp, #{emit_int (n-8)}]\n`;
`{emit_label !tailrec_entry_point}:\n`;
emit_all fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
List.iter emit_call_bound_error !bound_error_sites;
cfi_endproc();
` .type {emit_symbol fundecl.fun_name}, %function\n`;
` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`;
emit_literals()
(* Emission of data *)
let emit_item = function
| Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
| Cdefine_symbol s -> `{emit_symbol s}:\n`
| Cdefine_label lbl -> `{emit_data_label lbl}:\n`
| Cint8 n -> ` .byte {emit_int n}\n`
| Cint16 n -> ` .short {emit_int n}\n`
| Cint32 n -> ` .long {emit_nativeint n}\n`
| Cint n -> ` .quad {emit_nativeint n}\n`
| Csingle f -> emit_float32_directive ".long" f
| Cdouble f -> emit_float64_directive ".quad" f
| Csymbol_address s -> ` .quad {emit_symbol s}\n`
| Clabel_address lbl -> ` .quad {emit_data_label lbl}\n`
| Cstring s -> emit_string_directive " .ascii " s
| Cskip n -> if n > 0 then ` .space {emit_int n}\n`
| Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
reset_debug_info();
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
emit_frames
{ efa_label = (fun lbl ->
` .type {emit_label lbl}, %function\n`;
` .quad {emit_label lbl}\n`);
efa_16 = (fun n -> ` .short {emit_int n}\n`);
efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
efa_word = (fun n -> ` .quad {emit_int n}\n`);
efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
efa_label_rel = (fun lbl ofs ->
` .long {emit_label lbl} - . + {emit_int32 ofs}\n`);
efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
efa_string = (fun s -> emit_string_directive " .asciz " s) };
` .type {emit_symbol lbl}, %object\n`;
` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
begin match Config.system with
| "linux" ->
(* Mark stack as non-executable *)
` .section .note.GNU-stack,\"\",%progbits\n`
| _ -> ()
end

212
asmcomp/arm64/proc.ml Normal file
View File

@ -0,0 +1,212 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
(* reserved. This file is distributed under the terms of the Q *)
(* Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Description of the ARM processor in 64-bit mode *)
open Misc
open Cmm
open Reg
open Arch
open Mach
(* Instruction selection *)
let word_addressed = false
(* Registers available for register allocation *)
(* Integer register map:
x0 - x15 general purpose (caller-save)
x16, x17 temporaries (used by call veeners)
x18 platform register (reserved)
x19 - x25 general purpose (callee-save)
x26 trap pointer
x27 alloc pointer
x28 alloc limit
x29 frame pointer
x30 return address
sp / xzr stack pointer / zero register
Floating-point register map:
d0 - d7 general purpose (caller-save)
d8 - d15 general purpose (callee-save)
d16 - d31 generat purpose (caller-save)
*)
let int_reg_name =
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7";
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15";
"x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25";
"x26"; "x27"; "x28"; "x16"; "x17" |]
let float_reg_name =
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
"d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
"d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
"d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
let num_register_classes = 2
let register_class r =
match r.typ with
| (Int | Addr) -> 0
| Float -> 1
let num_available_registers =
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
let first_available_register =
[| 0; 100 |]
let register_name r =
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
let rotate_registers = true
(* Representation of hard registers by pseudo-registers *)
let hard_int_reg =
let v = Array.create 28 Reg.dummy in
for i = 0 to 27 do
v.(i) <- Reg.at_location Int (Reg i)
done;
v
let hard_float_reg =
let v = Array.create 32 Reg.dummy in
for i = 0 to 31 do
v.(i) <- Reg.at_location Float (Reg(100 + i))
done;
v
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
let phys_reg n =
if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
let reg_x15 = phys_reg 15
let reg_d7 = phys_reg 107
let stack_slot slot ty =
Reg.at_location ty (Stack slot)
(* Calling conventions *)
let calling_conventions
first_int last_int first_float last_float make_stack arg =
let loc = Array.create (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
for i = 0 to Array.length arg - 1 do
match arg.(i).typ with
Int | Addr as ty ->
if !int <= last_int then begin
loc.(i) <- phys_reg !int;
incr int
end else begin
loc.(i) <- stack_slot (make_stack !ofs) ty;
ofs := !ofs + size_int
end
| Float ->
if !float <= last_float then begin
loc.(i) <- phys_reg !float;
incr float
end else begin
loc.(i) <- stack_slot (make_stack !ofs) Float;
ofs := !ofs + size_float
end
done;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
(* OCaml calling convention:
first integer args in r0...r15
first float args in d0...d15
remaining args on stack.
Return values in r0...r15 or d0...d15. *)
let loc_arguments arg =
calling_conventions 0 15 100 115 outgoing arg
let loc_parameters arg =
let (loc, _) = calling_conventions 0 15 100 115 incoming arg in loc
let loc_results res =
let (loc, _) = calling_conventions 0 15 100 115 not_supported res in loc
(* C calling convention:
first integer args in r0...r7
first float args in d0...d7
remaining args on stack.
Return values in r0...r1 or d0. *)
let loc_external_arguments arg =
calling_conventions 0 7 100 107 outgoing arg
let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
let loc_exn_bucket = phys_reg 0
(* Registers destroyed by operations *)
let destroyed_at_c_call =
(* x19-x28, d8-d15 preserved *)
Array.of_list (List.map phys_reg
[0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;
100;101;102;103;104;105;106;107;
116;117;118;119;120;121;122;123;
124;125;126;127;128;129;130;131])
let destroyed_at_oper = function
| Iop(Icall_ind | Icall_imm _) | Iop(Iextcall(_, true)) ->
all_phys_regs
| Iop(Iextcall(_, false)) ->
destroyed_at_c_call
| Iop(Ialloc _) ->
[| reg_x15 |]
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
[| reg_d7 |] (* d7 / s7 destroyed *)
| _ -> [||]
let destroyed_at_raise = all_phys_regs
(* Maximal register pressure *)
let safe_register_pressure = function
| Iextcall(_, _) -> 8
| Ialloc _ -> 25
| _ -> 26
let max_register_pressure = function
| Iextcall(_, _) -> [| 10; 8 |]
| Ialloc _ -> [| 25; 32 |]
| Iintoffloat | Ifloatofint
| Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
| _ -> [| 26; 32 |]
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
let contains_calls = ref false
(* Calling the assembler *)
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " -o " ^
Filename.quote outfile ^ " " ^ Filename.quote infile)
let init () = ()

16
asmcomp/arm64/reload.ml Normal file
View File

@ -0,0 +1,16 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Reloading for the ARM 64 bits *)
let fundecl f =
(new Reloadgen.reload_generic)#fundecl f

View File

@ -0,0 +1,18 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
let _ = let module M = Schedgen in () (* to create a dependency *)
(* Scheduling is turned off because the processor schedules dynamically
much better than what we could do. *)
let fundecl f = f

265
asmcomp/arm64/selection.ml Normal file
View File

@ -0,0 +1,265 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
(* reserved. This file is distributed under the terms of the Q *)
(* Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Instruction selection for the ARM processor *)
open Arch
open Cmm
open Mach
let is_offset chunk n =
(n >= -256 && n <= 255) (* 9 bits signed unscaled *)
|| (n >= 0 &&
match chunk with (* 12 bits unsigned, scaled by chunk size *)
| Byte_unsigned | Byte_signed ->
n < 0x1000
| Sixteen_unsigned | Sixteen_signed ->
n land 1 = 0 && n lsr 1 < 0x1000
| Thirtytwo_unsigned | Thirtytwo_signed | Single ->
n land 3 = 0 && n lsr 2 < 0x1000
| Word | Double | Double_u ->
n land 7 = 0 && n lsr 3 < 0x1000)
(* An automaton to recognize ( 0+1+0* | 1+0+1* )
0 1 0
/ \ / \ / \
\ / \ / \ /
-0--> [1] --1--> [2] --0--> [3]
/
[0]
\
-1--> [4] --0--> [5] --1--> [6]
/ \ / \ / \
\ / \ / \ /
1 0 1
The accepting states are 2, 3, 5 and 6. *)
let auto_table = [| (* accepting?, next on 0, next on 1 *)
(* state 0 *) (false, 1, 4);
(* state 1 *) (false, 1, 2);
(* state 2 *) (true, 3, 2);
(* state 3 *) (true, 3, 7);
(* state 4 *) (false, 5, 4);
(* state 5 *) (true, 5, 6);
(* state 6 *) (true, 7, 6);
(* state 7 *) (false, 7, 7) (* error state *)
|]
let rec run_automata nbits state input =
let (acc, next0, next1) = auto_table.(state) in
if nbits <= 0
then acc
else run_automata (nbits - 1)
(if input land 1 = 0 then next0 else next1)
(input asr 1)
(* We are very conservative wrt what ARM64 supports: we don't support
repetitions of a 000111000 or 1110000111 pattern, just a single
pattern of this kind. *)
let is_logical_immediate n =
n <> 0 && n <> -1 && run_automata 64 0 n
let is_intconst = function
Cconst_int _ -> true
| _ -> false
let inline_ops =
[ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap";
"caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ]
let use_direct_addressing symb =
(not !Clflags.dlcode) || Compilenv.symbol_in_current_unit symb
(* Instruction selection *)
class selector = object(self)
inherit Selectgen.selector_generic as super
method is_immediate n =
let mn = -n in
n land 0xFFF = n || n land 0xFFF_000 = n
|| mn land 0xFFF = mn || mn land 0xFFF_000 = mn
method! is_simple_expr = function
(* inlined floating-point ops are simple if their arguments are *)
| Cop(Cextcall(fn, _, _, _), args) when List.mem fn inline_ops ->
List.for_all self#is_simple_expr args
| e -> super#is_simple_expr e
method select_addressing chunk = function
| Cop(Cadda, [Cconst_symbol s; Cconst_int n])
when use_direct_addressing s ->
(Ibased(s, n), Ctuple [])
| Cop(Cadda, [arg; Cconst_int n])
when is_offset chunk n ->
(Iindexed n, arg)
| Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int n])])
when is_offset chunk n ->
(Iindexed n, Cop(Cadda, [arg1; arg2]))
| Cconst_symbol s
when use_direct_addressing s ->
(Ibased(s, 0), Ctuple [])
| arg ->
(Iindexed 0, arg)
method! select_operation op args =
match op with
(* Integer addition *)
| Caddi | Cadda ->
begin match args with
(* Add immediate *)
| [arg; Cconst_int n] | [Cconst_int n; arg] when self#is_immediate n ->
((if n >= 0 then Iintop_imm(Iadd, n) else Iintop_imm(Isub, -n)),
[arg])
(* Shift-add *)
| [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftadd, n)), [arg1; arg2])
| [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftadd, -n)), [arg1; arg2])
| [Cop(Clsl, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftadd, n)), [arg2; arg1])
| [Cop(Casr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftadd, -n)), [arg2; arg1])
(* Multiply-add *)
| [arg1; Cop(Cmuli, args2)] | [Cop(Cmuli, args2); arg1] ->
begin match self#select_operation Cmuli args2 with
| (Iintop_imm(Ilsl, l), [arg3]) ->
(Ispecific(Ishiftarith(Ishiftadd, l)), [arg1; arg3])
| (Iintop Imul, [arg3; arg4]) ->
(Ispecific Imuladd, [arg3; arg4; arg1])
| _ ->
super#select_operation op args
end
| _ ->
super#select_operation op args
end
(* Integer subtraction *)
| Csubi | Csuba ->
begin match args with
(* Sub immediate *)
| [arg; Cconst_int n] when self#is_immediate n ->
((if n >= 0 then Iintop_imm(Isub, n) else Iintop_imm(Iadd, -n)),
[arg])
(* Shift-sub *)
| [arg1; Cop(Clsl, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftsub, n)), [arg1; arg2])
| [arg1; Cop(Casr, [arg2; Cconst_int n])] when n > 0 && n < 64 ->
(Ispecific(Ishiftarith(Ishiftsub, -n)), [arg1; arg2])
(* Multiply-sub *)
| [arg1; Cop(Cmuli, args2)] ->
begin match self#select_operation Cmuli args2 with
| (Iintop_imm(Ilsl, l), [arg3]) ->
(Ispecific(Ishiftarith(Ishiftsub, l)), [arg1; arg3])
| (Iintop Imul, [arg3; arg4]) ->
(Ispecific Imulsub, [arg3; arg4; arg1])
| _ ->
super#select_operation op args
end
| _ ->
super#select_operation op args
end
(* Checkbounds *)
| Ccheckbound _ ->
begin match args with
| [Cop(Clsr, [arg1; Cconst_int n]); arg2] when n > 0 && n < 64 ->
(Ispecific(Ishiftcheckbound n), [arg1; arg2])
| _ ->
super#select_operation op args
end
(* Integer multiplication *)
(* ARM does not support immediate operands for multiplication *)
| Cmuli ->
begin match args with
| [arg; Cconst_int n] | [Cconst_int n; arg] ->
let l = Misc.log2 n in
if n = 1 lsl l
then (Iintop_imm(Ilsl, l), [arg])
else (Iintop Imul, args)
| _ ->
(Iintop Imul, args)
end
(* Division and modulus *)
(* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
| Cdivi ->
begin match args with
| [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
((if n = 1 then Imove else Iintop_imm(Idiv, n)), [arg])
| _ ->
(Iintop Idiv, args)
end
| Cmodi ->
begin match args with
| [arg; Cconst_int n] when n = 1 lsl Misc.log2 n ->
((if n = 1 then Iconst_int 0n else Iintop_imm(Imod, n)), [arg])
| _ ->
(Iintop Imod, args)
end
(* Bitwise logical operations have a different range of immediate
operands than the other instructions *)
| Cand -> self#select_logical Iand args
| Cor -> self#select_logical Ior args
| Cxor -> self#select_logical Ixor args
(* Recognize floating-point negate and multiply *)
| Cnegf ->
begin match args with
| [Cop(Cmulf, args)] -> (Ispecific Inegmulf, args)
| _ -> super#select_operation op args
end
(* Recognize floating-point multiply and add/sub *)
| Caddf ->
begin match args with
| [arg; Cop(Cmulf, args)] | [Cop(Cmulf, args); arg] ->
(Ispecific Imuladdf, arg :: args)
| _ ->
super#select_operation op args
end
| Csubf ->
begin match args with
| [arg; Cop(Cmulf, args)] ->
(Ispecific Imulsubf, arg :: args)
| [Cop(Cmulf, args); arg] ->
(Ispecific Inegmulsubf, arg :: args)
| _ ->
super#select_operation op args
end
(* Recognize floating-point square root *)
| Cextcall("sqrt", _, _, _) ->
(Ispecific Isqrtf, args)
(* Recognize bswap instructions *)
| Cextcall("caml_bswap16_direct", _, _, _) ->
(Ispecific(Ibswap 16), args)
| Cextcall("caml_int32_direct_bswap", _, _, _) ->
(Ispecific(Ibswap 32), args)
| Cextcall(("caml_int64_direct_bswap"|"caml_nativeint_direct_bswap"),
_, _, _) ->
(Ispecific (Ibswap 64), args)
(* Other operations are regular *)
| _ ->
super#select_operation op args
method select_logical op = function
| [arg; Cconst_int n] when is_logical_immediate n ->
(Iintop_imm(op, n), [arg])
| [Cconst_int n; arg] when is_logical_immediate n ->
(Iintop_imm(op, n), [arg])
| args ->
(Iintop op, args)
end
let fundecl f = (new selector)#emit_fundecl f

View File

@ -83,6 +83,15 @@ let make_symbol ?(unitname = current_unit.ui_symbol) idopt =
| None -> prefix
| Some id -> prefix ^ "__" ^ id
let symbol_in_current_unit name =
let prefix = "caml" ^ current_unit.ui_symbol in
name = prefix ||
(let lp = String.length prefix in
String.length name >= 2 + lp
&& String.sub name 0 lp = prefix
&& name.[lp] = '_'
&& name.[lp + 1] = '_')
let read_unit_info filename =
let ic = open_in_bin filename in
try

View File

@ -31,6 +31,10 @@ val make_symbol: ?unitname:string -> string option -> string
corresponds to symbol [id] in the compilation unit [u]
(or the current unit). *)
val symbol_in_current_unit: string -> bool
(* Return true if the given asm symbol belongs to the
current compilation unit, false otherwise. *)
val symbol_for_global: Ident.t -> string
(* Return the asm symbol that refers to the given global identifier *)

535
asmrun/arm64.S Normal file
View File

@ -0,0 +1,535 @@
/***********************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
/* */
/* Copyright 2013 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License, with */
/* the special exception on linking described in file ../LICENSE. */
/* */
/***********************************************************************/
/* Asm part of the runtime system, ARM processor, 64-bit mode */
/* Must be preprocessed by cpp */
/* Special registers */
#define TRAP_PTR x26
#define ALLOC_PTR x27
#define ALLOC_LIMIT x28
#define ARG x15
#define TMP x16
#define TMP2 x17
/* Support for CFI directives */
#if defined(ASM_CFI_SUPPORTED)
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
#define CFI_ADJUST(n) .cfi_adjust_cfa_offset n
#else
#define CFI_STARTPROC
#define CFI_ENDPROC
#define CFI_ADJUST(n)
#endif
/* Support for profiling with gprof */
#define PROFILE
/* Macros to load and store global variables. Destroy TMP2 */
#if defined(__PIC__)
#define ADDRGLOBAL(reg,symb) \
adrp TMP2, :got:symb; \
ldr reg, [TMP2, #:got_lo12:symb]
#define LOADGLOBAL(reg,symb) \
ADDRGLOBAL(TMP2,symb); \
ldr reg, [TMP2]
#define STOREGLOBAL(reg,symb) \
ADDRGLOBAL(TMP2,symb); \
str reg, [TMP2]
#else
#define ADDRGLOBAL(reg,symb) \
adrp reg, symb; \
add reg, reg, #:lo12:symb
#define LOADGLOBAL(reg,symb) \
adrp TMP2, symb; \
ldr reg, [TMP2, #:lo12:symb]
#define STOREGLOBAL(reg,symb) \
adrp TMP2, symb; \
str reg, [TMP2, #:lo12:symb]
#endif
/* Allocation functions and GC interface */
.globl caml_system__code_begin
caml_system__code_begin:
.align 2
.globl caml_call_gc
caml_call_gc:
CFI_STARTPROC
PROFILE
/* Record return address */
STOREGLOBAL(x30, caml_last_return_address)
.Lcaml_call_gc:
/* Record lowest stack address */
mov TMP, sp
STOREGLOBAL(TMP, caml_bottom_of_stack)
/* Set up stack space, saving return address and frame pointer */
/* (2 regs RA/GP, 24 allocatable int regs, 24 caller-save float regs) * 8 */
stp x29, x30, [sp, -400]!
CFI_ADJUST(400)
add x29, sp, #0
/* Save allocatable integer registers on the stack, in the order
given in proc.ml */
stp x0, x1, [sp, 16]
stp x2, x3, [sp, 32]
stp x4, x5, [sp, 48]
stp x6, x7, [sp, 64]
stp x8, x9, [sp, 80]
stp x10, x11, [sp, 96]
stp x12, x13, [sp, 112]
stp x14, x15, [sp, 128]
stp x19, x20, [sp, 144]
stp x21, x22, [sp, 160]
stp x23, x24, [sp, 176]
str x25, [sp, 192]
/* Save caller-save floating-point registers on the stack
(callee-saves are preserved by caml_garbage_collection) */
stp d0, d1, [sp, 208]
stp d2, d3, [sp, 224]
stp d4, d5, [sp, 240]
stp d6, d7, [sp, 256]
stp d16, d17, [sp, 272]
stp d18, d19, [sp, 288]
stp d20, d21, [sp, 304]
stp d22, d23, [sp, 320]
stp d24, d25, [sp, 336]
stp d26, d27, [sp, 352]
stp d28, d29, [sp, 368]
stp d30, d31, [sp, 384]
/* Store pointer to saved integer registers in caml_gc_regs */
add TMP, sp, #16
STOREGLOBAL(TMP, caml_gc_regs)
/* Save current allocation pointer for debugging purposes */
STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
/* Save trap pointer in case an exception is raised during GC */
STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
/* Call the garbage collector */
bl caml_garbage_collection
/* Restore registers */
ldp x0, x1, [sp, 16]
ldp x2, x3, [sp, 32]
ldp x4, x5, [sp, 48]
ldp x6, x7, [sp, 64]
ldp x8, x9, [sp, 80]
ldp x10, x11, [sp, 96]
ldp x12, x13, [sp, 112]
ldp x14, x15, [sp, 128]
ldp x19, x20, [sp, 144]
ldp x21, x22, [sp, 160]
ldp x23, x24, [sp, 176]
ldr x25, [sp, 192]
ldp d0, d1, [sp, 208]
ldp d2, d3, [sp, 224]
ldp d4, d5, [sp, 240]
ldp d6, d7, [sp, 256]
ldp d16, d17, [sp, 272]
ldp d18, d19, [sp, 288]
ldp d20, d21, [sp, 304]
ldp d22, d23, [sp, 320]
ldp d24, d25, [sp, 336]
ldp d26, d27, [sp, 352]
ldp d28, d29, [sp, 368]
ldp d30, d31, [sp, 384]
/* Reload new allocation pointer and allocation limit */
LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
/* Free stack space and return to caller */
ldp x29, x30, [sp], 400
ret
CFI_ENDPROC
.type caml_call_gc, %function
.size caml_call_gc, .-caml_call_gc
.align 2
.globl caml_alloc1
caml_alloc1:
CFI_STARTPROC
PROFILE
1: sub ALLOC_PTR, ALLOC_PTR, #16
cmp ALLOC_PTR, ALLOC_LIMIT
b.lo 2f
ret
2: stp x29, x30, [sp, -16]!
CFI_ADJUST(16)
add x29, sp, #0
/* Record return address */
STOREGLOBAL(x30, caml_last_return_address)
/* Call GC */
bl .Lcaml_call_gc
/* Restore return address */
ldp x29, x30, [sp], 16
CFI_ADJUST(-16)
/* Try again */
b 1b
CFI_ENDPROC
.type caml_alloc1, %function
.size caml_alloc1, .-caml_alloc1
.align 2
.globl caml_alloc2
caml_alloc2:
CFI_STARTPROC
PROFILE
1: sub ALLOC_PTR, ALLOC_PTR, #24
cmp ALLOC_PTR, ALLOC_LIMIT
b.lo 2f
ret
2: stp x29, x30, [sp, -16]!
CFI_ADJUST(16)
add x29, sp, #0
/* Record return address */
STOREGLOBAL(x30, caml_last_return_address)
/* Call GC */
bl .Lcaml_call_gc
/* Restore return address */
ldp x29, x30, [sp], 16
CFI_ADJUST(-16)
/* Try again */
b 1b
CFI_ENDPROC
.type caml_alloc2, %function
.size caml_alloc2, .-caml_alloc2
.align 2
.globl caml_alloc3
caml_alloc3:
CFI_STARTPROC
PROFILE
1: sub ALLOC_PTR, ALLOC_PTR, #32
cmp ALLOC_PTR, ALLOC_LIMIT
b.lo 2f
ret
2: stp x29, x30, [sp, -16]!
CFI_ADJUST(16)
add x29, sp, #0
/* Record return address */
STOREGLOBAL(x30, caml_last_return_address)
/* Call GC */
bl .Lcaml_call_gc
/* Restore return address */
ldp x29, x30, [sp], 16
CFI_ADJUST(-16)
/* Try again */
b 1b
CFI_ENDPROC
.type caml_alloc2, %function
.size caml_alloc2, .-caml_alloc2
.align 2
.globl caml_allocN
caml_allocN:
CFI_STARTPROC
PROFILE
1: sub ALLOC_PTR, ALLOC_PTR, ARG
cmp ALLOC_PTR, ALLOC_LIMIT
b.lo 2f
ret
2: stp x29, x30, [sp, -16]!
CFI_ADJUST(16)
add x29, sp, #0
/* Record return address */
STOREGLOBAL(x30, caml_last_return_address)
/* Call GC. This preserves ARG */
bl .Lcaml_call_gc
/* Restore return address */
ldp x29, x30, [sp], 16
CFI_ADJUST(-16)
/* Try again */
b 1b
CFI_ENDPROC
.type caml_allocN, %function
.size caml_allocN, .-caml_allocN
/* Call a C function from OCaml */
/* Function to call is in ARG */
.align 2
.globl caml_c_call
caml_c_call:
CFI_STARTPROC
PROFILE
/* Preserve return address in callee-save register x19 */
mov x19, x30
/* Record lowest stack address and return address */
STOREGLOBAL(x30, caml_last_return_address)
add TMP, sp, #0
STOREGLOBAL(TMP, caml_bottom_of_stack)
/* Make the exception handler alloc ptr available to the C code */
STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
/* Call the function */
blr ARG
/* Reload alloc ptr and alloc limit */
LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
/* Return */
ret x19
CFI_ENDPROC
.type caml_c_call, %function
.size caml_c_call, .-caml_c_call
/* Start the OCaml program */
.align 2
.globl caml_start_program
caml_start_program:
CFI_STARTPROC
PROFILE
ADDRGLOBAL(ARG, caml_program)
/* Code shared with caml_callback* */
/* Address of OCaml code to call is in ARG */
/* Arguments to the OCaml code are in x0...x7 */
.Ljump_to_caml:
/* Set up stack frame and save callee-save registers */
stp x29, x30, [sp, -160]!
CFI_ADJUST(160)
add x29, sp, #0
stp x19, x20, [sp, 16]
stp x21, x22, [sp, 32]
stp x23, x24, [sp, 48]
stp x25, x26, [sp, 64]
stp x27, x28, [sp, 80]
stp d8, d9, [sp, 96]
stp d10, d11, [sp, 112]
stp d12, d13, [sp, 128]
stp d14, d15, [sp, 144]
/* Setup a callback link on the stack */
LOADGLOBAL(x8, caml_bottom_of_stack)
LOADGLOBAL(x9, caml_last_return_address)
LOADGLOBAL(x10, caml_gc_regs)
stp x8, x9, [sp, -32]! /* 16-byte alignment */
CFI_ADJUST(32)
str x10, [sp, 16]
/* Setup a trap frame to catch exceptions escaping the OCaml code */
LOADGLOBAL(x8, caml_exception_pointer)
adr x9, .Ltrap_handler
stp x8, x9, [sp, -16]!
CFI_ADJUST(16)
add TRAP_PTR, sp, #0
/* Reload allocation pointers */
LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
/* Call the OCaml code */
blr ARG
.Lcaml_retaddr:
/* Pop the trap frame, restoring caml_exception_pointer */
ldr x8, [sp], 16
CFI_ADJUST(-16)
STOREGLOBAL(x8, caml_exception_pointer)
/* Pop the callback link, restoring the global variables */
.Lreturn_result:
ldr x10, [sp, 16]
ldp x8, x9, [sp], 32
CFI_ADJUST(-32)
STOREGLOBAL(x8, caml_bottom_of_stack)
STOREGLOBAL(x9, caml_last_return_address)
STOREGLOBAL(x10, caml_gc_regs)
/* Update allocation pointer */
STOREGLOBAL(ALLOC_PTR, caml_young_ptr)
/* Reload callee-save registers and return address */
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldp x23, x24, [sp, 48]
ldp x25, x26, [sp, 64]
ldp x27, x28, [sp, 80]
ldp d8, d9, [sp, 96]
ldp d10, d11, [sp, 112]
ldp d12, d13, [sp, 128]
ldp d14, d15, [sp, 144]
ldp x29, x30, [sp], 160
CFI_ADJUST(-160)
/* Return to C caller */
ret
CFI_ENDPROC
.type .Lcaml_retaddr, %function
.size .Lcaml_retaddr, .-.Lcaml_retaddr
.type caml_start_program, %function
.size caml_start_program, .-caml_start_program
/* The trap handler */
.align 2
.Ltrap_handler:
CFI_STARTPROC
/* Save exception pointer */
STOREGLOBAL(TRAP_PTR, caml_exception_pointer)
/* Encode exception bucket as an exception result */
orr x0, x0, #2
/* Return it */
b .Lreturn_result
CFI_ENDPROC
.type .Ltrap_handler, %function
.size .Ltrap_handler, .-.Ltrap_handler
/* Raise an exception from OCaml */
.align 2
.globl caml_raise_exn
caml_raise_exn:
CFI_STARTPROC
PROFILE
/* Test if backtrace is active */
LOADGLOBAL(TMP, caml_backtrace_active)
cbnz TMP, 2f
1: /* Cut stack at current trap handler */
mov sp, TRAP_PTR
/* Pop previous handler and jump to it */
ldr TMP, [sp, 8]
ldr TRAP_PTR, [sp], 16
br TMP
2: /* Preserve exception bucket in callee-save register x19 */
mov x19, x0
/* Stash the backtrace */
/* arg1: exn bucket, already in x0 */
mov x1, x30 /* arg2: pc of raise */
add x2, sp, #0 /* arg3: sp of raise */
mov x3, TRAP_PTR /* arg4: sp of handler */
bl caml_stash_backtrace
/* Restore exception bucket and raise */
mov x0, x19
b 1b
CFI_ENDPROC
.type caml_raise_exn, %function
.size caml_raise_exn, .-caml_raise_exn
/* Raise an exception from C */
.align 2
.globl caml_raise_exception
caml_raise_exception:
CFI_STARTPROC
PROFILE
/* Reload trap ptr, alloc ptr and alloc limit */
LOADGLOBAL(TRAP_PTR, caml_exception_pointer)
LOADGLOBAL(ALLOC_PTR, caml_young_ptr)
LOADGLOBAL(ALLOC_LIMIT, caml_young_limit)
/* Test if backtrace is active */
LOADGLOBAL(TMP, caml_backtrace_active)
cbnz TMP, 2f
1: /* Cut stack at current trap handler */
mov sp, TRAP_PTR
/* Pop previous handler and jump to it */
ldr TMP, [sp, 8]
ldr TRAP_PTR, [sp], 16
br TMP
2: /* Preserve exception bucket in callee-save register x19 */
mov x19, x0
/* Stash the backtrace */
/* arg1: exn bucket, already in x0 */
LOADGLOBAL(x1, caml_last_return_address) /* arg2: pc of raise */
LOADGLOBAL(x2, caml_bottom_of_stack) /* arg3: sp of raise */
mov x3, TRAP_PTR /* arg4: sp of handler */
bl caml_stash_backtrace
/* Restore exception bucket and raise */
mov x0, x19
b 1b
CFI_ENDPROC
.type caml_raise_exception, %function
.size caml_raise_exception, .-caml_raise_exception
/* Callback from C to OCaml */
.align 2
.globl caml_callback_exn
caml_callback_exn:
CFI_STARTPROC
PROFILE
/* Initial shuffling of arguments (x0 = closure, x1 = first arg) */
mov TMP, x0
mov x0, x1 /* x0 = first arg */
mov x1, TMP /* x1 = closure environment */
ldr ARG, [TMP] /* code pointer */
b .Ljump_to_caml
CFI_ENDPROC
.type caml_callback_exn, %function
.size caml_callback_exn, .-caml_callback_exn
.align 2
.globl caml_callback2_exn
caml_callback2_exn:
CFI_STARTPROC
PROFILE
/* Initial shuffling of arguments (x0 = closure, x1 = arg1, x2 = arg2) */
mov TMP, x0
mov x0, x1 /* x0 = first arg */
mov x1, x2 /* x1 = second arg
mov x2, TMP /* x2 = closure environment */
ADDRGLOBAL(ARG, caml_apply2)
b .Ljump_to_caml
CFI_ENDPROC
.type caml_callback2_exn, %function
.size caml_callback2_exn, .-caml_callback2_exn
.align 2
.globl caml_callback3_exn
caml_callback3_exn:
CFI_STARTPROC
PROFILE
/* Initial shuffling of arguments */
/* (x0 = closure, x1 = arg1, x2 = arg2, x3 = arg3) */
mov TMP, x0
mov x0, x1 /* x0 = first arg */
mov x1, x2 /* x1 = second arg */
mov x2, x3 /* x2 = third arg */
mov x3, TMP /* x3 = closure environment */
ADDRGLOBAL(ARG, caml_apply3)
b .Ljump_to_caml
CFI_ENDPROC
.type caml_callback3_exn, %function
.size caml_callback3_exn, .-caml_callback3_exn
.align 2
.globl caml_ml_array_bound_error
caml_ml_array_bound_error:
CFI_STARTPROC
PROFILE
/* Load address of [caml_array_bound_error] in ARG */
ADDRGLOBAL(ARG, caml_array_bound_error)
/* Call that function */
b caml_c_call
CFI_ENDPROC
.type caml_ml_array_bound_error, %function
.size caml_ml_array_bound_error, .-caml_ml_array_bound_error
.globl caml_system__code_end
caml_system__code_end:
/* GC roots for callback */
.data
.align 3
.globl caml_system__frametable
caml_system__frametable:
.quad 1 /* one descriptor */
.quad .Lcaml_retaddr /* return address into callback */
.short -1 /* negative frame size => use callback link */
.short 0 /* no roots */
.align 3
.type caml_system__frametable, %object
.size caml_system__frametable, .-caml_system__frametable

View File

@ -92,6 +92,25 @@
#define CONTEXT_YOUNG_PTR (context->uc_mcontext.arm_r8)
#define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
/****************** ARM64, Linux */
#elif defined(TARGET_arm64) && defined(SYS_linux)
#include <sys/ucontext.h>
#define DECLARE_SIGNAL_HANDLER(name) \
static void name(int sig, siginfo_t * info, ucontext_t * context)
#define SET_SIGACT(sigact,name) \
sigact.sa_sigaction = (void (*)(int,siginfo_t *,void *)) (name); \
sigact.sa_flags = SA_SIGINFO
typedef unsigned long context_reg;
#define CONTEXT_PC (context->uc_mcontext.pc)
#define CONTEXT_EXCEPTION_POINTER (context->uc_mcontext.regs[26])
#define CONTEXT_YOUNG_PTR (context->uc_mcontext.regs[27])
#define CONTEXT_FAULTING_ADDRESS ((char *) context->uc_mcontext.fault_address)
/****************** AMD64, Solaris x86 */
#elif defined(TARGET_amd64) && defined (SYS_solaris)

View File

@ -56,6 +56,11 @@
#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
#endif
#ifdef TARGET_arm64
#define Saved_return_address(sp) *((intnat *)((sp) - 8))
#define Callback_link(sp) ((struct caml_context *)((sp) + 16))
#endif
/* Structure of OCaml callback contexts */
struct caml_context {

View File

@ -173,6 +173,12 @@ sp is a local copy of the global variable caml_extern_sp. */
#define SP_REG asm("%r14")
#define ACCU_REG asm("%r13")
#endif
#ifdef __aarch64__
#define PC_REG asm("%x19")
#define SP_REG asm("%x20")
#define ACCU_REG asm("%x21")
#define JUMPTBL_BASE_REG asm("%x22")
#endif
#endif
/* Division and modulus madness */

5
configure vendored
View File

@ -795,6 +795,7 @@ if test $withsharedlibs = "yes"; then
x86_64-*-netbsd*) natdynlink=true;;
i386-*-gnu0.3) natdynlink=true;;
arm*-*-linux*) natdynlink=true;;
aarch64-*-linux*) natdynlink=true;;
esac
fi
@ -854,6 +855,7 @@ case "$target" in
x86_64-*-openbsd*) arch=amd64; system=openbsd;;
x86_64-*-darwin*) arch=amd64; system=macosx;;
x86_64-*-mingw*) arch=amd64; system=mingw;;
aarch64-*-linux*) arch=arm64; system=linux;;
esac
# Some platforms exist both in 32-bit and 64-bit variants, not distinguished
@ -915,7 +917,7 @@ case "$arch,$system" in
*gcc*) aspp="${TOOLPREF}gcc -c";;
*) aspp="${TOOLPREF}as -P";;
esac;;
amd64,*|arm,*|i386,*|power,bsd|sparc,*)
amd64,*|arm,*|arm64,*|i386,*|power,bsd|sparc,*)
as="${TOOLPREF}as"
aspp="${TOOLPREF}gcc -c";;
esac
@ -1347,6 +1349,7 @@ case "$arch" in
fi;;
power) bng_arch=ppc; bng_asm_level=1;;
amd64) bng_arch=amd64; bng_asm_level=1;;
arm64) bng_arch=arm64; bng_asm_level=1;;
*) bng_arch=generic; bng_asm_level=0;;
esac

View File

@ -23,12 +23,10 @@
#include "bng_amd64.c"
#elif defined(BNG_ARCH_ppc)
#include "bng_ppc.c"
#elif defined (BNG_ARCH_alpha)
#include "bng_alpha.c"
#elif defined (BNG_ARCH_sparc)
#include "bng_sparc.c"
#elif defined (BNG_ARCH_mips)
#include "bng_mips.c"
#elif defined (BNG_ARCH_arm64)
#include "bng_arm64.c"
#endif
#endif

20
otherlibs/num/bng_arm64.c Normal file
View File

@ -0,0 +1,20 @@
/***********************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
/* */
/* Copyright 2013 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License, with */
/* the special exception on linking described in file ../../LICENSE. */
/* */
/***********************************************************************/
/* Code specific for the ARM 64 (AArch64) architecture */
#define BngMult(resh,resl,arg1,arg2) \
asm("mul %0, %2, %3 \n\t" \
"umulh %1, %2, %3" \
: "=&r" (resl), "=&r" (resh) \
: "r" (arg1), "r" (arg2))

View File

@ -126,7 +126,7 @@ parsecmm.mli parsecmm.ml: parsecmm.mly
lexcmm.ml: lexcmm.mll
@$(OCAMLLEX) -q lexcmm.mll
CASES=fib tak quicksort quicksort2 soli \
CASES=fib tak quicksort quicksort2 soli integr \
arith checkbound tagged-fib tagged-integr tagged-quicksort tagged-tak
ARGS_fib=-DINT_INT -DFUN=fib main.c
ARGS_tak=-DUNIT_INT -DFUN=takmain main.c

View File

@ -0,0 +1,52 @@
/***********************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Gallium, INRIA Rocquencourt */
/* */
/* Copyright 2013 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the Q Public License version 1.0. */
/* */
/***********************************************************************/
.globl call_gen_code
.align 2
call_gen_code:
/* Set up stack frame and save callee-save registers */
stp x29, x30, [sp, -160]!
add x29, sp, #0
stp x19, x20, [sp, 16]
stp x21, x22, [sp, 32]
stp x23, x24, [sp, 48]
stp x25, x26, [sp, 64]
stp x27, x28, [sp, 80]
stp d8, d9, [sp, 96]
stp d10, d11, [sp, 112]
stp d12, d13, [sp, 128]
stp d14, d15, [sp, 144]
/* Shuffle arguments */
mov x8, x0
mov x0, x1
mov x1, x2
mov x2, x3
mov x3, x4
/* Call generated asm */
blr x8
/* Reload callee-save registers and return address */
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldp x23, x24, [sp, 48]
ldp x25, x26, [sp, 64]
ldp x27, x28, [sp, 80]
ldp d8, d9, [sp, 96]
ldp d10, d11, [sp, 112]
ldp d12, d13, [sp, 128]
ldp d14, d15, [sp, 144]
ldp x29, x30, [sp], 160
ret
.globl caml_c_call
.align 2
caml_c_call:
br x15

View File

@ -13,6 +13,7 @@
open Clflags
let compile_file filename =
Clflags.dlcode := false;
Compilenv.reset "test";
Emit.begin_assembly();
let ic = open_in filename in