363 lines
12 KiB
OCaml
363 lines
12 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Benedikt Meurer, University of Siegen *)
|
|
(* *)
|
|
(* Copyright 1998 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. *)
|
|
(* Copyright 2012 Benedikt Meurer. *)
|
|
(* *)
|
|
(* All rights reserved. This file is distributed under the terms of *)
|
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
|
(* special exception on linking described in the file LICENSE. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
(* Description of the ARM processor *)
|
|
|
|
open Misc
|
|
open Cmm
|
|
open Reg
|
|
open Arch
|
|
open Mach
|
|
|
|
(* Instruction selection *)
|
|
|
|
let word_addressed = false
|
|
|
|
(* Registers available for register allocation *)
|
|
|
|
(* Integer register map:
|
|
r0 - r3 general purpose (not preserved)
|
|
r4 - r7 general purpose (preserved)
|
|
r8 trap pointer (preserved)
|
|
r9 platform register, usually reserved
|
|
r10 allocation pointer (preserved)
|
|
r11 domain state pointer (preserved)
|
|
r12 intra-procedural scratch register (not preserved)
|
|
r13 stack pointer
|
|
r14 return address
|
|
r15 program counter
|
|
Floating-point register map (VFPv{2,3}):
|
|
d0 - d7 general purpose (not preserved)
|
|
d8 - d15 general purpose (preserved)
|
|
d16 - d31 general purpose (not preserved), VFPv3 only
|
|
*)
|
|
|
|
let int_reg_name =
|
|
[| "r0"; "r1"; "r2"; "r3"; "r4"; "r5"; "r6"; "r7"; "r12" |]
|
|
|
|
let float_reg_name =
|
|
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
|
|
"d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
|
|
"d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
|
|
"d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
|
|
|
|
(* We have three register classes:
|
|
0 for integer registers
|
|
1 for VFPv2 and VFPv3-D16
|
|
2 for VFPv3
|
|
This way we can choose between VFPv2/VFPv3-D16 and VFPv3
|
|
at (ocamlopt) runtime using command line switches.
|
|
*)
|
|
|
|
let num_register_classes = 3
|
|
|
|
let register_class r =
|
|
match (r.typ, !fpu) with
|
|
| (Val | Int | Addr), _ -> 0
|
|
| Float, VFPv2 -> 1
|
|
| Float, VFPv3_D16 -> 1
|
|
| Float, _ -> 2
|
|
|
|
let num_available_registers =
|
|
[| 9; 16; 32 |]
|
|
|
|
let first_available_register =
|
|
[| 0; 100; 100 |]
|
|
|
|
let register_name r =
|
|
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
|
|
|
|
let rotate_registers = true
|
|
|
|
(* Representation of hard registers by pseudo-registers *)
|
|
|
|
let hard_int_reg =
|
|
let v = Array.make 9 Reg.dummy in
|
|
for i = 0 to 8 do
|
|
v.(i) <- Reg.at_location Int (Reg i)
|
|
done;
|
|
v
|
|
|
|
let hard_float_reg =
|
|
let v = Array.make 32 Reg.dummy in
|
|
for i = 0 to 31 do
|
|
v.(i) <- Reg.at_location Float (Reg(100 + i))
|
|
done;
|
|
v
|
|
|
|
let all_phys_regs =
|
|
Array.append hard_int_reg hard_float_reg
|
|
|
|
let phys_reg n =
|
|
if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
|
|
|
|
let stack_slot slot ty =
|
|
Reg.at_location ty (Stack slot)
|
|
|
|
(* Calling conventions *)
|
|
|
|
let loc_int last_int make_stack int ofs =
|
|
if !int <= last_int then begin
|
|
let l = phys_reg !int in
|
|
incr int; l
|
|
end else begin
|
|
let l = stack_slot (make_stack !ofs) Int in
|
|
ofs := !ofs + size_int; l
|
|
end
|
|
|
|
let loc_float last_float make_stack float ofs =
|
|
assert (abi = EABI_HF);
|
|
assert (!fpu >= VFPv2);
|
|
if !float <= last_float then begin
|
|
let l = phys_reg !float in
|
|
incr float; l
|
|
end else begin
|
|
ofs := Misc.align !ofs size_float;
|
|
let l = stack_slot (make_stack !ofs) Float in
|
|
ofs := !ofs + size_float; l
|
|
end
|
|
|
|
let loc_int_pair last_int make_stack int ofs =
|
|
(* 64-bit quantities split across two registers must either be in a
|
|
consecutive pair of registers where the lowest numbered is an
|
|
even-numbered register; or in a stack slot that is 8-byte aligned. *)
|
|
int := Misc.align !int 2;
|
|
if !int <= last_int - 1 then begin
|
|
let reg_lower = phys_reg !int in
|
|
let reg_upper = phys_reg (1 + !int) in
|
|
int := !int + 2;
|
|
[| reg_lower; reg_upper |]
|
|
end else begin
|
|
let size_int64 = size_int * 2 in
|
|
ofs := Misc.align !ofs size_int64;
|
|
let stack_lower = stack_slot (make_stack !ofs) Int in
|
|
let stack_upper = stack_slot (make_stack (size_int + !ofs)) Int in
|
|
ofs := !ofs + size_int64;
|
|
[| stack_lower; stack_upper |]
|
|
end
|
|
|
|
let calling_conventions first_int last_int first_float last_float make_stack
|
|
arg =
|
|
let loc = Array.make (Array.length arg) Reg.dummy in
|
|
let int = ref first_int in
|
|
let float = ref first_float in
|
|
let ofs = ref 0 in
|
|
for i = 0 to Array.length arg - 1 do
|
|
match arg.(i) with
|
|
| Val | Int | Addr ->
|
|
loc.(i) <- loc_int last_int make_stack int ofs
|
|
| Float ->
|
|
loc.(i) <- loc_float last_float make_stack float ofs
|
|
done;
|
|
(loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
|
|
|
|
let incoming ofs = Incoming ofs
|
|
let outgoing ofs = Outgoing ofs
|
|
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
|
|
|
|
(* OCaml calling convention:
|
|
first integer args in r0...r7
|
|
first float args in d0...d15 (EABI+VFP)
|
|
remaining args on stack.
|
|
Return values in r0...r7 or d0...d15. *)
|
|
|
|
let max_arguments_for_tailcalls = 8
|
|
|
|
let loc_arguments arg =
|
|
calling_conventions 0 7 100 115 outgoing arg
|
|
|
|
let loc_parameters arg =
|
|
let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
|
|
|
|
let loc_results res =
|
|
let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
|
|
|
|
(* C calling convention:
|
|
first integer args in r0...r3
|
|
first 64-bit integer args in r0-r1, r2-r3
|
|
first float args in d0...d7 (EABI+VFP)
|
|
first float args in r0-r1, r2-r3 (soft FP)
|
|
remaining args on stack.
|
|
Return values in r0, r0-r1, or d0. *)
|
|
|
|
let external_calling_conventions first_int last_int first_float last_float
|
|
make_stack ty_args =
|
|
let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
|
|
let int = ref first_int in
|
|
let float = ref first_float in
|
|
let ofs = ref 0 in
|
|
List.iteri
|
|
(fun i ty_arg ->
|
|
match ty_arg with
|
|
| XInt | XInt32 ->
|
|
loc.(i) <- [| loc_int last_int make_stack int ofs |]
|
|
| XInt64 ->
|
|
loc.(i) <- loc_int_pair last_int make_stack int ofs
|
|
| XFloat ->
|
|
loc.(i) <-
|
|
(if abi = EABI_HF
|
|
then [| loc_float last_float make_stack float ofs |]
|
|
else loc_int_pair last_int make_stack int ofs))
|
|
ty_args;
|
|
(loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
|
|
|
|
let loc_external_arguments ty_args =
|
|
external_calling_conventions 0 3 100 107 outgoing ty_args
|
|
|
|
let loc_external_results res =
|
|
let (loc, _) = calling_conventions 0 1 100 100 not_supported res
|
|
in loc
|
|
|
|
let loc_exn_bucket = phys_reg 0
|
|
|
|
(* See "DWARF for the ARM architecture" available from developer.arm.com. *)
|
|
|
|
let int_dwarf_reg_numbers =
|
|
[| 0; 1; 2; 3; 4; 5; 6; 7; 12 |]
|
|
|
|
let float_dwarf_reg_numbers_legacy =
|
|
[| 64; 65; 66; 67; 68; 69; 70; 71;
|
|
72; 73; 74; 75; 76; 77; 78; 79;
|
|
80; 81; 82; 83; 84; 85; 86; 87;
|
|
88; 89; 90; 91; 92; 93; 94; 95;
|
|
|]
|
|
|
|
let float_dwarf_reg_numbers =
|
|
[| 256; 257; 258; 259; 260; 261; 262; 263;
|
|
264; 265; 266; 267; 268; 269; 270; 271;
|
|
272; 273; 274; 275; 276; 277; 278; 279;
|
|
280; 281; 282; 283; 284; 285; 286; 287;
|
|
|]
|
|
|
|
let dwarf_register_numbers ~reg_class =
|
|
match reg_class with
|
|
| 0 -> int_dwarf_reg_numbers
|
|
| 1 ->
|
|
(* Section 3.1 note 4 says that the "new" VFPv3 register numberings
|
|
(as per [float_dwarf_reg_numbers]) should be used for VFPv2 as well.
|
|
However we believe that for <= ARMv6 we should use the legacy VFPv2
|
|
numberings. *)
|
|
if !arch <= ARMv6 then float_dwarf_reg_numbers_legacy
|
|
else float_dwarf_reg_numbers
|
|
| 2 -> float_dwarf_reg_numbers
|
|
| _ -> Misc.fatal_errorf "Bad register class %d" reg_class
|
|
|
|
let stack_ptr_dwarf_register_number = 13
|
|
|
|
(* Volatile registers: none *)
|
|
|
|
let regs_are_volatile _rs = false
|
|
|
|
(* Registers destroyed by operations *)
|
|
|
|
let destroyed_at_alloc = (* r0-r6, d0-d15 preserved *)
|
|
Array.of_list (List.map
|
|
phys_reg
|
|
[7;8;
|
|
116;117;118;119;120;121;122;123;
|
|
124;125;126;127;128;129;130;131])
|
|
|
|
let destroyed_at_c_call =
|
|
Array.of_list (List.map
|
|
phys_reg
|
|
(match abi with
|
|
EABI -> (* r4-r7 preserved *)
|
|
[0;1;2;3;8;
|
|
100;101;102;103;104;105;106;107;
|
|
108;109;110;111;112;113;114;115;
|
|
116;117;118;119;120;121;122;123;
|
|
124;125;126;127;128;129;130;131]
|
|
| EABI_HF -> (* r4-r7, d8-d15 preserved *)
|
|
[0;1;2;3;8;
|
|
100;101;102;103;104;105;106;107;
|
|
116;117;118;119;120;121;122;123;
|
|
124;125;126;127;128;129;130;131]))
|
|
|
|
let destroyed_at_oper = function
|
|
Iop(Icall_ind _ | Icall_imm _)
|
|
| Iop(Iextcall { alloc = true; _ }) ->
|
|
all_phys_regs
|
|
| Iop(Iextcall { alloc = false; _}) ->
|
|
destroyed_at_c_call
|
|
| Iop(Ialloc _) ->
|
|
destroyed_at_alloc
|
|
| Iop(Iconst_symbol _) when !Clflags.pic_code ->
|
|
[| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *)
|
|
| Iop(Iintop Imulh) when !arch < ARMv6 ->
|
|
[| phys_reg 8 |] (* r12 destroyed *)
|
|
| Iop(Iintop (Icomp _) | Iintop_imm(Icomp _, _))
|
|
when !arch >= ARMv8 && !thumb ->
|
|
[| phys_reg 3 |] (* r3 destroyed *)
|
|
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
|
|
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
|
|
| _ -> [||]
|
|
|
|
let destroyed_at_raise = all_phys_regs
|
|
|
|
(* lr is destroyed at [Lreloadretaddr], but lr is not used for register
|
|
allocation, and thus does not need to (and indeed cannot) occur here. *)
|
|
let destroyed_at_reloadretaddr = [| |]
|
|
|
|
(* Maximal register pressure *)
|
|
|
|
let safe_register_pressure = function
|
|
Iextcall _ -> if abi = EABI then 0 else 4
|
|
| Ialloc _ -> if abi = EABI then 0 else 7
|
|
| Iconst_symbol _ when !Clflags.pic_code -> 7
|
|
| Iintop Imulh when !arch < ARMv6 -> 8
|
|
| _ -> 9
|
|
|
|
let max_register_pressure = function
|
|
Iextcall _ -> if abi = EABI then [| 4; 0; 0 |] else [| 4; 8; 8 |]
|
|
| Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
|
|
| Iconst_symbol _ when !Clflags.pic_code -> [| 7; 16; 32 |]
|
|
| Iintoffloat | Ifloatofint
|
|
| Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
|
|
| Iintop Imulh when !arch < ARMv6 -> [| 8; 16; 32 |]
|
|
| _ -> [| 9; 16; 32 |]
|
|
|
|
(* Pure operations (without any side effect besides updating their result
|
|
registers). *)
|
|
|
|
let op_is_pure = function
|
|
| Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _
|
|
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
|
|
| Iintop(Icheckbound _) | Iintop_imm(Icheckbound _, _)
|
|
| Ispecific(Ishiftcheckbound _) -> false
|
|
| _ -> true
|
|
|
|
(* Layout of the stack *)
|
|
|
|
let frame_required fd =
|
|
let num_stack_slots = fd.fun_num_stack_slots in
|
|
fd.fun_contains_calls
|
|
|| num_stack_slots.(0) > 0
|
|
|| num_stack_slots.(1) > 0
|
|
|| num_stack_slots.(2) > 0
|
|
|
|
let prologue_required fd =
|
|
frame_required fd
|
|
|
|
(* Calling the assembler *)
|
|
|
|
let assemble_file infile outfile =
|
|
Ccomp.command (Config.asm ^ " " ^
|
|
(String.concat " " (Misc.debug_prefix_map_flags ())) ^
|
|
" -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
|
|
|
|
|
|
let init () = ()
|