Merge pull request #275 from xavierleroy/zsystem

IBM z Systems port of ocamlopt

This is a port of the OCaml native-code compiler to IBM's z Systems architecture under Linux.

z Systems (https://en.wikipedia.org/wiki/IBM_System_z), also known as "s390x" in the GNU/Linux world, is IBM's line of mainframe computers. They are supported by several Linux distributions: RHEL, Suse, Debian.

The OCaml port was developed by Bill O'Farrell at IBM Toronto, with help from Tristan Amini, based on OCaml 4.02.1. I upgraded the port to the current OCaml trunk and performed some simplifiications and fixes. A CLA was signed to cover the reuse of Bill O'Farrell's code.
master
Xavier Leroy 2015-11-19 14:28:37 +01:00
commit 4640bd0c10
18 changed files with 1826 additions and 18 deletions

View File

@ -101,6 +101,8 @@ Compilers:
(Mark Shinwell)
- GPR#270: Make [transl_exception_constructor] generate [Immutable] blocks
(Mark Shinwell)
- GPR#275: native-code generator for IBM z System running Linux
(Bill O'Farrell, Tristan Amini, Xavier Leroy)
- GPR#282: relax short-paths safety check in presence of module aliases, take
penalty into account while building the printing map.
(Thomas Refis, Leo White)

39
asmcomp/s390x/CSE.ml Normal file
View File

@ -0,0 +1,39 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Bill O'Farrell, IBM *)
(* *)
(* Copyright 2015 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2015 IBM (Bill O'Farrell with *)
(* help from Tristan Amini). All rights reserved. This file is *)
(* distributed under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* CSE for the Z Processor *)
open Arch
open Mach
open CSEgen
class cse = object (self)
inherit cse_generic as super
method! class_of_operation op =
match op with
| Ispecific(Imultaddf | Imultsubf) -> Op_pure
| _ -> super#class_of_operation op
method! is_cheap_operation op =
match op with
| Iconst_int n | Iconst_blockheader n ->
n >= -0x8000_0000n && n <= 0x7FFF_FFFFn
| _ -> false
end
let fundecl f =
(new cse)#fundecl f

86
asmcomp/s390x/arch.ml Normal file
View File

@ -0,0 +1,86 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Bill O'Farrell, IBM *)
(* *)
(* Copyright 2015 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2015 IBM (Bill O'Farrell with *)
(* help from Tristan Amini). All rights reserved. This file is *)
(* distributed under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Specific operations for the Z processor *)
open Format
(* Machine-specific command-line options *)
let pic_code = ref true
let command_line_options =
[ "-fPIC", Arg.Set pic_code,
" Generate position-independent machine code (default)";
"-fno-PIC", Arg.Clear pic_code,
" Generate position-dependent machine code" ]
(* Specific operations *)
type specific_operation =
Imultaddf (* multiply and add *)
| Imultsubf (* multiply and subtract *)
(* Addressing modes *)
type addressing_mode =
| Iindexed of int (* reg + displ *)
| Iindexed2 of int (* reg + reg + displ *)
(* Sizes, endianness *)
let big_endian = true
let size_addr = 8
let size_int = size_addr
let size_float = 8
let allow_unaligned_access = false
(* Behavior of division *)
let division_crashes_on_overflow = true
(* Operations on addressing modes *)
let identity_addressing = Iindexed 0
let offset_addressing addr delta =
match addr with
| Iindexed n -> Iindexed(n + delta)
| Iindexed2 n -> Iindexed2(n + delta)
let num_args_addressing = function
| Iindexed n -> 1
| Iindexed2 n -> 2
(* Printing operations and addressing modes *)
let print_addressing printreg addr ppf arg =
match addr with
| Iindexed n ->
let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
fprintf ppf "%a%s" printreg arg.(0) idx
| Iindexed2 n ->
let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
fprintf ppf "%a + %a%s" printreg arg.(0) printreg arg.(1) idx
let print_specific_operation printreg op ppf arg =
match op with
| Imultaddf ->
fprintf ppf "%a *f %a +f %a"
printreg arg.(0) printreg arg.(1) printreg arg.(2)
| Imultsubf ->
fprintf ppf "%a *f %a -f %a"
printreg arg.(0) printreg arg.(1) printreg arg.(2)

767
asmcomp/s390x/emit.mlp Normal file
View File

@ -0,0 +1,767 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Emission of Linux on Z 64-bit assembly code *)
module StringSet =
Set.Make(struct type t = string let compare (x:t) y = compare x y end)
open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux
(* Layout of the stack. The stack is kept 8-aligned. *)
let stack_offset = ref 0
let frame_size () =
let size =
!stack_offset + (* Trap frame, outgoing parameters *)
size_int * num_stack_slots.(0) + (* Local int variables *)
size_float * num_stack_slots.(1) + (* Local float variables *)
(if !contains_calls then size_addr else 0) in (* The return address *)
Misc.align size 8
let slot_offset loc cls =
match loc with
Local n ->
if cls = 0
then !stack_offset + num_stack_slots.(1) * size_float + n * size_int
else !stack_offset + n * size_float
| Incoming n -> frame_size() + n
| Outgoing n -> n
(* Output a symbol *)
let emit_symbol s = Emitaux.emit_symbol '.' s
(* Output function call *)
let emit_call s =
if !pic_code then
`brasl %r14, {emit_symbol s}@PLT`
else
`brasl %r14, {emit_symbol s}`
(* Output a label *)
let label_prefix = ".L"
let emit_label lbl =
emit_string label_prefix; emit_int lbl
let emit_data_label lbl =
emit_string label_prefix; emit_string "d"; emit_int lbl
(* Section switching *)
let data_space = " .section \".data\"\n"
let code_space = " .section \".text\"\n"
let rodata_space = " .section \".rodata\"\n"
(* Output a pseudo-register *)
let emit_reg r =
match r.loc with
| Reg r -> emit_string (register_name r)
| _ -> fatal_error "Emit.emit_reg"
let emit_gpr r = emit_string "%r"; emit_int r
let emit_fpr r = emit_string "%f"; emit_int r
(* Special registers *)
let reg_f15 = phys_reg 115
(* Output a stack reference *)
let emit_stack r =
match r.loc with
Stack s ->
let ofs = slot_offset s (register_class r) in `{emit_int ofs}(%r15)`
| _ -> fatal_error "Emit.emit_stack"
(* Output a load or store operation *)
let emit_load_store instr addressing_mode addr n arg =
match addressing_mode with
| Iindexed ofs ->
` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
| Iindexed2 ofs ->
` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
(* Adjust the stack pointer down by N.
Choose the shortest instruction possible for the value of N. *)
let emit_stack_adjust n =
let n = -n in
if n = 0 then ()
else if n >= 0 && n < 4096 then
` la %r15, {emit_int n}(%r15)\n`
else if n >= -0x80000 && n < 0x80000 then
` lay %r15, {emit_int n}(%r15)\n`
else
` agfi %r15, {emit_int n}\n`
(* Emit a 'add immediate' *)
let emit_addimm res arg n =
if n >= 0 && n < 4096 then
` la {emit_reg res}, {emit_int n}({emit_reg arg})\n`
else if n >= -0x80000 && n < 0x80000 then
` lay {emit_reg res}, {emit_int n}({emit_reg arg})\n`
else begin
if arg.loc <> res.loc then
` lgr {emit_reg res}, {emit_reg arg}\n`;
` agfi {emit_reg res}, {emit_int n}\n`
end
(* After a comparison, extract the result as 0 or 1 *)
(* The locgr instruction is not available in the z10 architecture,
so this code is currently unused. *)
(*
let emit_set_comp cmp res =
` lghi %r1, 1\n`;
` lghi {emit_reg res}, 0\n`;
begin match cmp with
Ceq -> ` locgre {emit_reg res}, %r1\n`
| Cne -> ` locgrne {emit_reg res}, %r1\n`
| Cgt -> ` locgrh {emit_reg res}, %r1\n`
| Cle -> ` locgrnh {emit_reg res}, %r1\n`
| Clt -> ` locgrl {emit_reg res}, %r1\n`
| Cge -> ` locgrnl {emit_reg res}, %r1\n`
end
*)
(* Record live pointers at call points *)
let record_frame live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
| {typ = Val; loc = Reg r} ->
live_offset := (r lsl 1) + 1 :: !live_offset
| {typ = Val; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| {typ = Addr} as r ->
Misc.fatal_error ("bad GC root " ^ Reg.name r)
| _ -> ())
live;
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
fd_live_offset = !live_offset;
fd_debuginfo = dbg } :: !frame_descriptors;
lbl
(* Record calls to caml_call_gc, emitted out of line. *)
type gc_call =
{ gc_lbl: label; (* Entry label *)
gc_return_lbl: label; (* Where to branch after GC *)
gc_frame_lbl: label } (* Label of frame descriptor *)
let call_gc_sites = ref ([] : gc_call list)
let emit_call_gc gc =
`{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
`{emit_label gc.gc_frame_lbl}: brcl 15, {emit_label gc.gc_return_lbl}\n`
(* Record calls to caml_ml_array_bound_error, emitted out of line. *)
type bound_error_call =
{ bd_lbl: label; (* Entry label *)
bd_frame: label } (* Label of frame descriptor *)
let bound_error_sites = ref ([] : bound_error_call list)
let bound_error_call = ref 0
let bound_error_label dbg =
if !Clflags.debug then begin
let lbl_bound_error = new_label() in
let lbl_frame = record_frame Reg.Set.empty dbg in
bound_error_sites :=
{ bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
lbl_bound_error
end else begin
if !bound_error_call = 0 then bound_error_call := new_label();
!bound_error_call
end
let emit_call_bound_error bd =
`{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
`{emit_label bd.bd_frame}:\n`
let emit_call_bound_errors () =
List.iter emit_call_bound_error !bound_error_sites;
if !bound_error_call > 0 then
`{emit_label !bound_error_call}: {emit_call "caml_ml_array_bound_error"}\n`
(* Record floating-point and large integer literals *)
let float_literals = ref ([] : (int64 * int) list)
let int_literals = ref ([] : (nativeint * int) list)
(* Masks for conditional branches after comparisons *)
let branch_for_comparison = function
Ceq -> 8 | Cne -> 7
| Cle -> 12 | Cgt -> 2
| Cge -> 10 | Clt -> 4
let name_for_int_comparison = function
Isigned cmp -> ("cgr", branch_for_comparison cmp)
| Iunsigned cmp -> ("clgr", branch_for_comparison cmp)
let name_for_int_comparison_imm = function
Isigned cmp -> ("cgfi", branch_for_comparison cmp)
| Iunsigned cmp -> ("clgfi", branch_for_comparison cmp)
(* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = unordered*)
let branch_for_float_comparison cmp neg =
match cmp with
Ceq -> if neg then 7 else 8
| Cne -> if neg then 8 else 7
| Cle -> if neg then 3 else 12
| Cgt -> if neg then 13 else 2
| Cge -> if neg then 5 else 10
| Clt -> if neg then 11 else 4
(* Names for various instructions *)
let name_for_intop = function
Iadd -> "agr"
| Isub -> "sgr"
| Imul -> "msgr"
| Iand -> "ngr"
| Ior -> "ogr"
| Ixor -> "xgr"
| _ -> Misc.fatal_error "Emit.Intop"
let name_for_floatop1 = function
Inegf -> "lcdbr"
| Iabsf -> "lpdbr"
| _ -> Misc.fatal_error "Emit.Iopf1"
let name_for_floatop2 = function
Iaddf -> "adbr"
| Isubf -> "sdbr"
| Imulf -> "mdbr"
| Idivf -> "ddbr"
| _ -> Misc.fatal_error "Emit.Iopf2"
let name_for_specific = function
Imultaddf -> "madbr"
| Imultsubf -> "msdbr"
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Output the assembly code for an instruction *)
let emit_instr i =
emit_debug_info i.dbg;
match i.desc with
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc <> dst.loc then begin
match (src, dst) with
{loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
` lgr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
` ldr {emit_reg dst}, {emit_reg src}\n`
| {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Stack sd} ->
` stg {emit_reg src}, {emit_stack dst}\n`
| {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
` std {emit_reg src}, {emit_stack dst}\n`
| {loc = Stack ss; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
` lg {emit_reg dst}, {emit_stack src}\n`
| {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
` ldy {emit_reg dst}, {emit_stack src}\n`
| (_, _) ->
fatal_error "Emit: Imove"
end
| Lop(Iconst_int n | Iconst_blockheader n) ->
if n >= -0x8000n && n <= 0x7FFFn then begin
` lghi {emit_reg i.res.(0)}, {emit_nativeint n}\n`;
end else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
` lgfi {emit_reg i.res.(0)}, {emit_nativeint n}\n`;
end else begin
let lbl = new_label() in
int_literals := (n, lbl) :: !int_literals;
` lgrl {emit_reg i.res.(0)}, {emit_label lbl}\n`;
end
| Lop(Iconst_float f) ->
let lbl = new_label() in
float_literals := (f, lbl) :: !float_literals;
` larl %r1, {emit_label lbl}\n`;
` ld {emit_reg i.res.(0)}, 0(%r1)\n`
| Lop(Iconst_symbol s) ->
if !pic_code then
` lgrl {emit_reg i.res.(0)}, {emit_symbol s}@GOTENT\n`
else
` larl {emit_reg i.res.(0)}, {emit_symbol s}\n`;
| Lop(Icall_ind) ->
` basr %r14, {emit_reg i.arg.(0)}\n`;
let lbl = record_frame i.live i.dbg in
`{emit_label lbl}:\n`
| Lop(Icall_imm s) ->
if !pic_code then
` brasl %r14, {emit_symbol s}@PLT\n`
else
` brasl %r14, {emit_symbol s}\n`;
let lbl = record_frame i.live i.dbg in
`{emit_label lbl}:\n`;
| Lop(Itailcall_ind) ->
let n = frame_size() in
if !contains_calls then
` lg %r14, {emit_int(n - size_addr)}(%r15)\n`;
emit_stack_adjust (-n);
` br {emit_reg i.arg.(0)}\n`
| Lop(Itailcall_imm s) ->
if s = !function_name then
` brcl 15, {emit_label !tailrec_entry_point}\n`
else begin
let n = frame_size() in
if !contains_calls then
` lg %r14, {emit_int(n - size_addr)}(%r15)\n`;
emit_stack_adjust (-n);
if !pic_code then
` brcl 15, {emit_symbol s}@PLT\n`
else
` brcl 15, {emit_symbol s}\n`
end
| Lop(Iextcall(s, alloc)) ->
if alloc then begin
if !pic_code then begin
` lgrl %r7, {emit_symbol s}@GOTENT\n`;
` brasl %r14, {emit_symbol "caml_c_call"}@PLT\n`
end else begin
` larl %r7, {emit_symbol s}\n`;
` brasl %r14, {emit_symbol "caml_c_call"}\n`
end;
let lbl = record_frame i.live i.dbg in
`{emit_label lbl}:\n`;
end else begin
if !pic_code then
` brasl %r14, {emit_symbol s}@PLT\n`
else
` brasl %r14, {emit_symbol s}\n`
end
| Lop(Istackoffset n) ->
emit_stack_adjust n;
stack_offset := !stack_offset + n
| Lop(Iload(chunk, addr)) ->
let loadinstr =
match chunk with
Byte_unsigned -> "llgc"
| Byte_signed -> "lgb"
| Sixteen_unsigned -> "llgh"
| Sixteen_signed -> "lgh"
| Thirtytwo_unsigned -> "llgf"
| Thirtytwo_signed -> "lgf"
| Word_int | Word_val -> "lg"
| Single -> "ley"
| Double | Double_u -> "ldy" in
emit_load_store loadinstr addr i.arg 0 i.res.(0);
if chunk = Single then
` ldebr {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
| Lop(Istore(Single, addr, _)) ->
` ledbr %f15, {emit_reg i.arg.(0)}\n`;
emit_load_store "stey" addr i.arg 1 reg_f15
| Lop(Istore(chunk, addr, _)) ->
let storeinstr =
match chunk with
Byte_unsigned | Byte_signed -> "stcy"
| Sixteen_unsigned | Sixteen_signed -> "sthy"
| Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
| Word_int | Word_val -> "stg"
| Single -> assert false
| Double | Double_u -> "stdy" in
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
| Lop(Ialloc n) ->
let lbl_redo = new_label() in
let lbl_call_gc = new_label() in
let lbl_frame = record_frame i.live i.dbg in
call_gc_sites :=
{ gc_lbl = lbl_call_gc;
gc_return_lbl = lbl_redo;
gc_frame_lbl = lbl_frame } :: !call_gc_sites;
`{emit_label lbl_redo}:`;
` lay %r11, {emit_int(-n)}(%r11)\n`;
` clgr %r11, %r10\n`;
` brcl 4, {emit_label lbl_call_gc}\n`; (* less than *)
` la {emit_reg i.res.(0)}, 8(%r11)\n`
| Lop(Iintop Imulh) ->
(* Hacker's Delight section 8.3:
mul-high-signed(a, b) = mul-high-unsigned(a, b)
- a if b < 0
- b if a < 0
or, without branches,
mul-high-signed(a, b) = mul-high-unsigned(a, b)
- (a & (b >>s 63))
- (b & (a >>s 63))
*)
` lgr %r1, {emit_reg i.arg.(0)}\n`;
` mlgr %r0, {emit_reg i.arg.(1)}\n`;
(* r0:r1 is 128-bit unsigned product; r0 is the high bits *)
` srag %r1, {emit_reg i.arg.(0)}, 63\n`;
` ngr %r1, {emit_reg i.arg.(1)}\n`;
` sgr %r0, %r1\n`;
` srag %r1, {emit_reg i.arg.(1)}, 63\n`;
` ngr %r1, {emit_reg i.arg.(0)}\n`;
` sgr %r0, %r1\n`;
` lgr {emit_reg i.res.(0)}, %r0\n`
| Lop(Iintop Imod) ->
` lgr %r1, {emit_reg i.arg.(0)}\n`;
` dsgr %r0, {emit_reg i.arg.(1)}\n`;
` lgr {emit_reg i.res.(0)}, %r0\n`
| Lop(Iintop Idiv) ->
` lgr %r1, {emit_reg i.arg.(0)}\n`;
` dsgr %r0, {emit_reg i.arg.(1)}\n`;
` lgr {emit_reg i.res.(0)}, %r1\n`
| Lop(Iintop Ilsl) ->
` sllg {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
| Lop(Iintop Ilsr) ->
` srlg {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
| Lop(Iintop Iasr) ->
` srag {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
| Lop(Iintop(Icomp cmp)) ->
let lbl = new_label() in
let (comp, mask) = name_for_int_comparison cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` lghi {emit_reg i.res.(0)}, 1\n`;
` brc {emit_int mask}, {emit_label lbl}\n`;
` lghi {emit_reg i.res.(0)}, 0\n`;
`{emit_label lbl}:\n`
| Lop(Iintop Icheckbound) ->
let lbl = bound_error_label i.dbg in
` clgr {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` brcl 12, {emit_label lbl}\n` (* branch if unsigned le *)
| Lop(Iintop op) ->
assert (i.arg.(0).loc = i.res.(0).loc);
let instr = name_for_intop op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
| Lop(Iintop_imm(Iadd, n)) ->
emit_addimm i.res.(0) i.arg.(0) n
| Lop(Iintop_imm(Isub, n)) ->
emit_addimm i.res.(0) i.arg.(0) (-n)
| Lop(Iintop_imm(Icomp cmp, n)) ->
let lbl = new_label() in
let (comp, mask) = name_for_int_comparison_imm cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_int n}\n`;
` lghi {emit_reg i.res.(0)}, 1\n`;
` brc {emit_int mask}, {emit_label lbl}\n`;
` lghi {emit_reg i.res.(0)}, 0\n`;
`{emit_label lbl}:\n`
| Lop(Iintop_imm(Icheckbound, n)) ->
let lbl = bound_error_label i.dbg in
if n >= 0 then begin
` clgfi {emit_reg i.arg.(0)}, {emit_int n}\n`;
` brcl 12, {emit_label lbl}\n` (* branch if unsigned le *)
end else begin
` brcl 15, {emit_label lbl}\n` (* branch always *)
end
| Lop(Iintop_imm(Ilsl, n)) ->
` sllg {emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
| Lop(Iintop_imm(Ilsr, n)) ->
` srlg {emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
| Lop(Iintop_imm(Iasr, n)) ->
` srag {emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
| Lop(Iintop_imm(Iand, n)) ->
assert (i.arg.(0).loc = i.res.(0).loc);
` nilf {emit_reg i.res.(0)}, {emit_int (n land 0xFFFF_FFFF)}\n`
| Lop(Iintop_imm(Ior, n)) ->
assert (i.arg.(0).loc = i.res.(0).loc);
` oilf {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Iintop_imm(Ixor, n)) ->
assert (i.arg.(0).loc = i.res.(0).loc);
` xilf {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Iintop_imm(Imul, n)) ->
assert (i.arg.(0).loc = i.res.(0).loc);
` msgfi {emit_reg i.res.(0)}, {emit_int n}\n`
| Lop(Iintop_imm((Imulh | Idiv | Imod), _)) ->
assert false
| Lop(Inegf | Iabsf as op) ->
let instr = name_for_floatop1 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
assert (i.arg.(0).loc = i.res.(0).loc);
let instr = name_for_floatop2 op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
| Lop(Ifloatofint) ->
` cdgbr {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop(Iintoffloat) ->
(* rounding method #5 = round toward 0 *)
` cgdbr {emit_reg i.res.(0)}, 5, {emit_reg i.arg.(0)}\n`
| Lop(Ispecific sop) ->
assert (i.arg.(2).loc = i.res.(0).loc);
let instr = name_for_specific sop in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
| Lreloadretaddr ->
let n = frame_size() in
` lg %r14, {emit_int(n - size_addr)}(%r15)\n`
| Lreturn ->
let n = frame_size() in
emit_stack_adjust (-n);
` br %r14\n`
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
` brcl 15,{emit_label lbl}\n`
| Lcondbranch(tst, lbl) ->
begin match tst with
Itruetest ->
` cgfi {emit_reg i.arg.(0)}, 0\n`;
` brcl 7, {emit_label lbl}\n`
| Ifalsetest ->
` cgfi {emit_reg i.arg.(0)}, 0\n`;
` brcl 8, {emit_label lbl}\n`
| Iinttest cmp ->
let (comp, mask) = name_for_int_comparison cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
` brcl {emit_int mask}, {emit_label lbl}\n`
| Iinttest_imm(cmp, n) ->
let (comp, mask) = name_for_int_comparison_imm cmp in
` {emit_string comp} {emit_reg i.arg.(0)}, {emit_int n}\n`;
` brcl {emit_int mask}, {emit_label lbl}\n`
| Ifloattest(cmp, neg) ->
` cdbr {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let mask = branch_for_float_comparison cmp neg in
` brcl {emit_int mask}, {emit_label lbl}\n`
| Ioddtest ->
` tmll {emit_reg i.arg.(0)}, 1\n`;
` brcl 1, {emit_label lbl}\n`
| Ieventest ->
` tmll {emit_reg i.arg.(0)}, 1\n`;
` brcl 8, {emit_label lbl}\n`
end
| Lcondbranch3(lbl0, lbl1, lbl2) ->
` cgfi {emit_reg i.arg.(0)}, 1\n`;
begin match lbl0 with
None -> ()
| Some lbl -> ` brcl 4, {emit_label lbl}\n`
end;
begin match lbl1 with
None -> ()
| Some lbl -> ` brcl 8, {emit_label lbl}\n`
end;
begin match lbl2 with
None -> ()
| Some lbl -> ` brcl 2, {emit_label lbl}\n`
end
| Lswitch jumptbl ->
let lbl = new_label() in
` larl %r0, {emit_label lbl}\n`;
` sllg %r1, {emit_reg i.arg.(0)}, 2(%r0)\n`;
` agr %r1, %r0\n`;
` lgf %r1, 0(%r1)\n`;
` agr %r1, %r0\n`;
` br %r1\n`;
emit_string rodata_space;
` .align 8\n`;
`{emit_label lbl}:`;
for i = 0 to Array.length jumptbl - 1 do
` .long {emit_label jumptbl.(i)} - {emit_label lbl}\n`
done;
emit_string code_space
| Lsetuptrap lbl ->
` brasl %r14, {emit_label lbl}\n`;
| Lpushtrap ->
stack_offset := !stack_offset + 16;
emit_stack_adjust 16;
` stg %r14, 0(%r15)\n`;
` stg %r13, {emit_int size_addr}(%r15)\n`;
` lgr %r13, %r15\n`
| Lpoptrap ->
` lg %r13, {emit_int size_addr}(%r15)\n`;
emit_stack_adjust (-16);
stack_offset := !stack_offset - 16
| Lraise k ->
begin match !Clflags.debug, k with
| true, Lambda.Raise_regular ->
` brasl %r14, {emit_symbol "caml_raise_exn"}\n`;
let lbl = record_frame Reg.Set.empty i.dbg in
`{emit_label lbl}:\n`
| true, Lambda.Raise_reraise ->
` brasl %r14, {emit_symbol "caml_reraise_exn"}\n`;
let lbl = record_frame Reg.Set.empty i.dbg in
`{emit_label lbl}:\n`
| false, _
| true, Lambda.Raise_notrace ->
` lg %r1, 0(%r13)\n`;
` lgr %r15, %r13\n`;
` lg %r13, {emit_int size_addr}(%r15)\n`;
emit_stack_adjust (-16);
` br %r1\n`
end
(* Emit a sequence of instructions *)
let rec emit_all i =
match i with
{desc = Lend} -> ()
| _ ->
emit_instr i;
emit_all i.next
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
tailrec_entry_point := new_label();
stack_offset := 0;
call_gc_sites := [];
bound_error_sites := [];
bound_error_call := 0;
float_literals := [];
int_literals := [];
` .globl {emit_symbol fundecl.fun_name}\n`;
emit_debug_info fundecl.fun_dbg;
` .type {emit_symbol fundecl.fun_name}, @function\n`;
emit_string code_space;
` .align 8\n`;
`{emit_symbol fundecl.fun_name}:\n`;
let n = frame_size() in
emit_stack_adjust n;
if !contains_calls then
` stg %r14, {emit_int(n - size_addr)}(%r15)\n`;
`{emit_label !tailrec_entry_point}:\n`;
emit_all fundecl.fun_body;
(* Emit the glue code to call the GC *)
List.iter emit_call_gc !call_gc_sites;
(* Emit the glue code to handle bound errors *)
emit_call_bound_errors();
(* Emit the numeric literals *)
if !float_literals <> [] || !int_literals <> [] then begin
emit_string rodata_space;
` .align 8\n`;
List.iter
(fun (f, lbl) ->
`{emit_label lbl}:`;
emit_float64_directive ".quad" f)
!float_literals;
List.iter
(fun (n, lbl) ->
`{emit_label lbl}: .quad {emit_nativeint n}\n`)
!int_literals
end
(* Emission of data *)
let declare_global_data s =
` .globl {emit_symbol s}\n`;
` .type {emit_symbol s}, @object\n`
let emit_item = function
Cglobal_symbol s ->
declare_global_data s
| Cdefine_symbol s ->
`{emit_symbol s}:\n`;
| Cdefine_label lbl ->
`{emit_data_label lbl}:\n`
| Cint8 n ->
` .byte {emit_int n}\n`
| Cint16 n ->
` .short {emit_int n}\n`
| Cint32 n ->
` .long {emit_nativeint n}\n`
| Cint n ->
` .quad {emit_nativeint n}\n`
| Csingle f ->
emit_float32_directive ".long" (Int32.bits_of_float f)
| Cdouble f ->
emit_float64_directive ".quad" (Int64.bits_of_float f)
| Csymbol_address s ->
` .quad {emit_symbol s}\n`
| Clabel_address lbl ->
` .quad {emit_data_label lbl}\n`
| Cstring s ->
emit_bytes_directive " .byte " s
| Cskip n ->
if n > 0 then ` .space {emit_int n}\n`
| Calign n ->
if n < 8 then ` .align 8\n`
else ` .align {emit_int n}\n`
let data l =
emit_string data_space;
` .align 8\n`;
List.iter emit_item l
(* Beginning / end of an assembly file *)
let begin_assembly() =
reset_debug_info();
(* Emit the beginning of the segments *)
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
emit_string data_space;
` .align 8\n`;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
emit_string code_space;
declare_global_data lbl_begin;
`{emit_symbol lbl_begin}:\n`
let end_assembly() =
(* Emit the end of the segments *)
emit_string code_space;
let lbl_end = Compilenv.make_symbol (Some "code_end") in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
` .long 0\n`;
emit_string data_space;
` .align 8\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
declare_global_data lbl_end;
`{emit_symbol lbl_end}:\n`;
` .quad 0\n`;
(* Emit the frame descriptors *)
emit_string rodata_space;
` .align 8\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
declare_global_data lbl;
`{emit_symbol lbl}:\n`;
emit_frames
{ efa_label = (fun l -> ` .quad {emit_label l}\n`);
efa_16 = (fun n -> ` .short {emit_int n}\n`);
efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
efa_word = (fun n -> ` .quad {emit_int n}\n`);
efa_align = (fun n -> ` .align {emit_int n}\n`);
efa_label_rel = (fun lbl ofs ->
` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`);
efa_def_label = (fun l -> `{emit_label l}:\n`);
efa_string = (fun s -> emit_bytes_directive " .byte " (s ^ "\000"))
}

208
asmcomp/s390x/proc.ml Normal file
View File

@ -0,0 +1,208 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Bill O'Farrell, IBM *)
(* *)
(* Copyright 2015 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2015 IBM (Bill O'Farrell with *)
(* help from Tristan Amini). All rights reserved. This file is *)
(* distributed under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Description of the Z Processor *)
open Misc
open Cmm
open Reg
open Arch
open Mach
(* Instruction selection *)
let word_addressed = false
(* Registers available for register allocation *)
(* Integer register map:
0 temporary, null register for some operations (volatile)
1 temporary (volatile)
2 - 5 function arguments and results (volatile)
6 function arguments and results (persevered by C)
7 - 9 general purpose, preserved by C
10 allocation limit (preserved by C)
11 allocation pointer (preserved by C)
12 general purpose (preserved by C)
13 trap pointer (preserved by C)
14 return address (volatile)
15 stack pointer (preserved by C)
Floating-point register map:
0, 2, 4, 6 function arguments and results (volatile)
1, 3, 5, 7 general purpose (volatile)
8 - 14 general purpose, preserved by C
15 temporary, preserved by C
Note: integer register r12 is used as GOT pointer by some C compilers.
The code generated by OCaml does not need a GOT pointer, using PC-relative
addressing instead for accessing the GOT. This frees r12 as a
general-purpose register. *)
let int_reg_name =
[| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; "%r12" |]
let float_reg_name =
[| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7";
"%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; "%f15" |]
let num_register_classes = 2
let register_class r =
match r.typ with
| Val | Int | Addr -> 0
| Float -> 1
let num_available_registers = [| 9; 15 |]
let first_available_register = [| 0; 100 |]
let register_name r =
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
let rotate_registers = true
(* Representation of hard registers by pseudo-registers *)
let hard_int_reg =
let v = Array.make 9 Reg.dummy in
for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done; v
let hard_float_reg =
let v = Array.make 16 Reg.dummy in
for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
let phys_reg n =
if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
let stack_slot slot ty =
Reg.at_location ty (Stack slot)
(* Calling conventions *)
let calling_conventions
first_int last_int first_float last_float make_stack stack_ofs arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref stack_ofs in
for i = 0 to Array.length arg - 1 do
match arg.(i).typ with
| Val | Int | Addr as ty ->
if !int <= last_int then begin
loc.(i) <- phys_reg !int;
incr int
end else begin
loc.(i) <- stack_slot (make_stack !ofs) ty;
ofs := !ofs + size_int
end
| Float ->
if !float <= last_float then begin
loc.(i) <- phys_reg !float;
incr float
end else begin
loc.(i) <- stack_slot (make_stack !ofs) Float;
ofs := !ofs + size_float
end
done;
(loc, Misc.align !ofs 16)
(* Keep stack 16-aligned. *)
let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
let loc_arguments arg =
calling_conventions 0 4 100 103 outgoing 0 arg
let loc_parameters arg =
let (loc, ofs) = calling_conventions 0 4 100 103 incoming 0 arg in loc
let loc_results res =
let (loc, ofs) = calling_conventions 0 4 100 103 not_supported 0 res in loc
(* C calling conventions under SVR4:
use GPR 2-6 and FPR 0,2,4,6 just like ML calling conventions.
Using a float register does not affect the int registers.
Always reserve 160 bytes at bottom of stack, plus whatever is needed
to hold the overflow arguments. *)
let loc_external_arguments arg =
let arg =
Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg in
let (loc, ofs) =
calling_conventions 0 4 100 103 outgoing 160 arg in
(Array.map (fun reg -> [|reg|]) loc, ofs)
let extcall_use_push = false
(* Results are in GPR 2 and FPR 0 *)
let loc_external_results res =
let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc
(* Exceptions are in GPR 2 *)
let loc_exn_bucket = phys_reg 0
(* Volatile registers: none *)
let regs_are_volatile rs = false
(* Registers destroyed by operations *)
let destroyed_at_c_call =
Array.of_list(List.map phys_reg
[0; 1; 2; 3; 4;
100; 101; 102; 103; 104; 105; 106; 107])
let destroyed_at_oper = function
Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
| Iop(Iextcall(_, false)) -> destroyed_at_c_call
| _ -> [||]
let destroyed_at_raise = all_phys_regs
(* Maximal register pressure *)
let safe_register_pressure = function
Iextcall(_, _) -> 4
| _ -> 9
let max_register_pressure = function
Iextcall(_, _) -> [| 4; 7 |]
| _ -> [| 9; 15 |]
(* Pure operations (without any side effect besides updating their result
registers). *)
let op_is_pure = function
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
| Ispecific(Imultaddf | Imultsubf) -> true
| _ -> true
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
let contains_calls = ref false
(* Calling the assembler *)
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " -o " ^
Filename.quote outfile ^ " " ^ Filename.quote infile)
let init () = ()

47
asmcomp/s390x/reload.ml Normal file
View File

@ -0,0 +1,47 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Reloading for the Z Processor *)
open Arch
open Mach
class reload = object (self)
inherit Reloadgen.reload_generic as super
(* For 2-address instructions, reloading must make sure that the
temporary result register is the same as the appropriate
argument register. *)
method! reload_operation op arg res =
match op with
(* Two-address binary operations: arg.(0) and res.(0) must be the same *)
| Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) | Iaddf|Isubf|Imulf|Idivf ->
let res = self#makereg res.(0) in
([|res; self#makereg arg.(1)|], [|res|])
(* Three-address ternary operations: arg.(2) and res.(0) must be the same *)
| Ispecific(Imultaddf|Imultsubf) ->
let res = self#makereg res.(0) in
([|self#makereg arg.(0); self#makereg arg.(1); res|], [|res|])
(* One-address unary operations: arg.(0) and res.(0) must be the same *)
| Iintop_imm((Imul|Iand|Ior|Ixor), _) ->
let res = self#makereg res.(0) in
([|res|], [|res|])
(* Other instructions are regular *)
| _ ->
super#reload_operation op arg res
end
let fundecl f =
(new reload)#fundecl f

View File

@ -0,0 +1,60 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Bill O'Farrell, IBM *)
(* *)
(* Copyright 2015 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2015 IBM (Bill O'Farrell with *)
(* help from Tristan Amini). All rights reserved. This file is *)
(* distributed under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Instruction scheduling for the Z processor *)
open Arch
open Mach
(* The z10 processor is in-order, dual-issue. It could benefit from some
basic-block scheduling, although precise latency information
is not available.
The z196 and later are out-of-order processors. Basic-block
scheduling probably makes no difference. *)
class scheduler = object
inherit Schedgen.scheduler_generic
(* Latencies (in cycles). Wild guesses. We multiply all latencies by 2
to favor dual-issue. *)
method oper_latency = function
Ireload -> 4
| Iload(_, _) -> 4
| Iconst_float _ -> 4 (* turned into a load *)
| Iintop(Imul) -> 10
| Iintop_imm(Imul, _) -> 10
| Iaddf | Isubf | Imulf -> 8
| Idivf -> 40
| Ispecific(Imultaddf | Imultsubf) -> 8
| _ -> 2
method reload_retaddr_latency = 4
(* Issue cycles. Rough approximations. *)
method oper_issue_cycles = function
| Ialloc _ -> 4
| Iintop(Imulh) -> 15
| Iintop(Idiv|Imod) -> 20
| Iintop(Icomp _) -> 4
| Iintop_imm(Icomp _, _) -> 4
| _ -> 1
method reload_retaddr_issue_cycles = 1
end
let fundecl f = (new scheduler)#schedule_fundecl f

114
asmcomp/s390x/selection.ml Normal file
View File

@ -0,0 +1,114 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Bill O'Farrell, IBM *)
(* *)
(* Copyright 2015 Institut National de Recherche en Informatique *)
(* et en Automatique. Copyright 2015 IBM (Bill O'Farrell with *)
(* help from Tristan Amini). All rights reserved. This file is *)
(* distributed under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Instruction selection for the Z processor *)
open Cmm
open Arch
open Mach
(* Recognition of addressing modes *)
exception Use_default
type addressing_expr =
| Alinear of expression
| Aadd of expression * expression
let rec select_addr = function
| Cop((Caddi | Cadda | Caddv), [arg; Cconst_int m]) ->
let (a, n) = select_addr arg in (a, n + m)
| Cop((Caddi | Cadda | Caddv), [Cconst_int m; arg]) ->
let (a, n) = select_addr arg in (a, n + m)
| Cop((Caddi | Cadda | Caddv), [arg1; arg2]) ->
begin match (select_addr arg1, select_addr arg2) with
((Alinear e1, n1), (Alinear e2, n2)) ->
(Aadd(e1, e2), n1 + n2)
| _ ->
(Aadd(arg1, arg2), 0)
end
| exp ->
(Alinear exp, 0)
(* Instruction selection *)
let pseudoregs_for_operation op arg res =
match op with
(* Two-address binary operations: arg.(0) and res.(0) must be the same *)
| Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) | Iaddf|Isubf|Imulf|Idivf ->
([|res.(0); arg.(1)|], res)
| Ispecific(sop) ->
( [| arg.(0); arg.(1); res.(0) |], [| res.(0) |])
(* One-address unary operations: arg.(0) and res.(0) must be the same *)
| Iintop_imm((Imul|Iand|Ior|Ixor), _) -> (res, res)
(* Other instructions are regular *)
| _ -> raise Use_default
class selector = object (self)
inherit Selectgen.selector_generic as super
method is_immediate n = (n <= 2147483647) && (n >= -2147483648)
method select_addressing chunk exp =
let (a, d) = select_addr exp in
(* 20-bit signed displacement *)
if d < 0x80000 && d >= -0x80000 then begin
match a with
| Alinear e -> (Iindexed d, e)
| Aadd(e1, e2) -> (Iindexed2 d, Ctuple [e1; e2])
end else
(Iindexed 0, exp)
method! select_operation op args =
match (op, args) with
(* Z does not support immediate operands for multiply high *)
(Cmulhi, _) -> (Iintop Imulh, args)
(* The and, or and xor instructions have a different range of immediate
operands than the other instructions *)
| (Cand, _) -> self#select_logical Iand (-0x1_0000_0000) (-1) args
| (Cor, _) -> self#select_logical Ior 0 0xFFFF_FFFF args
| (Cxor, _) -> self#select_logical Ixor 0 0xFFFF_FFFF args
(* Recognize mult-add and mult-sub instructions *)
| (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
(Ispecific Imultaddf, [arg1; arg2; arg3])
| (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
(Ispecific Imultaddf, [arg1; arg2; arg3])
| (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
(Ispecific Imultsubf, [arg1; arg2; arg3])
| _ ->
super#select_operation op args
method select_logical op lo hi = function
[arg; Cconst_int n] when n >= lo && n <= hi ->
(Iintop_imm(op, n), [arg])
| [Cconst_int n; arg] when n >= lo && n <= hi ->
(Iintop_imm(op, n), [arg])
| args ->
(Iintop op, args)
method! insert_op_debug op dbg rs rd =
try
let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
self#insert_moves rs rsrc;
self#insert_debug (Iop op) dbg rsrc rdst;
self#insert_moves rdst rd;
rd
with Use_default ->
super#insert_op_debug op dbg rs rd
end
let fundecl f = (new selector)#emit_fundecl f

353
asmrun/s390x.S Normal file
View File

@ -0,0 +1,353 @@
/***********************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* Bill O'Farrell, IBM */
/* */
/* Copyright 2015 Institut National de Recherche en Informatique et */
/* en Automatique. Copyright 2015 IBM (Bill O'Farrell with help from */
/* Tristan Amini). All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License, with */
/* the special exception on linking described in file ../LICENSE. */
/* */
/***********************************************************************/
#define Addrglobal(reg,glob) \
larl reg, glob
#define Loadglobal(reg,glob) \
lgrl reg, glob
#define Storeglobal(reg,glob) \
stgrl reg, glob
#define Loadglobal32(reg,glob) \
lgfrl reg, glob
#define Storeglobal32(reg,glob) \
strl reg, glob
.section ".text"
/* Invoke the garbage collector. */
.globl caml_system__code_begin
caml_system__code_begin:
.globl caml_call_gc
.type caml_call_gc, @function
caml_call_gc:
/* Set up stack frame */
#define FRAMESIZE (16*8 + 16*8)
lay %r15, -FRAMESIZE(%r15)
/* Record return address into OCaml code */
Storeglobal(%r14, caml_last_return_address)
/* Record lowest stack address */
lay %r0, FRAMESIZE(%r15)
Storeglobal(%r0, caml_bottom_of_stack)
/* Record pointer to register array */
lay %r0, (8*16)(%r15)
Storeglobal(%r0, caml_gc_regs)
/* Save current allocation pointer for debugging purposes */
Storeglobal(%r11, caml_young_ptr)
/* Save exception pointer (if e.g. a sighandler raises) */
Storeglobal(%r13, caml_exception_pointer)
/* Save all registers used by the code generator */
stmg %r2,%r9, (8*16)(%r15)
stg %r12, (8*16 + 8*8)(%r15)
std %f0, 0(%r15)
std %f1, 8(%r15)
std %f2, 16(%r15)
std %f3, 24(%r15)
std %f4, 32(%r15)
std %f5, 40(%r15)
std %f6, 48(%r15)
std %f7, 56(%r15)
std %f8, 64(%r15)
std %f9, 72(%r15)
std %f10, 80(%r15)
std %f11, 88(%r15)
std %f12, 96(%r15)
std %f13, 108(%r15)
std %f14, 112(%r15)
std %f15, 120(%r15)
/* Call the GC */
lay %r15, -160(%r15)
stg %r15, 0(%r15)
brasl %r14, caml_garbage_collection@PLT
lay %r15, 160(%r15)
/* Reload new allocation pointer and allocation limit */
Loadglobal(%r11, caml_young_ptr)
Loadglobal(%r10, caml_young_limit)
/* Restore all regs used by the code generator */
lmg %r2,%r9, (8*16)(%r15)
lg %r12, (8*16 + 8*8)(%r15)
ld %f0, 0(%r15)
ld %f1, 8(%r15)
ld %f2, 16(%r15)
ld %f3, 24(%r15)
ld %f4, 32(%r15)
ld %f5, 40(%r15)
ld %f6, 48(%r15)
ld %f7, 56(%r15)
ld %f8, 64(%r15)
ld %f9, 72(%r15)
ld %f10, 80(%r15)
ld %f11, 88(%r15)
ld %f12, 96(%r15)
ld %f13, 108(%r15)
ld %f14, 112(%r15)
ld %f15, 120(%r15)
/* Return to caller */
Loadglobal(%r1, caml_last_return_address)
/* Deallocate stack frame */
lay %r15, FRAMESIZE(%r15)
/* Return */
br %r1
/* Call a C function from OCaml */
.globl caml_c_call
.type caml_c_call, @function
caml_c_call:
Storeglobal(%r15, caml_bottom_of_stack)
/* Save return address */
ldgr %f15, %r14
/* Get ready to call C function (address in r7) */
/* Record lowest stack address and return address */
Storeglobal(%r14, caml_last_return_address)
/* Make the exception handler and alloc ptr available to the C code */
Storeglobal(%r11, caml_young_ptr)
Storeglobal(%r13, caml_exception_pointer)
/* Call the function */
basr %r14, %r7
/* restore return address */
lgdr %r14,%f15
/* Reload allocation pointer and allocation limit*/
Loadglobal(%r11, caml_young_ptr)
Loadglobal(%r10, caml_young_limit)
/* Say we are back into OCaml code */
lgfi %r0, 0
Storeglobal(%r0, caml_last_return_address)
/* Return to caller */
br %r14
/* Raise an exception from OCaml */
.globl caml_raise_exn
.type caml_raise_exn, @function
caml_raise_exn:
Loadglobal32(%r0, caml_backtrace_active)
cgfi %r0, 0
jne .L110
.L111:
/* Pop trap frame */
lg %r1, 0(%r13)
lgr %r15, %r13
lg %r13, 8(13)
agfi %r15, 16
/* Branch to handler */
br %r1
.L110:
lgfi %r0, 0
Storeglobal32(%r0, caml_backtrace_pos)
.L114:
ldgr %f15, %r2 /* preserve exn bucket in callee-save reg */
/* arg1: exception bucket, already in r3 */
lgr %r3,%r14 /* arg2: PC of raise */
lgr %r4, %r15 /* arg3: SP of raise */
lgr %r5, %r13 /* arg4: SP of handler */
agfi %r15, -160 /* reserve stack space for C call */
brasl %r14, caml_stash_backtrace@PLT
agfi %r15, 160
lgdr %r2,%f15 /* restore exn bucket */
j .L111 /* raise the exn */
.globl caml_reraise_exn
.type caml_reraise_exn, @function
caml_reraise_exn:
Loadglobal32(%r0, caml_backtrace_active)
cgfi %r0, 0
jne .L114
/* Pop trap frame */
lg %r1, 0(%r13)
lgr %r15, %r13
lg %r13, 8(%r13)
agfi %r15, 16
/* Branch to handler */
br %r1;
/* Raise an exception from C */
.globl caml_raise_exception
.type caml_raise_exception, @function
caml_raise_exception:
Loadglobal32(0, caml_backtrace_active)
cgfi %r0, 0
jne .L112
.L113:
/* Reload OCaml global registers */
Loadglobal(%r15, caml_exception_pointer)
Loadglobal(%r11, caml_young_ptr)
Loadglobal(%r10, caml_young_limit)
/* Say we are back into OCaml code */
lgfi %r0, 0
Storeglobal(%r0, caml_last_return_address)
/* Pop trap frame */
lg %r1, 0(%r15)
lg %r13, 8(%r15)
agfi %r15, 16
/* Branch to handler */
br %r1;
.L112:
ldgr %f15,%r2 /* preserve exn bucket in callee-save reg */
/* arg1: exception bucket, already in r2 */
Loadglobal(%r3, caml_last_return_address) /* arg2: PC of raise */
Loadglobal(%r4, caml_bottom_of_stack) /* arg3: SP of raise */
Loadglobal(%r5, caml_exception_pointer) /* arg4: SP of handler */
/* reserve stack space for C call */
lay %r15, -160(%r15)
brasl %r14, caml_stash_backtrace@PLT
lay %r15, 160(%r15)
lgdr %r2,%f15 /* restore exn bucket */ /* restore exn bucket */
j .L113 /* raise the exn */
/* Start the OCaml program */
.globl caml_start_program
.type caml_start_program, @function
caml_start_program:
Addrglobal(%r0, caml_program)
/* Code shared between caml_start_program and caml_callback */
.L102:
/* Allocate stack frame */
lay %r15, -144(%r15)
/* Save all callee-save registers + return address */
/* GPR 6..14 at sp + 0 ... sp + 64
FPR 10..15 at sp + 72 ... sp + 128 */
stmg %r6,%r14, 0(%r15)
std %f8, 72(%r15)
std %f9, 80(%r15)
std %f10, 88(%r15)
std %f11, 96(%r15)
std %f12, 104(%r15)
std %f13, 112(%r15)
std %f14, 120(%r15)
std %f15, 128(%r15)
/* Set up a callback link */
lay %r15, -32(%r15)
Loadglobal(%r1, caml_bottom_of_stack)
stg %r1, 0(%r15)
Loadglobal(%r1, caml_last_return_address)
stg %r1, 8(%r15)
Loadglobal(%r1, caml_gc_regs)
stg %r1, 16(%r15)
/* Build an exception handler to catch exceptions escaping out of OCaml */
brasl %r14, .L103
j .L104
.L103:
lay %r15, -16(%r15)
stg %r14, 0(%r15)
Loadglobal(%r1, caml_exception_pointer)
stg %r1, 8(%r15)
lgr %r13, %r15
/* Reload allocation pointers */
Loadglobal(%r11, caml_young_ptr)
Loadglobal(%r10, caml_young_limit)
/* Call the OCaml code */
lgr %r1,%r0
basr %r14, %r1
.L105:
/* Pop the trap frame, restoring caml_exception_pointer */
lg %r0, 8(%r15)
Storeglobal(%r0, caml_exception_pointer)
la %r15, 16(%r15)
/* Pop the callback link, restoring the global variables */
.L106:
lg %r5, 0(%r15)
lg %r6, 8(%r15)
lg %r1, 16(%r15)
Storeglobal(%r5, caml_bottom_of_stack)
Storeglobal(%r6, caml_last_return_address)
Storeglobal(%r1, caml_gc_regs)
la %r15, 32(%r15)
/* Update allocation pointer */
Storeglobal(%r11, caml_young_ptr)
/* Restore registers */
lmg %r6,%r14, 0(%r15)
ld %f8, 72(%r15)
ld %f9, 80(%r15)
ld %f10, 88(%r15)
ld %f11, 96(%r15)
ld %f12, 104(%r15)
ld %f13, 112(%r15)
ld %f14, 120(%r15)
ld %f15, 128(%r15)
/* Return */
lay %r15, 144(%r15)
br %r14
/* The trap handler: */
.L104:
/* Update caml_exception_pointer */
Storeglobal(%r13, caml_exception_pointer)
/* Encode exception bucket as an exception result and return it */
oill %r2, 2
j .L106
/* Callback from C to OCaml */
.globl caml_callback_exn
.type caml_callback_exn, @function
caml_callback_exn:
/* Initial shuffling of arguments */
lgr %r0, %r2 /* Closure */
lgr %r2, %r3 /* Argument */
lgr %r3, %r0
lg %r0, 0(%r3) /* Code pointer */
j .L102
.globl caml_callback2_exn
.type caml_callback2_exn, @function
caml_callback2_exn:
lgr %r0, %r2 /* Closure */
lgr %r2, %r3 /* First argument */
lgr %r3, %r4 /* Second argument */
lgr %r4, %r0
Addrglobal(%r0, caml_apply2)
j .L102
.globl caml_callback3_exn
.type caml_callback3_exn, @function
caml_callback3_exn:
lgr %r0, %r2 /* Closure */
lgr %r2, %r3 /* First argument */
lgr %r3, %r4 /* Second argument */
lgr %r4, %r5 /* Third argument */
lgr %r5, %r0
Addrglobal(%r0, caml_apply3)
j .L102
.globl caml_ml_array_bound_error
.type caml_ml_array_bound_error, @function
caml_ml_array_bound_error:
lay %r15, -160(%r15) /* Reserve stack space for C call */
larl %r7, caml_array_bound_error
j caml_c_call
.globl caml_system__code_end
caml_system__code_end:
/* Frame table */
.section ".data"
.align 8
.globl caml_system__frametable
.type caml_system__frametable, @object
caml_system__frametable:
.quad 1 /* one descriptor */
.quad .L105 /* return address into callback */
.short -1 /* negative size count => use callback link */
.short 0 /* no roots here */
.align 8

View File

@ -144,7 +144,7 @@ int caml_set_signal_action(int signo, int action)
/* Machine- and OS-dependent handling of bound check trap */
#if defined(TARGET_power) || (defined(TARGET_sparc) && defined(SYS_solaris))
#if defined(TARGET_power) || defined(TARGET_s390x) || (defined(TARGET_sparc) && defined(SYS_solaris))
DECLARE_SIGNAL_HANDLER(trap_handler)
{
#if defined(SYS_solaris)
@ -267,6 +267,14 @@ void caml_init_signals(void)
}
#endif
#if defined(TARGET_s390x)
{ struct sigaction act;
sigemptyset(&act.sa_mask);
SET_SIGACT(act, trap_handler);
sigaction(SIGFPE, &act, NULL);
}
#endif
/* Stack overflow handling */
#ifdef HAS_STACK_OVERFLOW_DETECTION
{

View File

@ -284,6 +284,23 @@
#define CONTEXT_YOUNG_PTR (context->regs->gpr[31])
#define CONTEXT_SP (context->regs->gpr[1])
/****************** s390x, ELF (Linux) */
#elif defined(TARGET_s390x) && defined(SYS_elf)
#define DECLARE_SIGNAL_HANDLER(name) \
static void name(int sig, struct sigcontext * context)
#define SET_SIGACT(sigact,name) \
sigact.sa_handler = (void (*)(int)) (name); \
sigact.sa_flags = 0
typedef unsigned long context_reg;
#define CONTEXT_PC (context->sregs->regs.psw.addr)
#define CONTEXT_EXCEPTION_POINTER (context->sregs->regs.gprs[13])
#define CONTEXT_YOUNG_LIMIT (context->sregs->regs.gprs[10])
#define CONTEXT_YOUNG_PTR (context->sregs->regs.gprs[11])
#define CONTEXT_SP (context->sregs->regs.gprs[15])
/****************** PowerPC, BSD */
#elif defined(TARGET_power) && (defined(SYS_bsd) || defined(SYS_bsd_elf))

View File

@ -50,6 +50,12 @@
#define Mark_scanned(sp, retaddr) Saved_return_address(sp) = (retaddr) | 1
#endif
#ifdef TARGET_s390x
#define Saved_return_address(sp) *((intnat *)((sp) - SIZEOF_PTR))
#define Trap_frame_size 16
#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
#endif
#ifdef TARGET_arm
#define Saved_return_address(sp) *((intnat *)((sp) - 4))
#define Callback_link(sp) ((struct caml_context *)((sp) + 8))

4
configure vendored
View File

@ -763,6 +763,7 @@ if test $with_sharedlibs = "yes"; then
natdynlink=true
fi;;
x86_64-*-darwin*) natdynlink=true;;
s390x*-*-linux*) natdynlink=true;;
powerpc*-*-linux*) natdynlink=true;;
sparc*-*-linux*) natdynlink=true;;
i686-*-kfreebsd*) natdynlink=true;;
@ -823,6 +824,7 @@ case "$target" in
system=elf;;
powerpc-*-netbsd*) arch=power; model=ppc; system=elf;;
powerpc-*-openbsd*) arch=power; model=ppc; system=bsd_elf;;
s390x*-*-linux*) arch=s390x; model=z10; system=elf;;
armv6*-*-linux-gnueabihf) arch=arm; model=armv6; system=linux_eabihf;;
arm*-*-linux-gnueabihf) arch=arm; system=linux_eabihf;;
armv7*-*-linux-gnueabi) arch=arm; model=armv7; system=linux_eabi;;
@ -899,6 +901,8 @@ case "$arch,$system" in
as="${TOOLPREF}as -mppc"
aspp="${TOOLPREF}gcc -m32 -c"
fi;;
s390x,elf) as="${TOOLPREF}as -m 64 -march=$model"
aspp="${TOOLPREF}gcc -c -Wa,-march=$model";;
sparc,solaris) as="${TOOLPREF}as"
case "$cc" in
*gcc*) aspp="${TOOLPREF}gcc -c";;

View File

@ -214,4 +214,5 @@
(floataset d 37 (-f f))
(floataset d 38 (absf f))
(addraset r 116 (mulh x y))
)))))))

View File

@ -55,6 +55,7 @@ let keyword_table =
"let", LET;
"load", LOAD;
"mod", MODI;
"mulh", MULH;
"or", OR;
"proj", PROJ;
"raise", RAISE Lambda.Raise_regular;

View File

@ -61,6 +61,7 @@ double F, G;
extern void call_gen_code(void (*)(void));
extern void testarith(void);
static intnat mulhs(intnat x, intnat y);
void do_test(void)
{
@ -252,29 +253,55 @@ void do_test(void)
FLOATTEST(D[37], - F);
FLOATTEST(D[38], fabs(F));
INTTEST(R[116], mulhs(X, Y));
}
#ifdef __i386__
#ifdef __FreeBSD__
#include <floatingpoint.h>
#endif
#endif
/* Multiply-high signed. Hacker's Delight section 8.2 */
void init_ieee_floats(void)
#define HALFSIZE (4 * sizeof(intnat))
#define HALFMASK (((intnat)1 << HALFSIZE) - 1)
static intnat mulhs(intnat u, intnat v)
{
#ifdef __i386__
#ifdef __FreeBSD__
fpsetmask(0);
#endif
#endif
uintnat u0, v0, w0;
intnat u1, v1, w1, w2, t;
u0 = u & HALFMASK; u1 = u >> HALFSIZE;
v0 = v & HALFMASK; v1 = v >> HALFSIZE;
w0 = u0*v0;
t = u1*v0 + (w0 >> HALFSIZE);
w1 = t & HALFMASK;
w2 = t >> HALFSIZE;
w1 = u0*v1 + w1;
return u1*v1 + w2 + (w1 >> HALFSIZE);
}
/* A simple linear congruential PRNG */
#ifdef ARCH_SIXTYFOUR
#define RAND_A 6364136223846793005ULL
#define RAND_C 1442695040888963407ULL
#else
#define RAND_A 214013U
#define RAND_C 2531011U
#endif
static intnat rnd(void)
{
static uintnat seed = 0;
seed = seed * RAND_A + RAND_C;
return (intnat) seed;
}
/* Test harness */
#define NUM_RANDOM_ITERATIONS 1000000
int main(int argc, char **argv)
{
int i;
double weird[4];
init_ieee_floats();
if (argc >= 5) {
X = atoi(argv[1]);
Y = atoi(argv[2]);
@ -283,12 +310,14 @@ int main(int argc, char **argv)
do_test();
return 0;
}
printf("Testing -2...2\n");
for(Y = -2; Y <= 2; Y++) {
for (X = -2; X <= 2; X++) {
F = X; G = Y; do_test();
}
}
if (!(argc >= 2 && strcmp(argv[1], "noinf"))) {
printf("Testing special FP values\n");
weird[0] = 0.0;
weird[1] = 1.0 / weird[0]; /* +infty */
weird[2] = -1.0 / weird[0]; /* -infty */
@ -299,13 +328,13 @@ int main(int argc, char **argv)
}
}
}
while(1) {
X = (rand() & 0x1FFFFFFF) - 0x10000000;
Y = (rand() & 0x1FFFFFFF) - 0x10000000;
printf("Testing %d random values\n", NUM_RANDOM_ITERATIONS);
for (i = 0; i < NUM_RANDOM_ITERATIONS; i++) {
X = rnd();
Y = rnd();
F = X / 1e3;
G = Y / 1e3;
do_test();
printf("."); fflush(stdout);
}
return 0;
}

View File

@ -102,6 +102,7 @@ let access_array base numelt size =
%token LTI
%token MODI
%token MULF
%token MULH
%token MULI
%token NEA
%token NEF
@ -289,6 +290,7 @@ binaryop:
| GTF { Ccmpf Cgt }
| GEF { Ccmpf Cge }
| CHECKBOUND { Ccheckbound Debuginfo.none }
| MULH { Cmulhi }
;
sequence:
expr sequence { Csequence($1, $2) }

View File

@ -0,0 +1,64 @@
#define ALIGN 8
#define CALL_GEN_CODE call_gen_code
#define CAML_C_CALL caml_c_call
#define CAML_NEGF_MASK caml_negf_mask
#define CAML_ABSF_MASK caml_absf_mask
.section ".text"
.globl CALL_GEN_CODE
.type CALL_GEN_CODE, @function
.align ALIGN
CALL_GEN_CODE:
/* Stack space */
lay %r15, -144(%r15)
/* Save registers */
stmg %r6,%r14, 0(%r15)
std %f8, 72(%r15)
std %f9, 80(%r15)
std %f10, 88(%r15)
std %f11, 96(%r15)
std %f12, 104(%r15)
std %f13, 112(%r15)
std %f14, 120(%r15)
std %f15, 128(%r15)
/* Shuffle args */
lgr %r1, %r2
lgr %r2, %r3
lgr %r3, %r4
lgr %r4, %r5
/* Function call */
basr %r14, %r1
/* Restore registers */
lmg %r6,%r14, 0(%r15)
ld %f8, 72(%r15)
ld %f9, 80(%r15)
ld %f10, 88(%r15)
ld %f11, 96(%r15)
ld %f12, 104(%r15)
ld %f13, 112(%r15)
ld %f14, 120(%r15)
ld %f15, 128(%r15)
/* Return */
lay %r15, 144(%r15)
br %r14
.globl CAML_C_CALL
.type CAML_C_CALL, @function
.align ALIGN
CAML_C_CALL:
br %r7
.section ".rodata"
.global CAML_NEGF_MASK
.align ALIGN
CAML_NEGF_MASK:
.quad 0x8000000000000000, 0
.global CAML_ABSF_MASK
.align ALIGN
CAML_ABSF_MASK:
.quad 0x7FFFFFFFFFFFFFFF, 0
.comm young_limit, 8