Merge pull request #275 from xavierleroy/zsystem

IBM z Systems port of ocamlopt This is a port of the OCaml native-code compiler to IBM's z Systems architecture under Linux. z Systems (https://en.wikipedia.org/wiki/IBM_System_z), also known as "s390x" in the GNU/Linux world, is IBM's line of mainframe computers. They are supported by several Linux distributions: RHEL, Suse, Debian. The OCaml port was developed by Bill O'Farrell at IBM Toronto, with help from Tristan Amini, based on OCaml 4.02.1. I upgraded the port to the current OCaml trunk and performed some simplifiications and fixes. A CLA was signed to cover the reuse of Bill O'Farrell's code.
2015-11-19 14:28:37 +01:00 · 2015-11-19 14:28:37 +01:00 · 4640bd0c10
parent e896e9071a 32def141e5
commit 4640bd0c10
18 changed files with 1826 additions and 18 deletions
--- a/2
+++ b/2
@ -101,6 +101,8 @@ Compilers:
  (Mark Shinwell)
 - GPR#270: Make [transl_exception_constructor] generate [Immutable] blocks
  (Mark Shinwell)
+- GPR#275: native-code generator for IBM z System running Linux
+  (Bill O'Farrell, Tristan Amini, Xavier Leroy)
 - GPR#282: relax short-paths safety check in presence of module aliases, take
  penalty into account while building the printing map.
  (Thomas Refis, Leo White)
--- a/asmcomp/s390x/CSE.ml
+++ b/asmcomp/s390x/CSE.ml
@ -0,0 +1,39 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the Z Processor *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Imultaddf | Imultsubf) -> Op_pure
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n ->
+      n >= -0x8000_0000n && n <= 0x7FFF_FFFFn
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
--- a/asmcomp/s390x/arch.ml
+++ b/asmcomp/s390x/arch.ml
@ -0,0 +1,86 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Specific operations for the Z processor *)
+
+open Format
+
+(* Machine-specific command-line options *)
+
+let pic_code = ref true
+
+let command_line_options =
+  [ "-fPIC", Arg.Set pic_code,
+      " Generate position-independent machine code (default)";
+    "-fno-PIC", Arg.Clear pic_code,
+      " Generate position-dependent machine code" ]
+
+(* Specific operations *)
+
+type specific_operation =
+    Imultaddf                           (* multiply and add *)
+  | Imultsubf                           (* multiply and subtract *)
+
+(* Addressing modes *)
+
+type addressing_mode =
+  | Iindexed of int                     (* reg + displ *)
+  | Iindexed2 of int                    (* reg + reg + displ *)
+
+(* Sizes, endianness *)
+
+let big_endian = true
+
+let size_addr = 8
+let size_int = size_addr
+let size_float = 8
+
+let allow_unaligned_access = false
+
+(* Behavior of division *)
+
+let division_crashes_on_overflow = true
+
+(* Operations on addressing modes *)
+
+let identity_addressing = Iindexed 0
+
+let offset_addressing addr delta =
+  match addr with
+  | Iindexed n -> Iindexed(n + delta)
+  | Iindexed2 n -> Iindexed2(n + delta)
+
+let num_args_addressing = function
+  | Iindexed n -> 1
+  | Iindexed2 n -> 2
+
+(* Printing operations and addressing modes *)
+
+let print_addressing printreg addr ppf arg =
+  match addr with
+  | Iindexed n ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "%a%s" printreg arg.(0) idx
+  | Iindexed2 n ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "%a + %a%s" printreg arg.(0) printreg arg.(1) idx
+
+let print_specific_operation printreg op ppf arg =
+  match op with
+  | Imultaddf ->
+      fprintf ppf "%a *f %a +f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
+  | Imultsubf ->
+      fprintf ppf "%a *f %a -f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@ -0,0 +1,767 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Emission of Linux on Z 64-bit assembly code *)
+
+module StringSet =
+  Set.Make(struct type t = string let compare (x:t) y = compare x y end)
+
+open Misc
+open Cmm
+open Arch
+open Proc
+open Reg
+open Mach
+open Linearize
+open Emitaux
+
+(* Layout of the stack.  The stack is kept 8-aligned. *)
+
+let stack_offset = ref 0
+
+let frame_size () =
+  let size =
+    !stack_offset +                     (* Trap frame, outgoing parameters *)
+    size_int * num_stack_slots.(0) +    (* Local int variables *)
+    size_float * num_stack_slots.(1) +  (* Local float variables *)
+    (if !contains_calls then size_addr else 0) in (* The return address *)
+  Misc.align size 8
+
+let slot_offset loc cls =
+  match loc with
+    Local n ->
+      if cls = 0
+      then !stack_offset + num_stack_slots.(1) * size_float + n * size_int
+      else !stack_offset + n * size_float
+  | Incoming n -> frame_size() + n
+  | Outgoing n -> n
+
+(* Output a symbol *)
+
+let emit_symbol s = Emitaux.emit_symbol '.' s
+
+(* Output function call *)
+
+let emit_call s =
+   if !pic_code then
+    `brasl	%r14, {emit_symbol s}@PLT`
+   else
+    `brasl	%r14, {emit_symbol s}`
+
+(* Output a label *)
+
+let label_prefix = ".L"
+
+let emit_label lbl =
+  emit_string label_prefix; emit_int lbl
+
+let emit_data_label lbl =
+  emit_string label_prefix; emit_string "d"; emit_int lbl
+
+(* Section switching *)
+
+let data_space = "	.section \".data\"\n"
+
+let code_space = "	.section \".text\"\n"
+
+let rodata_space = "	.section \".rodata\"\n"
+
+(* Output a pseudo-register *)
+
+let emit_reg r =
+  match r.loc with
+  | Reg r -> emit_string (register_name r)
+  | _ -> fatal_error "Emit.emit_reg"
+
+
+let emit_gpr r = emit_string "%r"; emit_int r
+
+let emit_fpr r = emit_string "%f"; emit_int r
+
+(* Special registers *)
+
+let reg_f15 = phys_reg 115  
+
+(* Output a stack reference *)
+
+let emit_stack r =
+  match r.loc with
+    Stack s ->
+      let ofs = slot_offset s (register_class r) in `{emit_int ofs}(%r15)`
+  | _ -> fatal_error "Emit.emit_stack"
+
+
+(* Output a load or store operation *)
+
+let emit_load_store instr addressing_mode addr n arg =
+  match addressing_mode with
+  | Iindexed ofs ->
+      `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
+  | Iindexed2 ofs ->
+      `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
+
+(* Adjust the stack pointer down by N.  
+   Choose the shortest instruction possible for the value of N. *)
+
+let emit_stack_adjust n =
+  let n = -n in
+  if n = 0 then ()
+  else if n >= 0 && n < 4096 then
+    `	la	%r15, {emit_int n}(%r15)\n`
+  else if n >= -0x80000 && n < 0x80000 then
+    `	lay	%r15, {emit_int n}(%r15)\n`
+  else
+    `	agfi	%r15, {emit_int n}\n`
+
+(* Emit a 'add immediate' *)
+
+let emit_addimm res arg n =
+  if n >= 0 && n < 4096 then
+    `	la	{emit_reg res}, {emit_int n}({emit_reg arg})\n`
+  else if n >= -0x80000 && n < 0x80000 then
+    `	lay	{emit_reg res}, {emit_int n}({emit_reg arg})\n`
+  else begin
+    if arg.loc <> res.loc then
+      `	lgr	{emit_reg res}, {emit_reg arg}\n`;
+    `	agfi	{emit_reg res}, {emit_int n}\n`
+  end
+
+(* After a comparison, extract the result as 0 or 1 *)
+(* The locgr instruction is not available in the z10 architecture,
+   so this code is currently unused. *)
+(*
+let emit_set_comp cmp res =
+    `	lghi	%r1, 1\n`;
+    `	lghi	{emit_reg res}, 0\n`;
+  begin match cmp with
+      Ceq -> `	locgre	{emit_reg res}, %r1\n`
+    | Cne -> `	locgrne	{emit_reg res}, %r1\n`
+    | Cgt -> `	locgrh	{emit_reg res}, %r1\n`
+    | Cle -> `	locgrnh	{emit_reg res}, %r1\n`
+    | Clt -> `	locgrl	{emit_reg res}, %r1\n`
+    | Cge -> `	locgrnl	{emit_reg res}, %r1\n`
+  end
+*)
+
+(* Record live pointers at call points *)
+
+let record_frame live dbg =
+  let lbl = new_label() in
+  let live_offset = ref [] in
+  Reg.Set.iter
+    (function
+      | {typ = Val; loc = Reg r} ->
+          live_offset := (r lsl 1) + 1 :: !live_offset
+      | {typ = Val; loc = Stack s} as reg ->
+          live_offset := slot_offset s (register_class reg) :: !live_offset
+      | {typ = Addr} as r ->
+          Misc.fatal_error ("bad GC root " ^ Reg.name r)
+      | _ -> ())
+    live;
+  frame_descriptors :=
+    { fd_lbl = lbl;
+      fd_frame_size = frame_size();
+      fd_live_offset = !live_offset;
+      fd_debuginfo = dbg } :: !frame_descriptors;
+  lbl
+
+(* Record calls to caml_call_gc, emitted out of line. *)
+
+type gc_call =
+  { gc_lbl: label;                      (* Entry label *)
+    gc_return_lbl: label;               (* Where to branch after GC *)
+    gc_frame_lbl: label }               (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+  `{emit_label gc.gc_lbl}:	{emit_call "caml_call_gc"}\n`;
+  `{emit_label gc.gc_frame_lbl}:	brcl	15, {emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error, emitted out of line. *)
+
+type bound_error_call =
+  { bd_lbl: label;                      (* Entry label *)
+    bd_frame: label }                   (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+let bound_error_call = ref 0
+
+let bound_error_label dbg =
+  if !Clflags.debug then begin
+    let lbl_bound_error = new_label() in
+    let lbl_frame = record_frame Reg.Set.empty dbg in
+    bound_error_sites :=
+     { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
+   lbl_bound_error
+ end else begin
+   if !bound_error_call = 0 then bound_error_call := new_label();
+   !bound_error_call
+ end
+
+let emit_call_bound_error bd =
+  `{emit_label bd.bd_lbl}:	{emit_call "caml_ml_array_bound_error"}\n`;
+  `{emit_label bd.bd_frame}:\n`
+
+let emit_call_bound_errors () =
+  List.iter emit_call_bound_error !bound_error_sites;
+  if !bound_error_call > 0 then
+    `{emit_label !bound_error_call}:	{emit_call "caml_ml_array_bound_error"}\n`
+
+(* Record floating-point and large integer literals *)
+
+let float_literals = ref ([] : (int64 * int) list)
+let int_literals = ref ([] : (nativeint * int) list)
+
+(* Masks for conditional branches after comparisons *)
+
+let branch_for_comparison = function
+    Ceq -> 8  | Cne -> 7
+  | Cle -> 12 | Cgt -> 2
+  | Cge -> 10 | Clt -> 4
+
+let name_for_int_comparison = function
+    Isigned cmp -> ("cgr", branch_for_comparison cmp)
+  | Iunsigned cmp -> ("clgr", branch_for_comparison cmp)
+  
+let name_for_int_comparison_imm = function
+    Isigned cmp -> ("cgfi", branch_for_comparison cmp)
+  | Iunsigned cmp -> ("clgfi", branch_for_comparison cmp)
+
+(* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = unordered*)
+let branch_for_float_comparison cmp neg =
+   match cmp with
+     Ceq -> if neg then 7  else 8
+   | Cne -> if neg then 8  else 7
+   | Cle -> if neg then 3  else 12
+   | Cgt -> if neg then 13 else 2
+   | Cge -> if neg then 5  else 10
+   | Clt -> if neg then 11 else 4
+
+(* Names for various instructions *)
+
+let name_for_intop = function
+    Iadd  -> "agr"
+  | Isub  -> "sgr"
+  | Imul  -> "msgr"
+  | Iand  -> "ngr"
+  | Ior   -> "ogr"
+  | Ixor  -> "xgr"
+  | _ -> Misc.fatal_error "Emit.Intop"
+
+let name_for_floatop1 = function
+    Inegf -> "lcdbr"
+  | Iabsf -> "lpdbr"
+  | _ -> Misc.fatal_error "Emit.Iopf1"
+
+let name_for_floatop2 = function
+    Iaddf -> "adbr"
+  | Isubf -> "sdbr"
+  | Imulf -> "mdbr"
+  | Idivf -> "ddbr"
+  | _ -> Misc.fatal_error "Emit.Iopf2"
+
+let name_for_specific = function
+    Imultaddf -> "madbr"
+  | Imultsubf -> "msdbr"
+
+(* Name of current function *)
+let function_name = ref ""
+(* Entry point for tail recursive calls *)
+let tailrec_entry_point = ref 0
+
+(* Output the assembly code for an instruction *)
+
+let emit_instr i =
+    emit_debug_info i.dbg;
+    match i.desc with
+      Lend -> ()
+    | Lop(Imove | Ispill | Ireload) ->
+        let src = i.arg.(0) and dst = i.res.(0) in
+        if src.loc <> dst.loc then begin
+           match (src, dst) with
+              {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
+                `	lgr	{emit_reg dst}, {emit_reg src}\n`
+            | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
+                `	ldr	{emit_reg dst}, {emit_reg src}\n`
+            | {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Stack sd} ->
+                `	stg	{emit_reg src}, {emit_stack dst}\n`
+            | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
+                `	std	{emit_reg src}, {emit_stack dst}\n`
+            | {loc = Stack ss; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
+                `	lg	{emit_reg dst}, {emit_stack src}\n`
+            | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
+                `	ldy	{emit_reg dst}, {emit_stack src}\n`
+            | (_, _) ->
+                fatal_error "Emit: Imove"
+        end
+    | Lop(Iconst_int n | Iconst_blockheader n) ->
+        if n >= -0x8000n && n <= 0x7FFFn then begin
+          `	lghi	{emit_reg i.res.(0)}, {emit_nativeint n}\n`;
+        end else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
+          `	lgfi	{emit_reg i.res.(0)}, {emit_nativeint n}\n`;
+        end else begin
+          let lbl = new_label() in
+          int_literals := (n, lbl) :: !int_literals;
+          `	lgrl	{emit_reg i.res.(0)}, {emit_label lbl}\n`;
+          end
+    | Lop(Iconst_float f) ->
+        let lbl = new_label() in
+        float_literals := (f, lbl) :: !float_literals;
+        `	larl	%r1, {emit_label lbl}\n`;
+        `	ld	{emit_reg i.res.(0)}, 0(%r1)\n`
+     | Lop(Iconst_symbol s) ->
+        if !pic_code then
+        `	lgrl	{emit_reg i.res.(0)}, {emit_symbol s}@GOTENT\n`
+        else
+        `	larl	{emit_reg i.res.(0)}, {emit_symbol s}\n`;
+    | Lop(Icall_ind) ->
+        `	basr	%r14, {emit_reg i.arg.(0)}\n`;
+        let lbl = record_frame i.live i.dbg in
+         `{emit_label lbl}:\n`
+
+    | Lop(Icall_imm s) ->
+        if !pic_code then
+        `	brasl	%r14, {emit_symbol s}@PLT\n`
+        else
+        `	brasl	%r14, {emit_symbol s}\n`;
+        let lbl = record_frame i.live i.dbg in
+         `{emit_label lbl}:\n`;
+    | Lop(Itailcall_ind) ->
+        let n = frame_size() in
+        if !contains_calls then
+          `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+        emit_stack_adjust (-n);
+        `	br	{emit_reg i.arg.(0)}\n`
+    | Lop(Itailcall_imm s) ->
+        if s = !function_name then
+          `	brcl	15, {emit_label !tailrec_entry_point}\n`
+        else begin
+          let n = frame_size() in
+          if !contains_calls then
+            `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+          emit_stack_adjust (-n);
+          if !pic_code then
+            `	brcl	15, {emit_symbol s}@PLT\n`
+          else
+            `	brcl	15, {emit_symbol s}\n`
+        end
+              
+     | Lop(Iextcall(s, alloc)) ->
+        if alloc then begin
+          if !pic_code then begin
+          `	lgrl	%r7, {emit_symbol s}@GOTENT\n`;
+          `	brasl	%r14, {emit_symbol "caml_c_call"}@PLT\n`
+          end else begin
+          `	larl	%r7, {emit_symbol s}\n`;
+          `	brasl	%r14, {emit_symbol "caml_c_call"}\n`
+          end;
+          let lbl = record_frame i.live i.dbg in
+           `{emit_label lbl}:\n`;
+        end else begin
+          if !pic_code then
+          `	brasl	%r14, {emit_symbol s}@PLT\n`
+          else
+          `	brasl	%r14, {emit_symbol s}\n`
+       end
+
+     | Lop(Istackoffset n) ->
+        emit_stack_adjust n;
+        stack_offset := !stack_offset + n
+
+     | Lop(Iload(chunk, addr)) ->
+        let loadinstr =
+          match chunk with
+            Byte_unsigned -> "llgc"
+          | Byte_signed -> "lgb"
+          | Sixteen_unsigned -> "llgh"
+          | Sixteen_signed -> "lgh"
+          | Thirtytwo_unsigned -> "llgf"
+          | Thirtytwo_signed -> "lgf"
+          | Word_int | Word_val -> "lg"
+          | Single -> "ley"
+          | Double | Double_u -> "ldy" in
+        emit_load_store loadinstr addr i.arg 0 i.res.(0);
+        if chunk = Single then
+          `	ldebr	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+
+    | Lop(Istore(Single, addr, _)) ->
+        `	ledbr	%f15, {emit_reg i.arg.(0)}\n`;
+        emit_load_store "stey" addr i.arg 1 reg_f15
+    | Lop(Istore(chunk, addr, _)) ->
+        let storeinstr =
+          match chunk with
+            Byte_unsigned | Byte_signed -> "stcy"
+          | Sixteen_unsigned | Sixteen_signed -> "sthy"
+          | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
+          | Word_int | Word_val -> "stg"
+          | Single -> assert false
+          | Double | Double_u -> "stdy" in
+        emit_load_store storeinstr addr i.arg 1 i.arg.(0)
+
+    | Lop(Ialloc n) ->
+        let lbl_redo = new_label() in
+        let lbl_call_gc = new_label() in
+        let lbl_frame = record_frame i.live i.dbg in
+        call_gc_sites :=
+          { gc_lbl = lbl_call_gc;
+            gc_return_lbl = lbl_redo;
+            gc_frame_lbl = lbl_frame } :: !call_gc_sites;
+        `{emit_label lbl_redo}:`;
+        `	lay	%r11, {emit_int(-n)}(%r11)\n`;
+        `	clgr	%r11, %r10\n`;
+        `	brcl	4, {emit_label lbl_call_gc}\n`;  (* less than *)
+        `	la	{emit_reg i.res.(0)}, 8(%r11)\n`
+
+    | Lop(Iintop Imulh) ->
+       (* Hacker's Delight section 8.3:
+            mul-high-signed(a, b) = mul-high-unsigned(a, b)
+                                    - a  if b < 0
+                                    - b  if a < 0
+          or, without branches,
+            mul-high-signed(a, b) = mul-high-unsigned(a, b)
+                                    - (a & (b >>s 63))
+                                    - (b & (a >>s 63))
+       *)
+       `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+       `	mlgr	%r0, {emit_reg i.arg.(1)}\n`;
+         (* r0:r1 is 128-bit unsigned product; r0 is the high bits *)
+       `	srag	%r1, {emit_reg i.arg.(0)}, 63\n`;
+       `	ngr	%r1, {emit_reg i.arg.(1)}\n`;
+       `	sgr	%r0, %r1\n`;
+       `	srag	%r1, {emit_reg i.arg.(1)}, 63\n`;
+       `	ngr	%r1, {emit_reg i.arg.(0)}\n`;
+       `	sgr	%r0, %r1\n`;
+       `	lgr	{emit_reg i.res.(0)}, %r0\n`
+    | Lop(Iintop Imod) ->
+        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr	%r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr	{emit_reg i.res.(0)}, %r0\n`
+    | Lop(Iintop Idiv) ->
+        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr	%r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr	{emit_reg i.res.(0)}, %r1\n`
+    | Lop(Iintop Ilsl) ->
+        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
+    | Lop(Iintop Ilsr) ->
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
+    | Lop(Iintop Iasr) ->
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
+    | Lop(Iintop(Icomp cmp)) ->
+        let lbl = new_label() in
+        let (comp, mask) = name_for_int_comparison cmp in
+        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 1\n`;
+        `	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 0\n`;
+        `{emit_label lbl}:\n`
+    | Lop(Iintop Icheckbound) ->
+        let lbl = bound_error_label i.dbg in
+        `	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+        `	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
+    | Lop(Iintop op) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        let instr = name_for_intop op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lop(Iintop_imm(Iadd, n)) ->
+        emit_addimm i.res.(0) i.arg.(0) n
+    | Lop(Iintop_imm(Isub, n)) ->
+        emit_addimm i.res.(0) i.arg.(0) (-n)
+    | Lop(Iintop_imm(Icomp cmp, n)) ->
+        let lbl = new_label() in
+        let (comp, mask) = name_for_int_comparison_imm cmp in
+        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 1\n`;
+        `	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 0\n`;
+        `{emit_label lbl}:\n`
+    | Lop(Iintop_imm(Icheckbound, n)) ->
+       let lbl = bound_error_label i.dbg in 
+       if n >= 0 then begin
+        `	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
+        `	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
+       end else begin
+        `	brcl	15, {emit_label lbl}\n`  (* branch always *)
+       end
+    | Lop(Iintop_imm(Ilsl, n)) ->
+        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
+    | Lop(Iintop_imm(Ilsr, n)) ->
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
+    | Lop(Iintop_imm(Iasr, n)) ->
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
+    | Lop(Iintop_imm(Iand, n)) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	nilf	{emit_reg i.res.(0)}, {emit_int (n land 0xFFFF_FFFF)}\n`
+    | Lop(Iintop_imm(Ior, n)) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	oilf	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm(Ixor, n)) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	xilf	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm(Imul, n)) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+          `	msgfi	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm((Imulh | Idiv | Imod), _)) ->
+        assert false
+    | Lop(Inegf | Iabsf as op) ->
+        let instr = name_for_floatop1 op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+    | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        let instr = name_for_floatop2 op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
+    | Lop(Ifloatofint) ->
+          `	cdgbr	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+    | Lop(Iintoffloat) ->
+        (* rounding method #5 = round toward 0 *)
+        `	cgdbr	{emit_reg i.res.(0)}, 5, {emit_reg i.arg.(0)}\n`
+    | Lop(Ispecific sop) ->
+        assert (i.arg.(2).loc = i.res.(0).loc);
+        let instr = name_for_specific sop in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lreloadretaddr ->
+        let n = frame_size() in
+        `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`
+    | Lreturn ->
+        let n = frame_size() in
+        emit_stack_adjust (-n);
+        `	br	%r14\n`
+    | Llabel lbl ->
+        `{emit_label lbl}:\n`
+    | Lbranch lbl ->
+        `	brcl	15,{emit_label lbl}\n`
+    | Lcondbranch(tst, lbl) ->
+        begin match tst with
+          Itruetest ->
+            `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
+            `	brcl	7, {emit_label lbl}\n`
+        | Ifalsetest ->
+            `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
+            `	brcl	8, {emit_label lbl}\n`
+        | Iinttest cmp ->
+            let (comp, mask) = name_for_int_comparison cmp in
+            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
+        | Iinttest_imm(cmp, n) ->
+            let (comp, mask) = name_for_int_comparison_imm cmp in
+            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
+        | Ifloattest(cmp, neg) ->
+            `	cdbr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            let mask = branch_for_float_comparison cmp neg in
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
+        | Ioddtest ->
+            `	tmll	{emit_reg i.arg.(0)}, 1\n`;
+            `	brcl	1, {emit_label lbl}\n`
+        | Ieventest ->
+            `	tmll	{emit_reg i.arg.(0)}, 1\n`;
+            `	brcl	8, {emit_label lbl}\n`
+        end
+    | Lcondbranch3(lbl0, lbl1, lbl2) ->
+        `	cgfi	{emit_reg i.arg.(0)}, 1\n`;
+        begin match lbl0 with
+          None -> ()
+        | Some lbl -> `	brcl	4, {emit_label lbl}\n`
+        end;
+        begin match lbl1 with
+          None -> ()
+        | Some lbl -> `	brcl	8, {emit_label lbl}\n`
+        end;
+        begin match lbl2 with
+          None -> ()
+        | Some lbl -> `	brcl	2, {emit_label lbl}\n`
+        end
+    | Lswitch jumptbl ->
+        let lbl = new_label() in
+        `	larl	%r0, {emit_label lbl}\n`;
+        `	sllg	%r1, {emit_reg i.arg.(0)}, 2(%r0)\n`;
+        `	agr	%r1, %r0\n`;
+        `	lgf	%r1, 0(%r1)\n`;
+        `	agr	%r1, %r0\n`;
+        `	br	%r1\n`;
+        emit_string rodata_space;
+        `	.align	8\n`;
+        `{emit_label lbl}:`;
+        for i = 0 to Array.length jumptbl - 1 do
+          `	.long	{emit_label jumptbl.(i)} - {emit_label lbl}\n`
+        done;
+        emit_string code_space
+    | Lsetuptrap lbl ->
+        `	brasl	%r14, {emit_label lbl}\n`;
+    | Lpushtrap ->
+        stack_offset := !stack_offset + 16;
+        emit_stack_adjust 16;
+        `	stg	%r14, 0(%r15)\n`;
+        `	stg	%r13, {emit_int size_addr}(%r15)\n`;
+        `	lgr	%r13, %r15\n`
+    | Lpoptrap ->
+        `	lg	%r13, {emit_int size_addr}(%r15)\n`;
+        emit_stack_adjust (-16);
+        stack_offset := !stack_offset - 16
+    | Lraise k ->
+        begin match !Clflags.debug, k with
+        | true, Lambda.Raise_regular ->
+          `	brasl	%r14, {emit_symbol "caml_raise_exn"}\n`;
+          let lbl = record_frame Reg.Set.empty i.dbg in
+          `{emit_label lbl}:\n`
+        | true, Lambda.Raise_reraise ->
+          `	brasl	%r14, {emit_symbol "caml_reraise_exn"}\n`;
+          let lbl = record_frame Reg.Set.empty i.dbg in
+          `{emit_label lbl}:\n`
+        | false, _
+        | true, Lambda.Raise_notrace ->
+          `	lg	%r1, 0(%r13)\n`;
+          `	lgr	%r15, %r13\n`;
+          `	lg	%r13, {emit_int size_addr}(%r15)\n`;
+          emit_stack_adjust (-16);
+          `	br	%r1\n`
+        end
+
+
+(* Emit a sequence of instructions *)
+
+let rec emit_all i =
+  match i with
+    {desc = Lend} -> ()
+  | _ ->
+      emit_instr i;
+      emit_all i.next
+
+(* Emission of a function declaration *)
+
+let fundecl fundecl =
+  function_name := fundecl.fun_name;
+  tailrec_entry_point := new_label();
+  stack_offset := 0;
+  call_gc_sites := [];
+  bound_error_sites := [];
+  bound_error_call := 0;
+  float_literals := [];
+  int_literals := [];
+  `	.globl	{emit_symbol fundecl.fun_name}\n`;
+  emit_debug_info fundecl.fun_dbg;
+  `	.type	{emit_symbol fundecl.fun_name}, @function\n`;
+  emit_string code_space;
+  `	.align	8\n`;
+  `{emit_symbol fundecl.fun_name}:\n`;
+  let n = frame_size() in
+  emit_stack_adjust n;
+  if !contains_calls then
+    `	stg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+  `{emit_label !tailrec_entry_point}:\n`;
+  emit_all fundecl.fun_body;
+  (* Emit the glue code to call the GC *)
+  List.iter emit_call_gc !call_gc_sites;
+  (* Emit the glue code to handle bound errors *)
+  emit_call_bound_errors();
+  (* Emit the numeric literals *)
+  if !float_literals <> [] || !int_literals <> [] then begin
+    emit_string rodata_space;
+    `	.align	8\n`;
+    List.iter
+      (fun (f, lbl) ->
+        `{emit_label lbl}:`;
+        emit_float64_directive ".quad" f)
+      !float_literals;
+    List.iter
+      (fun (n, lbl) ->
+        `{emit_label lbl}:	.quad	{emit_nativeint n}\n`)
+      !int_literals
+  end
+
+(* Emission of data *)
+
+let declare_global_data s =
+  `	.globl	{emit_symbol s}\n`;
+  `	.type	{emit_symbol s}, @object\n`
+
+let emit_item = function
+    Cglobal_symbol s ->
+      declare_global_data s
+  | Cdefine_symbol s ->
+      `{emit_symbol s}:\n`;
+  | Cdefine_label lbl ->
+      `{emit_data_label lbl}:\n`
+  | Cint8 n ->
+      `	.byte	{emit_int n}\n`
+  | Cint16 n ->
+      `	.short	{emit_int n}\n`
+  | Cint32 n ->
+      `	.long	{emit_nativeint n}\n`
+  | Cint n ->
+      `	.quad	{emit_nativeint n}\n`
+  | Csingle f ->
+      emit_float32_directive ".long" (Int32.bits_of_float f)
+  | Cdouble f ->
+      emit_float64_directive ".quad" (Int64.bits_of_float f)
+  | Csymbol_address s ->
+      `	.quad	{emit_symbol s}\n`
+  | Clabel_address lbl ->
+      `	.quad	{emit_data_label lbl}\n`
+  | Cstring s ->
+      emit_bytes_directive "	.byte	" s
+  | Cskip n ->
+      if n > 0 then `	.space	{emit_int n}\n`
+  | Calign n ->
+      if n < 8 then `	.align	8\n`
+               else `	.align	{emit_int n}\n`
+
+let data l =
+  emit_string data_space;
+  `	.align	8\n`;
+  List.iter emit_item l
+
+(* Beginning / end of an assembly file *)
+
+let begin_assembly() =
+  reset_debug_info();
+  (* Emit the beginning of the segments *)
+  let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
+  emit_string data_space;
+  `	.align	8\n`;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`;
+  let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
+  emit_string code_space;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`
+
+let end_assembly() =
+  (* Emit the end of the segments *)
+  emit_string code_space;
+  let lbl_end = Compilenv.make_symbol (Some "code_end") in
+  declare_global_data lbl_end;
+  `{emit_symbol lbl_end}:\n`;
+  `	.long	0\n`;
+  emit_string data_space;
+  `	.align	8\n`;
+  let lbl_end = Compilenv.make_symbol (Some "data_end") in
+  declare_global_data lbl_end;
+  `{emit_symbol lbl_end}:\n`;
+  `	.quad	0\n`;
+  (* Emit the frame descriptors *)
+  emit_string rodata_space;
+  `	.align	8\n`;
+  let lbl = Compilenv.make_symbol (Some "frametable") in
+  declare_global_data lbl;
+  `{emit_symbol lbl}:\n`;
+  emit_frames
+    { efa_label = (fun l -> `	.quad	{emit_label l}\n`);
+      efa_16 = (fun n -> `	.short	{emit_int n}\n`);
+      efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
+      efa_word = (fun n -> `	.quad	{emit_int n}\n`);
+      efa_align = (fun n -> `	.align	{emit_int n}\n`);
+      efa_label_rel = (fun lbl ofs ->
+                           `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
+      efa_def_label = (fun l -> `{emit_label l}:\n`);
+      efa_string = (fun s -> emit_bytes_directive "	.byte	" (s ^ "\000"))
+     }
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@ -0,0 +1,208 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Description of the Z Processor *)
+
+open Misc
+open Cmm
+open Reg
+open Arch
+open Mach
+
+(* Instruction selection *)
+
+let word_addressed = false
+
+(* Registers available for register allocation *)
+
+(* Integer register map:
+    0                   temporary, null register for some operations (volatile)
+    1                   temporary (volatile)
+    2 - 5               function arguments and results (volatile)
+    6                   function arguments and results (persevered by C)
+    7 - 9               general purpose, preserved by C
+    10                  allocation limit (preserved by C)
+    11                  allocation pointer (preserved by C)
+    12                  general purpose  (preserved by C)
+    13                  trap pointer (preserved by C)
+    14                  return address (volatile)
+    15                  stack pointer (preserved by C)
+  Floating-point register map:
+    0, 2, 4, 6          function arguments and results (volatile)
+    1, 3, 5, 7          general purpose (volatile)
+    8 - 14              general purpose, preserved by C
+    15                  temporary, preserved by C
+
+Note: integer register r12 is used as GOT pointer by some C compilers.
+The code generated by OCaml does not need a GOT pointer, using PC-relative
+addressing instead for accessing the GOT.  This frees r12 as a
+general-purpose register. *)
+
+let int_reg_name =
+    [| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; "%r12" |]
+
+let float_reg_name =
+    [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7";
+       "%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; "%f15" |]
+
+let num_register_classes = 2
+
+let register_class r =
+  match r.typ with
+  | Val | Int | Addr -> 0
+  | Float -> 1
+
+let num_available_registers = [| 9; 15 |]
+
+let first_available_register = [| 0; 100 |]
+
+let register_name r =
+  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
+
+let rotate_registers = true
+
+(* Representation of hard registers by pseudo-registers *)
+
+let hard_int_reg =
+  let v = Array.make 9 Reg.dummy in
+  for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done; v
+
+let hard_float_reg =
+  let v = Array.make 16 Reg.dummy in
+  for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
+
+let all_phys_regs =
+  Array.append hard_int_reg hard_float_reg
+
+let phys_reg n =
+  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
+
+let stack_slot slot ty =
+  Reg.at_location ty (Stack slot)
+
+(* Calling conventions *)
+
+let calling_conventions
+    first_int last_int first_float last_float make_stack stack_ofs arg =
+  let loc = Array.make (Array.length arg) Reg.dummy in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref stack_ofs in
+  for i = 0 to Array.length arg - 1 do
+    match arg.(i).typ with
+    | Val | Int | Addr as ty ->
+        if !int <= last_int then begin
+          loc.(i) <- phys_reg !int;
+          incr int
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) ty;
+          ofs := !ofs + size_int
+        end
+    | Float ->
+        if !float <= last_float then begin
+          loc.(i) <- phys_reg !float;
+          incr float
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) Float;
+          ofs := !ofs + size_float
+        end
+  done;
+  (loc, Misc.align !ofs 16)
+  (* Keep stack 16-aligned. *)
+
+let incoming ofs = Incoming ofs
+let outgoing ofs = Outgoing ofs
+let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
+
+let loc_arguments arg =
+  calling_conventions 0 4 100 103 outgoing 0 arg
+let loc_parameters arg =
+  let (loc, ofs) = calling_conventions 0 4 100 103 incoming 0 arg in loc
+let loc_results res =
+  let (loc, ofs) = calling_conventions 0 4 100 103 not_supported 0 res in loc
+
+(*   C calling conventions under SVR4:
+     use GPR 2-6 and FPR 0,2,4,6 just like ML calling conventions.
+     Using a float register does not affect the int registers.
+     Always reserve 160 bytes at bottom of stack, plus whatever is needed
+     to hold the overflow arguments. *)
+
+let loc_external_arguments arg =
+  let arg =
+    Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg in
+  let (loc, ofs) =
+    calling_conventions 0 4 100 103 outgoing 160 arg in
+  (Array.map (fun reg -> [|reg|]) loc, ofs)
+
+let extcall_use_push = false
+
+(* Results are in GPR 2 and FPR 0 *)
+
+let loc_external_results res =
+  let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc
+
+(* Exceptions are in GPR 2 *)
+
+let loc_exn_bucket = phys_reg 0
+
+(* Volatile registers: none *)
+
+let regs_are_volatile rs = false
+
+(* Registers destroyed by operations *)
+
+let destroyed_at_c_call =
+  Array.of_list(List.map phys_reg
+    [0; 1; 2; 3; 4;
+     100; 101; 102; 103; 104; 105; 106; 107])
+
+let destroyed_at_oper = function
+    Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
+  | Iop(Iextcall(_, false)) -> destroyed_at_c_call
+  | _ -> [||]
+
+let destroyed_at_raise = all_phys_regs
+
+(* Maximal register pressure *)
+
+let safe_register_pressure = function
+    Iextcall(_, _) -> 4
+  | _ -> 9
+
+let max_register_pressure = function
+    Iextcall(_, _) -> [| 4; 7 |]
+  | _ -> [| 9; 15 |]
+
+(* Pure operations (without any side effect besides updating their result
+   registers). *)
+
+let op_is_pure = function
+  | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
+  | Ispecific(Imultaddf | Imultsubf) -> true
+  | _ -> true
+
+(* Layout of the stack *)
+
+let num_stack_slots = [| 0; 0 |]
+let contains_calls = ref false
+
+(* Calling the assembler *)
+
+let assemble_file infile outfile =
+  Ccomp.command (Config.asm ^ " -o " ^
+                 Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+let init () = ()
--- a/asmcomp/s390x/reload.ml
+++ b/asmcomp/s390x/reload.ml
@ -0,0 +1,47 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Reloading for the Z Processor *)
+
+open Arch
+open Mach
+
+class reload = object (self)
+
+inherit Reloadgen.reload_generic as super
+
+(* For 2-address instructions, reloading must make sure that the
+   temporary result register is the same as the appropriate
+   argument register. *)
+
+method! reload_operation op arg res =
+  match op with
+  (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
+  | Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
+      let res = self#makereg res.(0) in
+      ([|res; self#makereg arg.(1)|], [|res|])
+  (* Three-address ternary operations: arg.(2) and res.(0) must be the same *)
+  | Ispecific(Imultaddf|Imultsubf) ->
+      let res = self#makereg res.(0) in
+      ([|self#makereg arg.(0); self#makereg arg.(1); res|], [|res|])
+  (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+  |  Iintop_imm((Imul|Iand|Ior|Ixor), _) ->
+      let res = self#makereg res.(0) in
+      ([|res|], [|res|])
+  (* Other instructions are regular *)
+  | _ ->
+      super#reload_operation op arg res
+
+end
+
+let fundecl f =
+  (new reload)#fundecl f
--- a/asmcomp/s390x/scheduling.ml
+++ b/asmcomp/s390x/scheduling.ml
@ -0,0 +1,60 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Instruction scheduling for the Z processor *)
+
+open Arch
+open Mach
+
+(* The z10 processor is in-order, dual-issue.  It could benefit from some
+   basic-block scheduling, although precise latency information
+   is not available.
+   The z196 and later are out-of-order processors.  Basic-block
+   scheduling probably makes no difference. *)
+
+class scheduler = object
+
+inherit Schedgen.scheduler_generic
+
+(* Latencies (in cycles). Wild guesses.  We multiply all latencies by 2
+   to favor dual-issue. *)
+
+method oper_latency = function
+    Ireload -> 4
+  | Iload(_, _) -> 4
+  | Iconst_float _ -> 4 (* turned into a load *)
+  | Iintop(Imul) -> 10
+  | Iintop_imm(Imul, _) -> 10
+  | Iaddf | Isubf | Imulf -> 8
+  | Idivf -> 40
+  | Ispecific(Imultaddf | Imultsubf) -> 8
+  | _ -> 2
+
+method reload_retaddr_latency = 4
+
+(* Issue cycles.  Rough approximations. *)
+
+method oper_issue_cycles = function
+  | Ialloc _ -> 4
+  | Iintop(Imulh) -> 15
+  | Iintop(Idiv|Imod) -> 20
+  | Iintop(Icomp _) -> 4
+  | Iintop_imm(Icomp _, _) -> 4
+  | _ -> 1
+
+method reload_retaddr_issue_cycles = 1
+
+end
+
+let fundecl f = (new scheduler)#schedule_fundecl f
--- a/asmcomp/s390x/selection.ml
+++ b/asmcomp/s390x/selection.ml
@ -0,0 +1,114 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Instruction selection for the Z processor *)
+
+open Cmm
+open Arch
+open Mach
+
+(* Recognition of addressing modes *)
+
+exception Use_default
+
+type addressing_expr =
+  | Alinear of expression
+  | Aadd of expression * expression
+
+let rec select_addr = function
+  | Cop((Caddi | Cadda | Caddv), [arg; Cconst_int m]) ->
+      let (a, n) = select_addr arg in (a, n + m)
+  | Cop((Caddi | Cadda | Caddv), [Cconst_int m; arg]) ->
+      let (a, n) = select_addr arg in (a, n + m)
+  | Cop((Caddi | Cadda | Caddv), [arg1; arg2]) ->
+      begin match (select_addr arg1, select_addr arg2) with
+          ((Alinear e1, n1), (Alinear e2, n2)) ->
+              (Aadd(e1, e2), n1 + n2)
+        | _ ->
+              (Aadd(arg1, arg2), 0)
+      end
+  | exp ->
+      (Alinear exp, 0)
+
+(* Instruction selection *)
+
+let pseudoregs_for_operation op arg res =
+  match op with
+  (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
+  | Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
+      ([|res.(0); arg.(1)|], res)
+  | Ispecific(sop) ->
+    ( [| arg.(0); arg.(1); res.(0) |], [| res.(0) |])
+  (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+  |  Iintop_imm((Imul|Iand|Ior|Ixor), _) -> (res, res)
+  (* Other instructions are regular *)
+  | _ -> raise Use_default
+
+class selector = object (self)
+
+inherit Selectgen.selector_generic as super
+
+method is_immediate n = (n <= 2147483647) && (n >= -2147483648)
+
+method select_addressing chunk exp =
+  let (a, d) = select_addr exp in
+  (* 20-bit signed displacement *)
+  if d < 0x80000 && d >= -0x80000 then begin
+    match a with
+    | Alinear e -> (Iindexed d, e)
+    | Aadd(e1, e2) -> (Iindexed2 d, Ctuple [e1; e2])
+  end else
+    (Iindexed 0, exp)
+
+method! select_operation op args =
+  match (op, args) with
+  (* Z does not support immediate operands for multiply high *)
+    (Cmulhi, _) -> (Iintop Imulh, args)
+  (* The and, or and xor instructions have a different range of immediate
+     operands than the other instructions *)
+  | (Cand, _) -> self#select_logical Iand (-0x1_0000_0000) (-1) args
+  | (Cor, _) -> self#select_logical Ior 0 0xFFFF_FFFF args
+  | (Cxor, _) -> self#select_logical Ixor  0 0xFFFF_FFFF args
+  (* Recognize mult-add and mult-sub instructions *)
+  | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
+      (Ispecific Imultaddf, [arg1; arg2; arg3])
+  | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
+      (Ispecific Imultaddf, [arg1; arg2; arg3])
+  | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
+      (Ispecific Imultsubf, [arg1; arg2; arg3])
+  | _ ->
+      super#select_operation op args
+
+method select_logical op lo hi = function
+    [arg; Cconst_int n] when n >= lo && n <= hi ->
+      (Iintop_imm(op, n), [arg])
+  | [Cconst_int n; arg] when n >= lo && n <= hi ->
+      (Iintop_imm(op, n), [arg])
+  | args ->
+      (Iintop op, args)
+
+
+method! insert_op_debug op dbg rs rd =
+  try
+    let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
+    self#insert_moves rs rsrc;
+    self#insert_debug (Iop op) dbg rsrc rdst;
+    self#insert_moves rdst rd;
+    rd
+  with Use_default ->
+    super#insert_op_debug op dbg rs rd
+
+end
+
+let fundecl f = (new selector)#emit_fundecl f
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@ -0,0 +1,353 @@
+/***********************************************************************/
+/*                                                                     */
+/*                                OCaml                                */
+/*                                                                     */
+/*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
+/*                          Bill O'Farrell, IBM                        */
+/*                                                                     */
+/*  Copyright 2015 Institut National de Recherche en Informatique et   */
+/*  en Automatique. Copyright 2015 IBM (Bill O'Farrell with help from  */
+/*  Tristan Amini). All rights reserved.  This file is distributed     */
+/*  under the terms of the GNU Library General Public License, with    */
+/*  the special exception on linking described in file ../LICENSE.     */
+/*                                                                     */
+/***********************************************************************/
+#define Addrglobal(reg,glob) \
+        larl    reg, glob
+#define Loadglobal(reg,glob) \
+        lgrl    reg, glob
+#define Storeglobal(reg,glob) \
+        stgrl   reg, glob
+#define Loadglobal32(reg,glob) \
+        lgfrl   reg, glob
+#define Storeglobal32(reg,glob) \
+        strl   reg, glob
+
+
+        .section ".text"
+
+/* Invoke the garbage collector. */
+
+        .globl  caml_system__code_begin
+caml_system__code_begin:
+
+        .globl  caml_call_gc
+        .type   caml_call_gc, @function
+caml_call_gc:
+    /* Set up stack frame */
+#define FRAMESIZE (16*8 + 16*8)
+        lay	%r15, -FRAMESIZE(%r15)
+    /* Record return address into OCaml code */
+        Storeglobal(%r14, caml_last_return_address)
+    /* Record lowest stack address */
+        lay	%r0, FRAMESIZE(%r15)
+        Storeglobal(%r0, caml_bottom_of_stack)
+    /* Record pointer to register array */
+        lay	%r0, (8*16)(%r15)
+        Storeglobal(%r0, caml_gc_regs)
+    /* Save current allocation pointer for debugging purposes */
+        Storeglobal(%r11, caml_young_ptr)
+    /* Save exception pointer (if e.g. a sighandler raises) */
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Save all registers used by the code generator */
+        stmg    %r2,%r9, (8*16)(%r15)
+	stg	%r12, (8*16 + 8*8)(%r15)
+        std     %f0, 0(%r15)
+        std     %f1, 8(%r15)
+        std     %f2, 16(%r15)
+        std     %f3, 24(%r15)
+        std     %f4, 32(%r15)
+        std     %f5, 40(%r15)
+        std     %f6, 48(%r15)
+        std     %f7, 56(%r15)
+        std     %f8, 64(%r15)
+        std     %f9, 72(%r15)
+        std     %f10, 80(%r15)
+        std     %f11, 88(%r15)
+        std     %f12, 96(%r15)
+        std     %f13, 108(%r15)
+        std     %f14, 112(%r15)
+        std     %f15, 120(%r15)
+    /* Call the GC */
+        lay %r15, -160(%r15)
+        stg     %r15, 0(%r15)
+        brasl   %r14, caml_garbage_collection@PLT
+        lay %r15, 160(%r15)
+    /* Reload new allocation pointer and allocation limit */
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Restore all regs used by the code generator */
+        lmg     %r2,%r9, (8*16)(%r15)
+	lg	%r12, (8*16 + 8*8)(%r15)
+        ld      %f0, 0(%r15)
+        ld      %f1, 8(%r15)
+        ld      %f2, 16(%r15)
+        ld      %f3, 24(%r15)
+        ld      %f4, 32(%r15)
+        ld      %f5, 40(%r15)
+        ld      %f6, 48(%r15)
+        ld      %f7, 56(%r15)
+        ld      %f8, 64(%r15)
+        ld      %f9, 72(%r15)
+        ld      %f10, 80(%r15)
+        ld      %f11, 88(%r15)
+        ld      %f12, 96(%r15)
+        ld      %f13, 108(%r15)
+        ld      %f14, 112(%r15)
+        ld      %f15, 120(%r15)
+    /* Return to caller */
+        Loadglobal(%r1, caml_last_return_address)
+    /* Deallocate stack frame */
+        lay	%r15, FRAMESIZE(%r15)
+    /* Return */
+        br    %r1
+
+/* Call a C function from OCaml */
+
+        .globl  caml_c_call
+        .type   caml_c_call, @function
+caml_c_call:
+        Storeglobal(%r15, caml_bottom_of_stack)
+    /* Save return address */
+        ldgr	%f15, %r14
+    /* Get ready to call C function (address in r7) */
+    /* Record lowest stack address and return address */
+        Storeglobal(%r14, caml_last_return_address)
+    /* Make the exception handler and alloc ptr available to the C code */
+        Storeglobal(%r11, caml_young_ptr)
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Call the function */
+        basr %r14, %r7
+    /* restore return address */
+        lgdr	%r14,%f15
+    /* Reload allocation pointer and allocation limit*/
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Say we are back into OCaml code */
+        lgfi   %r0, 0
+        Storeglobal(%r0, caml_last_return_address)
+
+    /* Return to caller */
+        br %r14
+
+/* Raise an exception from OCaml */
+        .globl  caml_raise_exn
+        .type   caml_raise_exn, @function
+caml_raise_exn:
+        Loadglobal32(%r0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne     .L110
+.L111:
+    /* Pop trap frame */
+        lg	%r1, 0(%r13)
+        lgr     %r15, %r13
+        lg     %r13, 8(13)
+        agfi   %r15, 16
+    /* Branch to handler */
+        br	%r1
+.L110:
+        lgfi      %r0, 0
+        Storeglobal32(%r0, caml_backtrace_pos)
+.L114:
+        ldgr	%f15, %r2	/* preserve exn bucket in callee-save reg */
+                                /* arg1: exception bucket, already in r3 */
+        lgr     %r3,%r14        /* arg2: PC of raise */
+        lgr     %r4, %r15       /* arg3: SP of raise */
+        lgr     %r5, %r13           /* arg4: SP of handler */
+        agfi    %r15, -160       /* reserve stack space for C call */
+        brasl   %r14, caml_stash_backtrace@PLT
+        agfi    %r15, 160 
+        lgdr	%r2,%f15        /* restore exn bucket */
+        j       .L111           /* raise the exn */
+
+        .globl  caml_reraise_exn
+        .type   caml_reraise_exn, @function
+caml_reraise_exn:
+        Loadglobal32(%r0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne    .L114
+    /* Pop trap frame */
+        lg     %r1, 0(%r13)
+        lgr    %r15, %r13
+        lg    %r13, 8(%r13)
+        agfi   %r15, 16
+    /* Branch to handler */
+        br     %r1;
+
+/* Raise an exception from C */
+
+        .globl  caml_raise_exception
+        .type   caml_raise_exception, @function
+caml_raise_exception:
+        Loadglobal32(0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne    .L112
+.L113:
+    /* Reload OCaml global registers */
+        Loadglobal(%r15, caml_exception_pointer)
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Say we are back into OCaml code */
+        lgfi   %r0, 0
+        Storeglobal(%r0, caml_last_return_address)
+    /* Pop trap frame */
+        lg	%r1, 0(%r15)
+        lg	%r13, 8(%r15)
+        agfi	%r15, 16
+    /* Branch to handler */
+        br	%r1;
+.L112:
+        ldgr	%f15,%r2        /* preserve exn bucket in callee-save reg */
+                                /* arg1: exception bucket, already in r2 */
+        Loadglobal(%r3, caml_last_return_address) /* arg2: PC of raise */
+        Loadglobal(%r4, caml_bottom_of_stack)     /* arg3: SP of raise */
+        Loadglobal(%r5, caml_exception_pointer)   /* arg4: SP of handler */
+             /* reserve stack space for C call */
+        lay %r15, -160(%r15)
+        brasl   %r14, caml_stash_backtrace@PLT
+        lay %r15, 160(%r15)
+        lgdr	%r2,%f15        /* restore exn bucket */        /* restore exn bucket */
+        j       .L113           /* raise the exn */
+
+/* Start the OCaml program */
+
+        .globl  caml_start_program
+        .type   caml_start_program, @function
+caml_start_program:
+        Addrglobal(%r0, caml_program)
+
+/* Code shared between caml_start_program and caml_callback */
+.L102:
+    /* Allocate stack frame */
+        lay	%r15, -144(%r15)
+    /* Save all callee-save registers + return address */
+    /* GPR 6..14 at sp + 0 ... sp + 64
+       FPR 10..15 at sp + 72 ... sp + 128 */
+        stmg    %r6,%r14, 0(%r15)
+        std     %f8, 72(%r15)
+        std     %f9, 80(%r15)
+        std     %f10, 88(%r15)
+        std     %f11, 96(%r15)
+        std     %f12, 104(%r15)
+        std     %f13, 112(%r15)
+        std     %f14, 120(%r15)
+        std     %f15, 128(%r15)
+
+    /* Set up a callback link */
+        lay     %r15, -32(%r15)
+        Loadglobal(%r1, caml_bottom_of_stack)
+        stg     %r1, 0(%r15)
+        Loadglobal(%r1, caml_last_return_address)
+        stg     %r1, 8(%r15)
+        Loadglobal(%r1, caml_gc_regs)
+        stg     %r1, 16(%r15)
+    /* Build an exception handler to catch exceptions escaping out of OCaml */
+        brasl   %r14, .L103
+        j       .L104
+.L103:
+	lay	%r15, -16(%r15)
+        stg     %r14, 0(%r15)
+        Loadglobal(%r1, caml_exception_pointer)
+        stg     %r1, 8(%r15)
+        lgr     %r13, %r15
+    /* Reload allocation pointers */
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Call the OCaml code */
+        lgr %r1,%r0
+        basr %r14, %r1
+.L105:
+    /* Pop the trap frame, restoring caml_exception_pointer */
+        lg     %r0, 8(%r15)
+        Storeglobal(%r0, caml_exception_pointer)
+        la	%r15, 16(%r15)
+    /* Pop the callback link, restoring the global variables */
+.L106:
+        lg      %r5, 0(%r15)
+        lg      %r6, 8(%r15)
+        lg      %r1, 16(%r15)
+        Storeglobal(%r5, caml_bottom_of_stack)
+        Storeglobal(%r6, caml_last_return_address)
+        Storeglobal(%r1, caml_gc_regs)
+        la	%r15, 32(%r15)
+
+    /* Update allocation pointer */
+        Storeglobal(%r11, caml_young_ptr)
+
+	/* Restore registers */
+        lmg    %r6,%r14, 0(%r15)
+        ld     %f8, 72(%r15)
+        ld     %f9, 80(%r15)
+        ld     %f10, 88(%r15)
+        ld     %f11, 96(%r15)
+        ld     %f12, 104(%r15)
+        ld     %f13, 112(%r15)
+        ld     %f14, 120(%r15)
+        ld     %f15, 128(%r15)
+
+    /* Return */
+	lay	%r15, 144(%r15)
+	br	%r14
+
+    /* The trap handler: */
+.L104:
+    /* Update caml_exception_pointer */
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Encode exception bucket as an exception result and return it */
+        oill     %r2,  2
+        j       .L106
+
+/* Callback from C to OCaml */
+
+        .globl  caml_callback_exn
+        .type   caml_callback_exn, @function
+caml_callback_exn:
+    /* Initial shuffling of arguments */
+        lgr     %r0, %r2            /* Closure */
+        lgr     %r2, %r3            /* Argument */
+        lgr     %r3, %r0
+        lg      %r0, 0(%r3)        /* Code pointer */
+        j       .L102
+
+        .globl  caml_callback2_exn
+        .type   caml_callback2_exn, @function
+caml_callback2_exn:
+        lgr      %r0, %r2            /* Closure */
+        lgr      %r2, %r3            /* First argument */
+        lgr      %r3, %r4            /* Second argument */
+        lgr      %r4, %r0
+        Addrglobal(%r0, caml_apply2)
+        j       .L102
+
+        .globl  caml_callback3_exn
+        .type   caml_callback3_exn, @function
+caml_callback3_exn:
+        lgr      %r0, %r2            /* Closure */
+        lgr      %r2, %r3            /* First argument */
+        lgr      %r3, %r4            /* Second argument */
+        lgr      %r4, %r5            /* Third argument */
+        lgr      %r5, %r0
+        Addrglobal(%r0, caml_apply3)
+        j        .L102
+
+	.globl	caml_ml_array_bound_error
+	.type	caml_ml_array_bound_error, @function
+caml_ml_array_bound_error:
+	lay	%r15, -160(%r15)    /* Reserve stack space for C call */
+	larl	%r7, caml_array_bound_error
+	j	caml_c_call
+        .globl  caml_system__code_end
+caml_system__code_end:
+
+/* Frame table */
+
+        .section ".data"
+        .align 8
+        .globl  caml_system__frametable
+        .type   caml_system__frametable, @object
+caml_system__frametable:
+        .quad   1               /* one descriptor */
+        .quad   .L105           /* return address into callback */
+        .short  -1              /* negative size count => use callback link */
+        .short  0               /* no roots here */
+	.align	8
--- a/asmrun/signals_asm.c
+++ b/asmrun/signals_asm.c
@ -144,7 +144,7 @@ int caml_set_signal_action(int signo, int action)

 /* Machine- and OS-dependent handling of bound check trap */

-#if defined(TARGET_power) || (defined(TARGET_sparc) && defined(SYS_solaris))
+#if defined(TARGET_power) || defined(TARGET_s390x) || (defined(TARGET_sparc) && defined(SYS_solaris))
 DECLARE_SIGNAL_HANDLER(trap_handler)
 {
 #if defined(SYS_solaris)
@ -267,6 +267,14 @@ void caml_init_signals(void)
  }
 #endif

+#if defined(TARGET_s390x)
+  { struct sigaction act;
+    sigemptyset(&act.sa_mask);
+    SET_SIGACT(act, trap_handler);
+    sigaction(SIGFPE, &act, NULL);
+  }
+#endif
+
  /* Stack overflow handling */
 #ifdef HAS_STACK_OVERFLOW_DETECTION
  {
--- a/asmrun/signals_osdep.h
+++ b/asmrun/signals_osdep.h
@ -283,6 +283,23 @@
  #define CONTEXT_YOUNG_LIMIT (context->regs->gpr[30])
  #define CONTEXT_YOUNG_PTR (context->regs->gpr[31])
  #define CONTEXT_SP (context->regs->gpr[1])
+  
+/****************** s390x, ELF (Linux) */
+#elif defined(TARGET_s390x) && defined(SYS_elf)
+
+  #define DECLARE_SIGNAL_HANDLER(name) \
+    static void name(int sig, struct sigcontext * context)
+
+  #define SET_SIGACT(sigact,name) \
+     sigact.sa_handler = (void (*)(int)) (name); \
+     sigact.sa_flags = 0
+
+  typedef unsigned long context_reg;
+  #define CONTEXT_PC (context->sregs->regs.psw.addr)
+  #define CONTEXT_EXCEPTION_POINTER (context->sregs->regs.gprs[13])
+  #define CONTEXT_YOUNG_LIMIT (context->sregs->regs.gprs[10])
+  #define CONTEXT_YOUNG_PTR (context->sregs->regs.gprs[11])
+  #define CONTEXT_SP (context->sregs->regs.gprs[15])

 /****************** PowerPC, BSD */

--- a/asmrun/stack.h
+++ b/asmrun/stack.h
@ -50,6 +50,12 @@
 #define Mark_scanned(sp, retaddr) Saved_return_address(sp) = (retaddr) | 1
 #endif

+#ifdef TARGET_s390x
+#define Saved_return_address(sp) *((intnat *)((sp) - SIZEOF_PTR))
+#define Trap_frame_size 16
+#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
+#endif
+
 #ifdef TARGET_arm
 #define Saved_return_address(sp) *((intnat *)((sp) - 4))
 #define Callback_link(sp) ((struct caml_context *)((sp) + 8))
--- a/4
+++ b/4
@ -763,6 +763,7 @@ if test $with_sharedlibs = "yes"; then
        natdynlink=true
      fi;;
    x86_64-*-darwin*)             natdynlink=true;;
+    s390x*-*-linux*)              natdynlink=true;;
    powerpc*-*-linux*)            natdynlink=true;;
    sparc*-*-linux*)              natdynlink=true;;
    i686-*-kfreebsd*)             natdynlink=true;;
@ -823,6 +824,7 @@ case "$target" in
                                system=elf;;
  powerpc-*-netbsd*)            arch=power; model=ppc; system=elf;;
  powerpc-*-openbsd*)           arch=power; model=ppc; system=bsd_elf;;
+  s390x*-*-linux*)              arch=s390x; model=z10; system=elf;;
  armv6*-*-linux-gnueabihf)     arch=arm; model=armv6; system=linux_eabihf;;
  arm*-*-linux-gnueabihf)       arch=arm; system=linux_eabihf;;
  armv7*-*-linux-gnueabi)       arch=arm; model=armv7; system=linux_eabi;;
@ -899,6 +901,8 @@ case "$arch,$system" in
                    as="${TOOLPREF}as -mppc"
                    aspp="${TOOLPREF}gcc -m32 -c"
                  fi;;
+  s390x,elf)      as="${TOOLPREF}as -m 64 -march=$model"
+                  aspp="${TOOLPREF}gcc -c -Wa,-march=$model";;
  sparc,solaris)  as="${TOOLPREF}as"
                  case "$cc" in
                    *gcc*) aspp="${TOOLPREF}gcc -c";;
--- a/testsuite/tests/asmcomp/arith.cmm
+++ b/testsuite/tests/asmcomp/arith.cmm
@ -214,4 +214,5 @@
      (floataset d 37 (-f f))
      (floataset d 38 (absf f))

+      (addraset r 116 (mulh x y))
 )))))))
--- a/testsuite/tests/asmcomp/lexcmm.mll
+++ b/testsuite/tests/asmcomp/lexcmm.mll
@ -55,6 +55,7 @@ let keyword_table =
    "let", LET;
    "load", LOAD;
    "mod", MODI;
+    "mulh", MULH;
    "or", OR;
    "proj", PROJ;
    "raise", RAISE Lambda.Raise_regular;
--- a/testsuite/tests/asmcomp/mainarith.c
+++ b/testsuite/tests/asmcomp/mainarith.c
@ -61,6 +61,7 @@ double F, G;

 extern void call_gen_code(void (*)(void));
 extern void testarith(void);
+static intnat mulhs(intnat x, intnat y);

 void do_test(void)
 {
@ -252,29 +253,55 @@ void do_test(void)

      FLOATTEST(D[37], - F);
      FLOATTEST(D[38], fabs(F));
+
+      INTTEST(R[116], mulhs(X, Y));
 }

-#ifdef __i386__
-#ifdef __FreeBSD__
-#include <floatingpoint.h>
-#endif
-#endif
+/* Multiply-high signed.  Hacker's Delight section 8.2 */

-void init_ieee_floats(void)
+#define HALFSIZE (4 * sizeof(intnat))
+#define HALFMASK (((intnat)1 << HALFSIZE) - 1)
+
+static intnat mulhs(intnat u, intnat v)
 {
-#ifdef __i386__
-#ifdef __FreeBSD__
-  fpsetmask(0);
-#endif
-#endif
+  uintnat u0, v0, w0;
+  intnat u1, v1, w1, w2, t;
+  u0 = u & HALFMASK; u1 = u >> HALFSIZE;
+  v0 = v & HALFMASK; v1 = v >> HALFSIZE;
+  w0 = u0*v0;
+  t = u1*v0 + (w0 >> HALFSIZE);
+  w1 = t & HALFMASK;
+  w2 = t >> HALFSIZE;
+  w1 = u0*v1 + w1;
+  return u1*v1 + w2 + (w1 >> HALFSIZE);
 }

+/* A simple linear congruential PRNG */
+
+#ifdef ARCH_SIXTYFOUR
+#define RAND_A 6364136223846793005ULL
+#define RAND_C 1442695040888963407ULL
+#else
+#define RAND_A 214013U
+#define RAND_C 2531011U 
+#endif
+
+static intnat rnd(void)
+{
+  static uintnat seed = 0;
+  seed = seed * RAND_A + RAND_C;
+  return (intnat) seed;
+}
+
+/* Test harness */
+
+#define NUM_RANDOM_ITERATIONS 1000000
+
 int main(int argc, char **argv)
 {
+  int i;
  double weird[4];

-  init_ieee_floats();
-
  if (argc >= 5) {
    X = atoi(argv[1]);
    Y = atoi(argv[2]);
@ -283,12 +310,14 @@ int main(int argc, char **argv)
    do_test();
    return 0;
  }
+  printf("Testing -2...2\n");
  for(Y = -2; Y <= 2; Y++) {
    for (X = -2; X <= 2; X++) {
      F = X; G = Y; do_test();
    }
  }
  if (!(argc >= 2 && strcmp(argv[1], "noinf"))) {
+    printf("Testing special FP values\n");
    weird[0] = 0.0;
    weird[1] = 1.0 / weird[0];         /* +infty */
    weird[2] = -1.0 / weird[0];        /* -infty */
@ -299,13 +328,13 @@ int main(int argc, char **argv)
      }
    }
  }
-  while(1) {
-    X = (rand() & 0x1FFFFFFF) - 0x10000000;
-    Y = (rand() & 0x1FFFFFFF) - 0x10000000;
+  printf("Testing %d random values\n", NUM_RANDOM_ITERATIONS);
+  for (i = 0; i < NUM_RANDOM_ITERATIONS; i++) {
+    X = rnd();
+    Y = rnd();
    F = X / 1e3;
    G = Y / 1e3;
    do_test();
-    printf("."); fflush(stdout);
  }
  return 0;
 }
--- a/testsuite/tests/asmcomp/parsecmm.mly
+++ b/testsuite/tests/asmcomp/parsecmm.mly
@ -102,6 +102,7 @@ let access_array base numelt size =
 %token LTI
 %token MODI
 %token MULF
+%token MULH
 %token MULI
 %token NEA
 %token NEF
@ -289,6 +290,7 @@ binaryop:
  | GTF                         { Ccmpf Cgt }
  | GEF                         { Ccmpf Cge }
  | CHECKBOUND                  { Ccheckbound Debuginfo.none }
+  | MULH                        { Cmulhi }
 ;
 sequence:
    expr sequence               { Csequence($1, $2) }
--- a/testsuite/tests/asmcomp/s390x.S
+++ b/testsuite/tests/asmcomp/s390x.S
@ -0,0 +1,64 @@
+#define ALIGN 8
+
+#define CALL_GEN_CODE call_gen_code
+#define CAML_C_CALL caml_c_call
+#define CAML_NEGF_MASK caml_negf_mask
+#define CAML_ABSF_MASK caml_absf_mask
+
+	.section ".text"
+	
+	.globl	CALL_GEN_CODE
+	.type	CALL_GEN_CODE, @function
+	.align	ALIGN
+CALL_GEN_CODE:
+	/* Stack space */
+	lay	%r15, -144(%r15)
+	/* Save registers */
+        stmg    %r6,%r14, 0(%r15)
+        std     %f8, 72(%r15)
+        std     %f9, 80(%r15)
+        std     %f10, 88(%r15)
+        std     %f11, 96(%r15)
+        std     %f12, 104(%r15)
+        std     %f13, 112(%r15)
+        std     %f14, 120(%r15)
+        std     %f15, 128(%r15)
+	/* Shuffle args */
+	lgr	%r1, %r2
+	lgr	%r2, %r3
+	lgr	%r3, %r4
+	lgr	%r4, %r5
+	/* Function call */
+	basr	%r14, %r1
+	/* Restore registers */
+        lmg    %r6,%r14, 0(%r15)
+        ld     %f8, 72(%r15)
+        ld     %f9, 80(%r15)
+        ld     %f10, 88(%r15)
+        ld     %f11, 96(%r15)
+        ld     %f12, 104(%r15)
+        ld     %f13, 112(%r15)
+        ld     %f14, 120(%r15)
+        ld     %f15, 128(%r15)
+	/* Return */
+	lay	%r15, 144(%r15)
+	br	%r14
+
+	.globl	CAML_C_CALL
+	.type	CAML_C_CALL, @function
+	.align	ALIGN
+CAML_C_CALL:
+        br	%r7
+	
+	.section ".rodata"
+
+	.global	CAML_NEGF_MASK
+	.align	ALIGN
+CAML_NEGF_MASK:
+	.quad	0x8000000000000000, 0
+	.global  CAML_ABSF_MASK
+        .align  ALIGN
+CAML_ABSF_MASK:
+        .quad   0x7FFFFFFFFFFFFFFF, 0
+
+        .comm   young_limit, 8