From 58db11e0514daaaf25b5543c3ad48587ec6e1b40 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Wed, 28 Oct 2015 15:41:31 +0100
Subject: [PATCH 01/27] IBM zSystem port of OCaml

Import of Bill O'Farrell's port, rebased from 4.02.1 to trunk.
---
 asmcomp/s390x/CSE.ml            |  38 ++
 asmcomp/s390x/arch.ml           |  94 ++++
 asmcomp/s390x/emit.mlp          | 846 ++++++++++++++++++++++++++++++++
 asmcomp/s390x/proc.ml           | 203 ++++++++
 asmcomp/s390x/reload.ml         |  16 +
 asmcomp/s390x/scheduling.ml     |  64 +++
 asmcomp/s390x/selection.ml      | 118 +++++
 asmrun/s390x.S                  | 392 +++++++++++++++
 asmrun/signals_asm.c            |  10 +-
 asmrun/signals_osdep.h          |  17 +
 asmrun/stack.h                  |  10 +
 configure                       |   4 +
 testsuite/tests/asmcomp/s390x.S |  57 +++
 13 files changed, 1868 insertions(+), 1 deletion(-)
 create mode 100644 asmcomp/s390x/CSE.ml
 create mode 100644 asmcomp/s390x/arch.ml
 create mode 100644 asmcomp/s390x/emit.mlp
 create mode 100644 asmcomp/s390x/proc.ml
 create mode 100644 asmcomp/s390x/reload.ml
 create mode 100644 asmcomp/s390x/scheduling.ml
 create mode 100644 asmcomp/s390x/selection.ml
 create mode 100644 asmrun/s390x.S
 create mode 100644 testsuite/tests/asmcomp/s390x.S

diff --git a/asmcomp/s390x/CSE.ml b/asmcomp/s390x/CSE.ml
new file mode 100644
index 000000000..12a65fbf2
--- /dev/null
+++ b/asmcomp/s390x/CSE.ml
@@ -0,0 +1,38 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* CSE for the Z Processor *)
+
+open Arch
+open Mach
+open CSEgen
+
+class cse = object (self)
+
+inherit cse_generic as super
+
+method! class_of_operation op =
+  match op with
+  | Ispecific(Imultaddf | Imultsubf) -> Op_pure
+  | _ -> super#class_of_operation op
+
+method! is_cheap_operation op =
+  match op with
+  | Iconst_int n | Iconst_blockheader n -> n >= -0x8000_0000n && n <= 0x7FFF_FFFFn
+  | _ -> false
+
+end
+
+let fundecl f =
+  (new cse)#fundecl f
diff --git a/asmcomp/s390x/arch.ml b/asmcomp/s390x/arch.ml
new file mode 100644
index 000000000..b3a14dc78
--- /dev/null
+++ b/asmcomp/s390x/arch.ml
@@ -0,0 +1,94 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Specific operations for the Z processor *)
+
+open Format
+
+(* Machine-specific command-line options *)
+
+let pic_code = ref true
+
+let command_line_options =
+  [ "-fPIC", Arg.Set pic_code,
+      " Generate position-independent machine code (default)";
+    "-fno-PIC", Arg.Clear pic_code,
+      " Generate position-dependent machine code" ]
+
+(* Specific operations *)
+
+type specific_operation =
+    Imultaddf                           (* multiply and add *)
+  | Imultsubf                           (* multiply and subtract *)
+
+(* Addressing modes *)
+
+type addressing_mode =
+    Ibased of string * int              (* symbol + displ *)
+  | Iindexed of int                     (* reg + displ *)
+  | Iindexed2                           (* reg + reg *)
+
+(* Sizes, endianness *)
+
+let big_endian = true
+
+let s390x =
+  match Config.model with "s390x" -> true | _ -> false
+
+let size_addr = 8
+let size_int = size_addr
+let size_float = 8
+
+let allow_unaligned_access = false
+
+(* Behavior of division *)
+
+let division_crashes_on_overflow = true
+
+(* Operations on addressing modes *)
+
+let identity_addressing = Iindexed 0
+
+let offset_addressing addr delta =
+  match addr with
+    Ibased(s, n) -> Ibased(s, n + delta)
+  | Iindexed n -> Iindexed(n + delta)
+  | Iindexed2 -> assert false
+
+let num_args_addressing = function
+    Ibased(s, n) -> 0
+  | Iindexed n -> 1
+  | Iindexed2 -> 2
+
+(* Printing operations and addressing modes *)
+
+let print_addressing printreg addr ppf arg =
+  match addr with
+  | Ibased(s, n) ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "\"%s\"%s" s idx
+  | Iindexed n ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "%a%s" printreg arg.(0) idx
+  | Iindexed2 ->
+      fprintf ppf "%a + %a" printreg arg.(0) printreg arg.(1)
+
+let print_specific_operation printreg op ppf arg =
+  match op with
+  | Imultaddf ->
+      fprintf ppf "%a *f %a +f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
+  | Imultsubf ->
+      fprintf ppf "%a *f %a -f %a"
+        printreg arg.(0) printreg arg.(1) printreg arg.(2)
diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
new file mode 100644
index 000000000..114c0e9ce
--- /dev/null
+++ b/asmcomp/s390x/emit.mlp
@@ -0,0 +1,846 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Emission of Linux on Z 64-bit assembly code *)
+
+module StringSet =
+  Set.Make(struct type t = string let compare (x:t) y = compare x y end)
+
+open Misc
+open Cmm
+open Arch
+open Proc
+open Reg
+open Mach
+open Linearize
+open Emitaux
+
+(* Layout of the stack.  The stack is kept 8-aligned. *)
+
+let stack_offset = ref 0
+
+let frame_size () =
+  let size =
+    !stack_offset +                     (* Trap frame, outgoing parameters *)
+    size_int * num_stack_slots.(0) +    (* Local int variables *)
+    size_float * num_stack_slots.(1) +  (* Local float variables *)
+    size_addr + 			(* Slot for storing and restoring GOT pointer *)
+    (if !contains_calls then size_addr else 0) in (* The return address *)
+  Misc.align size 8
+
+let slot_offset loc cls =
+  match loc with
+    Local n ->
+      if cls = 0
+      then !stack_offset + num_stack_slots.(1) * size_float + n * size_int
+      else !stack_offset + n * size_float
+  | Incoming n -> frame_size() + n
+  | Outgoing n -> n
+
+(* Output a symbol *)
+
+let emit_symbol =
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" -> (fun s -> Emitaux.emit_symbol '.' s)
+  | _ -> assert false
+(* Output function call *)
+
+let emit_call s =
+   if !pic_code then
+    `brasl      %r14, {emit_symbol s}@PLT`
+   else
+    `brasl      %r14, {emit_symbol s}`
+(* Output a label *)
+
+let label_prefix =
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" -> ".L"
+  | _ -> assert false
+
+let emit_label lbl =
+  emit_string label_prefix; emit_int lbl
+
+let emit_data_label lbl =
+  emit_string label_prefix; emit_string "d"; emit_int lbl
+
+(* Section switching *)
+
+let data_space =
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" -> "	.section \".data\"\n"
+  | _ -> assert false
+
+let code_space =
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" -> "	.section \".text\"\n"
+  | _ -> assert false
+
+let rodata_space =
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" -> "	.section \".rodata\"\n"
+  | _ -> assert false
+
+
+let datag = ".quad"
+
+(* Output a pseudo-register *)
+
+let emit_reg r =
+  match r with
+     {loc = Reg rs; typ = (Int | Addr)} -> emit_string "%r"; emit_string (register_name rs)
+  |  {loc = Reg rs; typ = Float} -> emit_string "%f"; emit_string (register_name rs)
+  | _ -> fatal_error "Emit.emit_reg"
+
+
+let emit_gpr r = emit_string "%r"; emit_int r
+
+let emit_fpr r = emit_string "%f"; emit_int r
+
+  
+
+(* Output a stack reference *)
+
+let emit_stack r =
+  match r.loc with
+    Stack s ->
+      let ofs = slot_offset s (register_class r) in `{emit_int ofs}({emit_gpr 15})`
+  | _ -> fatal_error "Emit.emit_stack"
+
+
+(* Output a load or store operation *)
+
+let emit_symbol_offset (s, d) =
+  emit_symbol s;
+  if d > 0 then `+`;
+  if d <> 0 then emit_int d
+
+let emit_load_store instr addressing_mode addr n arg =
+if (compare instr "stey") = 0 then begin
+  `	ledbr	{emit_fpr 15}, {emit_reg arg}\n`;
+  match addressing_mode with
+    Ibased(s, d) ->
+    if !pic_code then begin
+      `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
+      `	{emit_string instr}	{emit_fpr 15},{emit_int d}({emit_gpr 1})\n`
+    end else begin
+      `	larl	{emit_gpr 1}, {emit_symbol_offset (s,d)}\n`;
+      `	{emit_string instr}	{emit_fpr 15},0({emit_gpr 1})\n`
+    end
+  | Iindexed ofs ->
+      `	{emit_string instr}	{emit_fpr 15}, {emit_int ofs}({emit_reg addr.(n)})\n`
+  | Iindexed2 ->
+      `	{emit_string instr}	{emit_fpr 15}, 0({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
+end else begin
+  match addressing_mode with
+    Ibased(s, d) ->
+    if !pic_code then begin
+      `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
+      `	{emit_string instr}	{emit_reg arg},{emit_int d}({emit_gpr 1})\n`
+    end else begin
+      `	larl	{emit_gpr 1}, {emit_symbol_offset (s,d)}\n`;
+      `	{emit_string instr}	{emit_reg arg},0({emit_gpr 1})\n`
+    end
+  | Iindexed ofs ->
+      `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
+  | Iindexed2 ->
+      `	{emit_string instr}	{emit_reg arg}, 0({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
+end
+
+(* After a comparison, extract the result as 0 or 1 *)
+let emit_set_comp cmp res =
+    `	lgfi	{emit_gpr 1}, 1\n`;
+    `	lgfi	{emit_reg res}, 0\n`;
+  begin match cmp with
+      Ceq -> `	locgre	{emit_reg res}, {emit_gpr 1}\n`
+    | Cne -> `	locgrne	{emit_reg res}, {emit_gpr 1}\n`
+    | Cgt -> `	locgrh	{emit_reg res}, {emit_gpr 1}\n`
+    | Cle -> `	locgrnh	{emit_reg res}, {emit_gpr 1}\n`
+    | Clt -> `	locgrl	{emit_reg res}, {emit_gpr 1}\n`
+    | Cge -> `	locgrnl	{emit_reg res}, {emit_gpr 1}\n`
+  end
+
+let nativelow n = 
+   let m = Nativeint.logand n (Nativeint.of_int 0xFFFFFFFF) in
+      if (Nativeint.compare m  0x7FFF_FFFFn) > 0 
+      then let k = Nativeint.logand m 0x7FFFFFFFn in 
+         Nativeint.sub k 2147483648n
+      else m
+      
+let nativehigh n = 
+    let m = Nativeint.shift_right n 32 in
+      if (Nativeint.compare m  0x7FFF_FFFFn) > 0 
+      then let k = Nativeint.logand m 0x7FFFFFFFn in 
+         Nativeint.sub k 2147483648n
+      else m
+
+(* Record live pointers at call points *)
+
+let record_frame live dbg =
+  let lbl = new_label() in
+  let live_offset = ref [] in
+  Reg.Set.iter
+    (function
+        {typ = Addr; loc = Reg r} ->
+          live_offset := (r lsl 1) + 1 :: !live_offset
+      | {typ = Addr; loc = Stack s} as reg ->
+          live_offset := slot_offset s (register_class reg) :: !live_offset
+      | _ -> ())
+    live;
+  frame_descriptors :=
+    { fd_lbl = lbl;
+      fd_frame_size = frame_size();
+      fd_live_offset = !live_offset;
+      fd_debuginfo = dbg } :: !frame_descriptors;
+  lbl
+type bound_error_call =
+  { bd_lbl: label;                      (* Entry label *)
+    bd_frame: label }                   (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+let bound_error_call = ref 0
+
+let bound_error_label dbg =
+  if !Clflags.debug then begin
+    let lbl_bound_error = new_label() in
+    let lbl_frame = record_frame Reg.Set.empty dbg in
+    bound_error_sites :=
+     { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
+   lbl_bound_error
+ end else begin
+   if !bound_error_call = 0 then bound_error_call := new_label();
+   !bound_error_call
+ end
+
+let emit_call_bound_error bd =
+  `{emit_label bd.bd_lbl}:	{emit_call "caml_ml_array_bound_error"}\n`;
+  `{emit_label bd.bd_frame}:\n`
+
+let emit_call_bound_errors () =
+  List.iter emit_call_bound_error !bound_error_sites;
+  if !bound_error_call > 0 then
+    `{emit_label !bound_error_call}:	{emit_call "caml_ml_array_bound_error"}\n`
+(* Record floating-point and large integer literals *)
+
+let float_literals = ref ([] : (int64 * int) list)
+let int_literals = ref ([] : (nativeint * int) list)
+
+let pic_externals = false
+
+let external_functions = ref StringSet.empty
+
+(* Names for conditional branches after comparisons *)
+
+let branch_for_comparison = function
+    Ceq -> "brcl	8," | Cne -> "brcl	7,"
+  | Cle -> "brcl	12," | Cgt -> "brcl	2,"
+  | Cge -> "brcl	10," | Clt -> "brcl	4,"
+
+let name_for_int_comparison = function
+    Isigned cmp -> ("cgr", branch_for_comparison cmp)
+  | Iunsigned cmp -> ("clgr", branch_for_comparison cmp)
+  
+let name_for_int_comparison_imm = function
+    Isigned cmp -> ("cgfi", branch_for_comparison cmp)
+  | Iunsigned cmp -> ("clgfi", branch_for_comparison cmp)
+
+(* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = unordered*)
+let name_for_float_comparison cmp neg =
+   match cmp with
+     Ceq -> if neg then "brcl	7," else "brcl	8,"
+   | Cne -> if neg then "brcl	8,"  else "brcl	7,"
+   | Cle -> if neg then "brcl	3,"  else "brcl	12,"
+   | Cgt -> if neg then "brcl	13," else "brcl	2,"
+   | Cge -> if neg then "brcl	5,"  else "brcl	10,"
+   | Clt -> if neg then "brcl	11," else "brcl	4,"
+
+(* Names for various instructions *)
+
+let name_for_intop = function
+    Iadd  -> "agr"
+  | Imul  -> "msgr"
+  | Iand  -> "ngr"
+  | Ior   -> "ogr"
+  | Ixor  -> "xgr"
+  | _ -> Misc.fatal_error "Emit.Intop"
+
+let name_for_intop_imm = function
+  | _ -> Misc.fatal_error "Emit.Intop_imm"
+
+let name_for_floatop1 = function
+    Inegf -> "lcdbr"
+  | Iabsf -> "lpdbr"
+  | _ -> Misc.fatal_error "Emit.Iopf1"
+
+let name_for_floatop2 = function
+    Iaddf -> "adbr"
+  | Isubf -> "sdbr"
+  | Imulf -> "mdbr"
+  | Idivf -> "ddbr"
+  | _ -> Misc.fatal_error "Emit.Iopf2"
+
+let name_for_specific = function
+    Imultaddf -> "madbr"
+  | Imultsubf -> "msdbr"
+
+(* Name of current function *)
+let function_name = ref ""
+(* Entry point for tail recursive calls *)
+let tailrec_entry_point = ref 0
+(* Names of functions defined in the current file *)
+let defined_functions = ref StringSet.empty
+(* Label of glue code for calling the GC *)
+let call_gc_label = ref 0
+
+
+(* Output the assembly code for an instruction *)
+
+let emit_instr i =
+    emit_debug_info i.dbg;
+    match i.desc with
+      Lend -> ()
+    | Lop(Imove | Ispill | Ireload) ->
+        let src = i.arg.(0) and dst = i.res.(0) in
+        if src.loc <> dst.loc then begin
+           match (src, dst) with
+              {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
+                `	lgr     {emit_reg dst}, {emit_reg src}\n`
+            | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
+                `	ldr     {emit_reg dst}, {emit_reg src}\n`
+            | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
+                `	stg     {emit_reg src}, {emit_stack dst}\n`
+            | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
+                `	std     {emit_reg src}, {emit_stack dst}\n`
+            | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
+                `	lg      {emit_reg dst}, {emit_stack src}\n`
+            | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
+                `	ldy      {emit_reg dst}, {emit_stack src}\n`
+            | (_, _) ->
+                fatal_error "Emit: Imove"
+        end
+    | Lop(Iconst_int n | Iconst_blockheader n) ->
+        if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
+          `	lgfi	{emit_reg i.res.(0)}, {emit_nativeint n}\n`;
+        end else begin
+          let lbl = new_label() in
+          int_literals := (n, lbl) :: !int_literals;
+          `	lgrl	{emit_reg i.res.(0)}, {emit_label lbl}\n`;
+          end
+    | Lop(Iconst_float f) ->
+        let lbl = new_label() in
+        float_literals := (Int64.bits_of_float f, lbl) :: !float_literals;
+        `	larl {emit_gpr 1}, {emit_label lbl}\n`;
+        `	ld  {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`
+     | Lop(Iconst_symbol s) ->
+        if !pic_code then
+        `	lg	{emit_reg i.res.(0)}, {emit_symbol s}@GOT({emit_gpr 12})\n`
+        else
+        `	larl {emit_reg i.res.(0)}, {emit_symbol s}\n`;
+    | Lop(Icall_ind) ->
+        `	basr	{emit_gpr 14}, {emit_reg i.arg.(0)}\n`;
+        let lbl = record_frame i.live i.dbg in
+	 `{emit_label lbl}:\n`
+
+    | Lop(Icall_imm s) ->
+        if !pic_code then
+        `	brasl	{emit_gpr 14}, {emit_symbol s}@PLT\n`
+        else
+        `	brasl   {emit_gpr 14}, {emit_symbol s}\n`;
+        let lbl = record_frame i.live i.dbg in
+	 `{emit_label lbl}:\n`;
+    | Lop(Itailcall_ind) ->
+        let n = frame_size() in
+        if !contains_calls then begin
+          `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
+          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+        end else begin
+          if n > 0 then
+            `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+        end;
+        `	br	{emit_reg i.arg.(0)}\n`
+    | Lop(Itailcall_imm s) ->
+        if s = !function_name then
+          `	brcl	15,{emit_label !tailrec_entry_point}\n`
+        else begin
+          let n = frame_size() in
+          if !contains_calls then begin
+          `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
+          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+          end else begin
+            if n > 0 then
+              `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+          end;
+          `	brcl	15,{emit_symbol s}\n`
+        end
+              
+     | Lop(Iextcall(s, alloc)) ->
+        if alloc then begin
+          if !pic_code then begin
+          `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
+          `	brasl	{emit_gpr 14}, {emit_symbol "caml_c_call"}@PLT\n`
+          end else begin
+          `	larl {emit_gpr 1}, {emit_symbol s}\n`;
+          `	brasl	{emit_gpr 14}, {emit_symbol "caml_c_call"}\n`
+          end;
+          let lbl = record_frame i.live i.dbg in
+	   `{emit_label lbl}:\n`;
+        end else begin
+          `	lay     {emit_gpr 15}, {emit_int (-160)} ({emit_gpr 15})\n`;
+          if !pic_code then
+          `	brasl   {emit_gpr 14}, {emit_symbol s}@PLT\n`
+          else
+          `	brasl   {emit_gpr 14}, {emit_symbol s}\n`;
+          `	lay     {emit_gpr 15}, {emit_int (160)} ({emit_gpr 15})\n`
+       end
+
+      | Lop(Istackoffset n) ->
+        `	lay     {emit_gpr 15}, {emit_int (-n)} ({emit_gpr 15})\n`;
+        stack_offset := !stack_offset + n
+    | Lop(Iload(chunk, addr)) ->
+        let loadinstr =
+          match chunk with
+            Byte_unsigned -> "llgc"
+          | Byte_signed -> "lgb"
+          | Sixteen_unsigned -> "llgh"
+          | Sixteen_signed -> "lgh"
+          | Thirtytwo_unsigned -> "llgf"
+          | Thirtytwo_signed -> "lgf"
+	  | Word -> "lg"
+          | Single -> "ley"
+          | Double | Double_u -> "ldy" in
+        emit_load_store loadinstr addr i.arg 0 i.res.(0);
+        if chunk = Single then
+          `	ldebr	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+    | Lop(Istore(chunk, addr, _)) ->
+        let storeinstr =
+          match chunk with
+            Byte_unsigned | Byte_signed -> "stcy"
+          | Sixteen_unsigned | Sixteen_signed -> "sthy"
+	  | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
+	  | Word -> "stg"
+          | Single -> "stey"
+          | Double | Double_u -> "stdy" in
+             emit_load_store storeinstr addr i.arg 1 i.arg.(0)
+        
+    | Lop(Ialloc n) ->
+        if !call_gc_label = 0 then call_gc_label := new_label();
+        let lbl = new_label() in
+        `	agfi	{emit_gpr 11}, {emit_int(-n)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 11}\n`;
+        `	agfi	{emit_reg i.res.(0)}, {emit_int size_addr}\n`;
+        `	clgr	{emit_gpr 11}, {emit_gpr 10}\n`;
+        `	jnl	{emit_label lbl}\n`;
+        `	brasl   {emit_gpr 14}, {emit_label !call_gc_label}\n`;
+        let fr_lbl = record_frame i.live Debuginfo.none in
+	 `{emit_label fr_lbl}:\n`;
+        `	{emit_label lbl}:\n`;
+        `	larl	{emit_gpr 12},_GLOBAL_OFFSET_TABLE_\n`
+    | Lop(Iintop Isub) ->
+        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
+        `	sgr	{emit_gpr 1}, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 1}\n`
+    | Lop(Iintop Imulh) ->
+       let lbl1 = new_label() in
+        `	lpgr	{emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
+        `	lpgr	{emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
+        `	mlgr	{emit_gpr 0}, {emit_gpr 0}\n`;
+        `	ldgr	{emit_fpr 15}, {emit_gpr 1}\n`;
+        `	lgr	{emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
+        `	xgr	{emit_gpr 1}, {emit_reg i.arg.(1)}\n`;
+        `	cgfi	{emit_gpr 1}, 0\n`;
+        `	jnl	{emit_label lbl1}\n`;
+        `	lgdr	{emit_gpr 1},{emit_fpr 15}\n`;
+        `	xilf	{emit_gpr 0},0xFFFFFFFF\n`;
+        `	xihf	{emit_gpr 0},0xFFFFFFFF\n`;
+        `	xilf	{emit_gpr 1},0xFFFFFFFF\n`;
+        `	xihf	{emit_gpr 1},0xFFFFFFFF\n`;
+        `	algfi	{emit_gpr 1},1\n`;
+        `	lgfi	{emit_gpr 1},0\n`;
+        `	alcgr	{emit_gpr 0},{emit_gpr 1}\n`;
+        `	{emit_label lbl1}:\n`;
+        `	lgr	{emit_reg i.res.(0)}, {emit_gpr 0}\n`
+    | Lop(Iintop Imod) ->
+        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
+        `	dsgr    {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 0}\n`
+    | Lop(Iintop Idiv) ->
+        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
+        `	dsgr    {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 1}\n`
+    | Lop(Iintop Ilsl) ->
+        `	sllg    {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+    | Lop(Iintop Ilsr) ->
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+    | Lop(Iintop Iasr) ->
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+    | Lop(Iintop(Icomp cmp)) ->
+        begin match cmp with
+          Isigned c ->
+            `	cgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            emit_set_comp c i.res.(0)
+        | Iunsigned c ->
+            `	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            emit_set_comp c i.res.(0)
+        end
+    | Lop(Iintop Icheckbound) ->
+        let lbl = bound_error_label i.dbg in
+	`	cgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+	`	jle	{emit_label lbl}\n`;
+	(* Check for negative index *)
+	`	cgfi	{emit_reg i.arg.(1)}, (0)\n`;
+	`	jl	{emit_label lbl}\n`
+    | Lop(Iintop op) ->
+        let instr = name_for_intop op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
+    | Lop(Iintop_imm(Isub, n)) ->
+        `	agfi	{emit_reg i.res.(0)}, {emit_int(-n)}\n`
+    | Lop(Iintop_imm(Icomp cmp, n)) ->
+        begin match cmp with
+          Isigned c ->
+            `	cgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+            emit_set_comp c i.res.(0)
+        | Iunsigned c ->
+            `	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+            emit_set_comp c i.res.(0)
+        end
+    | Lop(Iintop_imm(Icheckbound, n)) ->
+       let lbl = bound_error_label i.dbg in 
+	`	cgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
+	`	jle	{emit_label lbl}\n`; 
+	`	lgfi	{emit_gpr 1}, {emit_int n}\n`;
+	`	cgfi	{emit_gpr 1}, (0)\n`; 
+	`	jl	{emit_label lbl}\n`
+    | Lop(Iintop_imm(Ilsl, n)) ->
+        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+    | Lop(Iintop_imm(Ilsr, n)) ->
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+    | Lop(Iintop_imm(Iasr, n)) ->
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+    | Lop(Iintop_imm(Iand, n)) ->
+          let lbl = new_label() in
+          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
+          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
+          `	ngr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+    | Lop(Iintop_imm(Ior, n)) ->
+          let lbl = new_label() in
+          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
+          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
+          `	ogr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+    | Lop(Iintop_imm(Ixor, n)) ->
+          let lbl = new_label() in
+          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
+          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
+          `	xgr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+    | Lop(Iintop_imm(Imul, n)) ->
+          `	msgfi	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm(Iadd, n)) ->
+          `	lgr     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
+          `	agfi	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm(op, n)) ->
+        let instr = name_for_intop_imm op in
+        `	lgr     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Inegf | Iabsf as op) ->
+        let instr = name_for_floatop1 op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+    | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
+        let instr = name_for_floatop2 op in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
+    | Lop(Ifloatofint) ->
+	  `	cdgbr	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+    | Lop(Iintoffloat) ->
+        `	cgdbr	{emit_reg i.res.(0)}, 0, {emit_reg i.arg.(0)}\n`
+    | Lop(Ispecific sop) ->
+        let instr = name_for_specific sop in
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lreloadretaddr ->
+        let n = frame_size() in
+        `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`
+    | Lreturn ->
+        let n = frame_size() in
+        if n > 0 then
+          if !contains_calls then
+            `	lg	{emit_gpr 12}, {emit_int(n - (size_addr+size_addr))}({emit_gpr 15})\n`
+          else
+            `	lg	{emit_gpr 12}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
+          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+          `	nill	{emit_gpr 14}, 0xFFFE\n`;
+          `	br	{emit_gpr 14}\n`
+    | Llabel lbl ->
+        `{emit_label lbl}:\n`
+    | Lbranch lbl ->
+        `	brcl	15,{emit_label lbl}\n`
+    | Lcondbranch(tst, lbl) ->
+        begin match tst with
+          Itruetest ->
+            `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
+            `	brcl	7,	{emit_label lbl}\n`
+        | Ifalsetest ->
+            `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
+            `	brcl	8,	{emit_label lbl}\n`
+        | Iinttest cmp ->
+            let (comp, branch) = name_for_int_comparison cmp in
+            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            `	{emit_string branch}	{emit_label lbl}\n`
+        | Iinttest_imm(cmp, n) ->
+            let (comp, branch) = name_for_int_comparison_imm cmp in
+            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+            `	{emit_string branch}	{emit_label lbl}\n`
+        | Ifloattest(cmp, neg) ->
+            `	cdbr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+            let branch = name_for_float_comparison cmp neg in
+            `	{emit_string branch}	{emit_label lbl}\n`
+        | Ioddtest ->
+            `	lgfi	{emit_gpr 0}, 1\n`;
+            `	ngr	{emit_gpr 0}, {emit_reg i.arg.(0)}\n`;
+            `	brcl	4,	{emit_label lbl}\n`
+        | Ieventest ->
+            `	lgfi	{emit_gpr 0}, 1\n`;
+            `	ngr	{emit_gpr 0}, {emit_reg i.arg.(0)}\n`;
+            `	brcl	8,{emit_label lbl}\n`
+        end
+    | Lcondbranch3(lbl0, lbl1, lbl2) ->
+        `	cgfi	{emit_reg i.arg.(0)}, 1\n`;
+        begin match lbl0 with
+          None -> ()
+        | Some lbl -> `	brcl	4,{emit_label lbl}\n`
+        end;
+        begin match lbl1 with
+          None -> ()
+        | Some lbl -> `	brcl	8,	{emit_label lbl}\n`
+        end;
+        begin match lbl2 with
+          None -> ()
+        | Some lbl -> `	brcl	2,	{emit_label lbl}\n`
+        end
+    | Lswitch jumptbl ->
+        let lbl = new_label() in
+        `	larl	{emit_gpr 0}, {emit_label lbl}\n`;
+        `	sllg	{emit_gpr 1}, {emit_reg i.arg.(0)}, 2(%r0)\n`;
+        `	agr	{emit_gpr 1}, {emit_gpr 0}\n`;
+        `	lgf	{emit_gpr 1}, 0({emit_gpr 1})\n`;
+        `	agr	{emit_gpr 1}, {emit_gpr 0}\n`;
+        `	br	{emit_gpr 1}\n`;
+        emit_string rodata_space;
+        `	.align	8\n`;
+        `{emit_label lbl}:`;
+        for i = 0 to Array.length jumptbl - 1 do
+          `	.long	{emit_label jumptbl.(i)} - {emit_label lbl}\n`
+        done;
+        emit_string code_space
+    | Lsetuptrap lbl ->
+        `	brasl   {emit_gpr 14}, {emit_label lbl}\n`;
+    | Lpushtrap ->
+        stack_offset := !stack_offset + 16;
+        `	agfi	{emit_gpr 15}, -16\n`;
+        `	stg     {emit_gpr 14}, 0({emit_gpr 15})\n`;
+        `	stg     {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
+        `	lgr     {emit_gpr 13}, {emit_gpr 15}\n`
+    | Lpoptrap ->
+        `	lg      {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
+        `	agfi    {emit_gpr 15}, 16\n`;
+        stack_offset := !stack_offset - 16
+    | Lraise k ->
+        begin match !Clflags.debug, k with
+        | true, Lambda.Raise_regular ->
+          `	brasl   {emit_gpr 14}, {emit_symbol "caml_raise_exn"}\n`;
+          let lbl = record_frame Reg.Set.empty i.dbg in
+	   `{emit_label lbl}:\n`
+        | true, Lambda.Raise_reraise ->
+          `	brasl   {emit_gpr 14}, {emit_symbol "caml_reraise_exn"}\n`;
+          let lbl = record_frame Reg.Set.empty i.dbg in
+	   `{emit_label lbl}:\n`
+        | false, _
+        | true, Lambda.Raise_notrace ->
+          `	lg      {emit_gpr 1}, 0({emit_gpr 13})\n`;
+          `	lgr     {emit_gpr 15},{emit_gpr 13}\n`;
+          `	lg      {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
+          `	agfi    {emit_gpr 15}, 16\n`;
+          `	nill	{emit_gpr 1}, 0xFFFE\n`;
+          `	br	{emit_gpr 1}\n`
+        end
+
+
+(* Checks if a pseudo-instruction expands to instructions
+   that do not branch and do not affect CR0 nor R12. *)
+
+(* No branch delay slots needed on Z *)
+let is_simple_instr i = false
+
+
+let no_interference res arg =
+  try
+    for i = 0 to Array.length arg - 1 do
+      for j = 0 to Array.length res - 1 do
+        if arg.(i).loc = res.(j).loc then raise Exit
+      done
+    done;
+    true
+  with Exit ->
+    false
+
+(* Emit a sequence of instructions *)
+
+let rec emit_all i =
+  match i with
+    {desc = Lend} -> ()
+  | _ ->
+      emit_instr i;
+      emit_all i.next
+
+(* Emission of a function declaration *)
+
+let fundecl fundecl =
+  function_name := fundecl.fun_name;
+  defined_functions := StringSet.add fundecl.fun_name !defined_functions;
+  tailrec_entry_point := new_label();
+  stack_offset := 0;
+  call_gc_label := 0;
+  bound_error_sites := [];
+  bound_error_call := 0;
+  float_literals := [];
+  int_literals := [];
+  `	.globl	{emit_symbol fundecl.fun_name}\n`;
+emit_debug_info fundecl.fun_dbg;
+  begin match Config.system with
+  | "elf" | "bsd" | "bsd_elf" ->
+      `	.type	{emit_symbol fundecl.fun_name}, @function\n`
+  | _ -> ()
+  end;
+  emit_string code_space;
+  `	.align	8\n`;
+  `{emit_symbol fundecl.fun_name}:\n`;
+  let n = frame_size() in
+  if !contains_calls then begin
+    `	lay	{emit_gpr 15}, {emit_int(-n)}({emit_gpr 15})\n`;
+    `	stg     {emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
+    `	stg     {emit_gpr 12}, {emit_int(n - (size_addr+size_addr))}({emit_gpr 15})\n`
+  end else begin
+    if n > 0 then
+    `	lay	{emit_gpr 15}, {emit_int(-n)}({emit_gpr 15})\n`;
+    `	stg     {emit_gpr 12}, {emit_int(n - (size_addr))}({emit_gpr 15})\n`
+  end;
+  `	larl	{emit_gpr 12},_GLOBAL_OFFSET_TABLE_\n`;
+  `{emit_label !tailrec_entry_point}:\n`;
+  emit_all fundecl.fun_body;
+  (* Emit the glue code to call the GC *)
+  if !call_gc_label > 0 then begin
+    `{emit_label !call_gc_label}:\n`;
+    `	brcl	15,{emit_symbol "caml_call_gc"}\n`
+  end;
+  emit_call_bound_errors();
+  (* Emit the numeric literals *)
+  if !float_literals <> [] || !int_literals <> [] then begin
+    emit_string rodata_space;
+    `	.align	8\n`;
+    List.iter
+      (fun (f, lbl) ->
+        `{emit_label lbl}:`;
+        emit_float64_directive ".quad" f)
+      !float_literals;
+    List.iter
+      (fun (n, lbl) ->
+        `{emit_label lbl}:	{emit_string datag}	{emit_nativeint n}\n`)
+      !int_literals
+  end
+
+(* Emission of data *)
+
+let declare_global_data s =
+  `	.globl	{emit_symbol s}\n`;
+  match Config.system with
+  | "elf" | "bsd" | "bsd_elf" ->
+    `	.type	{emit_symbol s}, @object\n`
+  | _ -> assert false
+
+let emit_item = function
+    Cglobal_symbol s ->
+      declare_global_data s
+  | Cdefine_symbol s ->
+      `{emit_symbol s}:\n`;
+  | Cdefine_label lbl ->
+      `{emit_data_label lbl}:\n`
+  | Cint8 n ->
+      `	.byte	{emit_int n}\n`
+  | Cint16 n ->
+      `	.short	{emit_int n}\n`
+  | Cint32 n ->
+      `	.long	{emit_nativeint n}\n`
+  | Cint n ->
+      `	{emit_string datag}	{emit_nativeint n}\n`
+  | Csingle f ->
+      emit_float32_directive ".long" (Int32.bits_of_float f)
+  | Cdouble f ->
+      emit_float64_directive ".quad" (Int64.bits_of_float f)
+  | Csymbol_address s ->
+      `	{emit_string datag}	{emit_symbol s}\n`
+  | Clabel_address lbl ->
+      `	{emit_string datag}	{emit_data_label lbl}\n`
+  | Cstring s ->
+      emit_bytes_directive "	.byte	" s
+  | Cskip n ->
+      if n > 0 then `	.space	{emit_int n}\n`
+  | Calign n ->
+      if n < 8 then `	.align	8\n`
+               else `	.align	{emit_int n}\n`
+
+let data l =
+  emit_string data_space;
+  `	.align	8\n`;
+  List.iter emit_item l
+
+(* Beginning / end of an assembly file *)
+
+let begin_assembly() =
+  reset_debug_info();
+  defined_functions := StringSet.empty;
+  external_functions := StringSet.empty;
+  (* Emit the beginning of the segments *)
+  let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
+  emit_string data_space;
+  `	.align	8\n`;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`;
+  let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
+  emit_string code_space;
+  declare_global_data lbl_begin;
+  `{emit_symbol lbl_begin}:\n`
+
+let end_assembly() =
+  (* Emit the end of the segments *)
+  emit_string code_space;
+  let lbl_end = Compilenv.make_symbol (Some "code_end") in
+  declare_global_data lbl_end;
+  `{emit_symbol lbl_end}:\n`;
+  `	.long	0\n`;
+  emit_string data_space;
+  `	.align	8\n`;
+  let lbl_end = Compilenv.make_symbol (Some "data_end") in
+  declare_global_data lbl_end;
+  `{emit_symbol lbl_end}:\n`;
+  `	{emit_string datag}	0\n`;
+  (* Emit the frame descriptors *)
+  emit_string rodata_space;
+  `	.align	8\n`;
+  let lbl = Compilenv.make_symbol (Some "frametable") in
+  declare_global_data lbl;
+  `{emit_symbol lbl}:\n`;
+  emit_frames
+    { efa_label = (fun l -> `	{emit_string datag}	{emit_label l}\n`);
+      efa_16 = (fun n -> `	.short	{emit_int n}\n`);
+      efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
+      efa_word = (fun n -> `	{emit_string datag}	{emit_int n}\n`);
+      efa_align = (fun n -> `	.align	{emit_int n}\n`);
+      efa_label_rel = (fun lbl ofs ->
+                           `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
+      efa_def_label = (fun l -> `{emit_label l}:\n`);
+      efa_string = (fun s -> emit_bytes_directive "	.byte	" (s ^ "\000"))
+     }
\ No newline at end of file
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
new file mode 100644
index 000000000..049c7a046
--- /dev/null
+++ b/asmcomp/s390x/proc.ml
@@ -0,0 +1,203 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Description of the Z Processor *)
+
+open Misc
+open Cmm
+open Reg
+open Arch
+open Mach
+
+(* Instruction selection *)
+
+let word_addressed = false
+
+(* Registers available for register allocation *)
+
+(* Integer register map:
+    0                   temporary, null register for some operations (volatile)
+    1                   temporary (volatile)
+    2 - 5               function arguments and results (volatile)
+    6              	function arguments and results (persevered by C)
+    7 - 9		general purpose, preserved by C
+    10                  allocation limit (preserved by C)
+    11                  allocation pointer (preserved by C)
+    12			GOT pointer  (preserved by C)
+    13                  trap pointer (preserved by C)
+    14			return address (volatile)
+    15			stack pointer (preserved by C)
+  Floating-point register map:
+    0, 2, 4, 6		function arguments and results (volatile)
+    1, 3, 5, 7		general purpose (volatile)
+    8 - 14          	general purpose, preserved by C
+    15			temporary, preserved by C
+*)
+
+let int_reg_name =
+    [| "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; |]
+
+let float_reg_name =
+    [| "0"; "2"; "4"; "6"; "1"; "3"; "5"; "7"; 
+       "8"; "9"; "10"; "11"; "12"; "13"; "14"; |]
+
+let num_register_classes = 2
+
+let register_class r =
+  match r.typ with
+    Int -> 0
+  | Addr -> 0
+  | Float -> 1
+
+let num_available_registers = [| 8; 15 |]
+
+let first_available_register = [| 0; 100 |]
+
+let register_name r =
+  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
+
+let rotate_registers = true
+
+(* Representation of hard registers by pseudo-registers *)
+
+let hard_int_reg =
+  let v = Array.make 8 Reg.dummy in
+  for i = 0 to 7 do v.(i) <- Reg.at_location Int (Reg i) done; v
+
+let hard_float_reg =
+  let v = Array.make 15 Reg.dummy in
+  for i = 0 to 14 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
+
+let all_phys_regs =
+  Array.append hard_int_reg hard_float_reg
+
+let phys_reg n =
+  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
+
+let stack_slot slot ty =
+  Reg.at_location ty (Stack slot)
+
+(* Calling conventions *)
+
+let calling_conventions
+    first_int last_int first_float last_float make_stack stack_ofs arg =
+  let loc = Array.make (Array.length arg) Reg.dummy in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref stack_ofs in
+  for i = 0 to Array.length arg - 1 do
+    match arg.(i).typ with
+      Int | Addr as ty ->
+        if !int <= last_int then begin
+          loc.(i) <- phys_reg !int;
+          incr int
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) ty;
+          ofs := !ofs + size_int
+        end
+    | Float ->
+        if !float <= last_float then begin
+          loc.(i) <- phys_reg !float;
+          incr float
+        end else begin
+          loc.(i) <- stack_slot (make_stack !ofs) Float;
+          ofs := !ofs + size_float
+        end
+  done;
+  (loc, Misc.align !ofs 16)
+  (* Keep stack 16-aligned. *)
+
+let incoming ofs = Incoming ofs
+let outgoing ofs = Outgoing ofs
+let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
+
+let loc_arguments arg =
+  calling_conventions 0 4 100 103 outgoing 0 arg
+let loc_parameters arg =
+  let (loc, ofs) = calling_conventions 0 4 100 103 incoming 0 arg in loc
+let loc_results res =
+  let (loc, ofs) = calling_conventions 0 4 100 103 not_supported 0 res in loc
+
+(*   C calling conventions under SVR4:
+     use GPR 2-6 and FPR 0,2,4,6 just like ML calling conventions.
+     Using a float register does not affect the int registers.
+     Always reserve 8 bytes at bottom of stack, plus whatever is needed
+     to hold the overflow arguments. *)
+
+let loc_external_arguments = calling_conventions 0 4 100 103 outgoing 0
+
+let extcall_use_push = false
+
+(* Results are in GPR 2 and FPR 0 *)
+
+let loc_external_results res =
+  let (loc, ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc
+
+(* Exceptions are in GPR 2 *)
+
+let loc_exn_bucket = phys_reg 0
+
+(* Volatile registers: none *)
+
+let regs_are_volatile rs = false
+
+(* Registers destroyed by operations *)
+
+let destroyed_at_c_call =
+  Array.of_list(List.map phys_reg
+    [0; 1; 2; 3; 4;
+     100; 101; 102; 103; 104; 105; 106; 107])
+
+let destroyed_at_oper = function
+    Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
+  | Iop(Iextcall(_, false)) -> destroyed_at_c_call
+  | _ -> [||]
+
+let destroyed_at_raise = all_phys_regs
+
+(* Maximal register pressure *)
+
+let safe_register_pressure = function
+    Iextcall(_, _) -> 3
+  | _ -> 8
+
+let max_register_pressure = function
+    Iextcall(_, _) -> [| 3; 7 |]
+  | _ -> [| 8; 15 |]
+
+(* Pure operations (without any side effect besides updating their result
+   registers). *)
+
+let op_is_pure = function
+  | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
+  | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
+  | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
+  | Ispecific(Imultaddf | Imultsubf) -> true
+  | _ -> true
+
+(* Layout of the stack *)
+
+let num_stack_slots = [| 0; 0 |]
+let contains_calls = ref false
+
+(* Calling the assembler *)
+
+let assemble_file infile outfile =
+  Ccomp.command (Config.asm ^ " -o " ^
+                 Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+let init () = ()
+
+
+
diff --git a/asmcomp/s390x/reload.ml b/asmcomp/s390x/reload.ml
new file mode 100644
index 000000000..76c32b7c6
--- /dev/null
+++ b/asmcomp/s390x/reload.ml
@@ -0,0 +1,16 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Reloading for the Z Processor *)
+
+let fundecl f =
+  (new Reloadgen.reload_generic)#fundecl f
diff --git a/asmcomp/s390x/scheduling.ml b/asmcomp/s390x/scheduling.ml
new file mode 100644
index 000000000..874105302
--- /dev/null
+++ b/asmcomp/s390x/scheduling.ml
@@ -0,0 +1,64 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Instruction scheduling for the Z processor*)
+
+open Arch
+open Mach
+
+class scheduler = object
+
+inherit Schedgen.scheduler_generic
+
+(* Latencies (in cycles). Based roughly on the "common model". *)
+
+method oper_latency = function
+    Ireload -> 2
+  | Iload(_, _) -> 2
+  | Iconst_float _ -> 2 (* turned into a load *)
+  | Iconst_symbol _ -> 1
+  | Iintop(Imul) -> 9
+  | Iintop(Imulh) -> 20
+  | Iintop_imm(Imul, _) -> 5
+  | Iintop(Idiv | Imod) -> 36
+  | Iaddf | Isubf -> 4
+  | Imulf -> 5
+  | Idivf -> 33
+  | Ispecific(Imultaddf | Imultsubf) -> 5
+  | _ -> 1
+
+method reload_retaddr_latency = 12
+  (* If we can have that many cycles between the reloadretaddr and the
+     return, we can expect that the blr branch will be completely folded. *)
+
+(* Issue cycles.  Rough approximations. *)
+
+method oper_issue_cycles = function
+    Iconst_float _ | Iconst_symbol _ -> 2
+  | Iload(_, Ibased(_, _)) -> 2
+  | Istore(_, Ibased(_, _), _) -> 2
+  | Ialloc _ -> 4
+  | Iintop(Imod) -> 40 (* assuming full stall *)
+  | Iintop(Icomp _) -> 4
+  | Iintop_imm(Icomp _, _) -> 4
+  | Ifloatofint -> 9
+  | Iintoffloat -> 4
+  | _ -> 1
+
+method reload_retaddr_issue_cycles = 3
+  (* load then stalling mtlr *)
+
+end
+
+let fundecl f = (new scheduler)#schedule_fundecl f
diff --git a/asmcomp/s390x/selection.ml b/asmcomp/s390x/selection.ml
new file mode 100644
index 000000000..406f34881
--- /dev/null
+++ b/asmcomp/s390x/selection.ml
@@ -0,0 +1,118 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Gallium, INRIA Rocquencourt         *)
+(*                          Bill O'Farrell, IBM                        *)
+(*                                                                     *)
+(*    Copyright 2015 Institut National de Recherche en Informatique    *)
+(*    et en Automatique. Copyright 2015 IBM (Bill O'Farrell with       *)
+(*    help from Tristan Amini). All rights reserved.  This file is     *)
+(*    distributed under the terms of the Q Public License version 1.0. *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* Instruction selection for the Z processor *)
+
+open Cmm
+open Arch
+open Mach
+
+(* Recognition of addressing modes *)
+
+exception Use_default
+
+type addressing_expr =
+    Asymbol of string
+  | Alinear of expression
+  | Aadd of expression * expression
+
+let rec select_addr = function
+    Cconst_symbol s ->
+      (Asymbol s, 0)
+  | Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
+      let (a, n) = select_addr arg in (a, n + m)
+  | Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
+      let (a, n) = select_addr arg in (a, n + m)
+  | Cop((Caddi | Cadda), [arg1; arg2]) ->
+      begin match (select_addr arg1, select_addr arg2) with
+          ((Alinear e1, n1), (Alinear e2, n2)) ->
+              (Aadd(e1, e2), n1 + n2)
+        | _ ->
+              (Aadd(arg1, arg2), 0)
+      end
+  | exp ->
+      (Alinear exp, 0)
+
+(* Instruction selection *)
+
+let pseudoregs_for_operation op arg res =
+  match op with
+  (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
+    Iintop(Iadd|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
+      ([|res.(0); arg.(1)|], res)
+    | Ispecific(sop) ->
+    ( [| arg.(0); arg.(1); res.(0) |], [| res.(0) |])
+    (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+    |  Iintop_imm((Isub|Imul|Iand|Ior|Ixor), _) -> (res, res)
+    (* Other instructions are regular *)
+    | _ -> raise Use_default
+
+class selector = object (self)
+
+inherit Selectgen.selector_generic as super
+
+method is_immediate n = (n <= 2147483647) && (n >= -2147483648)
+
+method select_addressing chunk exp =
+  match select_addr exp with
+    (Asymbol s, d) ->
+      (Ibased(s, d), Ctuple [])
+  | (Alinear e, d) ->
+      (Iindexed d, e)
+  | (Aadd(e1, e2), d) ->
+      if d = 0
+      then (Iindexed2, Ctuple[e1; e2])
+      else (Iindexed d, Cop(Cadda, [e1; e2]))
+
+method! select_operation op args =
+  match (op, args) with
+  (* Z does not support immediate operands for multiply high *)
+    (Cmulhi, _) -> (Iintop Imulh, args)
+  (* The and, or and xor instructions have a different range of immediate 
+     operands than the other instructions *)
+  | (Cand, _) -> self#select_logical Iand args
+  | (Cor, _) -> self#select_logical Ior args
+  | (Cxor, _) -> self#select_logical Ixor args
+  (* Recognize mult-add and mult-sub instructions *)
+  | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
+      (Ispecific Imultaddf, [arg1; arg2; arg3])
+  | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2])]) ->
+      (Ispecific Imultaddf, [arg1; arg2; arg3])
+  | (Csubf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
+      (Ispecific Imultsubf, [arg1; arg2; arg3])
+  | _ ->
+      super#select_operation op args
+
+method select_logical op = function
+    [arg; Cconst_int n] when n >= 0 && n <= 0xFFFFFFFF ->
+      (Iintop_imm(op, n), [arg])
+  | [Cconst_int n; arg] when n >= 0 && n <= 0xFFFFFFFF ->
+      (Iintop_imm(op, n), [arg])
+  | args ->
+      (Iintop op, args)
+
+
+method! insert_op_debug op dbg rs rd =
+  try
+    let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
+    self#insert_moves rs rsrc;
+    self#insert_debug (Iop op) dbg rsrc rdst;
+    self#insert_moves rdst rd;
+    rd
+  with Use_default ->
+    super#insert_op_debug op dbg rs rd
+
+end
+
+let fundecl f = (new selector)#emit_fundecl f
diff --git a/asmrun/s390x.S b/asmrun/s390x.S
new file mode 100644
index 000000000..152e2ddf9
--- /dev/null
+++ b/asmrun/s390x.S
@@ -0,0 +1,392 @@
+/***********************************************************************/
+/*                                                                     */
+/*                                OCaml                                */
+/*                                                                     */
+/*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
+/*                          Bill O'Farrell, IBM                        */
+/*                                                                     */
+/*  Copyright 2015 Institut National de Recherche en Informatique et   */
+/*  en Automatique. Copyright 2015 IBM (Bill O'Farrell with help from  */
+/*  Tristan Amini). All rights reserved.  This file is distributed     */
+/*  under the terms of the GNU Library General Public License, with    */
+/*  the special exception on linking described in file ../LICENSE.     */
+/*                                                                     */
+/***********************************************************************/
+#define Addrglobal(reg,glob) \
+        larl    reg, glob
+#define Loadglobal(reg,glob) \
+        lgrl    reg, glob
+#define Storeglobal(reg,glob) \
+        stgrl   reg, glob
+#define Loadglobal32(reg,glob) \
+        lgfrl   reg, glob
+#define Storeglobal32(reg,glob) \
+        strl   reg, glob
+
+
+        .section ".text"
+
+/* Invoke the garbage collector. */
+
+        .globl  caml_system__code_begin
+caml_system__code_begin:
+
+        .globl  caml_call_gc
+        .type   caml_call_gc, @function
+caml_call_gc:
+    /* Set up stack frame */
+#define FRAMESIZE (16*8 + 16*8 + 32)
+        agfi	%r15, -FRAMESIZE
+        stg     %r15, 0(%r15)
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+    /* Record return address into OCaml code */
+        Storeglobal(%r14, caml_last_return_address)
+    /* Record lowest stack address */
+        lgr	%r0, %r15
+        agfi    %r0, FRAMESIZE
+        Storeglobal(%r0, caml_bottom_of_stack)
+    /* Record pointer to register array */
+        lgr	%r0, %r15
+        agfi    %r0, 8*16 + 32
+        Storeglobal(%r0, caml_gc_regs)
+    /* Save current allocation pointer for debugging purposes */
+        Storeglobal(%r11, caml_young_ptr)
+    /* Save exception pointer (if e.g. a sighandler raises) */
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Save all registers used by the code generator */
+        lgr	%r1, %r15
+        agfi    %r1, 8*16 + 32 - 8
+        stmg    %r2,%r9, 8(%r1)
+        lgr	%r1, %r15
+        agfi    %r1, 32 - 8
+        std     %f0, 8(%r1)
+        std     %f1, 16(%r1)
+        std     %f2, 24(%r1)
+        std     %f3, 32(%r1)
+        std     %f4, 40(%r1)
+        std     %f5, 48(%r1)
+        std     %f6, 56(%r1)
+        std     %f7, 64(%r1)
+        std     %f8, 72(%r1)
+        std     %f9, 80(%r1)
+        std     %f10, 88(%r1)
+        std     %f11, 96(%r1)
+        std     %f12, 104(%r1)
+        std     %f13, 112(%r1)
+        std     %f14, 120(%r1)
+        std     %f15, 128(%r1)
+    /* Call the GC */
+        lay %r15, -160(%r15)
+        stg     %r15, 0(%r15)
+        brasl   %r14, caml_garbage_collection@PLT
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        lay %r15, 160(%r15)
+    /* Reload new allocation pointer and allocation limit */
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Restore all regs used by the code generator */
+        lgr	%r1, %r15
+        agfi    %r1, 8*16 + 32 - 8
+        lmg     %r2,%r9, 8(%r1)
+        lgr	%r1, %r15
+        agfi    %r1, 32 - 8
+        ld     %f0, 8(%r1)
+        ld     %f1, 16(%r1)
+        ld     %f2, 24(%r1)
+        ld     %f3, 32(%r1)
+        ld     %f4, 40(%r1)
+        ld     %f5, 48(%r1)
+        ld     %f6, 56(%r1)
+        ld     %f7, 64(%r1)
+        ld     %f8, 72(%r1)
+        ld     %f9, 80(%r1)
+        ld     %f10, 88(%r1)
+        ld     %f11, 96(%r1)
+        ld     %f12, 104(%r1)
+        ld     %f13, 112(%r1)
+        ld     %f14, 120(%r1)
+        ld     %f15, 128(%r1)
+    /* Return to caller, restarting the allocation */
+        Loadglobal(%r1, caml_last_return_address)
+        agfi   %r1, -30     /* Restart the allocation (7 instructions) */
+    /* Say we are back into OCaml code */
+        lgfi   %r0, 0
+        Storeglobal(%r0, caml_last_return_address)
+    /* Deallocate stack frame */
+        agfi	%r15, FRAMESIZE
+    /* Return */
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        br    %r1
+
+/* Call a C function from OCaml */
+
+        .globl  caml_c_call
+        .type   caml_c_call, @function
+caml_c_call:
+        Storeglobal(%r15, caml_bottom_of_stack)
+
+        lay %r15, -160(%r15)
+
+    /* Save return address */
+        ldgr	%f15, %r14
+    /* Get ready to call C function (address in r1) */
+    /* Record lowest stack address and return address */
+        Storeglobal(%r14, caml_last_return_address)
+    /* Make the exception handler and alloc ptr available to the C code */
+        Storeglobal(%r11, caml_young_ptr)
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Call the function */
+
+        basr %r14, %r1;
+    /* restore return address */
+        lgdr	%r14,%f15
+        lay %r15, 160(%r15)
+    /* Reload allocation pointer and allocation limit*/
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Say we are back into OCaml code */
+        lgfi   %r0, 0
+        Storeglobal(%r0, caml_last_return_address)
+
+    /* Return to caller */
+        br %r14;
+
+/* Raise an exception from OCaml */
+        .globl  caml_raise_exn
+        .type   caml_raise_exn, @function
+caml_raise_exn:
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        Loadglobal32(%r0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne     .L110
+.L111:
+    /* Pop trap frame */
+        lg	%r1, 0(%r13)
+        lgr     %r15, %r13
+        lg     %r13, 8(13)
+        agfi   %r15, 16
+    /* Branch to handler */
+        br	%r1;
+.L110:
+        lgfi      %r0, 0
+        Storeglobal32(%r0, caml_backtrace_pos)
+.L114:
+        ldgr	%f15, %r2	/* preserve exn bucket in callee-save reg */
+                                /* arg1: exception bucket, already in r3 */
+        lgr     %r3,%r14        /* arg2: PC of raise */
+        lgr     %r4, %r15       /* arg3: SP of raise */
+        lgr     %r5, %r13           /* arg4: SP of handler */
+        agfi    %r15, -160       /* reserve stack space for C call */
+        brasl   %r14, caml_stash_backtrace@PLT
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        agfi    %r15, 160 
+        lgdr	%r2,%f15        /* restore exn bucket */
+        j       .L111           /* raise the exn */
+
+        .globl  caml_reraise_exn
+        .type   caml_reraise_exn, @function
+caml_reraise_exn:
+	larl  %r12,_GLOBAL_OFFSET_TABLE_
+        Loadglobal32(%r0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne    .L114
+    /* Pop trap frame */
+        lg     %r1, 0(%r13)
+        lgr    %r15, %r13
+        lg    %r13, 8(%r13)
+        agfi   %r15, 16
+    /* Branch to handler */
+        br     %r1;
+
+/* Raise an exception from C */
+
+        .globl  caml_raise_exception
+        .type   caml_raise_exception, @function
+caml_raise_exception:
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        Loadglobal32(0, caml_backtrace_active)
+        cgfi	%r0, 0
+        jne    .L112
+.L113:
+    /* Reload OCaml global registers */
+        Loadglobal(%r15, caml_exception_pointer)
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Say we are back into OCaml code */
+        lgfi   %r0, 0
+        Storeglobal(%r0, caml_last_return_address)
+    /* Pop trap frame */
+        lg	%r1, 0(%r15)
+        lg	%r13, 8(%r15)
+        agfi	%r15, 16
+    /* Branch to handler */
+        br	%r1;
+.L112:
+        ldgr	%f15,%r2        /* preserve exn bucket in callee-save reg */
+                                /* arg1: exception bucket, already in r2 */
+        Loadglobal(%r3, caml_last_return_address) /* arg2: PC of raise */
+        Loadglobal(%r4, caml_bottom_of_stack)     /* arg3: SP of raise */
+        Loadglobal(%r5, caml_exception_pointer)   /* arg4: SP of handler */
+             /* reserve stack space for C call */
+        lay %r15, -160(%r15)
+        brasl   %r14, caml_stash_backtrace@PLT
+        larl  %r12,_GLOBAL_OFFSET_TABLE_
+        lay %r15, 160(%r15)
+        lgdr	%r2,%f15        /* restore exn bucket */        /* restore exn bucket */
+        j       .L113           /* raise the exn */
+
+/* Start the OCaml program */
+
+        .globl  caml_start_program
+        .type   caml_start_program, @function
+caml_start_program:
+        Addrglobal(%r0, caml_program)
+
+/* Code shared between caml_start_program and caml_callback */
+.L102:
+    /* Allocate and link stack frame */
+        stg	%r15, -320(%r15)
+        agfi	%r15, -320
+    /* Save return address */
+        stg     %r14, 8(%r15)
+    /* Save all callee-save registers */
+    /* GPR 14 at sp+16 ... GPR 31 at sp+84
+       FPR 14 at sp+92 ... FPR 31 at sp+228 */
+        lgr	%r1, %r15
+        agfi    %r1, 16-8
+        stmg    %r6,%r13, 8(%r1)
+	stg	%r15, 72(%r1)
+	std     %f0, 80(%r1)
+        std     %f8, 88(%r1)
+        std     %f9, 96(%r1)
+        std     %f10, 104(%r1)
+        std     %f11, 112(%r1)
+        std     %f12, 120(%r1)
+        std     %f13, 128(%r1)
+        std     %f14, 136(%r1)
+        std     %f15, 144(%r1)
+
+    /* Set up a callback link */
+        agfi    %r15, -32
+        Loadglobal(%r1, caml_bottom_of_stack)
+        stg     %r1, 0(%r15)
+        Loadglobal(%r1, caml_last_return_address)
+        stg     %r1, 8(%r15)
+        Loadglobal(%r1, caml_gc_regs)
+        stg     %r1, 16(%r15)
+    /* Build an exception handler to catch exceptions escaping out of OCaml */
+        brasl   %r14, .L103
+        j       .L104
+.L103:
+        agfi    %r15,  -16
+        stg     %r14, 0(%r15)
+        Loadglobal(%r1, caml_exception_pointer)
+        stg      %r1, 8(%r15)
+        lgr     %r13, %r15
+    /* Reload allocation pointers */
+        Loadglobal(%r11, caml_young_ptr)
+        Loadglobal(%r10, caml_young_limit)
+    /* Say we are back into OCaml code */
+        lgfi   %r1, 0
+        Storeglobal(%r1, caml_last_return_address)
+    /* Call the OCaml code */
+        lgr %r1,%r0
+.L105:
+        basr %r14, %r1
+    /* Pop the trap frame, restoring caml_exception_pointer */
+        lg     %r0, 8(%r15)
+        Storeglobal(%r0, caml_exception_pointer)
+        agfi    %r15, 16
+    /* Pop the callback link, restoring the global variables */
+.L106:
+        lg      %r5, 0(%r15)
+        lg      %r6, 8(%r15)
+        nill	%r6, 0xFFFE
+        lg      %r1, 16(%r15)
+        Storeglobal(%r5, caml_bottom_of_stack)
+        Storeglobal(%r6, caml_last_return_address)
+        Storeglobal(%r1, caml_gc_regs)
+        agfi    %r15, 32
+    /* Update allocation pointer */
+        Storeglobal(%r11, caml_young_ptr)
+    /* Restore callee-save registers */
+        lgr	%r1, %r15
+        agfi    %r1, 16-8
+        lmg     %r6,%r13, 8(%r1)
+	ld     %f0, 80(%r1)
+        ld     %f8, 88(%r1)
+        ld     %f9, 96(%r1)
+        ld     %f10, 104(%r1)
+        ld     %f11, 112(%r1)
+        ld     %f12, 120(%r1)
+        ld     %f13, 128(%r1)
+        ld     %f14, 136(%r1)
+        ld     %f15, 144(%r1)
+
+    /* Reload return address */
+        lg     %r1, 8(%r15)
+    /* Return */
+        agfi    %r15,  320
+        br	%r1
+
+    /* The trap handler: */
+.L104:
+    /* Update caml_exception_pointer */
+        Storeglobal(%r13, caml_exception_pointer)
+    /* Encode exception bucket as an exception result and return it */
+        oill     %r2,  2
+        j       .L106
+
+/* Callback from C to OCaml */
+
+        .globl  caml_callback_exn
+        .type   caml_callback_exn, @function
+caml_callback_exn:
+    /* Initial shuffling of arguments */
+        lgr     %r0, %r2            /* Closure */
+        lgr     %r2, %r3            /* Argument */
+        lgr     %r3, %r0
+        lg      %r0, 0(%r3)        /* Code pointer */
+        j       .L102
+
+        .globl  caml_callback2_exn
+        .type   caml_callback2_exn, @function
+caml_callback2_exn:
+        lgr      %r0, %r2            /* Closure */
+        lgr      %r2, %r3            /* First argument */
+        lgr      %r3, %r4            /* Second argument */
+        lgr      %r4, %r0
+        Addrglobal(%r0, caml_apply2)
+        j       .L102
+
+        .globl  caml_callback3_exn
+        .type   caml_callback3_exn, @function
+caml_callback3_exn:
+        lgr      %r0, %r2            /* Closure */
+        lgr      %r2, %r3            /* First argument */
+        lgr      %r3, %r4            /* Second argument */
+        lgr      %r4, %r5            /* Third argument */
+        lgr      %r5, %r0
+        Addrglobal(%r0, caml_apply3)
+        j        .L102
+
+	.globl	caml_ml_array_bound_error
+	.type	caml_ml_array_bound_error, @function
+caml_ml_array_bound_error:
+	larl	%r1, caml_array_bound_error
+	j	caml_c_call
+        .globl  caml_system__code_end
+caml_system__code_end:
+
+/* Frame table */
+
+        .section ".data"
+        .align 8
+        .globl  caml_system__frametable
+        .type   caml_system__frametable, @object
+caml_system__frametable:
+        .quad   1               /* one descriptor */
+        .quad   .L105 + 2       /* return address into callback */
+        .short  -1              /* negative size count => use callback link */
+        .short  0               /* no roots here */
+	.align	8
diff --git a/asmrun/signals_asm.c b/asmrun/signals_asm.c
index f30273bd4..6c9439de5 100644
--- a/asmrun/signals_asm.c
+++ b/asmrun/signals_asm.c
@@ -144,7 +144,7 @@ int caml_set_signal_action(int signo, int action)
 
 /* Machine- and OS-dependent handling of bound check trap */
 
-#if defined(TARGET_power) || (defined(TARGET_sparc) && defined(SYS_solaris))
+#if defined(TARGET_power) || defined(TARGET_s390x) || (defined(TARGET_sparc) && defined(SYS_solaris))
 DECLARE_SIGNAL_HANDLER(trap_handler)
 {
 #if defined(SYS_solaris)
@@ -267,6 +267,14 @@ void caml_init_signals(void)
   }
 #endif
 
+#if defined(TARGET_s390x)
+  { struct sigaction act;
+    sigemptyset(&act.sa_mask);
+    SET_SIGACT(act, trap_handler);
+    sigaction(SIGFPE, &act, NULL);
+  }
+#endif
+
   /* Stack overflow handling */
 #ifdef HAS_STACK_OVERFLOW_DETECTION
   {
diff --git a/asmrun/signals_osdep.h b/asmrun/signals_osdep.h
index 627e3b727..03cdb87a9 100644
--- a/asmrun/signals_osdep.h
+++ b/asmrun/signals_osdep.h
@@ -283,6 +283,23 @@
   #define CONTEXT_YOUNG_LIMIT (context->regs->gpr[30])
   #define CONTEXT_YOUNG_PTR (context->regs->gpr[31])
   #define CONTEXT_SP (context->regs->gpr[1])
+  
+/****************** s390x, ELF (Linux) */
+#elif defined(TARGET_s390x) && defined(SYS_elf)
+
+  #define DECLARE_SIGNAL_HANDLER(name) \
+    static void name(int sig, struct sigcontext * context)
+
+  #define SET_SIGACT(sigact,name) \
+     sigact.sa_handler = (void (*)(int)) (name); \
+     sigact.sa_flags = 0
+
+  typedef unsigned long context_reg;
+  #define CONTEXT_PC (context->sregs->regs.psw.addr)
+  #define CONTEXT_EXCEPTION_POINTER (context->sregs->regs.gprs[13])
+  #define CONTEXT_YOUNG_LIMIT (context->sregs->regs.gprs[10])
+  #define CONTEXT_YOUNG_PTR (context->sregs->regs.gprs[11])
+  #define CONTEXT_SP (context->sregs->regs.gprs[15])
 
 /****************** PowerPC, BSD */
 
diff --git a/asmrun/stack.h b/asmrun/stack.h
index a4f47c7f9..dcf7f45ff 100644
--- a/asmrun/stack.h
+++ b/asmrun/stack.h
@@ -50,6 +50,16 @@
 #define Mark_scanned(sp, retaddr) Saved_return_address(sp) = (retaddr) | 1
 #endif
 
+#ifdef TARGET_s390x
+#define Saved_return_address(sp) *((intnat *)((sp) - SIZEOF_PTR))
+#define Already_scanned(sp, retaddr) ((retaddr) & 1)
+#define Mark_scanned(sp, retaddr) \
+          (*((intnat *)((sp) - SIZEOF_PTR)) = (retaddr) | 1)
+#define Mask_already_scanned(retaddr) ((retaddr) & ~1)
+#define Trap_frame_size 16
+#define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
+#endif
+
 #ifdef TARGET_arm
 #define Saved_return_address(sp) *((intnat *)((sp) - 4))
 #define Callback_link(sp) ((struct caml_context *)((sp) + 8))
diff --git a/configure b/configure
index 2f3788fa3..049d01769 100755
--- a/configure
+++ b/configure
@@ -763,6 +763,7 @@ if test $with_sharedlibs = "yes"; then
         natdynlink=true
       fi;;
     x86_64-*-darwin*)             natdynlink=true;;
+    s390x*-*-linux*)              natdynlink=true;;
     powerpc*-*-linux*)            natdynlink=true;;
     sparc*-*-linux*)              natdynlink=true;;
     i686-*-kfreebsd*)             natdynlink=true;;
@@ -823,6 +824,7 @@ case "$target" in
                                 system=elf;;
   powerpc-*-netbsd*)            arch=power; model=ppc; system=elf;;
   powerpc-*-openbsd*)           arch=power; model=ppc; system=bsd_elf;;
+  s390x*-*-linux*)              arch=s390x; model=z10; system=elf;;
   armv6*-*-linux-gnueabihf)     arch=arm; model=armv6; system=linux_eabihf;;
   arm*-*-linux-gnueabihf)       arch=arm; system=linux_eabihf;;
   armv7*-*-linux-gnueabi)       arch=arm; model=armv7; system=linux_eabi;;
@@ -899,6 +901,8 @@ case "$arch,$system" in
                     as="${TOOLPREF}as -mppc"
                     aspp="${TOOLPREF}gcc -m32 -c"
                   fi;;
+  s390x,elf)      as="${TOOLPREF}as -m 64"
+                  aspp="${TOOLPREF}gcc -c -Wa,-march=$model";;
   sparc,solaris)  as="${TOOLPREF}as"
                   case "$cc" in
                     *gcc*) aspp="${TOOLPREF}gcc -c";;
diff --git a/testsuite/tests/asmcomp/s390x.S b/testsuite/tests/asmcomp/s390x.S
new file mode 100644
index 000000000..519cc38a0
--- /dev/null
+++ b/testsuite/tests/asmcomp/s390x.S
@@ -0,0 +1,57 @@
+#define ALIGN 8
+
+#define CALL_GEN_CODE call_gen_code
+#define CAML_C_CALL caml_c_call
+#define CAML_NEGF_MASK caml_negf_mask
+#define CAML_ABSF_MASK caml_absf_mask
+
+	.section "text"
+	
+	.globl	CALL_GEN_CODE
+	.type	CALL_GEN_CODE, @function
+	.align	ALIGN
+CALL_GEN_CODE:
+	/* Stack space */
+	lay	%r15, -48(%r15)
+	/* Save registers */
+	stg	%r14, 0(%r15)
+	stg	%r7, 8(%r15)
+	stg	%r8, 16(%r15)
+	stg	%r9, 24(%r15)
+	stg	%r10, 32(%r15)
+	stg	%r11, 40(%r15)
+	/* Shuffle args */
+	lgr	%r1, %r2
+	lgr	%r2, %r3
+	lgr	%r3, %r4
+	lgr	%r4, %r5
+	/* Function call */
+	brasl	%r14, %r1
+	/* Restore registers */
+	lg	%r11, 40(%r15)
+	lg	%r10, 32(%r15)
+	lg	%r9, 24(%r15)
+	lg	%r8, 16(%r15)
+	lg	%r7, 8(%r15)
+
+
+	.globl	CAML_C_CALL
+	.type	CAML_C_CALL, @function
+	.align	ALIGN
+CAML_C_CALL:
+	lay	%r15, -160(%r15)
+        brasl %r14, %r1
+	lay	%r15, 160(%r15)
+	
+	.section	.rodata
+
+	.global	CAML_NEGF_MASK
+	.align	ALIGN
+CAML_NEGF_MASK:
+	.quad	0x8000000000000000, 0
+	.global  CAML_ABSF_MASK
+        .align  ALIGN
+CAML_ABSF_MASK:
+        .quad   0x7FFFFFFFFFFFFFFF, 0
+
+        .comm   young_limit, 8
\ No newline at end of file

From b79f88ea73854932265cd6a23e3ebeb5ac35f7b1 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 29 Oct 2015 10:07:30 +0100
Subject: [PATCH 02/27] zSystem port: minimal fixes so that it compiles wrt the
 trunk

Plus a few simplifications:
- emit.mlp: assume target is ELF
- proc.ml & emit.mlp: use asm register names for 'register_name'
---
 asmcomp/s390x/emit.mlp | 56 +++++++++++++++---------------------------
 asmcomp/s390x/proc.ml  | 18 ++++++++------
 2 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 114c0e9ce..c29adbbbb 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -48,10 +48,8 @@ let slot_offset loc cls =
 
 (* Output a symbol *)
 
-let emit_symbol =
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" -> (fun s -> Emitaux.emit_symbol '.' s)
-  | _ -> assert false
+let emit_symbol s = Emitaux.emit_symbol '.' s
+
 (* Output function call *)
 
 let emit_call s =
@@ -59,12 +57,10 @@ let emit_call s =
     `brasl      %r14, {emit_symbol s}@PLT`
    else
     `brasl      %r14, {emit_symbol s}`
+
 (* Output a label *)
 
-let label_prefix =
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" -> ".L"
-  | _ -> assert false
+let label_prefix = ".L"
 
 let emit_label lbl =
   emit_string label_prefix; emit_int lbl
@@ -74,30 +70,19 @@ let emit_data_label lbl =
 
 (* Section switching *)
 
-let data_space =
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" -> "	.section \".data\"\n"
-  | _ -> assert false
+let data_space = "	.section \".data\"\n"
 
-let code_space =
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" -> "	.section \".text\"\n"
-  | _ -> assert false
-
-let rodata_space =
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" -> "	.section \".rodata\"\n"
-  | _ -> assert false
+let code_space = "	.section \".text\"\n"
 
+let rodata_space = "	.section \".rodata\"\n"
 
 let datag = ".quad"
 
 (* Output a pseudo-register *)
 
 let emit_reg r =
-  match r with
-     {loc = Reg rs; typ = (Int | Addr)} -> emit_string "%r"; emit_string (register_name rs)
-  |  {loc = Reg rs; typ = Float} -> emit_string "%f"; emit_string (register_name rs)
+  match r.loc with
+  | Reg r -> emit_string (register_name r)
   | _ -> fatal_error "Emit.emit_reg"
 
 
@@ -189,10 +174,12 @@ let record_frame live dbg =
   let live_offset = ref [] in
   Reg.Set.iter
     (function
-        {typ = Addr; loc = Reg r} ->
+      | {typ = Val; loc = Reg r} ->
           live_offset := (r lsl 1) + 1 :: !live_offset
-      | {typ = Addr; loc = Stack s} as reg ->
+      | {typ = Val; loc = Stack s} as reg ->
           live_offset := slot_offset s (register_class reg) :: !live_offset
+      | {typ = Addr} as r ->
+          Misc.fatal_error ("bad GC root " ^ Reg.name r)
       | _ -> ())
     live;
   frame_descriptors :=
@@ -311,15 +298,15 @@ let emit_instr i =
         let src = i.arg.(0) and dst = i.res.(0) in
         if src.loc <> dst.loc then begin
            match (src, dst) with
-              {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
+              {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
                 `	lgr     {emit_reg dst}, {emit_reg src}\n`
             | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
                 `	ldr     {emit_reg dst}, {emit_reg src}\n`
-            | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} ->
+            | {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Stack sd} ->
                 `	stg     {emit_reg src}, {emit_stack dst}\n`
             | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
                 `	std     {emit_reg src}, {emit_stack dst}\n`
-            | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} ->
+            | {loc = Stack ss; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
                 `	lg      {emit_reg dst}, {emit_stack src}\n`
             | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
                 `	ldy      {emit_reg dst}, {emit_stack src}\n`
@@ -413,7 +400,7 @@ let emit_instr i =
           | Sixteen_signed -> "lgh"
           | Thirtytwo_unsigned -> "llgf"
           | Thirtytwo_signed -> "lgf"
-	  | Word -> "lg"
+	  | Word_int | Word_val -> "lg"
           | Single -> "ley"
           | Double | Double_u -> "ldy" in
         emit_load_store loadinstr addr i.arg 0 i.res.(0);
@@ -425,7 +412,7 @@ let emit_instr i =
             Byte_unsigned | Byte_signed -> "stcy"
           | Sixteen_unsigned | Sixteen_signed -> "sthy"
 	  | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
-	  | Word -> "stg"
+	  | Word_int | Word_val -> "stg"
           | Single -> "stey"
           | Double | Double_u -> "stdy" in
              emit_load_store storeinstr addr i.arg 1 i.arg.(0)
@@ -756,10 +743,7 @@ emit_debug_info fundecl.fun_dbg;
 
 let declare_global_data s =
   `	.globl	{emit_symbol s}\n`;
-  match Config.system with
-  | "elf" | "bsd" | "bsd_elf" ->
-    `	.type	{emit_symbol s}, @object\n`
-  | _ -> assert false
+  `	.type	{emit_symbol s}, @object\n`
 
 let emit_item = function
     Cglobal_symbol s ->
@@ -843,4 +827,4 @@ let end_assembly() =
                            `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
       efa_def_label = (fun l -> `{emit_label l}:\n`);
       efa_string = (fun s -> emit_bytes_directive "	.byte	" (s ^ "\000"))
-     }
\ No newline at end of file
+     }
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
index 049c7a046..3f79b76d2 100644
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@@ -46,18 +46,17 @@ let word_addressed = false
 *)
 
 let int_reg_name =
-    [| "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; |]
+    [| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; |]
 
 let float_reg_name =
-    [| "0"; "2"; "4"; "6"; "1"; "3"; "5"; "7"; 
-       "8"; "9"; "10"; "11"; "12"; "13"; "14"; |]
+    [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7"; 
+       "%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; |]
 
 let num_register_classes = 2
 
 let register_class r =
   match r.typ with
-    Int -> 0
-  | Addr -> 0
+  | Val | Int | Addr -> 0
   | Float -> 1
 
 let num_available_registers = [| 8; 15 |]
@@ -98,7 +97,7 @@ let calling_conventions
   let ofs = ref stack_ofs in
   for i = 0 to Array.length arg - 1 do
     match arg.(i).typ with
-      Int | Addr as ty ->
+    | Val | Int | Addr as ty ->
         if !int <= last_int then begin
           loc.(i) <- phys_reg !int;
           incr int
@@ -135,7 +134,12 @@ let loc_results res =
      Always reserve 8 bytes at bottom of stack, plus whatever is needed
      to hold the overflow arguments. *)
 
-let loc_external_arguments = calling_conventions 0 4 100 103 outgoing 0
+let loc_external_arguments arg =
+  let arg =
+    Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg in
+  let (loc, alignment) =
+    calling_conventions 0 4 100 103 outgoing 0 arg in
+  (Array.map (fun reg -> [|reg|]) loc, alignment)
 
 let extcall_use_push = false
 

From f6a0392f57315eea2c9a750c534e40127d9940da Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 29 Oct 2015 05:44:01 -0400
Subject: [PATCH 03/27] zSystem port: do not mark return addresses

Using the low bit of return addresses to mark already-scanned stack frames improves GC time on architectures that ignore this bit in 'return' instructions, like Power.  Otherwise, as is the case for zSystem, clearing up this bit before every 'return' instruction costs too much in running time.

asmrun/stack.h: turn off the marking of return addresses for z
asmcomp/s390x/emit.mlp: suppress clearing of low bit of return addresses
---
 asmcomp/s390x/emit.mlp | 2 --
 asmrun/stack.h         | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index c29adbbbb..5f4b55059 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -559,7 +559,6 @@ let emit_instr i =
           else
             `	lg	{emit_gpr 12}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
           `	agfi	{emit_gpr 15}, {emit_int n}\n`;
-          `	nill	{emit_gpr 14}, 0xFFFE\n`;
           `	br	{emit_gpr 14}\n`
     | Llabel lbl ->
         `{emit_label lbl}:\n`
@@ -651,7 +650,6 @@ let emit_instr i =
           `	lgr     {emit_gpr 15},{emit_gpr 13}\n`;
           `	lg      {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
           `	agfi    {emit_gpr 15}, 16\n`;
-          `	nill	{emit_gpr 1}, 0xFFFE\n`;
           `	br	{emit_gpr 1}\n`
         end
 
diff --git a/asmrun/stack.h b/asmrun/stack.h
index dcf7f45ff..2aad2a7e0 100644
--- a/asmrun/stack.h
+++ b/asmrun/stack.h
@@ -52,10 +52,6 @@
 
 #ifdef TARGET_s390x
 #define Saved_return_address(sp) *((intnat *)((sp) - SIZEOF_PTR))
-#define Already_scanned(sp, retaddr) ((retaddr) & 1)
-#define Mark_scanned(sp, retaddr) \
-          (*((intnat *)((sp) - SIZEOF_PTR)) = (retaddr) | 1)
-#define Mask_already_scanned(retaddr) ((retaddr) & ~1)
 #define Trap_frame_size 16
 #define Callback_link(sp) ((struct caml_context *)((sp) + Trap_frame_size))
 #endif

From bcb696a260148e6277482ba3025c48099fccdc1f Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 29 Oct 2015 10:09:09 -0400
Subject: [PATCH 04/27] zSystem port: revise addressing modes

- Ibased addressing is removed.  The code generated for an Ibased load/store is no better than the code we generate for an Iindexed load/store preceded by a Iconst_symbol instruction that loads the address of the global variable.  Plus, we now get opportunities for CSE of the Iconst_symbol.

- Iindexed2 addressing is extended with a constant displacement, to take full advantage of the ofs(%r1, %r2) addressing mode of the processor.

- During selection instruction, make sure that the constant displacement of Iindexed and Iindexed2 is within range (20 bit signed).
---
 asmcomp/s390x/arch.ml       | 20 ++++++--------------
 asmcomp/s390x/emit.mlp      | 24 ++++--------------------
 asmcomp/s390x/scheduling.ml |  2 --
 asmcomp/s390x/selection.ml  | 26 +++++++++++---------------
 4 files changed, 21 insertions(+), 51 deletions(-)

diff --git a/asmcomp/s390x/arch.ml b/asmcomp/s390x/arch.ml
index b3a14dc78..718062624 100644
--- a/asmcomp/s390x/arch.ml
+++ b/asmcomp/s390x/arch.ml
@@ -35,17 +35,13 @@ type specific_operation =
 (* Addressing modes *)
 
 type addressing_mode =
-    Ibased of string * int              (* symbol + displ *)
   | Iindexed of int                     (* reg + displ *)
-  | Iindexed2                           (* reg + reg *)
+  | Iindexed2 of int                    (* reg + reg + displ *)
 
 (* Sizes, endianness *)
 
 let big_endian = true
 
-let s390x =
-  match Config.model with "s390x" -> true | _ -> false
-
 let size_addr = 8
 let size_int = size_addr
 let size_float = 8
@@ -62,27 +58,23 @@ let identity_addressing = Iindexed 0
 
 let offset_addressing addr delta =
   match addr with
-    Ibased(s, n) -> Ibased(s, n + delta)
   | Iindexed n -> Iindexed(n + delta)
-  | Iindexed2 -> assert false
+  | Iindexed2 n -> Iindexed2(n + delta)
 
 let num_args_addressing = function
-    Ibased(s, n) -> 0
   | Iindexed n -> 1
-  | Iindexed2 -> 2
+  | Iindexed2 n -> 2
 
 (* Printing operations and addressing modes *)
 
 let print_addressing printreg addr ppf arg =
   match addr with
-  | Ibased(s, n) ->
-      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
-      fprintf ppf "\"%s\"%s" s idx
   | Iindexed n ->
       let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
       fprintf ppf "%a%s" printreg arg.(0) idx
-  | Iindexed2 ->
-      fprintf ppf "%a + %a" printreg arg.(0) printreg arg.(1)
+  | Iindexed2 n ->
+      let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
+      fprintf ppf "%a + %a%s" printreg arg.(0) printreg arg.(1) idx
 
 let print_specific_operation printreg op ppf arg =
   match op with
diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 5f4b55059..5aa5dc1c5 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -112,32 +112,16 @@ let emit_load_store instr addressing_mode addr n arg =
 if (compare instr "stey") = 0 then begin
   `	ledbr	{emit_fpr 15}, {emit_reg arg}\n`;
   match addressing_mode with
-    Ibased(s, d) ->
-    if !pic_code then begin
-      `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
-      `	{emit_string instr}	{emit_fpr 15},{emit_int d}({emit_gpr 1})\n`
-    end else begin
-      `	larl	{emit_gpr 1}, {emit_symbol_offset (s,d)}\n`;
-      `	{emit_string instr}	{emit_fpr 15},0({emit_gpr 1})\n`
-    end
   | Iindexed ofs ->
       `	{emit_string instr}	{emit_fpr 15}, {emit_int ofs}({emit_reg addr.(n)})\n`
-  | Iindexed2 ->
-      `	{emit_string instr}	{emit_fpr 15}, 0({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
+  | Iindexed2 ofs ->
+      `	{emit_string instr}	{emit_fpr 15}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
 end else begin
   match addressing_mode with
-    Ibased(s, d) ->
-    if !pic_code then begin
-      `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
-      `	{emit_string instr}	{emit_reg arg},{emit_int d}({emit_gpr 1})\n`
-    end else begin
-      `	larl	{emit_gpr 1}, {emit_symbol_offset (s,d)}\n`;
-      `	{emit_string instr}	{emit_reg arg},0({emit_gpr 1})\n`
-    end
   | Iindexed ofs ->
       `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
-  | Iindexed2 ->
-      `	{emit_string instr}	{emit_reg arg}, 0({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
+  | Iindexed2 ofs ->
+      `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
 end
 
 (* After a comparison, extract the result as 0 or 1 *)
diff --git a/asmcomp/s390x/scheduling.ml b/asmcomp/s390x/scheduling.ml
index 874105302..66bb3ea69 100644
--- a/asmcomp/s390x/scheduling.ml
+++ b/asmcomp/s390x/scheduling.ml
@@ -46,8 +46,6 @@ method reload_retaddr_latency = 12
 
 method oper_issue_cycles = function
     Iconst_float _ | Iconst_symbol _ -> 2
-  | Iload(_, Ibased(_, _)) -> 2
-  | Istore(_, Ibased(_, _), _) -> 2
   | Ialloc _ -> 4
   | Iintop(Imod) -> 40 (* assuming full stall *)
   | Iintop(Icomp _) -> 4
diff --git a/asmcomp/s390x/selection.ml b/asmcomp/s390x/selection.ml
index 406f34881..773ce164c 100644
--- a/asmcomp/s390x/selection.ml
+++ b/asmcomp/s390x/selection.ml
@@ -23,18 +23,15 @@ open Mach
 exception Use_default
 
 type addressing_expr =
-    Asymbol of string
   | Alinear of expression
   | Aadd of expression * expression
 
 let rec select_addr = function
-    Cconst_symbol s ->
-      (Asymbol s, 0)
-  | Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
+  | Cop((Caddi | Cadda | Caddv), [arg; Cconst_int m]) ->
       let (a, n) = select_addr arg in (a, n + m)
-  | Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
+  | Cop((Caddi | Cadda | Caddv), [Cconst_int m; arg]) ->
       let (a, n) = select_addr arg in (a, n + m)
-  | Cop((Caddi | Cadda), [arg1; arg2]) ->
+  | Cop((Caddi | Cadda | Caddv), [arg1; arg2]) ->
       begin match (select_addr arg1, select_addr arg2) with
           ((Alinear e1, n1), (Alinear e2, n2)) ->
               (Aadd(e1, e2), n1 + n2)
@@ -65,15 +62,14 @@ inherit Selectgen.selector_generic as super
 method is_immediate n = (n <= 2147483647) && (n >= -2147483648)
 
 method select_addressing chunk exp =
-  match select_addr exp with
-    (Asymbol s, d) ->
-      (Ibased(s, d), Ctuple [])
-  | (Alinear e, d) ->
-      (Iindexed d, e)
-  | (Aadd(e1, e2), d) ->
-      if d = 0
-      then (Iindexed2, Ctuple[e1; e2])
-      else (Iindexed d, Cop(Cadda, [e1; e2]))
+  let (a, d) = select_addr exp in
+  (* 20-bit signed displacement *)
+  if d < 0x80000 && d >= -0x80000 then begin
+    match a with
+    | Alinear e -> (Iindexed d, e)
+    | Aadd(e1, e2) -> (Iindexed2 d, Ctuple [e1; e2])
+  end else
+    (Iindexed 0, exp)
 
 method! select_operation op args =
   match (op, args) with

From 52742044fa1973f9a09fee383485e58251d58764 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 29 Oct 2015 10:22:55 -0400
Subject: [PATCH 05/27] zSystem port: remove special case "stey" in
 Emit.emit_load_store

Instead, do the binary64->binary32 conversion before, and use emit_load_store with %f15 as source register.
---
 asmcomp/s390x/emit.mlp | 24 +++++++++++-------------
 asmcomp/s390x/proc.ml  |  6 +++---
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 5aa5dc1c5..e1f9aef5d 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -90,7 +90,9 @@ let emit_gpr r = emit_string "%r"; emit_int r
 
 let emit_fpr r = emit_string "%f"; emit_int r
 
-  
+(* Special registers *)
+
+let reg_f15 = phys_reg 115  
 
 (* Output a stack reference *)
 
@@ -109,20 +111,11 @@ let emit_symbol_offset (s, d) =
   if d <> 0 then emit_int d
 
 let emit_load_store instr addressing_mode addr n arg =
-if (compare instr "stey") = 0 then begin
-  `	ledbr	{emit_fpr 15}, {emit_reg arg}\n`;
-  match addressing_mode with
-  | Iindexed ofs ->
-      `	{emit_string instr}	{emit_fpr 15}, {emit_int ofs}({emit_reg addr.(n)})\n`
-  | Iindexed2 ofs ->
-      `	{emit_string instr}	{emit_fpr 15}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
-end else begin
   match addressing_mode with
   | Iindexed ofs ->
       `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n`
   | Iindexed2 ofs ->
       `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
-end
 
 (* After a comparison, extract the result as 0 or 1 *)
 let emit_set_comp cmp res =
@@ -375,7 +368,8 @@ let emit_instr i =
       | Lop(Istackoffset n) ->
         `	lay     {emit_gpr 15}, {emit_int (-n)} ({emit_gpr 15})\n`;
         stack_offset := !stack_offset + n
-    | Lop(Iload(chunk, addr)) ->
+
+     | Lop(Iload(chunk, addr)) ->
         let loadinstr =
           match chunk with
             Byte_unsigned -> "llgc"
@@ -390,6 +384,10 @@ let emit_instr i =
         emit_load_store loadinstr addr i.arg 0 i.res.(0);
         if chunk = Single then
           `	ldebr	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
+
+    | Lop(Istore(Single, addr, _)) ->
+        `	ledbr	{emit_fpr 15}, {emit_reg i.arg.(0)}\n`;
+        emit_load_store "stey" addr i.arg 1 reg_f15
     | Lop(Istore(chunk, addr, _)) ->
         let storeinstr =
           match chunk with
@@ -397,9 +395,9 @@ let emit_instr i =
           | Sixteen_unsigned | Sixteen_signed -> "sthy"
 	  | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
 	  | Word_int | Word_val -> "stg"
-          | Single -> "stey"
+          | Single -> assert false
           | Double | Double_u -> "stdy" in
-             emit_load_store storeinstr addr i.arg 1 i.arg.(0)
+        emit_load_store storeinstr addr i.arg 1 i.arg.(0)
         
     | Lop(Ialloc n) ->
         if !call_gc_label = 0 then call_gc_label := new_label();
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
index 3f79b76d2..ca51e8268 100644
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@@ -50,7 +50,7 @@ let int_reg_name =
 
 let float_reg_name =
     [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7"; 
-       "%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; |]
+       "%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; "%f15" |]
 
 let num_register_classes = 2
 
@@ -75,8 +75,8 @@ let hard_int_reg =
   for i = 0 to 7 do v.(i) <- Reg.at_location Int (Reg i) done; v
 
 let hard_float_reg =
-  let v = Array.make 15 Reg.dummy in
-  for i = 0 to 14 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
+  let v = Array.make 16 Reg.dummy in
+  for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg(100 + i)) done; v
 
 let all_phys_regs =
   Array.append hard_int_reg hard_float_reg

From 862347f0f44f09957a7d16da1af221cb28e1cc87 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 05:53:37 -0400
Subject: [PATCH 06/27] zSystem port: do not use %r12 as GOT pointer

Taking a leaf from recent versions of GCC, in PIC mode, we use a PC-relative load with GOTENT relocation to access the global offset table.  This way, we don't have to save, setup and reload %r12 as GOT pointer in every function.
---
 asmcomp/s390x/emit.mlp | 43 ++++++------------------------------------
 1 file changed, 6 insertions(+), 37 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index e1f9aef5d..5917c6d92 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -33,7 +33,6 @@ let frame_size () =
     !stack_offset +                     (* Trap frame, outgoing parameters *)
     size_int * num_stack_slots.(0) +    (* Local int variables *)
     size_float * num_stack_slots.(1) +  (* Local float variables *)
-    size_addr + 			(* Slot for storing and restoring GOT pointer *)
     (if !contains_calls then size_addr else 0) in (* The return address *)
   Misc.align size 8
 
@@ -105,11 +104,6 @@ let emit_stack r =
 
 (* Output a load or store operation *)
 
-let emit_symbol_offset (s, d) =
-  emit_symbol s;
-  if d > 0 then `+`;
-  if d <> 0 then emit_int d
-
 let emit_load_store instr addressing_mode addr n arg =
   match addressing_mode with
   | Iindexed ofs ->
@@ -130,20 +124,6 @@ let emit_set_comp cmp res =
     | Cge -> `	locgrnl	{emit_reg res}, {emit_gpr 1}\n`
   end
 
-let nativelow n = 
-   let m = Nativeint.logand n (Nativeint.of_int 0xFFFFFFFF) in
-      if (Nativeint.compare m  0x7FFF_FFFFn) > 0 
-      then let k = Nativeint.logand m 0x7FFFFFFFn in 
-         Nativeint.sub k 2147483648n
-      else m
-      
-let nativehigh n = 
-    let m = Nativeint.shift_right n 32 in
-      if (Nativeint.compare m  0x7FFF_FFFFn) > 0 
-      then let k = Nativeint.logand m 0x7FFFFFFFn in 
-         Nativeint.sub k 2147483648n
-      else m
-
 (* Record live pointers at call points *)
 
 let record_frame live dbg =
@@ -305,7 +285,7 @@ let emit_instr i =
         `	ld  {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`
      | Lop(Iconst_symbol s) ->
         if !pic_code then
-        `	lg	{emit_reg i.res.(0)}, {emit_symbol s}@GOT({emit_gpr 12})\n`
+        `	lgrl	{emit_reg i.res.(0)}, {emit_symbol s}@GOTENT\n`
         else
         `	larl {emit_reg i.res.(0)}, {emit_symbol s}\n`;
     | Lop(Icall_ind) ->
@@ -348,7 +328,7 @@ let emit_instr i =
      | Lop(Iextcall(s, alloc)) ->
         if alloc then begin
           if !pic_code then begin
-          `	lg	{emit_gpr 1}, {emit_symbol s}@GOT({emit_gpr 12})\n`;
+          `	lgrl	{emit_gpr 1}, {emit_symbol s}@GOTENT\n`;
           `	brasl	{emit_gpr 14}, {emit_symbol "caml_c_call"}@PLT\n`
           end else begin
           `	larl {emit_gpr 1}, {emit_symbol s}\n`;
@@ -410,8 +390,7 @@ let emit_instr i =
         `	brasl   {emit_gpr 14}, {emit_label !call_gc_label}\n`;
         let fr_lbl = record_frame i.live Debuginfo.none in
 	 `{emit_label fr_lbl}:\n`;
-        `	{emit_label lbl}:\n`;
-        `	larl	{emit_gpr 12},_GLOBAL_OFFSET_TABLE_\n`
+        `	{emit_label lbl}:\n`
     | Lop(Iintop Isub) ->
         `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
         `	sgr	{emit_gpr 1}, {emit_reg i.arg.(1)}\n`;
@@ -536,12 +515,8 @@ let emit_instr i =
     | Lreturn ->
         let n = frame_size() in
         if n > 0 then
-          if !contains_calls then
-            `	lg	{emit_gpr 12}, {emit_int(n - (size_addr+size_addr))}({emit_gpr 15})\n`
-          else
-            `	lg	{emit_gpr 12}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
           `	agfi	{emit_gpr 15}, {emit_int n}\n`;
-          `	br	{emit_gpr 14}\n`
+        `	br	{emit_gpr 14}\n`
     | Llabel lbl ->
         `{emit_label lbl}:\n`
     | Lbranch lbl ->
@@ -686,16 +661,10 @@ emit_debug_info fundecl.fun_dbg;
   `	.align	8\n`;
   `{emit_symbol fundecl.fun_name}:\n`;
   let n = frame_size() in
-  if !contains_calls then begin
+  if n > 0 then
     `	lay	{emit_gpr 15}, {emit_int(-n)}({emit_gpr 15})\n`;
+  if !contains_calls then
     `	stg     {emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
-    `	stg     {emit_gpr 12}, {emit_int(n - (size_addr+size_addr))}({emit_gpr 15})\n`
-  end else begin
-    if n > 0 then
-    `	lay	{emit_gpr 15}, {emit_int(-n)}({emit_gpr 15})\n`;
-    `	stg     {emit_gpr 12}, {emit_int(n - (size_addr))}({emit_gpr 15})\n`
-  end;
-  `	larl	{emit_gpr 12},_GLOBAL_OFFSET_TABLE_\n`;
   `{emit_label !tailrec_entry_point}:\n`;
   emit_all fundecl.fun_body;
   (* Emit the glue code to call the GC *)

From 1e4aedd404410573f63f4c583905783291bd3dcb Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 06:57:10 -0400
Subject: [PATCH 07/27] zSystem port: use %r12 as normal register + extcall
 conventions

Following the previous commit, %r12 becomes usable as a normal register.
However it must be saved in caml_call_gc.

Independently: change Proc.loc_external_arguments to account for the
160 reserved bytes at bottom of stack.  Then, caml_c_call and
emission of code for Iextcall(false) no longer need to account for
those reserved bytes.
---
 asmcomp/s390x/emit.mlp |   4 +-
 asmcomp/s390x/proc.ml  |  32 +++++++-----
 asmrun/s390x.S         | 114 ++++++++++++++++-------------------------
 3 files changed, 64 insertions(+), 86 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 5917c6d92..caf904ab1 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -337,12 +337,10 @@ let emit_instr i =
           let lbl = record_frame i.live i.dbg in
 	   `{emit_label lbl}:\n`;
         end else begin
-          `	lay     {emit_gpr 15}, {emit_int (-160)} ({emit_gpr 15})\n`;
           if !pic_code then
           `	brasl   {emit_gpr 14}, {emit_symbol s}@PLT\n`
           else
-          `	brasl   {emit_gpr 14}, {emit_symbol s}\n`;
-          `	lay     {emit_gpr 15}, {emit_int (160)} ({emit_gpr 15})\n`
+          `	brasl   {emit_gpr 14}, {emit_symbol s}\n`
        end
 
       | Lop(Istackoffset n) ->
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
index ca51e8268..98d8107bc 100644
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@@ -34,7 +34,7 @@ let word_addressed = false
     7 - 9		general purpose, preserved by C
     10                  allocation limit (preserved by C)
     11                  allocation pointer (preserved by C)
-    12			GOT pointer  (preserved by C)
+    12			general purpose  (preserved by C)
     13                  trap pointer (preserved by C)
     14			return address (volatile)
     15			stack pointer (preserved by C)
@@ -43,10 +43,14 @@ let word_addressed = false
     1, 3, 5, 7		general purpose (volatile)
     8 - 14          	general purpose, preserved by C
     15			temporary, preserved by C
-*)
+
+Note: integer register r12 is used as GOT pointer by some C compilers.
+The code generated by OCaml does not need a GOT pointer, using PC-relative
+addressing instead for accessing the GOT.  This frees r12 as a
+general-purpose register. *)
 
 let int_reg_name =
-    [| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; |]
+    [| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; "%r12" |]
 
 let float_reg_name =
     [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7"; 
@@ -59,7 +63,7 @@ let register_class r =
   | Val | Int | Addr -> 0
   | Float -> 1
 
-let num_available_registers = [| 8; 15 |]
+let num_available_registers = [| 9; 15 |]
 
 let first_available_register = [| 0; 100 |]
 
@@ -71,8 +75,8 @@ let rotate_registers = true
 (* Representation of hard registers by pseudo-registers *)
 
 let hard_int_reg =
-  let v = Array.make 8 Reg.dummy in
-  for i = 0 to 7 do v.(i) <- Reg.at_location Int (Reg i) done; v
+  let v = Array.make 9 Reg.dummy in
+  for i = 0 to 8 do v.(i) <- Reg.at_location Int (Reg i) done; v
 
 let hard_float_reg =
   let v = Array.make 16 Reg.dummy in
@@ -131,15 +135,15 @@ let loc_results res =
 (*   C calling conventions under SVR4:
      use GPR 2-6 and FPR 0,2,4,6 just like ML calling conventions.
      Using a float register does not affect the int registers.
-     Always reserve 8 bytes at bottom of stack, plus whatever is needed
+     Always reserve 160 bytes at bottom of stack, plus whatever is needed
      to hold the overflow arguments. *)
 
 let loc_external_arguments arg =
   let arg =
     Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg in
-  let (loc, alignment) =
-    calling_conventions 0 4 100 103 outgoing 0 arg in
-  (Array.map (fun reg -> [|reg|]) loc, alignment)
+  let (loc, ofs) =
+    calling_conventions 0 4 100 103 outgoing 160 arg in
+  (Array.map (fun reg -> [|reg|]) loc, ofs)
 
 let extcall_use_push = false
 
@@ -173,12 +177,12 @@ let destroyed_at_raise = all_phys_regs
 (* Maximal register pressure *)
 
 let safe_register_pressure = function
-    Iextcall(_, _) -> 3
-  | _ -> 8
+    Iextcall(_, _) -> 4
+  | _ -> 9
 
 let max_register_pressure = function
-    Iextcall(_, _) -> [| 3; 7 |]
-  | _ -> [| 8; 15 |]
+    Iextcall(_, _) -> [| 4; 7 |]
+  | _ -> [| 9; 15 |]
 
 (* Pure operations (without any side effect besides updating their result
    registers). *)
diff --git a/asmrun/s390x.S b/asmrun/s390x.S
index 152e2ddf9..d724e1f2d 100644
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@@ -35,87 +35,72 @@ caml_system__code_begin:
         .type   caml_call_gc, @function
 caml_call_gc:
     /* Set up stack frame */
-#define FRAMESIZE (16*8 + 16*8 + 32)
-        agfi	%r15, -FRAMESIZE
-        stg     %r15, 0(%r15)
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
+#define FRAMESIZE (16*8 + 16*8)
+        lay	%r15, -FRAMESIZE(%r15)
     /* Record return address into OCaml code */
         Storeglobal(%r14, caml_last_return_address)
     /* Record lowest stack address */
-        lgr	%r0, %r15
-        agfi    %r0, FRAMESIZE
+        lay	%r0, FRAMESIZE(%r15)
         Storeglobal(%r0, caml_bottom_of_stack)
     /* Record pointer to register array */
-        lgr	%r0, %r15
-        agfi    %r0, 8*16 + 32
+        lay	%r0, (8*16)(%r15)
         Storeglobal(%r0, caml_gc_regs)
     /* Save current allocation pointer for debugging purposes */
         Storeglobal(%r11, caml_young_ptr)
     /* Save exception pointer (if e.g. a sighandler raises) */
         Storeglobal(%r13, caml_exception_pointer)
     /* Save all registers used by the code generator */
-        lgr	%r1, %r15
-        agfi    %r1, 8*16 + 32 - 8
-        stmg    %r2,%r9, 8(%r1)
-        lgr	%r1, %r15
-        agfi    %r1, 32 - 8
-        std     %f0, 8(%r1)
-        std     %f1, 16(%r1)
-        std     %f2, 24(%r1)
-        std     %f3, 32(%r1)
-        std     %f4, 40(%r1)
-        std     %f5, 48(%r1)
-        std     %f6, 56(%r1)
-        std     %f7, 64(%r1)
-        std     %f8, 72(%r1)
-        std     %f9, 80(%r1)
-        std     %f10, 88(%r1)
-        std     %f11, 96(%r1)
-        std     %f12, 104(%r1)
-        std     %f13, 112(%r1)
-        std     %f14, 120(%r1)
-        std     %f15, 128(%r1)
+        stmg    %r2,%r9, (8*16)(%r15)
+	stg	%r12, (8*16 + 8*8)(%r15)
+        std     %f0, 0(%r15)
+        std     %f1, 8(%r15)
+        std     %f2, 16(%r15)
+        std     %f3, 24(%r15)
+        std     %f4, 32(%r15)
+        std     %f5, 40(%r15)
+        std     %f6, 48(%r15)
+        std     %f7, 56(%r15)
+        std     %f8, 64(%r15)
+        std     %f9, 72(%r15)
+        std     %f10, 80(%r15)
+        std     %f11, 88(%r15)
+        std     %f12, 96(%r15)
+        std     %f13, 108(%r15)
+        std     %f14, 112(%r15)
+        std     %f15, 120(%r15)
     /* Call the GC */
         lay %r15, -160(%r15)
         stg     %r15, 0(%r15)
         brasl   %r14, caml_garbage_collection@PLT
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         lay %r15, 160(%r15)
     /* Reload new allocation pointer and allocation limit */
         Loadglobal(%r11, caml_young_ptr)
         Loadglobal(%r10, caml_young_limit)
     /* Restore all regs used by the code generator */
-        lgr	%r1, %r15
-        agfi    %r1, 8*16 + 32 - 8
-        lmg     %r2,%r9, 8(%r1)
-        lgr	%r1, %r15
-        agfi    %r1, 32 - 8
-        ld     %f0, 8(%r1)
-        ld     %f1, 16(%r1)
-        ld     %f2, 24(%r1)
-        ld     %f3, 32(%r1)
-        ld     %f4, 40(%r1)
-        ld     %f5, 48(%r1)
-        ld     %f6, 56(%r1)
-        ld     %f7, 64(%r1)
-        ld     %f8, 72(%r1)
-        ld     %f9, 80(%r1)
-        ld     %f10, 88(%r1)
-        ld     %f11, 96(%r1)
-        ld     %f12, 104(%r1)
-        ld     %f13, 112(%r1)
-        ld     %f14, 120(%r1)
-        ld     %f15, 128(%r1)
+        lmg     %r2,%r9, (8*16)(%r15)
+	lg	%r12, (8*16 + 8*8)(%r15)
+        ld      %f0, 0(%r15)
+        ld      %f1, 8(%r15)
+        ld      %f2, 16(%r15)
+        ld      %f3, 24(%r15)
+        ld      %f4, 32(%r15)
+        ld      %f5, 40(%r15)
+        ld      %f6, 48(%r15)
+        ld      %f7, 56(%r15)
+        ld      %f8, 64(%r15)
+        ld      %f9, 72(%r15)
+        ld      %f10, 80(%r15)
+        ld      %f11, 88(%r15)
+        ld      %f12, 96(%r15)
+        ld      %f13, 108(%r15)
+        ld      %f14, 112(%r15)
+        ld      %f15, 120(%r15)
     /* Return to caller, restarting the allocation */
         Loadglobal(%r1, caml_last_return_address)
         agfi   %r1, -30     /* Restart the allocation (7 instructions) */
-    /* Say we are back into OCaml code */
-        lgfi   %r0, 0
-        Storeglobal(%r0, caml_last_return_address)
     /* Deallocate stack frame */
-        agfi	%r15, FRAMESIZE
+        lay	%r15, FRAMESIZE(%r15)
     /* Return */
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         br    %r1
 
 /* Call a C function from OCaml */
@@ -124,9 +109,6 @@ caml_call_gc:
         .type   caml_c_call, @function
 caml_c_call:
         Storeglobal(%r15, caml_bottom_of_stack)
-
-        lay %r15, -160(%r15)
-
     /* Save return address */
         ldgr	%f15, %r14
     /* Get ready to call C function (address in r1) */
@@ -136,11 +118,9 @@ caml_c_call:
         Storeglobal(%r11, caml_young_ptr)
         Storeglobal(%r13, caml_exception_pointer)
     /* Call the function */
-
-        basr %r14, %r1;
+        basr %r14, %r1
     /* restore return address */
         lgdr	%r14,%f15
-        lay %r15, 160(%r15)
     /* Reload allocation pointer and allocation limit*/
         Loadglobal(%r11, caml_young_ptr)
         Loadglobal(%r10, caml_young_limit)
@@ -149,13 +129,12 @@ caml_c_call:
         Storeglobal(%r0, caml_last_return_address)
 
     /* Return to caller */
-        br %r14;
+        br %r14
 
 /* Raise an exception from OCaml */
         .globl  caml_raise_exn
         .type   caml_raise_exn, @function
 caml_raise_exn:
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         Loadglobal32(%r0, caml_backtrace_active)
         cgfi	%r0, 0
         jne     .L110
@@ -166,7 +145,7 @@ caml_raise_exn:
         lg     %r13, 8(13)
         agfi   %r15, 16
     /* Branch to handler */
-        br	%r1;
+        br	%r1
 .L110:
         lgfi      %r0, 0
         Storeglobal32(%r0, caml_backtrace_pos)
@@ -178,7 +157,6 @@ caml_raise_exn:
         lgr     %r5, %r13           /* arg4: SP of handler */
         agfi    %r15, -160       /* reserve stack space for C call */
         brasl   %r14, caml_stash_backtrace@PLT
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         agfi    %r15, 160 
         lgdr	%r2,%f15        /* restore exn bucket */
         j       .L111           /* raise the exn */
@@ -186,7 +164,6 @@ caml_raise_exn:
         .globl  caml_reraise_exn
         .type   caml_reraise_exn, @function
 caml_reraise_exn:
-	larl  %r12,_GLOBAL_OFFSET_TABLE_
         Loadglobal32(%r0, caml_backtrace_active)
         cgfi	%r0, 0
         jne    .L114
@@ -203,7 +180,6 @@ caml_reraise_exn:
         .globl  caml_raise_exception
         .type   caml_raise_exception, @function
 caml_raise_exception:
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         Loadglobal32(0, caml_backtrace_active)
         cgfi	%r0, 0
         jne    .L112
@@ -230,7 +206,6 @@ caml_raise_exception:
              /* reserve stack space for C call */
         lay %r15, -160(%r15)
         brasl   %r14, caml_stash_backtrace@PLT
-        larl  %r12,_GLOBAL_OFFSET_TABLE_
         lay %r15, 160(%r15)
         lgdr	%r2,%f15        /* restore exn bucket */        /* restore exn bucket */
         j       .L113           /* raise the exn */
@@ -373,6 +348,7 @@ caml_callback3_exn:
 	.globl	caml_ml_array_bound_error
 	.type	caml_ml_array_bound_error, @function
 caml_ml_array_bound_error:
+	lay	%r15, -160(%r15)    /* Reserve stack space for C call */
 	larl	%r1, caml_array_bound_error
 	j	caml_c_call
         .globl  caml_system__code_end

From d6742ea2711d60541fb98334581133ce9b894176 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 08:34:50 -0400
Subject: [PATCH 08/27] zSeries port: hardcode register names (cosmetic)

In emit.mlp, write %rN and %fN directly in `...` strings, instead of going through emit_gpr and emit_fpr.

Justification: for other ports like Power, several concrete asm syntaxes for register names exist, so it makes sense to abstract over them.  This is not the case for z systems under Linux.  Plus, using the concrete syntax directly makes it easier to review emit.mlp.
---
 asmcomp/s390x/emit.mlp | 186 ++++++++++++++++++++---------------------
 1 file changed, 93 insertions(+), 93 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index caf904ab1..bff1f3af8 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -98,7 +98,7 @@ let reg_f15 = phys_reg 115
 let emit_stack r =
   match r.loc with
     Stack s ->
-      let ofs = slot_offset s (register_class r) in `{emit_int ofs}({emit_gpr 15})`
+      let ofs = slot_offset s (register_class r) in `{emit_int ofs}(%r15)`
   | _ -> fatal_error "Emit.emit_stack"
 
 
@@ -113,15 +113,15 @@ let emit_load_store instr addressing_mode addr n arg =
 
 (* After a comparison, extract the result as 0 or 1 *)
 let emit_set_comp cmp res =
-    `	lgfi	{emit_gpr 1}, 1\n`;
+    `	lgfi	%r1, 1\n`;
     `	lgfi	{emit_reg res}, 0\n`;
   begin match cmp with
-      Ceq -> `	locgre	{emit_reg res}, {emit_gpr 1}\n`
-    | Cne -> `	locgrne	{emit_reg res}, {emit_gpr 1}\n`
-    | Cgt -> `	locgrh	{emit_reg res}, {emit_gpr 1}\n`
-    | Cle -> `	locgrnh	{emit_reg res}, {emit_gpr 1}\n`
-    | Clt -> `	locgrl	{emit_reg res}, {emit_gpr 1}\n`
-    | Cge -> `	locgrnl	{emit_reg res}, {emit_gpr 1}\n`
+      Ceq -> `	locgre	{emit_reg res}, %r1\n`
+    | Cne -> `	locgrne	{emit_reg res}, %r1\n`
+    | Cgt -> `	locgrh	{emit_reg res}, %r1\n`
+    | Cle -> `	locgrnh	{emit_reg res}, %r1\n`
+    | Clt -> `	locgrl	{emit_reg res}, %r1\n`
+    | Cge -> `	locgrnl	{emit_reg res}, %r1\n`
   end
 
 (* Record live pointers at call points *)
@@ -281,33 +281,33 @@ let emit_instr i =
     | Lop(Iconst_float f) ->
         let lbl = new_label() in
         float_literals := (Int64.bits_of_float f, lbl) :: !float_literals;
-        `	larl {emit_gpr 1}, {emit_label lbl}\n`;
-        `	ld  {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`
+        `	larl %r1, {emit_label lbl}\n`;
+        `	ld  {emit_reg i.res.(0)}, 0(%r1)\n`
      | Lop(Iconst_symbol s) ->
         if !pic_code then
         `	lgrl	{emit_reg i.res.(0)}, {emit_symbol s}@GOTENT\n`
         else
         `	larl {emit_reg i.res.(0)}, {emit_symbol s}\n`;
     | Lop(Icall_ind) ->
-        `	basr	{emit_gpr 14}, {emit_reg i.arg.(0)}\n`;
+        `	basr	%r14, {emit_reg i.arg.(0)}\n`;
         let lbl = record_frame i.live i.dbg in
 	 `{emit_label lbl}:\n`
 
     | Lop(Icall_imm s) ->
         if !pic_code then
-        `	brasl	{emit_gpr 14}, {emit_symbol s}@PLT\n`
+        `	brasl	%r14, {emit_symbol s}@PLT\n`
         else
-        `	brasl   {emit_gpr 14}, {emit_symbol s}\n`;
+        `	brasl   %r14, {emit_symbol s}\n`;
         let lbl = record_frame i.live i.dbg in
 	 `{emit_label lbl}:\n`;
     | Lop(Itailcall_ind) ->
         let n = frame_size() in
         if !contains_calls then begin
-          `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
-          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+          `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+          `	agfi	%r15, {emit_int n}\n`;
         end else begin
           if n > 0 then
-            `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+            `	agfi	%r15, {emit_int n}\n`;
         end;
         `	br	{emit_reg i.arg.(0)}\n`
     | Lop(Itailcall_imm s) ->
@@ -316,11 +316,11 @@ let emit_instr i =
         else begin
           let n = frame_size() in
           if !contains_calls then begin
-          `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
-          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+          `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+          `	agfi	%r15, {emit_int n}\n`;
           end else begin
             if n > 0 then
-              `	agfi	{emit_gpr 15}, {emit_int n}\n`;
+              `	agfi	%r15, {emit_int n}\n`;
           end;
           `	brcl	15,{emit_symbol s}\n`
         end
@@ -328,23 +328,23 @@ let emit_instr i =
      | Lop(Iextcall(s, alloc)) ->
         if alloc then begin
           if !pic_code then begin
-          `	lgrl	{emit_gpr 1}, {emit_symbol s}@GOTENT\n`;
-          `	brasl	{emit_gpr 14}, {emit_symbol "caml_c_call"}@PLT\n`
+          `	lgrl	%r1, {emit_symbol s}@GOTENT\n`;
+          `	brasl	%r14, {emit_symbol "caml_c_call"}@PLT\n`
           end else begin
-          `	larl {emit_gpr 1}, {emit_symbol s}\n`;
-          `	brasl	{emit_gpr 14}, {emit_symbol "caml_c_call"}\n`
+          `	larl %r1, {emit_symbol s}\n`;
+          `	brasl	%r14, {emit_symbol "caml_c_call"}\n`
           end;
           let lbl = record_frame i.live i.dbg in
 	   `{emit_label lbl}:\n`;
         end else begin
           if !pic_code then
-          `	brasl   {emit_gpr 14}, {emit_symbol s}@PLT\n`
+          `	brasl   %r14, {emit_symbol s}@PLT\n`
           else
-          `	brasl   {emit_gpr 14}, {emit_symbol s}\n`
+          `	brasl   %r14, {emit_symbol s}\n`
        end
 
       | Lop(Istackoffset n) ->
-        `	lay     {emit_gpr 15}, {emit_int (-n)} ({emit_gpr 15})\n`;
+        `	lay     %r15, {emit_int (-n)} (%r15)\n`;
         stack_offset := !stack_offset + n
 
      | Lop(Iload(chunk, addr)) ->
@@ -364,7 +364,7 @@ let emit_instr i =
           `	ldebr	{emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
 
     | Lop(Istore(Single, addr, _)) ->
-        `	ledbr	{emit_fpr 15}, {emit_reg i.arg.(0)}\n`;
+        `	ledbr	%f15, {emit_reg i.arg.(0)}\n`;
         emit_load_store "stey" addr i.arg 1 reg_f15
     | Lop(Istore(chunk, addr, _)) ->
         let storeinstr =
@@ -380,47 +380,47 @@ let emit_instr i =
     | Lop(Ialloc n) ->
         if !call_gc_label = 0 then call_gc_label := new_label();
         let lbl = new_label() in
-        `	agfi	{emit_gpr 11}, {emit_int(-n)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 11}\n`;
+        `	agfi	%r11, {emit_int(-n)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, %r11\n`;
         `	agfi	{emit_reg i.res.(0)}, {emit_int size_addr}\n`;
-        `	clgr	{emit_gpr 11}, {emit_gpr 10}\n`;
+        `	clgr	%r11, %r10\n`;
         `	jnl	{emit_label lbl}\n`;
-        `	brasl   {emit_gpr 14}, {emit_label !call_gc_label}\n`;
+        `	brasl   %r14, {emit_label !call_gc_label}\n`;
         let fr_lbl = record_frame i.live Debuginfo.none in
 	 `{emit_label fr_lbl}:\n`;
         `	{emit_label lbl}:\n`
     | Lop(Iintop Isub) ->
-        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
-        `	sgr	{emit_gpr 1}, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 1}\n`
+        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
+        `	sgr	%r1, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop Imulh) ->
        let lbl1 = new_label() in
-        `	lpgr	{emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
-        `	lpgr	{emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
-        `	mlgr	{emit_gpr 0}, {emit_gpr 0}\n`;
-        `	ldgr	{emit_fpr 15}, {emit_gpr 1}\n`;
-        `	lgr	{emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
-        `	xgr	{emit_gpr 1}, {emit_reg i.arg.(1)}\n`;
-        `	cgfi	{emit_gpr 1}, 0\n`;
+        `	lpgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	lpgr	%r0, {emit_reg i.arg.(1)}\n`;
+        `	mlgr	%r0, %r0\n`;
+        `	ldgr	%f15, %r1\n`;
+        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	xgr	%r1, {emit_reg i.arg.(1)}\n`;
+        `	cgfi	%r1, 0\n`;
         `	jnl	{emit_label lbl1}\n`;
-        `	lgdr	{emit_gpr 1},{emit_fpr 15}\n`;
-        `	xilf	{emit_gpr 0},0xFFFFFFFF\n`;
-        `	xihf	{emit_gpr 0},0xFFFFFFFF\n`;
-        `	xilf	{emit_gpr 1},0xFFFFFFFF\n`;
-        `	xihf	{emit_gpr 1},0xFFFFFFFF\n`;
-        `	algfi	{emit_gpr 1},1\n`;
-        `	lgfi	{emit_gpr 1},0\n`;
-        `	alcgr	{emit_gpr 0},{emit_gpr 1}\n`;
+        `	lgdr	%r1,%f15\n`;
+        `	xilf	%r0,0xFFFFFFFF\n`;
+        `	xihf	%r0,0xFFFFFFFF\n`;
+        `	xilf	%r1,0xFFFFFFFF\n`;
+        `	xihf	%r1,0xFFFFFFFF\n`;
+        `	algfi	%r1,1\n`;
+        `	lgfi	%r1,0\n`;
+        `	alcgr	%r0,%r1\n`;
         `	{emit_label lbl1}:\n`;
-        `	lgr	{emit_reg i.res.(0)}, {emit_gpr 0}\n`
+        `	lgr	{emit_reg i.res.(0)}, %r0\n`
     | Lop(Iintop Imod) ->
-        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
-        `	dsgr    {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 0}\n`
+        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr    %r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, %r0\n`
     | Lop(Iintop Idiv) ->
-        `	lgr     {emit_gpr 1}, {emit_reg i.arg.(0)}\n`;
-        `	dsgr    {emit_gpr 0}, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, {emit_gpr 1}\n`
+        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr    %r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr     {emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop Ilsl) ->
         `	sllg    {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
     | Lop(Iintop Ilsr) ->
@@ -461,8 +461,8 @@ let emit_instr i =
        let lbl = bound_error_label i.dbg in 
 	`	cgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
 	`	jle	{emit_label lbl}\n`; 
-	`	lgfi	{emit_gpr 1}, {emit_int n}\n`;
-	`	cgfi	{emit_gpr 1}, (0)\n`; 
+	`	lgfi	%r1, {emit_int n}\n`;
+	`	cgfi	%r1, (0)\n`; 
 	`	jl	{emit_label lbl}\n`
     | Lop(Iintop_imm(Ilsl, n)) ->
         `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
@@ -473,18 +473,18 @@ let emit_instr i =
     | Lop(Iintop_imm(Iand, n)) ->
           let lbl = new_label() in
           int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
-          `	ngr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+          `	lgrl	%r1, {emit_label lbl}\n`;
+          `	ngr	{emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop_imm(Ior, n)) ->
           let lbl = new_label() in
           int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
-          `	ogr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+          `	lgrl	%r1, {emit_label lbl}\n`;
+          `	ogr	{emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop_imm(Ixor, n)) ->
           let lbl = new_label() in
           int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	{emit_gpr 1}, {emit_label lbl}\n`;
-          `	xgr	{emit_reg i.res.(0)}, {emit_gpr 1}\n`
+          `	lgrl	%r1, {emit_label lbl}\n`;
+          `	xgr	{emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop_imm(Imul, n)) ->
           `	msgfi	{emit_reg i.res.(0)}, {emit_int n}\n`
     | Lop(Iintop_imm(Iadd, n)) ->
@@ -509,12 +509,12 @@ let emit_instr i =
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
     | Lreloadretaddr ->
         let n = frame_size() in
-        `	lg	{emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`
+        `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`
     | Lreturn ->
         let n = frame_size() in
         if n > 0 then
-          `	agfi	{emit_gpr 15}, {emit_int n}\n`;
-        `	br	{emit_gpr 14}\n`
+          `	agfi	%r15, {emit_int n}\n`;
+        `	br	%r14\n`
     | Llabel lbl ->
         `{emit_label lbl}:\n`
     | Lbranch lbl ->
@@ -540,12 +540,12 @@ let emit_instr i =
             let branch = name_for_float_comparison cmp neg in
             `	{emit_string branch}	{emit_label lbl}\n`
         | Ioddtest ->
-            `	lgfi	{emit_gpr 0}, 1\n`;
-            `	ngr	{emit_gpr 0}, {emit_reg i.arg.(0)}\n`;
+            `	lgfi	%r0, 1\n`;
+            `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
             `	brcl	4,	{emit_label lbl}\n`
         | Ieventest ->
-            `	lgfi	{emit_gpr 0}, 1\n`;
-            `	ngr	{emit_gpr 0}, {emit_reg i.arg.(0)}\n`;
+            `	lgfi	%r0, 1\n`;
+            `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
             `	brcl	8,{emit_label lbl}\n`
         end
     | Lcondbranch3(lbl0, lbl1, lbl2) ->
@@ -564,12 +564,12 @@ let emit_instr i =
         end
     | Lswitch jumptbl ->
         let lbl = new_label() in
-        `	larl	{emit_gpr 0}, {emit_label lbl}\n`;
-        `	sllg	{emit_gpr 1}, {emit_reg i.arg.(0)}, 2(%r0)\n`;
-        `	agr	{emit_gpr 1}, {emit_gpr 0}\n`;
-        `	lgf	{emit_gpr 1}, 0({emit_gpr 1})\n`;
-        `	agr	{emit_gpr 1}, {emit_gpr 0}\n`;
-        `	br	{emit_gpr 1}\n`;
+        `	larl	%r0, {emit_label lbl}\n`;
+        `	sllg	%r1, {emit_reg i.arg.(0)}, 2(%r0)\n`;
+        `	agr	%r1, %r0\n`;
+        `	lgf	%r1, 0(%r1)\n`;
+        `	agr	%r1, %r0\n`;
+        `	br	%r1\n`;
         emit_string rodata_space;
         `	.align	8\n`;
         `{emit_label lbl}:`;
@@ -578,34 +578,34 @@ let emit_instr i =
         done;
         emit_string code_space
     | Lsetuptrap lbl ->
-        `	brasl   {emit_gpr 14}, {emit_label lbl}\n`;
+        `	brasl   %r14, {emit_label lbl}\n`;
     | Lpushtrap ->
         stack_offset := !stack_offset + 16;
-        `	agfi	{emit_gpr 15}, -16\n`;
-        `	stg     {emit_gpr 14}, 0({emit_gpr 15})\n`;
-        `	stg     {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
-        `	lgr     {emit_gpr 13}, {emit_gpr 15}\n`
+        `	agfi	%r15, -16\n`;
+        `	stg     %r14, 0(%r15)\n`;
+        `	stg     %r13, {emit_int size_addr}(%r15)\n`;
+        `	lgr     %r13, %r15\n`
     | Lpoptrap ->
-        `	lg      {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
-        `	agfi    {emit_gpr 15}, 16\n`;
+        `	lg      %r13, {emit_int size_addr}(%r15)\n`;
+        `	agfi    %r15, 16\n`;
         stack_offset := !stack_offset - 16
     | Lraise k ->
         begin match !Clflags.debug, k with
         | true, Lambda.Raise_regular ->
-          `	brasl   {emit_gpr 14}, {emit_symbol "caml_raise_exn"}\n`;
+          `	brasl   %r14, {emit_symbol "caml_raise_exn"}\n`;
           let lbl = record_frame Reg.Set.empty i.dbg in
 	   `{emit_label lbl}:\n`
         | true, Lambda.Raise_reraise ->
-          `	brasl   {emit_gpr 14}, {emit_symbol "caml_reraise_exn"}\n`;
+          `	brasl   %r14, {emit_symbol "caml_reraise_exn"}\n`;
           let lbl = record_frame Reg.Set.empty i.dbg in
 	   `{emit_label lbl}:\n`
         | false, _
         | true, Lambda.Raise_notrace ->
-          `	lg      {emit_gpr 1}, 0({emit_gpr 13})\n`;
-          `	lgr     {emit_gpr 15},{emit_gpr 13}\n`;
-          `	lg      {emit_gpr 13}, {emit_int size_addr}({emit_gpr 15})\n`;
-          `	agfi    {emit_gpr 15}, 16\n`;
-          `	br	{emit_gpr 1}\n`
+          `	lg      %r1, 0(%r13)\n`;
+          `	lgr     %r15,%r13\n`;
+          `	lg      %r13, {emit_int size_addr}(%r15)\n`;
+          `	agfi    %r15, 16\n`;
+          `	br	%r1\n`
         end
 
 
@@ -660,9 +660,9 @@ emit_debug_info fundecl.fun_dbg;
   `{emit_symbol fundecl.fun_name}:\n`;
   let n = frame_size() in
   if n > 0 then
-    `	lay	{emit_gpr 15}, {emit_int(-n)}({emit_gpr 15})\n`;
+    `	lay	%r15, {emit_int(-n)}(%r15)\n`;
   if !contains_calls then
-    `	stg     {emit_gpr 14}, {emit_int(n - size_addr)}({emit_gpr 15})\n`;
+    `	stg     %r14, {emit_int(n - size_addr)}(%r15)\n`;
   `{emit_label !tailrec_entry_point}:\n`;
   emit_all fundecl.fun_body;
   (* Emit the glue code to call the GC *)

From ad3a967660c7ccc533c0bea9a481cd32ebf5a9f3 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 09:00:37 -0400
Subject: [PATCH 09/27] zSeries port: factor out adjustments to the stack
 pointer

New function emit_stack_adjust, which chooses the shortest instruction
that performs the required adjustment.

Later, this will be a good place to put cfi_adjust directives.
---
 asmcomp/s390x/emit.mlp | 67 ++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 41 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index bff1f3af8..071dffab7 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -111,6 +111,19 @@ let emit_load_store instr addressing_mode addr n arg =
   | Iindexed2 ofs ->
       `	{emit_string instr}	{emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)},{emit_reg addr.(n+1)})\n`
 
+(* Adjust the stack pointer down by N.  
+   Choose the shortest instruction possible for the value of N. *)
+
+let emit_stack_adjust n =
+  let n = -n in
+  if n = 0 then ()
+  else if n >= 0 && n < 4096 then
+    `	la	%r15, {emit_int n}(%r15)\n`
+  else if n >= -0x80000 && n < 0x80000 then
+    `	lay	%r15, {emit_int n}(%r15)\n`
+  else
+    `	agfi	%r15, {emit_int n}\n`
+
 (* After a comparison, extract the result as 0 or 1 *)
 let emit_set_comp cmp res =
     `	lgfi	%r1, 1\n`;
@@ -302,26 +315,18 @@ let emit_instr i =
 	 `{emit_label lbl}:\n`;
     | Lop(Itailcall_ind) ->
         let n = frame_size() in
-        if !contains_calls then begin
+        if !contains_calls then
           `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
-          `	agfi	%r15, {emit_int n}\n`;
-        end else begin
-          if n > 0 then
-            `	agfi	%r15, {emit_int n}\n`;
-        end;
+	emit_stack_adjust (-n);
         `	br	{emit_reg i.arg.(0)}\n`
     | Lop(Itailcall_imm s) ->
         if s = !function_name then
           `	brcl	15,{emit_label !tailrec_entry_point}\n`
         else begin
           let n = frame_size() in
-          if !contains_calls then begin
-          `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
-          `	agfi	%r15, {emit_int n}\n`;
-          end else begin
-            if n > 0 then
-              `	agfi	%r15, {emit_int n}\n`;
-          end;
+          if !contains_calls then
+            `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
+	  emit_stack_adjust (-n);
           `	brcl	15,{emit_symbol s}\n`
         end
               
@@ -343,8 +348,8 @@ let emit_instr i =
           `	brasl   %r14, {emit_symbol s}\n`
        end
 
-      | Lop(Istackoffset n) ->
-        `	lay     %r15, {emit_int (-n)} (%r15)\n`;
+     | Lop(Istackoffset n) ->
+        emit_stack_adjust n;
         stack_offset := !stack_offset + n
 
      | Lop(Iload(chunk, addr)) ->
@@ -512,8 +517,7 @@ let emit_instr i =
         `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`
     | Lreturn ->
         let n = frame_size() in
-        if n > 0 then
-          `	agfi	%r15, {emit_int n}\n`;
+	emit_stack_adjust (-n);
         `	br	%r14\n`
     | Llabel lbl ->
         `{emit_label lbl}:\n`
@@ -581,13 +585,13 @@ let emit_instr i =
         `	brasl   %r14, {emit_label lbl}\n`;
     | Lpushtrap ->
         stack_offset := !stack_offset + 16;
-        `	agfi	%r15, -16\n`;
+	emit_stack_adjust 16;
         `	stg     %r14, 0(%r15)\n`;
         `	stg     %r13, {emit_int size_addr}(%r15)\n`;
         `	lgr     %r13, %r15\n`
     | Lpoptrap ->
         `	lg      %r13, {emit_int size_addr}(%r15)\n`;
-        `	agfi    %r15, 16\n`;
+	emit_stack_adjust (-16);
         stack_offset := !stack_offset - 16
     | Lraise k ->
         begin match !Clflags.debug, k with
@@ -602,31 +606,13 @@ let emit_instr i =
         | false, _
         | true, Lambda.Raise_notrace ->
           `	lg      %r1, 0(%r13)\n`;
-          `	lgr     %r15,%r13\n`;
+          `	lgr     %r15, %r13\n`;
           `	lg      %r13, {emit_int size_addr}(%r15)\n`;
-          `	agfi    %r15, 16\n`;
+	  emit_stack_adjust (-16);
           `	br	%r1\n`
         end
 
 
-(* Checks if a pseudo-instruction expands to instructions
-   that do not branch and do not affect CR0 nor R12. *)
-
-(* No branch delay slots needed on Z *)
-let is_simple_instr i = false
-
-
-let no_interference res arg =
-  try
-    for i = 0 to Array.length arg - 1 do
-      for j = 0 to Array.length res - 1 do
-        if arg.(i).loc = res.(j).loc then raise Exit
-      done
-    done;
-    true
-  with Exit ->
-    false
-
 (* Emit a sequence of instructions *)
 
 let rec emit_all i =
@@ -659,8 +645,7 @@ emit_debug_info fundecl.fun_dbg;
   `	.align	8\n`;
   `{emit_symbol fundecl.fun_name}:\n`;
   let n = frame_size() in
-  if n > 0 then
-    `	lay	%r15, {emit_int(-n)}(%r15)\n`;
+  emit_stack_adjust n;
   if !contains_calls then
     `	stg     %r14, {emit_int(n - size_addr)}(%r15)\n`;
   `{emit_label !tailrec_entry_point}:\n`;

From 88fd26f63c9c77fb66ff86133a77b20ebeb78461 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 09:26:11 -0400
Subject: [PATCH 10/27] z port: streamline the heap allocation code

Move the cold path (the one that calls the GC when alloc_ptr < alloc_limit)
as much as possible to the end of the function.

Use la and lay to produce shorter code.
---
 asmcomp/s390x/emit.mlp | 82 +++++++++++++++++++++---------------------
 asmrun/s390x.S         |  3 +-
 2 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 071dffab7..eb8a6f745 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -75,8 +75,6 @@ let code_space = "	.section \".text\"\n"
 
 let rodata_space = "	.section \".rodata\"\n"
 
-let datag = ".quad"
-
 (* Output a pseudo-register *)
 
 let emit_reg r =
@@ -158,6 +156,22 @@ let record_frame live dbg =
       fd_live_offset = !live_offset;
       fd_debuginfo = dbg } :: !frame_descriptors;
   lbl
+
+(* Record calls to caml_call_gc, emitted out of line. *)
+
+type gc_call =
+  { gc_lbl: label;                      (* Entry label *)
+    gc_return_lbl: label;               (* Where to branch after GC *)
+    gc_frame_lbl: label }               (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+  `{emit_label gc.gc_lbl}:	{emit_call "caml_call_gc"}\n`;
+  `{emit_label gc.gc_frame_lbl}:	brcl 15, {emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error, emitted out of line. *)
+
 type bound_error_call =
   { bd_lbl: label;                      (* Entry label *)
     bd_frame: label }                   (* Label of frame descriptor *)
@@ -185,15 +199,12 @@ let emit_call_bound_errors () =
   List.iter emit_call_bound_error !bound_error_sites;
   if !bound_error_call > 0 then
     `{emit_label !bound_error_call}:	{emit_call "caml_ml_array_bound_error"}\n`
+
 (* Record floating-point and large integer literals *)
 
 let float_literals = ref ([] : (int64 * int) list)
 let int_literals = ref ([] : (nativeint * int) list)
 
-let pic_externals = false
-
-let external_functions = ref StringSet.empty
-
 (* Names for conditional branches after comparisons *)
 
 let branch_for_comparison = function
@@ -252,11 +263,6 @@ let name_for_specific = function
 let function_name = ref ""
 (* Entry point for tail recursive calls *)
 let tailrec_entry_point = ref 0
-(* Names of functions defined in the current file *)
-let defined_functions = ref StringSet.empty
-(* Label of glue code for calling the GC *)
-let call_gc_label = ref 0
-
 
 (* Output the assembly code for an instruction *)
 
@@ -383,17 +389,18 @@ let emit_instr i =
         emit_load_store storeinstr addr i.arg 1 i.arg.(0)
         
     | Lop(Ialloc n) ->
-        if !call_gc_label = 0 then call_gc_label := new_label();
-        let lbl = new_label() in
-        `	agfi	%r11, {emit_int(-n)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, %r11\n`;
-        `	agfi	{emit_reg i.res.(0)}, {emit_int size_addr}\n`;
+	let lbl_redo = new_label() in
+	let lbl_call_gc = new_label() in
+        let lbl_frame = record_frame i.live i.dbg in
+        call_gc_sites :=
+          { gc_lbl = lbl_call_gc;
+            gc_return_lbl = lbl_redo;
+            gc_frame_lbl = lbl_frame } :: !call_gc_sites;
+	`{emit_label lbl_redo}:`;
+        `	lay	%r11, {emit_int(-n)}(%r11)\n`;
         `	clgr	%r11, %r10\n`;
-        `	jnl	{emit_label lbl}\n`;
-        `	brasl   %r14, {emit_label !call_gc_label}\n`;
-        let fr_lbl = record_frame i.live Debuginfo.none in
-	 `{emit_label fr_lbl}:\n`;
-        `	{emit_label lbl}:\n`
+        `	brcl	4, {emit_label lbl_call_gc}\n`;  (* less than *)
+	`	la	{emit_reg i.res.(0)}, 8(%r11)\n`
     | Lop(Iintop Isub) ->
         `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
         `	sgr	%r1, {emit_reg i.arg.(1)}\n`;
@@ -626,21 +633,16 @@ let rec emit_all i =
 
 let fundecl fundecl =
   function_name := fundecl.fun_name;
-  defined_functions := StringSet.add fundecl.fun_name !defined_functions;
   tailrec_entry_point := new_label();
   stack_offset := 0;
-  call_gc_label := 0;
+  call_gc_sites := [];
   bound_error_sites := [];
   bound_error_call := 0;
   float_literals := [];
   int_literals := [];
   `	.globl	{emit_symbol fundecl.fun_name}\n`;
-emit_debug_info fundecl.fun_dbg;
-  begin match Config.system with
-  | "elf" | "bsd" | "bsd_elf" ->
-      `	.type	{emit_symbol fundecl.fun_name}, @function\n`
-  | _ -> ()
-  end;
+  emit_debug_info fundecl.fun_dbg;
+  `	.type	{emit_symbol fundecl.fun_name}, @function\n`;
   emit_string code_space;
   `	.align	8\n`;
   `{emit_symbol fundecl.fun_name}:\n`;
@@ -651,10 +653,8 @@ emit_debug_info fundecl.fun_dbg;
   `{emit_label !tailrec_entry_point}:\n`;
   emit_all fundecl.fun_body;
   (* Emit the glue code to call the GC *)
-  if !call_gc_label > 0 then begin
-    `{emit_label !call_gc_label}:\n`;
-    `	brcl	15,{emit_symbol "caml_call_gc"}\n`
-  end;
+  List.iter emit_call_gc !call_gc_sites;
+  (* Emit the glue code to handle bound errors *)
   emit_call_bound_errors();
   (* Emit the numeric literals *)
   if !float_literals <> [] || !int_literals <> [] then begin
@@ -667,7 +667,7 @@ emit_debug_info fundecl.fun_dbg;
       !float_literals;
     List.iter
       (fun (n, lbl) ->
-        `{emit_label lbl}:	{emit_string datag}	{emit_nativeint n}\n`)
+        `{emit_label lbl}:	.quad	{emit_nativeint n}\n`)
       !int_literals
   end
 
@@ -691,15 +691,15 @@ let emit_item = function
   | Cint32 n ->
       `	.long	{emit_nativeint n}\n`
   | Cint n ->
-      `	{emit_string datag}	{emit_nativeint n}\n`
+      `	.quad	{emit_nativeint n}\n`
   | Csingle f ->
       emit_float32_directive ".long" (Int32.bits_of_float f)
   | Cdouble f ->
       emit_float64_directive ".quad" (Int64.bits_of_float f)
   | Csymbol_address s ->
-      `	{emit_string datag}	{emit_symbol s}\n`
+      `	.quad	{emit_symbol s}\n`
   | Clabel_address lbl ->
-      `	{emit_string datag}	{emit_data_label lbl}\n`
+      `	.quad	{emit_data_label lbl}\n`
   | Cstring s ->
       emit_bytes_directive "	.byte	" s
   | Cskip n ->
@@ -717,8 +717,6 @@ let data l =
 
 let begin_assembly() =
   reset_debug_info();
-  defined_functions := StringSet.empty;
-  external_functions := StringSet.empty;
   (* Emit the beginning of the segments *)
   let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
   emit_string data_space;
@@ -742,7 +740,7 @@ let end_assembly() =
   let lbl_end = Compilenv.make_symbol (Some "data_end") in
   declare_global_data lbl_end;
   `{emit_symbol lbl_end}:\n`;
-  `	{emit_string datag}	0\n`;
+  `	.quad	0\n`;
   (* Emit the frame descriptors *)
   emit_string rodata_space;
   `	.align	8\n`;
@@ -750,10 +748,10 @@ let end_assembly() =
   declare_global_data lbl;
   `{emit_symbol lbl}:\n`;
   emit_frames
-    { efa_label = (fun l -> `	{emit_string datag}	{emit_label l}\n`);
+    { efa_label = (fun l -> `	.quad	{emit_label l}\n`);
       efa_16 = (fun n -> `	.short	{emit_int n}\n`);
       efa_32 = (fun n -> `	.long	{emit_int32 n}\n`);
-      efa_word = (fun n -> `	{emit_string datag}	{emit_int n}\n`);
+      efa_word = (fun n -> `	.quad	{emit_int n}\n`);
       efa_align = (fun n -> `	.align	{emit_int n}\n`);
       efa_label_rel = (fun lbl ofs ->
                            `	.long	({emit_label lbl} - .) + {emit_int32 ofs}\n`);
diff --git a/asmrun/s390x.S b/asmrun/s390x.S
index d724e1f2d..156207fee 100644
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@@ -95,9 +95,8 @@ caml_call_gc:
         ld      %f13, 108(%r15)
         ld      %f14, 112(%r15)
         ld      %f15, 120(%r15)
-    /* Return to caller, restarting the allocation */
+    /* Return to caller */
         Loadglobal(%r1, caml_last_return_address)
-        agfi   %r1, -30     /* Restart the allocation (7 instructions) */
     /* Deallocate stack frame */
         lay	%r15, FRAMESIZE(%r15)
     /* Return */

From 54ff5d03771a3b472b863066924e62e0f6fca710 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 09:33:42 -0400
Subject: [PATCH 11/27] z port: use lghi when possible to load integer
 constants

lghi is 4 bytes, lgfi is 6.
---
 asmcomp/s390x/emit.mlp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index eb8a6f745..6ed20da77 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -124,8 +124,8 @@ let emit_stack_adjust n =
 
 (* After a comparison, extract the result as 0 or 1 *)
 let emit_set_comp cmp res =
-    `	lgfi	%r1, 1\n`;
-    `	lgfi	{emit_reg res}, 0\n`;
+    `	lghi	%r1, 1\n`;
+    `	lghi	{emit_reg res}, 0\n`;
   begin match cmp with
       Ceq -> `	locgre	{emit_reg res}, %r1\n`
     | Cne -> `	locgrne	{emit_reg res}, %r1\n`
@@ -290,7 +290,9 @@ let emit_instr i =
                 fatal_error "Emit: Imove"
         end
     | Lop(Iconst_int n | Iconst_blockheader n) ->
-        if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
+        if n >= -0x8000n && n <= 0x7FFFn then begin
+          `	lghi	{emit_reg i.res.(0)}, {emit_nativeint n}\n`;
+        end else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin
           `	lgfi	{emit_reg i.res.(0)}, {emit_nativeint n}\n`;
         end else begin
           let lbl = new_label() in
@@ -551,11 +553,11 @@ let emit_instr i =
             let branch = name_for_float_comparison cmp neg in
             `	{emit_string branch}	{emit_label lbl}\n`
         | Ioddtest ->
-            `	lgfi	%r0, 1\n`;
+            `	lghi	%r0, 1\n`;
             `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
             `	brcl	4,	{emit_label lbl}\n`
         | Ieventest ->
-            `	lgfi	%r0, 1\n`;
+            `	lghi	%r0, 1\n`;
             `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
             `	brcl	8,{emit_label lbl}\n`
         end

From b149f661cd63f6bec9da63bc8867c82abf0a22f8 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 09:47:37 -0400
Subject: [PATCH 12/27] z port: use 'test under mask' instruction for even/odd
 conditional branches

This saves one instruction.  These cond branches are heavily used by pattern-matching compilation.
---
 asmcomp/s390x/emit.mlp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 6ed20da77..e33a22dba 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -553,12 +553,10 @@ let emit_instr i =
             let branch = name_for_float_comparison cmp neg in
             `	{emit_string branch}	{emit_label lbl}\n`
         | Ioddtest ->
-            `	lghi	%r0, 1\n`;
-            `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
-            `	brcl	4,	{emit_label lbl}\n`
+            `	tmll	{emit_reg i.arg.(0)}, 1\n`;
+            `	brcl	1, {emit_label lbl}\n`
         | Ieventest ->
-            `	lghi	%r0, 1\n`;
-            `	ngr	%r0, {emit_reg i.arg.(0)}\n`;
+            `	tmll	{emit_reg i.arg.(0)}, 1\n`;
             `	brcl	8,{emit_label lbl}\n`
         end
     | Lcondbranch3(lbl0, lbl1, lbl2) ->

From 39cd68fd0d6c629aed3fa8786fd1c130999f751a Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 10:00:59 -0400
Subject: [PATCH 13/27] z port: use unsigned comparison for Icheckbound
 instructions

This avoids testing the "< 0" case separately.
---
 asmcomp/s390x/emit.mlp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index e33a22dba..16af21a9d 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -452,11 +452,8 @@ let emit_instr i =
         end
     | Lop(Iintop Icheckbound) ->
         let lbl = bound_error_label i.dbg in
-	`	cgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-	`	jle	{emit_label lbl}\n`;
-	(* Check for negative index *)
-	`	cgfi	{emit_reg i.arg.(1)}, (0)\n`;
-	`	jl	{emit_label lbl}\n`
+	`	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+	`	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
     | Lop(Iintop op) ->
         let instr = name_for_intop op in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
@@ -473,11 +470,12 @@ let emit_instr i =
         end
     | Lop(Iintop_imm(Icheckbound, n)) ->
        let lbl = bound_error_label i.dbg in 
-	`	cgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
-	`	jle	{emit_label lbl}\n`; 
-	`	lgfi	%r1, {emit_int n}\n`;
-	`	cgfi	%r1, (0)\n`; 
-	`	jl	{emit_label lbl}\n`
+       if n >= 0 then begin
+	`	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
+	`	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
+       end else begin
+	`	brcl	15, {emit_label lbl}\n`  (* branch always *)
+       end
     | Lop(Iintop_imm(Ilsl, n)) ->
         `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
     | Lop(Iintop_imm(Ilsr, n)) ->

From 88fb625050e74795cd853181da9459761c631e3e Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 11:02:43 -0400
Subject: [PATCH 14/27] z port: revise two-address instructions and integer
 immediate operations

Use la/lay when possible for add immediate and sub immediate,
because these instructions support the case result <> argument.

Use 'and/or/xor immediate over low 32 bits' instructions.
Do this only if the top 32 bits of the constant are 0 (or/xor) or -1 (and).
---
 asmcomp/s390x/emit.mlp     | 56 +++++++++++++++++++-------------------
 asmcomp/s390x/selection.ml | 24 ++++++++--------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 16af21a9d..73945d301 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -122,6 +122,19 @@ let emit_stack_adjust n =
   else
     `	agfi	%r15, {emit_int n}\n`
 
+(* Emit a 'add immediate' *)
+
+let emit_addimm res arg n =
+  if n >= 0 && n < 4096 then
+    `	la	{emit_reg res}, {emit_int n}({emit_reg arg})\n`
+  else if n >= -0x80000 && n < 0x80000 then
+    `	lay	{emit_reg res}, {emit_int n}({emit_reg arg})\n`
+  else begin
+    if arg.loc <> res.loc then
+      `	lgr	{emit_reg res}, {emit_reg arg}\n`;
+    `	agfi	{emit_reg res}, {emit_int n}\n`
+  end
+
 (* After a comparison, extract the result as 0 or 1 *)
 let emit_set_comp cmp res =
     `	lghi	%r1, 1\n`;
@@ -234,15 +247,13 @@ let name_for_float_comparison cmp neg =
 
 let name_for_intop = function
     Iadd  -> "agr"
+  | Isub  -> "sgr"
   | Imul  -> "msgr"
   | Iand  -> "ngr"
   | Ior   -> "ogr"
   | Ixor  -> "xgr"
   | _ -> Misc.fatal_error "Emit.Intop"
 
-let name_for_intop_imm = function
-  | _ -> Misc.fatal_error "Emit.Intop_imm"
-
 let name_for_floatop1 = function
     Inegf -> "lcdbr"
   | Iabsf -> "lpdbr"
@@ -403,10 +414,6 @@ let emit_instr i =
         `	clgr	%r11, %r10\n`;
         `	brcl	4, {emit_label lbl_call_gc}\n`;  (* less than *)
 	`	la	{emit_reg i.res.(0)}, 8(%r11)\n`
-    | Lop(Iintop Isub) ->
-        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
-        `	sgr	%r1, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop Imulh) ->
        let lbl1 = new_label() in
         `	lpgr	%r1, {emit_reg i.arg.(0)}\n`;
@@ -455,10 +462,13 @@ let emit_instr i =
 	`	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
 	`	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
     | Lop(Iintop op) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
         let instr = name_for_intop op in
-        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
+        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`
+    | Lop(Iintop_imm(Iadd, n)) ->
+        emit_addimm i.res.(0) i.arg.(0) n
     | Lop(Iintop_imm(Isub, n)) ->
-        `	agfi	{emit_reg i.res.(0)}, {emit_int(-n)}\n`
+        emit_addimm i.res.(0) i.arg.(0) (-n)
     | Lop(Iintop_imm(Icomp cmp, n)) ->
         begin match cmp with
           Isigned c ->
@@ -483,29 +493,19 @@ let emit_instr i =
     | Lop(Iintop_imm(Iasr, n)) ->
         `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
     | Lop(Iintop_imm(Iand, n)) ->
-          let lbl = new_label() in
-          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	%r1, {emit_label lbl}\n`;
-          `	ngr	{emit_reg i.res.(0)}, %r1\n`
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	nilf	{emit_reg i.res.(0)}, {emit_int (n land 0xFFFF_FFFF)}\n`
     | Lop(Iintop_imm(Ior, n)) ->
-          let lbl = new_label() in
-          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	%r1, {emit_label lbl}\n`;
-          `	ogr	{emit_reg i.res.(0)}, %r1\n`
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	oilf	{emit_reg i.res.(0)}, {emit_int n}\n`
     | Lop(Iintop_imm(Ixor, n)) ->
-          let lbl = new_label() in
-          int_literals := ((Nativeint.of_int n), lbl) :: !int_literals;
-          `	lgrl	%r1, {emit_label lbl}\n`;
-          `	xgr	{emit_reg i.res.(0)}, %r1\n`
+        assert (i.arg.(0).loc = i.res.(0).loc);
+        `	xilf	{emit_reg i.res.(0)}, {emit_int n}\n`
     | Lop(Iintop_imm(Imul, n)) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
           `	msgfi	{emit_reg i.res.(0)}, {emit_int n}\n`
-    | Lop(Iintop_imm(Iadd, n)) ->
-          `	lgr     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
-          `	agfi	{emit_reg i.res.(0)}, {emit_int n}\n`
-    | Lop(Iintop_imm(op, n)) ->
-        let instr = name_for_intop_imm op in
-        `	lgr     {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`;
-        `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_int n}\n`
+    | Lop(Iintop_imm((Imulh | Idiv | Imod), _)) ->
+        assert false
     | Lop(Inegf | Iabsf as op) ->
         let instr = name_for_floatop1 op in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
diff --git a/asmcomp/s390x/selection.ml b/asmcomp/s390x/selection.ml
index 773ce164c..740cf0c88 100644
--- a/asmcomp/s390x/selection.ml
+++ b/asmcomp/s390x/selection.ml
@@ -46,14 +46,14 @@ let rec select_addr = function
 let pseudoregs_for_operation op arg res =
   match op with
   (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
-    Iintop(Iadd|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
+  | Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
       ([|res.(0); arg.(1)|], res)
-    | Ispecific(sop) ->
+  | Ispecific(sop) ->
     ( [| arg.(0); arg.(1); res.(0) |], [| res.(0) |])
-    (* One-address unary operations: arg.(0) and res.(0) must be the same *)
-    |  Iintop_imm((Isub|Imul|Iand|Ior|Ixor), _) -> (res, res)
-    (* Other instructions are regular *)
-    | _ -> raise Use_default
+  (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+  |  Iintop_imm((Imul|Iand|Ior|Ixor), _) -> (res, res)
+  (* Other instructions are regular *)
+  | _ -> raise Use_default
 
 class selector = object (self)
 
@@ -77,9 +77,9 @@ method! select_operation op args =
     (Cmulhi, _) -> (Iintop Imulh, args)
   (* The and, or and xor instructions have a different range of immediate 
      operands than the other instructions *)
-  | (Cand, _) -> self#select_logical Iand args
-  | (Cor, _) -> self#select_logical Ior args
-  | (Cxor, _) -> self#select_logical Ixor args
+  | (Cand, _) -> self#select_logical Iand (-0x1_0000_0000) (-1) args
+  | (Cor, _) -> self#select_logical Ior 0 0xFFFF_FFFF args
+  | (Cxor, _) -> self#select_logical Ixor  0 0xFFFF_FFFF args
   (* Recognize mult-add and mult-sub instructions *)
   | (Caddf, [Cop(Cmulf, [arg1; arg2]); arg3]) ->
       (Ispecific Imultaddf, [arg1; arg2; arg3])
@@ -90,10 +90,10 @@ method! select_operation op args =
   | _ ->
       super#select_operation op args
 
-method select_logical op = function
-    [arg; Cconst_int n] when n >= 0 && n <= 0xFFFFFFFF ->
+method select_logical op lo hi = function
+    [arg; Cconst_int n] when n >= lo && n <= hi ->
       (Iintop_imm(op, n), [arg])
-  | [Cconst_int n; arg] when n >= 0 && n <= 0xFFFFFFFF ->
+  | [Cconst_int n; arg] when n >= lo && n <= hi ->
       (Iintop_imm(op, n), [arg])
   | args ->
       (Iintop op, args)

From cc9c12dff3b13431267f8b0527db15d8ab165f05 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 11:30:39 -0400
Subject: [PATCH 15/27] z port: fix issue with PIC code and dynamic loading

In PIC mode, Itailcall_imm should jumpt to the PLT of the called function.

Also: use %r7 rather than %r1 to pass the function pointer argument to caml_c_call.  It can be that caml_c_call is in a different shared object than the caller.  In this case, %r0 and %r1 can be destroyed by PLT stub code, according to the ELF ABI.
---
 asmcomp/s390x/emit.mlp | 9 ++++++---
 asmrun/s390x.S         | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 73945d301..0ca471b1d 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -346,16 +346,19 @@ let emit_instr i =
           if !contains_calls then
             `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
 	  emit_stack_adjust (-n);
-          `	brcl	15,{emit_symbol s}\n`
+          if !pic_code then
+            `	brcl	15, {emit_symbol s}@PLT\n`
+          else
+            `	brcl	15, {emit_symbol s}\n`
         end
               
      | Lop(Iextcall(s, alloc)) ->
         if alloc then begin
           if !pic_code then begin
-          `	lgrl	%r1, {emit_symbol s}@GOTENT\n`;
+          `	lgrl	%r7, {emit_symbol s}@GOTENT\n`;
           `	brasl	%r14, {emit_symbol "caml_c_call"}@PLT\n`
           end else begin
-          `	larl %r1, {emit_symbol s}\n`;
+          `	larl %r7, {emit_symbol s}\n`;
           `	brasl	%r14, {emit_symbol "caml_c_call"}\n`
           end;
           let lbl = record_frame i.live i.dbg in
diff --git a/asmrun/s390x.S b/asmrun/s390x.S
index 156207fee..4e3f4bc5e 100644
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@@ -110,14 +110,14 @@ caml_c_call:
         Storeglobal(%r15, caml_bottom_of_stack)
     /* Save return address */
         ldgr	%f15, %r14
-    /* Get ready to call C function (address in r1) */
+    /* Get ready to call C function (address in r7) */
     /* Record lowest stack address and return address */
         Storeglobal(%r14, caml_last_return_address)
     /* Make the exception handler and alloc ptr available to the C code */
         Storeglobal(%r11, caml_young_ptr)
         Storeglobal(%r13, caml_exception_pointer)
     /* Call the function */
-        basr %r14, %r1
+        basr %r14, %r7
     /* restore return address */
         lgdr	%r14,%f15
     /* Reload allocation pointer and allocation limit*/

From 9ff553d9edf11534a7ad41a58180b178e1c3a718 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 12:16:58 -0400
Subject: [PATCH 16/27] z port: update caml_ml_bound_error

To reflect the changes to caml_c_call from commit cc9c12dff3b13431267f8b0527db15d8ab165f05
---
 asmrun/s390x.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asmrun/s390x.S b/asmrun/s390x.S
index 4e3f4bc5e..c7072a08a 100644
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@@ -348,7 +348,7 @@ caml_callback3_exn:
 	.type	caml_ml_array_bound_error, @function
 caml_ml_array_bound_error:
 	lay	%r15, -160(%r15)    /* Reserve stack space for C call */
-	larl	%r1, caml_array_bound_error
+	larl	%r7, caml_array_bound_error
 	j	caml_c_call
         .globl  caml_system__code_end
 caml_system__code_end:

From 3f26d0e3752ef0364482adc65c1d19a89090a987 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Fri, 30 Oct 2015 12:17:56 -0400
Subject: [PATCH 17/27] z port: reloading must preserve 2-address instructions

Without the special reloading implemented here, a 2-address instruction such as x := x + y' could be reloaded as 'x1 := x2 + y' with two different temporaries x1, x2 for x.
---
 asmcomp/s390x/emit.mlp  |  2 ++
 asmcomp/s390x/reload.ml | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 0ca471b1d..9565882cb 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -513,6 +513,7 @@ let emit_instr i =
         let instr = name_for_floatop1 op in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
     | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
+        assert (i.arg.(0).loc = i.res.(0).loc);
         let instr = name_for_floatop2 op in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
     | Lop(Ifloatofint) ->
@@ -520,6 +521,7 @@ let emit_instr i =
     | Lop(Iintoffloat) ->
         `	cgdbr	{emit_reg i.res.(0)}, 0, {emit_reg i.arg.(0)}\n`
     | Lop(Ispecific sop) ->
+        assert (i.arg.(2).loc = i.res.(0).loc);
         let instr = name_for_specific sop in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`
     | Lreloadretaddr ->
diff --git a/asmcomp/s390x/reload.ml b/asmcomp/s390x/reload.ml
index 76c32b7c6..60ac59d26 100644
--- a/asmcomp/s390x/reload.ml
+++ b/asmcomp/s390x/reload.ml
@@ -12,5 +12,36 @@
 
 (* Reloading for the Z Processor *)
 
+open Arch
+open Mach
+
+class reload = object (self)
+
+inherit Reloadgen.reload_generic as super
+
+(* For 2-address instructions, reloading must make sure that the
+   temporary result register is the same as the appropriate
+   argument register. *)
+
+method! reload_operation op arg res =
+  match op with
+  (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
+  | Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor)  | Iaddf|Isubf|Imulf|Idivf ->
+      let res = self#makereg res.(0) in
+      ([|res; self#makereg arg.(1)|], [|res|])
+  (* Three-address ternary operations: arg.(2) and res.(0) must be the same *)
+  | Ispecific(Imultaddf|Imultsubf) ->
+      let res = self#makereg res.(0) in
+      ([|self#makereg arg.(0); self#makereg arg.(1); res|], [|res|])
+  (* One-address unary operations: arg.(0) and res.(0) must be the same *)
+  |  Iintop_imm((Imul|Iand|Ior|Ixor), _) ->
+      let res = self#makereg res.(0) in
+      ([|res|], [|res|])
+  (* Other instructions are regular *)
+  | _ ->
+      super#reload_operation op arg res
+
+end
+
 let fundecl f =
-  (new Reloadgen.reload_generic)#fundecl f
+  (new reload)#fundecl f

From 1ef188e2b1db66c142c1f3b3c8cc909e1dc7c694 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sat, 31 Oct 2015 06:39:27 -0400
Subject: [PATCH 18/27] tests/asmcomp: improve the arith test

- Test multiply-high
- Less verbose output
- Use our own PRNG instead of rand()
---
 testsuite/tests/asmcomp/arith.cmm    |  1 +
 testsuite/tests/asmcomp/lexcmm.mll   |  1 +
 testsuite/tests/asmcomp/mainarith.c  | 63 ++++++++++++++++++++--------
 testsuite/tests/asmcomp/parsecmm.mly |  2 +
 4 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/testsuite/tests/asmcomp/arith.cmm b/testsuite/tests/asmcomp/arith.cmm
index e5965bc9d..2d1031862 100644
--- a/testsuite/tests/asmcomp/arith.cmm
+++ b/testsuite/tests/asmcomp/arith.cmm
@@ -214,4 +214,5 @@
       (floataset d 37 (-f f))
       (floataset d 38 (absf f))
 
+      (addraset r 116 (mulh x y))
 )))))))
diff --git a/testsuite/tests/asmcomp/lexcmm.mll b/testsuite/tests/asmcomp/lexcmm.mll
index 473f3d9e0..7586189f0 100644
--- a/testsuite/tests/asmcomp/lexcmm.mll
+++ b/testsuite/tests/asmcomp/lexcmm.mll
@@ -55,6 +55,7 @@ let keyword_table =
     "let", LET;
     "load", LOAD;
     "mod", MODI;
+    "mulh", MULH;
     "or", OR;
     "proj", PROJ;
     "raise", RAISE Lambda.Raise_regular;
diff --git a/testsuite/tests/asmcomp/mainarith.c b/testsuite/tests/asmcomp/mainarith.c
index 94ff371e3..bcf2955fc 100644
--- a/testsuite/tests/asmcomp/mainarith.c
+++ b/testsuite/tests/asmcomp/mainarith.c
@@ -61,6 +61,7 @@ double F, G;
 
 extern void call_gen_code();
 extern void testarith();
+static intnat mulhs(intnat x, intnat y);
 
 void do_test(void)
 {
@@ -252,29 +253,55 @@ void do_test(void)
 
       FLOATTEST(D[37], - F);
       FLOATTEST(D[38], fabs(F));
+
+      INTTEST(R[116], mulhs(X, Y));
 }
 
-#ifdef __i386__
-#ifdef __FreeBSD__
-#include <floatingpoint.h>
-#endif
-#endif
+/* Multiply-high signed.  Hacker's Delight section 8.2 */
 
-void init_ieee_floats(void)
+#define HALFSIZE (4 * sizeof(intnat))
+#define HALFMASK (((intnat)1 << HALFSIZE) - 1)
+
+static intnat mulhs(intnat u, intnat v)
 {
-#ifdef __i386__
-#ifdef __FreeBSD__
-  fpsetmask(0);
-#endif
-#endif
+  uintnat u0, v0, w0;
+  intnat u1, v1, w1, w2, t;
+  u0 = u & HALFMASK; u1 = u >> HALFSIZE;
+  v0 = v & HALFMASK; v1 = v >> HALFSIZE;
+  w0 = u0*v0;
+  t = u1*v0 + (w0 >> HALFSIZE);
+  w1 = t & HALFMASK;
+  w2 = t >> HALFSIZE;
+  w1 = u0*v1 + w1;
+  return u1*v1 + w2 + (w1 >> HALFSIZE);
 }
 
+/* A simple linear congruential PRNG */
+
+#ifdef ARCH_SIXTYFOUR
+#define RAND_A 6364136223846793005ULL
+#define RAND_C 1442695040888963407ULL
+#else
+#define RAND_A 214013U
+#define RAND_C 2531011U 
+#endif
+
+static intnat rnd(void)
+{
+  static uintnat seed = 0;
+  seed = seed * RAND_A + RAND_C;
+  return (intnat) seed;
+}
+
+/* Test harness */
+
+#define NUM_RANDOM_ITERATIONS 1000000
+
 int main(int argc, char **argv)
 {
+  int i;
   double weird[4];
 
-  init_ieee_floats();
-
   if (argc >= 5) {
     X = atoi(argv[1]);
     Y = atoi(argv[2]);
@@ -283,12 +310,14 @@ int main(int argc, char **argv)
     do_test();
     return 0;
   }
+  printf("Testing -2...2\n");
   for(Y = -2; Y <= 2; Y++) {
     for (X = -2; X <= 2; X++) {
       F = X; G = Y; do_test();
     }
   }
   if (!(argc >= 2 && strcmp(argv[1], "noinf"))) {
+    printf("Testing special FP values\n");
     weird[0] = 0.0;
     weird[1] = 1.0 / weird[0];         /* +infty */
     weird[2] = -1.0 / weird[0];        /* -infty */
@@ -299,13 +328,13 @@ int main(int argc, char **argv)
       }
     }
   }
-  while(1) {
-    X = (rand() & 0x1FFFFFFF) - 0x10000000;
-    Y = (rand() & 0x1FFFFFFF) - 0x10000000;
+  printf("Testing %d random values\n", NUM_RANDOM_ITERATIONS);
+  for (i = 0; i < NUM_RANDOM_ITERATIONS; i++) {
+    X = rnd();
+    Y = rnd();
     F = X / 1e3;
     G = Y / 1e3;
     do_test();
-    printf("."); fflush(stdout);
   }
   return 0;
 }
diff --git a/testsuite/tests/asmcomp/parsecmm.mly b/testsuite/tests/asmcomp/parsecmm.mly
index a987ed08e..553ab66bc 100644
--- a/testsuite/tests/asmcomp/parsecmm.mly
+++ b/testsuite/tests/asmcomp/parsecmm.mly
@@ -102,6 +102,7 @@ let access_array base numelt size =
 %token LTI
 %token MODI
 %token MULF
+%token MULH
 %token MULI
 %token NEA
 %token NEF
@@ -289,6 +290,7 @@ binaryop:
   | GTF                         { Ccmpf Cgt }
   | GEF                         { Ccmpf Cge }
   | CHECKBOUND                  { Ccheckbound Debuginfo.none }
+  | MULH                        { Cmulhi }
 ;
 sequence:
     expr sequence               { Csequence($1, $2) }

From 2cdb50009bdefd2916ec715a02a5a9fe3c988631 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sat, 31 Oct 2015 06:44:21 -0400
Subject: [PATCH 19/27] z port: update the tests/asmcomp test

---
 testsuite/tests/asmcomp/s390x.S | 47 +++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/testsuite/tests/asmcomp/s390x.S b/testsuite/tests/asmcomp/s390x.S
index 519cc38a0..eb5bd23fe 100644
--- a/testsuite/tests/asmcomp/s390x.S
+++ b/testsuite/tests/asmcomp/s390x.S
@@ -5,45 +5,52 @@
 #define CAML_NEGF_MASK caml_negf_mask
 #define CAML_ABSF_MASK caml_absf_mask
 
-	.section "text"
+	.section ".text"
 	
 	.globl	CALL_GEN_CODE
 	.type	CALL_GEN_CODE, @function
 	.align	ALIGN
 CALL_GEN_CODE:
 	/* Stack space */
-	lay	%r15, -48(%r15)
+	lay	%r15, -144(%r15)
 	/* Save registers */
-	stg	%r14, 0(%r15)
-	stg	%r7, 8(%r15)
-	stg	%r8, 16(%r15)
-	stg	%r9, 24(%r15)
-	stg	%r10, 32(%r15)
-	stg	%r11, 40(%r15)
+        stmg    %r6,%r14, 0(%r15)
+        std     %f8, 72(%r15)
+        std     %f9, 80(%r15)
+        std     %f10, 88(%r15)
+        std     %f11, 96(%r15)
+        std     %f12, 104(%r15)
+        std     %f13, 112(%r15)
+        std     %f14, 120(%r15)
+        std     %f15, 128(%r15)
 	/* Shuffle args */
 	lgr	%r1, %r2
 	lgr	%r2, %r3
 	lgr	%r3, %r4
 	lgr	%r4, %r5
 	/* Function call */
-	brasl	%r14, %r1
+	basr	%r14, %r1
 	/* Restore registers */
-	lg	%r11, 40(%r15)
-	lg	%r10, 32(%r15)
-	lg	%r9, 24(%r15)
-	lg	%r8, 16(%r15)
-	lg	%r7, 8(%r15)
-
+        lmg    %r6,%r14, 0(%r15)
+        ld     %f8, 72(%r15)
+        ld     %f9, 80(%r15)
+        ld     %f10, 88(%r15)
+        ld     %f11, 96(%r15)
+        ld     %f12, 104(%r15)
+        ld     %f13, 112(%r15)
+        ld     %f14, 120(%r15)
+        ld     %f15, 128(%r15)
+	/* Return */
+	lay	%r15, 144(%r15)
+	br	%r14
 
 	.globl	CAML_C_CALL
 	.type	CAML_C_CALL, @function
 	.align	ALIGN
 CAML_C_CALL:
-	lay	%r15, -160(%r15)
-        brasl %r14, %r1
-	lay	%r15, 160(%r15)
+        br	%r7
 	
-	.section	.rodata
+	.section ".rodata"
 
 	.global	CAML_NEGF_MASK
 	.align	ALIGN
@@ -54,4 +61,4 @@ CAML_NEGF_MASK:
 CAML_ABSF_MASK:
         .quad   0x7FFFFFFFFFFFFFFF, 0
 
-        .comm   young_limit, 8
\ No newline at end of file
+        .comm   young_limit, 8

From 89475be9999965367737e6bcab593e67a0dac74e Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sat, 31 Oct 2015 06:47:50 -0400
Subject: [PATCH 20/27] z port: round toward 0 for float -> int conversions.

---
 asmcomp/s390x/emit.mlp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 9565882cb..05f4c642f 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -519,7 +519,8 @@ let emit_instr i =
     | Lop(Ifloatofint) ->
 	  `	cdgbr	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
     | Lop(Iintoffloat) ->
-        `	cgdbr	{emit_reg i.res.(0)}, 0, {emit_reg i.arg.(0)}\n`
+        (* rounding method #5 = round toward 0 *)
+        `	cgdbr	{emit_reg i.res.(0)}, 5, {emit_reg i.arg.(0)}\n`
     | Lop(Ispecific sop) ->
         assert (i.arg.(2).loc = i.res.(0).loc);
         let instr = name_for_specific sop in

From 414fc47a3c070df86bab13c1611e83361b57daef Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sat, 31 Oct 2015 06:48:29 -0400
Subject: [PATCH 21/27] z port: simpler implementation of multiply-high-signed

Following Hacker's Delight section 8.3.
---
 asmcomp/s390x/emit.mlp | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 05f4c642f..8a2add268 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -417,26 +417,27 @@ let emit_instr i =
         `	clgr	%r11, %r10\n`;
         `	brcl	4, {emit_label lbl_call_gc}\n`;  (* less than *)
 	`	la	{emit_reg i.res.(0)}, 8(%r11)\n`
+
     | Lop(Iintop Imulh) ->
-       let lbl1 = new_label() in
-        `	lpgr	%r1, {emit_reg i.arg.(0)}\n`;
-        `	lpgr	%r0, {emit_reg i.arg.(1)}\n`;
-        `	mlgr	%r0, %r0\n`;
-        `	ldgr	%f15, %r1\n`;
-        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
-        `	xgr	%r1, {emit_reg i.arg.(1)}\n`;
-        `	cgfi	%r1, 0\n`;
-        `	jnl	{emit_label lbl1}\n`;
-        `	lgdr	%r1,%f15\n`;
-        `	xilf	%r0,0xFFFFFFFF\n`;
-        `	xihf	%r0,0xFFFFFFFF\n`;
-        `	xilf	%r1,0xFFFFFFFF\n`;
-        `	xihf	%r1,0xFFFFFFFF\n`;
-        `	algfi	%r1,1\n`;
-        `	lgfi	%r1,0\n`;
-        `	alcgr	%r0,%r1\n`;
-        `	{emit_label lbl1}:\n`;
-        `	lgr	{emit_reg i.res.(0)}, %r0\n`
+       (* Hacker's Delight section 8.3:
+            mul-high-signed(a, b) = mul-high-unsigned(a, b)
+                                    - a  if b < 0
+				    - b  if a < 0
+          or, without branches,
+            mul-high-signed(a, b) = mul-high-unsigned(a, b)
+                                    - (a & (b >>s 63))
+				    - (b & (a >>s 63))
+       *)
+       `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+       `	mlgr	%r0, {emit_reg i.arg.(1)}\n`;
+         (* r0:r1 is 128-bit unsigned product; r0 is the high bits *)
+       `	srag	%r1, {emit_reg i.arg.(0)}, 63\n`;
+       `	ngr	%r1, {emit_reg i.arg.(1)}\n`;
+       `	sgr	%r0, %r1\n`;
+       `	srag	%r1, {emit_reg i.arg.(1)}, 63\n`;
+       `	ngr	%r1, {emit_reg i.arg.(0)}\n`;
+       `	sgr	%r0, %r1\n`;
+       `	lgr	{emit_reg i.res.(0)}, %r0\n`
     | Lop(Iintop Imod) ->
         `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
         `	dsgr    %r0, {emit_reg i.arg.(1)}\n`;

From 99067ad1e7399cacad7b17c2196c1913dc1b433b Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sat, 31 Oct 2015 07:34:27 -0400
Subject: [PATCH 22/27] z port: do not use locgr instruction for Boolean-valued
 comparisons

The locgr instruction is not available in z10, the baseline for this port.
Instead, generate pedestrian code with a conditional branch.
Pass -march=z10 to the assembler to enforce z10 compliance.
---
 asmcomp/s390x/emit.mlp | 76 ++++++++++++++++++++++--------------------
 configure              |  2 +-
 2 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 8a2add268..0e4729e5c 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -136,6 +136,9 @@ let emit_addimm res arg n =
   end
 
 (* After a comparison, extract the result as 0 or 1 *)
+(* The locgr instruction is not available in the z10 architecture,
+   so this code is currently unused. *)
+(*
 let emit_set_comp cmp res =
     `	lghi	%r1, 1\n`;
     `	lghi	{emit_reg res}, 0\n`;
@@ -147,6 +150,7 @@ let emit_set_comp cmp res =
     | Clt -> `	locgrl	{emit_reg res}, %r1\n`
     | Cge -> `	locgrnl	{emit_reg res}, %r1\n`
   end
+*)
 
 (* Record live pointers at call points *)
 
@@ -218,12 +222,12 @@ let emit_call_bound_errors () =
 let float_literals = ref ([] : (int64 * int) list)
 let int_literals = ref ([] : (nativeint * int) list)
 
-(* Names for conditional branches after comparisons *)
+(* Masks for conditional branches after comparisons *)
 
 let branch_for_comparison = function
-    Ceq -> "brcl	8," | Cne -> "brcl	7,"
-  | Cle -> "brcl	12," | Cgt -> "brcl	2,"
-  | Cge -> "brcl	10," | Clt -> "brcl	4,"
+    Ceq -> 8  | Cne -> 7
+  | Cle -> 12 | Cgt -> 2
+  | Cge -> 10 | Clt -> 4
 
 let name_for_int_comparison = function
     Isigned cmp -> ("cgr", branch_for_comparison cmp)
@@ -234,14 +238,14 @@ let name_for_int_comparison_imm = function
   | Iunsigned cmp -> ("clgfi", branch_for_comparison cmp)
 
 (* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = unordered*)
-let name_for_float_comparison cmp neg =
+let branch_for_float_comparison cmp neg =
    match cmp with
-     Ceq -> if neg then "brcl	7," else "brcl	8,"
-   | Cne -> if neg then "brcl	8,"  else "brcl	7,"
-   | Cle -> if neg then "brcl	3,"  else "brcl	12,"
-   | Cgt -> if neg then "brcl	13," else "brcl	2,"
-   | Cge -> if neg then "brcl	5,"  else "brcl	10,"
-   | Clt -> if neg then "brcl	11," else "brcl	4,"
+     Ceq -> if neg then 7  else 8
+   | Cne -> if neg then 8  else 7
+   | Cle -> if neg then 3  else 12
+   | Cgt -> if neg then 13 else 2
+   | Cge -> if neg then 5  else 10
+   | Clt -> if neg then 11 else 4
 
 (* Names for various instructions *)
 
@@ -453,14 +457,13 @@ let emit_instr i =
     | Lop(Iintop Iasr) ->
         `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
     | Lop(Iintop(Icomp cmp)) ->
-        begin match cmp with
-          Isigned c ->
-            `	cgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-            emit_set_comp c i.res.(0)
-        | Iunsigned c ->
-            `	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-            emit_set_comp c i.res.(0)
-        end
+        let lbl = new_label() in
+        let (comp, mask) = name_for_int_comparison cmp in
+        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 1\n`;
+	`	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 0\n`;
+	`{emit_label lbl}:\n`
     | Lop(Iintop Icheckbound) ->
         let lbl = bound_error_label i.dbg in
 	`	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
@@ -474,14 +477,13 @@ let emit_instr i =
     | Lop(Iintop_imm(Isub, n)) ->
         emit_addimm i.res.(0) i.arg.(0) (-n)
     | Lop(Iintop_imm(Icomp cmp, n)) ->
-        begin match cmp with
-          Isigned c ->
-            `	cgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;
-            emit_set_comp c i.res.(0)
-        | Iunsigned c ->
-            `	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;
-            emit_set_comp c i.res.(0)
-        end
+        let lbl = new_label() in
+        let (comp, mask) = name_for_int_comparison_imm cmp in
+        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 1\n`;
+	`	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	lghi	{emit_reg i.res.(0)}, 0\n`;
+	`{emit_label lbl}:\n`
     | Lop(Iintop_imm(Icheckbound, n)) ->
        let lbl = bound_error_label i.dbg in 
        if n >= 0 then begin
@@ -546,37 +548,37 @@ let emit_instr i =
             `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
             `	brcl	8,	{emit_label lbl}\n`
         | Iinttest cmp ->
-            let (comp, branch) = name_for_int_comparison cmp in
+            let (comp, mask) = name_for_int_comparison cmp in
             `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-            `	{emit_string branch}	{emit_label lbl}\n`
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
         | Iinttest_imm(cmp, n) ->
-            let (comp, branch) = name_for_int_comparison_imm cmp in
+            let (comp, mask) = name_for_int_comparison_imm cmp in
             `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
-            `	{emit_string branch}	{emit_label lbl}\n`
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
         | Ifloattest(cmp, neg) ->
             `	cdbr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-            let branch = name_for_float_comparison cmp neg in
-            `	{emit_string branch}	{emit_label lbl}\n`
+            let mask = branch_for_float_comparison cmp neg in
+            `	brcl	{emit_int mask}, {emit_label lbl}\n`
         | Ioddtest ->
             `	tmll	{emit_reg i.arg.(0)}, 1\n`;
             `	brcl	1, {emit_label lbl}\n`
         | Ieventest ->
             `	tmll	{emit_reg i.arg.(0)}, 1\n`;
-            `	brcl	8,{emit_label lbl}\n`
+            `	brcl	8, {emit_label lbl}\n`
         end
     | Lcondbranch3(lbl0, lbl1, lbl2) ->
         `	cgfi	{emit_reg i.arg.(0)}, 1\n`;
         begin match lbl0 with
           None -> ()
-        | Some lbl -> `	brcl	4,{emit_label lbl}\n`
+        | Some lbl -> `	brcl	4, {emit_label lbl}\n`
         end;
         begin match lbl1 with
           None -> ()
-        | Some lbl -> `	brcl	8,	{emit_label lbl}\n`
+        | Some lbl -> `	brcl	8, {emit_label lbl}\n`
         end;
         begin match lbl2 with
           None -> ()
-        | Some lbl -> `	brcl	2,	{emit_label lbl}\n`
+        | Some lbl -> `	brcl	2, {emit_label lbl}\n`
         end
     | Lswitch jumptbl ->
         let lbl = new_label() in
diff --git a/configure b/configure
index 049d01769..cdf18257b 100755
--- a/configure
+++ b/configure
@@ -901,7 +901,7 @@ case "$arch,$system" in
                     as="${TOOLPREF}as -mppc"
                     aspp="${TOOLPREF}gcc -m32 -c"
                   fi;;
-  s390x,elf)      as="${TOOLPREF}as -m 64"
+  s390x,elf)      as="${TOOLPREF}as -m 64 -march=$model"
                   aspp="${TOOLPREF}gcc -c -Wa,-march=$model";;
   sparc,solaris)  as="${TOOLPREF}as"
                   case "$cc" in

From 8e8835101a3aba25c8b3c3949cd9fd991f42a81c Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sun, 1 Nov 2015 05:01:03 -0500
Subject: [PATCH 23/27] z port: simplify caml_start_program (cosmetic)

---
 asmrun/s390x.S | 84 +++++++++++++++++++++-----------------------------
 1 file changed, 35 insertions(+), 49 deletions(-)

diff --git a/asmrun/s390x.S b/asmrun/s390x.S
index c7072a08a..2c0c37292 100644
--- a/asmrun/s390x.S
+++ b/asmrun/s390x.S
@@ -218,30 +218,23 @@ caml_start_program:
 
 /* Code shared between caml_start_program and caml_callback */
 .L102:
-    /* Allocate and link stack frame */
-        stg	%r15, -320(%r15)
-        agfi	%r15, -320
-    /* Save return address */
-        stg     %r14, 8(%r15)
-    /* Save all callee-save registers */
-    /* GPR 14 at sp+16 ... GPR 31 at sp+84
-       FPR 14 at sp+92 ... FPR 31 at sp+228 */
-        lgr	%r1, %r15
-        agfi    %r1, 16-8
-        stmg    %r6,%r13, 8(%r1)
-	stg	%r15, 72(%r1)
-	std     %f0, 80(%r1)
-        std     %f8, 88(%r1)
-        std     %f9, 96(%r1)
-        std     %f10, 104(%r1)
-        std     %f11, 112(%r1)
-        std     %f12, 120(%r1)
-        std     %f13, 128(%r1)
-        std     %f14, 136(%r1)
-        std     %f15, 144(%r1)
+    /* Allocate stack frame */
+        lay	%r15, -144(%r15)
+    /* Save all callee-save registers + return address */
+    /* GPR 6..14 at sp + 0 ... sp + 64
+       FPR 10..15 at sp + 72 ... sp + 128 */
+        stmg    %r6,%r14, 0(%r15)
+        std     %f8, 72(%r15)
+        std     %f9, 80(%r15)
+        std     %f10, 88(%r15)
+        std     %f11, 96(%r15)
+        std     %f12, 104(%r15)
+        std     %f13, 112(%r15)
+        std     %f14, 120(%r15)
+        std     %f15, 128(%r15)
 
     /* Set up a callback link */
-        agfi    %r15, -32
+        lay     %r15, -32(%r15)
         Loadglobal(%r1, caml_bottom_of_stack)
         stg     %r1, 0(%r15)
         Loadglobal(%r1, caml_last_return_address)
@@ -252,56 +245,49 @@ caml_start_program:
         brasl   %r14, .L103
         j       .L104
 .L103:
-        agfi    %r15,  -16
+	lay	%r15, -16(%r15)
         stg     %r14, 0(%r15)
         Loadglobal(%r1, caml_exception_pointer)
-        stg      %r1, 8(%r15)
+        stg     %r1, 8(%r15)
         lgr     %r13, %r15
     /* Reload allocation pointers */
         Loadglobal(%r11, caml_young_ptr)
         Loadglobal(%r10, caml_young_limit)
-    /* Say we are back into OCaml code */
-        lgfi   %r1, 0
-        Storeglobal(%r1, caml_last_return_address)
     /* Call the OCaml code */
         lgr %r1,%r0
-.L105:
         basr %r14, %r1
+.L105:
     /* Pop the trap frame, restoring caml_exception_pointer */
         lg     %r0, 8(%r15)
         Storeglobal(%r0, caml_exception_pointer)
-        agfi    %r15, 16
+        la	%r15, 16(%r15)
     /* Pop the callback link, restoring the global variables */
 .L106:
         lg      %r5, 0(%r15)
         lg      %r6, 8(%r15)
-        nill	%r6, 0xFFFE
         lg      %r1, 16(%r15)
         Storeglobal(%r5, caml_bottom_of_stack)
         Storeglobal(%r6, caml_last_return_address)
         Storeglobal(%r1, caml_gc_regs)
-        agfi    %r15, 32
+        la	%r15, 32(%r15)
+
     /* Update allocation pointer */
         Storeglobal(%r11, caml_young_ptr)
-    /* Restore callee-save registers */
-        lgr	%r1, %r15
-        agfi    %r1, 16-8
-        lmg     %r6,%r13, 8(%r1)
-	ld     %f0, 80(%r1)
-        ld     %f8, 88(%r1)
-        ld     %f9, 96(%r1)
-        ld     %f10, 104(%r1)
-        ld     %f11, 112(%r1)
-        ld     %f12, 120(%r1)
-        ld     %f13, 128(%r1)
-        ld     %f14, 136(%r1)
-        ld     %f15, 144(%r1)
 
-    /* Reload return address */
-        lg     %r1, 8(%r15)
+	/* Restore registers */
+        lmg    %r6,%r14, 0(%r15)
+        ld     %f8, 72(%r15)
+        ld     %f9, 80(%r15)
+        ld     %f10, 88(%r15)
+        ld     %f11, 96(%r15)
+        ld     %f12, 104(%r15)
+        ld     %f13, 112(%r15)
+        ld     %f14, 120(%r15)
+        ld     %f15, 128(%r15)
+
     /* Return */
-        agfi    %r15,  320
-        br	%r1
+	lay	%r15, 144(%r15)
+	br	%r14
 
     /* The trap handler: */
 .L104:
@@ -361,7 +347,7 @@ caml_system__code_end:
         .type   caml_system__frametable, @object
 caml_system__frametable:
         .quad   1               /* one descriptor */
-        .quad   .L105 + 2       /* return address into callback */
+        .quad   .L105           /* return address into callback */
         .short  -1              /* negative size count => use callback link */
         .short  0               /* no roots here */
 	.align	8

From 0196d724dad319ab8d7f021ed499f8c61d57668a Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sun, 1 Nov 2015 05:51:51 -0500
Subject: [PATCH 24/27] z port: adjust instruction latencies for basic-block
 scheduling

The latencies are based on wild guesses for the z10.  Since newer z processors are out-of-order, basic-block scheduling could also be turned off entirely.
---
 asmcomp/s390x/scheduling.ml | 46 ++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/asmcomp/s390x/scheduling.ml b/asmcomp/s390x/scheduling.ml
index 66bb3ea69..46da31477 100644
--- a/asmcomp/s390x/scheduling.ml
+++ b/asmcomp/s390x/scheduling.ml
@@ -12,50 +12,48 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* Instruction scheduling for the Z processor*)
+(* Instruction scheduling for the Z processor *)
 
 open Arch
 open Mach
 
+(* The z10 processor is in-order, dual-issue.  It could benefit from some
+   basic-block scheduling, although precise latency information
+   is not available.
+   The z196 and later are out-of-order processors.  Basic-block
+   scheduling probably makes no difference. *)
+
 class scheduler = object
 
 inherit Schedgen.scheduler_generic
 
-(* Latencies (in cycles). Based roughly on the "common model". *)
+(* Latencies (in cycles). Wild guesses.  We multiply all latencies by 2
+   to favor dual-issue. *)
 
 method oper_latency = function
-    Ireload -> 2
-  | Iload(_, _) -> 2
-  | Iconst_float _ -> 2 (* turned into a load *)
-  | Iconst_symbol _ -> 1
-  | Iintop(Imul) -> 9
-  | Iintop(Imulh) -> 20
-  | Iintop_imm(Imul, _) -> 5
-  | Iintop(Idiv | Imod) -> 36
-  | Iaddf | Isubf -> 4
-  | Imulf -> 5
-  | Idivf -> 33
-  | Ispecific(Imultaddf | Imultsubf) -> 5
-  | _ -> 1
+    Ireload -> 4
+  | Iload(_, _) -> 4
+  | Iconst_float _ -> 4 (* turned into a load *)
+  | Iintop(Imul) -> 10
+  | Iintop_imm(Imul, _) -> 10
+  | Iaddf | Isubf | Imulf -> 8
+  | Idivf -> 40
+  | Ispecific(Imultaddf | Imultsubf) -> 8
+  | _ -> 2
 
-method reload_retaddr_latency = 12
-  (* If we can have that many cycles between the reloadretaddr and the
-     return, we can expect that the blr branch will be completely folded. *)
+method reload_retaddr_latency = 4
 
 (* Issue cycles.  Rough approximations. *)
 
 method oper_issue_cycles = function
-    Iconst_float _ | Iconst_symbol _ -> 2
   | Ialloc _ -> 4
-  | Iintop(Imod) -> 40 (* assuming full stall *)
+  | Iintop(Imulh) -> 15
+  | Iintop(Idiv|Imod) -> 20
   | Iintop(Icomp _) -> 4
   | Iintop_imm(Icomp _, _) -> 4
-  | Ifloatofint -> 9
-  | Iintoffloat -> 4
   | _ -> 1
 
-method reload_retaddr_issue_cycles = 3
-  (* load then stalling mtlr *)
+method reload_retaddr_issue_cycles = 1
 
 end
 

From 770853b3f11ad31deee8e7a15de4d8d09296f2f7 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Sun, 1 Nov 2015 06:30:30 -0500
Subject: [PATCH 25/27] z port: fix up tabs and spaces (cosmetic)

House rules.
---
 asmcomp/s390x/CSE.ml       |   3 +-
 asmcomp/s390x/emit.mlp     | 146 ++++++++++++++++++-------------------
 asmcomp/s390x/proc.ml      |  23 +++---
 asmcomp/s390x/selection.ml |   2 +-
 4 files changed, 86 insertions(+), 88 deletions(-)

diff --git a/asmcomp/s390x/CSE.ml b/asmcomp/s390x/CSE.ml
index 12a65fbf2..735ec098c 100644
--- a/asmcomp/s390x/CSE.ml
+++ b/asmcomp/s390x/CSE.ml
@@ -29,7 +29,8 @@ method! class_of_operation op =
 
 method! is_cheap_operation op =
   match op with
-  | Iconst_int n | Iconst_blockheader n -> n >= -0x8000_0000n && n <= 0x7FFF_FFFFn
+  | Iconst_int n | Iconst_blockheader n ->
+      n >= -0x8000_0000n && n <= 0x7FFF_FFFFn
   | _ -> false
 
 end
diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index 0e4729e5c..fda3885cc 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -53,9 +53,9 @@ let emit_symbol s = Emitaux.emit_symbol '.' s
 
 let emit_call s =
    if !pic_code then
-    `brasl      %r14, {emit_symbol s}@PLT`
+    `brasl	%r14, {emit_symbol s}@PLT`
    else
-    `brasl      %r14, {emit_symbol s}`
+    `brasl	%r14, {emit_symbol s}`
 
 (* Output a label *)
 
@@ -185,7 +185,7 @@ let call_gc_sites = ref ([] : gc_call list)
 
 let emit_call_gc gc =
   `{emit_label gc.gc_lbl}:	{emit_call "caml_call_gc"}\n`;
-  `{emit_label gc.gc_frame_lbl}:	brcl 15, {emit_label gc.gc_return_lbl}\n`
+  `{emit_label gc.gc_frame_lbl}:	brcl	15, {emit_label gc.gc_return_lbl}\n`
 
 (* Record calls to caml_ml_array_bound_error, emitted out of line. *)
 
@@ -290,17 +290,17 @@ let emit_instr i =
         if src.loc <> dst.loc then begin
            match (src, dst) with
               {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
-                `	lgr     {emit_reg dst}, {emit_reg src}\n`
+                `	lgr	{emit_reg dst}, {emit_reg src}\n`
             | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
-                `	ldr     {emit_reg dst}, {emit_reg src}\n`
+                `	ldr	{emit_reg dst}, {emit_reg src}\n`
             | {loc = Reg rs; typ = (Val | Int | Addr)}, {loc = Stack sd} ->
-                `	stg     {emit_reg src}, {emit_stack dst}\n`
+                `	stg	{emit_reg src}, {emit_stack dst}\n`
             | {loc = Reg rs; typ = Float}, {loc = Stack sd} ->
-                `	std     {emit_reg src}, {emit_stack dst}\n`
+                `	std	{emit_reg src}, {emit_stack dst}\n`
             | {loc = Stack ss; typ = (Val | Int | Addr)}, {loc = Reg rd} ->
-                `	lg      {emit_reg dst}, {emit_stack src}\n`
+                `	lg	{emit_reg dst}, {emit_stack src}\n`
             | {loc = Stack ss; typ = Float}, {loc = Reg rd} ->
-                `	ldy      {emit_reg dst}, {emit_stack src}\n`
+                `	ldy	{emit_reg dst}, {emit_stack src}\n`
             | (_, _) ->
                 fatal_error "Emit: Imove"
         end
@@ -317,39 +317,39 @@ let emit_instr i =
     | Lop(Iconst_float f) ->
         let lbl = new_label() in
         float_literals := (Int64.bits_of_float f, lbl) :: !float_literals;
-        `	larl %r1, {emit_label lbl}\n`;
-        `	ld  {emit_reg i.res.(0)}, 0(%r1)\n`
+        `	larl	%r1, {emit_label lbl}\n`;
+        `	ld	{emit_reg i.res.(0)}, 0(%r1)\n`
      | Lop(Iconst_symbol s) ->
         if !pic_code then
         `	lgrl	{emit_reg i.res.(0)}, {emit_symbol s}@GOTENT\n`
         else
-        `	larl {emit_reg i.res.(0)}, {emit_symbol s}\n`;
+        `	larl	{emit_reg i.res.(0)}, {emit_symbol s}\n`;
     | Lop(Icall_ind) ->
         `	basr	%r14, {emit_reg i.arg.(0)}\n`;
         let lbl = record_frame i.live i.dbg in
-	 `{emit_label lbl}:\n`
+         `{emit_label lbl}:\n`
 
     | Lop(Icall_imm s) ->
         if !pic_code then
         `	brasl	%r14, {emit_symbol s}@PLT\n`
         else
-        `	brasl   %r14, {emit_symbol s}\n`;
+        `	brasl	%r14, {emit_symbol s}\n`;
         let lbl = record_frame i.live i.dbg in
-	 `{emit_label lbl}:\n`;
+         `{emit_label lbl}:\n`;
     | Lop(Itailcall_ind) ->
         let n = frame_size() in
         if !contains_calls then
           `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
-	emit_stack_adjust (-n);
+        emit_stack_adjust (-n);
         `	br	{emit_reg i.arg.(0)}\n`
     | Lop(Itailcall_imm s) ->
         if s = !function_name then
-          `	brcl	15,{emit_label !tailrec_entry_point}\n`
+          `	brcl	15, {emit_label !tailrec_entry_point}\n`
         else begin
           let n = frame_size() in
           if !contains_calls then
             `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
-	  emit_stack_adjust (-n);
+          emit_stack_adjust (-n);
           if !pic_code then
             `	brcl	15, {emit_symbol s}@PLT\n`
           else
@@ -362,16 +362,16 @@ let emit_instr i =
           `	lgrl	%r7, {emit_symbol s}@GOTENT\n`;
           `	brasl	%r14, {emit_symbol "caml_c_call"}@PLT\n`
           end else begin
-          `	larl %r7, {emit_symbol s}\n`;
+          `	larl	%r7, {emit_symbol s}\n`;
           `	brasl	%r14, {emit_symbol "caml_c_call"}\n`
           end;
           let lbl = record_frame i.live i.dbg in
-	   `{emit_label lbl}:\n`;
+           `{emit_label lbl}:\n`;
         end else begin
           if !pic_code then
-          `	brasl   %r14, {emit_symbol s}@PLT\n`
+          `	brasl	%r14, {emit_symbol s}@PLT\n`
           else
-          `	brasl   %r14, {emit_symbol s}\n`
+          `	brasl	%r14, {emit_symbol s}\n`
        end
 
      | Lop(Istackoffset n) ->
@@ -387,7 +387,7 @@ let emit_instr i =
           | Sixteen_signed -> "lgh"
           | Thirtytwo_unsigned -> "llgf"
           | Thirtytwo_signed -> "lgf"
-	  | Word_int | Word_val -> "lg"
+          | Word_int | Word_val -> "lg"
           | Single -> "ley"
           | Double | Double_u -> "ldy" in
         emit_load_store loadinstr addr i.arg 0 i.res.(0);
@@ -402,35 +402,35 @@ let emit_instr i =
           match chunk with
             Byte_unsigned | Byte_signed -> "stcy"
           | Sixteen_unsigned | Sixteen_signed -> "sthy"
-	  | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
-	  | Word_int | Word_val -> "stg"
+          | Thirtytwo_unsigned | Thirtytwo_signed -> "sty"
+          | Word_int | Word_val -> "stg"
           | Single -> assert false
           | Double | Double_u -> "stdy" in
         emit_load_store storeinstr addr i.arg 1 i.arg.(0)
-        
+
     | Lop(Ialloc n) ->
-	let lbl_redo = new_label() in
-	let lbl_call_gc = new_label() in
+        let lbl_redo = new_label() in
+        let lbl_call_gc = new_label() in
         let lbl_frame = record_frame i.live i.dbg in
         call_gc_sites :=
           { gc_lbl = lbl_call_gc;
             gc_return_lbl = lbl_redo;
             gc_frame_lbl = lbl_frame } :: !call_gc_sites;
-	`{emit_label lbl_redo}:`;
+        `{emit_label lbl_redo}:`;
         `	lay	%r11, {emit_int(-n)}(%r11)\n`;
         `	clgr	%r11, %r10\n`;
         `	brcl	4, {emit_label lbl_call_gc}\n`;  (* less than *)
-	`	la	{emit_reg i.res.(0)}, 8(%r11)\n`
+        `	la	{emit_reg i.res.(0)}, 8(%r11)\n`
 
     | Lop(Iintop Imulh) ->
        (* Hacker's Delight section 8.3:
             mul-high-signed(a, b) = mul-high-unsigned(a, b)
                                     - a  if b < 0
-				    - b  if a < 0
+                                    - b  if a < 0
           or, without branches,
             mul-high-signed(a, b) = mul-high-unsigned(a, b)
                                     - (a & (b >>s 63))
-				    - (b & (a >>s 63))
+                                    - (b & (a >>s 63))
        *)
        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
        `	mlgr	%r0, {emit_reg i.arg.(1)}\n`;
@@ -443,31 +443,31 @@ let emit_instr i =
        `	sgr	%r0, %r1\n`;
        `	lgr	{emit_reg i.res.(0)}, %r0\n`
     | Lop(Iintop Imod) ->
-        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
-        `	dsgr    %r0, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, %r0\n`
+        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr	%r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr	{emit_reg i.res.(0)}, %r0\n`
     | Lop(Iintop Idiv) ->
-        `	lgr     %r1, {emit_reg i.arg.(0)}\n`;
-        `	dsgr    %r0, {emit_reg i.arg.(1)}\n`;
-        `	lgr     {emit_reg i.res.(0)}, %r1\n`
+        `	lgr	%r1, {emit_reg i.arg.(0)}\n`;
+        `	dsgr	%r0, {emit_reg i.arg.(1)}\n`;
+        `	lgr	{emit_reg i.res.(0)}, %r1\n`
     | Lop(Iintop Ilsl) ->
-        `	sllg    {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
     | Lop(Iintop Ilsr) ->
-        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
     | Lop(Iintop Iasr) ->
-        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`;
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, 0({emit_reg i.arg.(1)})\n`
     | Lop(Iintop(Icomp cmp)) ->
         let lbl = new_label() in
         let (comp, mask) = name_for_int_comparison cmp in
         `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
         `	lghi	{emit_reg i.res.(0)}, 1\n`;
-	`	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	brc	{emit_int mask}, {emit_label lbl}\n`;
         `	lghi	{emit_reg i.res.(0)}, 0\n`;
-	`{emit_label lbl}:\n`
+        `{emit_label lbl}:\n`
     | Lop(Iintop Icheckbound) ->
         let lbl = bound_error_label i.dbg in
-	`	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
-	`	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
+        `	clgr	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+        `	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
     | Lop(Iintop op) ->
         assert (i.arg.(0).loc = i.res.(0).loc);
         let instr = name_for_intop op in
@@ -481,23 +481,23 @@ let emit_instr i =
         let (comp, mask) = name_for_int_comparison_imm cmp in
         `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}\n`;
         `	lghi	{emit_reg i.res.(0)}, 1\n`;
-	`	brc	{emit_int mask}, {emit_label lbl}\n`;
+        `	brc	{emit_int mask}, {emit_label lbl}\n`;
         `	lghi	{emit_reg i.res.(0)}, 0\n`;
-	`{emit_label lbl}:\n`
+        `{emit_label lbl}:\n`
     | Lop(Iintop_imm(Icheckbound, n)) ->
        let lbl = bound_error_label i.dbg in 
        if n >= 0 then begin
-	`	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
-	`	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
+        `	clgfi	{emit_reg i.arg.(0)}, {emit_int n}\n`;  
+        `	brcl	12, {emit_label lbl}\n`  (* branch if unsigned le *)
        end else begin
-	`	brcl	15, {emit_label lbl}\n`  (* branch always *)
+        `	brcl	15, {emit_label lbl}\n`  (* branch always *)
        end
     | Lop(Iintop_imm(Ilsl, n)) ->
-        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+        `	sllg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
     | Lop(Iintop_imm(Ilsr, n)) ->
-        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+        `	srlg	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
     | Lop(Iintop_imm(Iasr, n)) ->
-        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`;
+        `	srag	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)},{emit_int n}(%r0)\n`
     | Lop(Iintop_imm(Iand, n)) ->
         assert (i.arg.(0).loc = i.res.(0).loc);
         `	nilf	{emit_reg i.res.(0)}, {emit_int (n land 0xFFFF_FFFF)}\n`
@@ -520,7 +520,7 @@ let emit_instr i =
         let instr = name_for_floatop2 op in
         `	{emit_string instr}	{emit_reg i.res.(0)}, {emit_reg i.arg.(1)}\n`;
     | Lop(Ifloatofint) ->
-	  `	cdgbr	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
+          `	cdgbr	{emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
     | Lop(Iintoffloat) ->
         (* rounding method #5 = round toward 0 *)
         `	cgdbr	{emit_reg i.res.(0)}, 5, {emit_reg i.arg.(0)}\n`
@@ -533,7 +533,7 @@ let emit_instr i =
         `	lg	%r14, {emit_int(n - size_addr)}(%r15)\n`
     | Lreturn ->
         let n = frame_size() in
-	emit_stack_adjust (-n);
+        emit_stack_adjust (-n);
         `	br	%r14\n`
     | Llabel lbl ->
         `{emit_label lbl}:\n`
@@ -543,10 +543,10 @@ let emit_instr i =
         begin match tst with
           Itruetest ->
             `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
-            `	brcl	7,	{emit_label lbl}\n`
+            `	brcl	7, {emit_label lbl}\n`
         | Ifalsetest ->
             `	cgfi	{emit_reg i.arg.(0)}, 0\n`;
-            `	brcl	8,	{emit_label lbl}\n`
+            `	brcl	8, {emit_label lbl}\n`
         | Iinttest cmp ->
             let (comp, mask) = name_for_int_comparison cmp in
             `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
@@ -596,33 +596,33 @@ let emit_instr i =
         done;
         emit_string code_space
     | Lsetuptrap lbl ->
-        `	brasl   %r14, {emit_label lbl}\n`;
+        `	brasl	%r14, {emit_label lbl}\n`;
     | Lpushtrap ->
         stack_offset := !stack_offset + 16;
-	emit_stack_adjust 16;
-        `	stg     %r14, 0(%r15)\n`;
-        `	stg     %r13, {emit_int size_addr}(%r15)\n`;
-        `	lgr     %r13, %r15\n`
+        emit_stack_adjust 16;
+        `	stg	%r14, 0(%r15)\n`;
+        `	stg	%r13, {emit_int size_addr}(%r15)\n`;
+        `	lgr	%r13, %r15\n`
     | Lpoptrap ->
-        `	lg      %r13, {emit_int size_addr}(%r15)\n`;
-	emit_stack_adjust (-16);
+        `	lg	%r13, {emit_int size_addr}(%r15)\n`;
+        emit_stack_adjust (-16);
         stack_offset := !stack_offset - 16
     | Lraise k ->
         begin match !Clflags.debug, k with
         | true, Lambda.Raise_regular ->
-          `	brasl   %r14, {emit_symbol "caml_raise_exn"}\n`;
+          `	brasl	%r14, {emit_symbol "caml_raise_exn"}\n`;
           let lbl = record_frame Reg.Set.empty i.dbg in
-	   `{emit_label lbl}:\n`
+          `{emit_label lbl}:\n`
         | true, Lambda.Raise_reraise ->
-          `	brasl   %r14, {emit_symbol "caml_reraise_exn"}\n`;
+          `	brasl	%r14, {emit_symbol "caml_reraise_exn"}\n`;
           let lbl = record_frame Reg.Set.empty i.dbg in
-	   `{emit_label lbl}:\n`
+          `{emit_label lbl}:\n`
         | false, _
         | true, Lambda.Raise_notrace ->
-          `	lg      %r1, 0(%r13)\n`;
-          `	lgr     %r15, %r13\n`;
-          `	lg      %r13, {emit_int size_addr}(%r15)\n`;
-	  emit_stack_adjust (-16);
+          `	lg	%r1, 0(%r13)\n`;
+          `	lgr	%r15, %r13\n`;
+          `	lg	%r13, {emit_int size_addr}(%r15)\n`;
+          emit_stack_adjust (-16);
           `	br	%r1\n`
         end
 
@@ -656,7 +656,7 @@ let fundecl fundecl =
   let n = frame_size() in
   emit_stack_adjust n;
   if !contains_calls then
-    `	stg     %r14, {emit_int(n - size_addr)}(%r15)\n`;
+    `	stg	%r14, {emit_int(n - size_addr)}(%r15)\n`;
   `{emit_label !tailrec_entry_point}:\n`;
   emit_all fundecl.fun_body;
   (* Emit the glue code to call the GC *)
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
index 98d8107bc..5f1e44bb2 100644
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@@ -30,19 +30,19 @@ let word_addressed = false
     0                   temporary, null register for some operations (volatile)
     1                   temporary (volatile)
     2 - 5               function arguments and results (volatile)
-    6              	function arguments and results (persevered by C)
-    7 - 9		general purpose, preserved by C
+    6                   function arguments and results (persevered by C)
+    7 - 9               general purpose, preserved by C
     10                  allocation limit (preserved by C)
     11                  allocation pointer (preserved by C)
-    12			general purpose  (preserved by C)
+    12                  general purpose  (preserved by C)
     13                  trap pointer (preserved by C)
-    14			return address (volatile)
-    15			stack pointer (preserved by C)
+    14                  return address (volatile)
+    15                  stack pointer (preserved by C)
   Floating-point register map:
-    0, 2, 4, 6		function arguments and results (volatile)
-    1, 3, 5, 7		general purpose (volatile)
-    8 - 14          	general purpose, preserved by C
-    15			temporary, preserved by C
+    0, 2, 4, 6          function arguments and results (volatile)
+    1, 3, 5, 7          general purpose (volatile)
+    8 - 14              general purpose, preserved by C
+    15                  temporary, preserved by C
 
 Note: integer register r12 is used as GOT pointer by some C compilers.
 The code generated by OCaml does not need a GOT pointer, using PC-relative
@@ -53,7 +53,7 @@ let int_reg_name =
     [| "%r2"; "%r3"; "%r4"; "%r5"; "%r6"; "%r7"; "%r8"; "%r9"; "%r12" |]
 
 let float_reg_name =
-    [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7"; 
+    [| "%f0"; "%f2"; "%f4"; "%f6"; "%f1"; "%f3"; "%f5"; "%f7";
        "%f8"; "%f9"; "%f10"; "%f11"; "%f12"; "%f13"; "%f14"; "%f15" |]
 
 let num_register_classes = 2
@@ -206,6 +206,3 @@ let assemble_file infile outfile =
                  Filename.quote outfile ^ " " ^ Filename.quote infile)
 
 let init () = ()
-
-
-
diff --git a/asmcomp/s390x/selection.ml b/asmcomp/s390x/selection.ml
index 740cf0c88..8417f7d6c 100644
--- a/asmcomp/s390x/selection.ml
+++ b/asmcomp/s390x/selection.ml
@@ -75,7 +75,7 @@ method! select_operation op args =
   match (op, args) with
   (* Z does not support immediate operands for multiply high *)
     (Cmulhi, _) -> (Iintop Imulh, args)
-  (* The and, or and xor instructions have a different range of immediate 
+  (* The and, or and xor instructions have a different range of immediate
      operands than the other instructions *)
   | (Cand, _) -> self#select_logical Iand (-0x1_0000_0000) (-1) args
   | (Cor, _) -> self#select_logical Ior 0 0xFFFF_FFFF args

From 64ab62221b824083337c1bd0a83cb4d5731318f6 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 19 Nov 2015 08:21:38 -0500
Subject: [PATCH 26/27] z port: update emit.mlp to new format for FP literals

---
 asmcomp/s390x/emit.mlp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp
index fda3885cc..bb1fb65fb 100644
--- a/asmcomp/s390x/emit.mlp
+++ b/asmcomp/s390x/emit.mlp
@@ -316,7 +316,7 @@ let emit_instr i =
           end
     | Lop(Iconst_float f) ->
         let lbl = new_label() in
-        float_literals := (Int64.bits_of_float f, lbl) :: !float_literals;
+        float_literals := (f, lbl) :: !float_literals;
         `	larl	%r1, {emit_label lbl}\n`;
         `	ld	{emit_reg i.res.(0)}, 0(%r1)\n`
      | Lop(Iconst_symbol s) ->

From 32def141e5875fbf37ac4f870d5074a1fc6a40d1 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@inria.fr>
Date: Thu, 19 Nov 2015 08:26:59 -0500
Subject: [PATCH 27/27] z port: update Changes

---
 Changes | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Changes b/Changes
index 1af74fe2c..a2b2e485f 100644
--- a/Changes
+++ b/Changes
@@ -101,6 +101,8 @@ Compilers:
   (Mark Shinwell)
 - GPR#270: Make [transl_exception_constructor] generate [Immutable] blocks
   (Mark Shinwell)
+- GPR#275: native-code generator for IBM z System running Linux
+  (Bill O'Farrell, Tristan Amini, Xavier Leroy)
 - GPR#282: relax short-paths safety check in presence of module aliases, take
   penalty into account while building the printing map.
   (Thomas Refis, Leo White)