(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(* Emission of Alpha assembly code *)

module LabelSet =
  Set.Make(struct type t = Linearize.label let compare = compare end)

open Misc
open Cmm
open Arch
open Proc
open Reg
open Mach
open Linearize
open Emitaux

(* First pass: insert Iloadgp instructions where needed *)

let instr_copy i next =
  { desc = i.desc; next = next; arg = i.arg; res = i.res; live = i.live }

let insert_load_gp f =

  let labels_needing_gp = ref LabelSet.empty in
  let fixpoint_reached = ref false in

  let label_needs_gp lbl =
    LabelSet.mem lbl !labels_needing_gp in
  let opt_label_needs_gp default = function
      None -> default
    | Some lbl -> label_needs_gp lbl in
  let set_label_needs_gp lbl =
    if not (label_needs_gp lbl) then begin
      fixpoint_reached := false;
      labels_needing_gp := LabelSet.add lbl !labels_needing_gp
    end in

  let tailrec_entry_point = new_label() in

  (* Determine if $gp is needed before an instruction.
     [next] says whether $gp is needed just after (i.e. by the following
     instruction). *)
  let instr_needs_gp next = function
      Lend -> false
    | Lop(Iconst_int n) ->         (* for large n, turned into ldq ($gp) *)
        next || Nativeint.cmp n (-0x80000000) < 0
             || Nativeint.cmp n 0x7FFFFFFF > 0
    | Lop(Iconst_float s) -> true       (* turned into ldq ($gp) *)
    | Lop(Iconst_symbol s) -> true      (* turned into ldq ($gp) *)
    | Lop(Icall_ind) -> false           (* does ldgp if needed afterwards *)
    | Lop(Icall_imm s) -> false         (* does ldgp if needed afterwards *)
    | Lop(Itailcall_ind) -> false
    | Lop(Itailcall_imm s) ->
        if s = f.fun_name then label_needs_gp tailrec_entry_point else false
    | Lop(Iextcall(_, _)) -> true       (* loads $27 from ($gp) *)
    | Lop(Iload(_, Ibased(_, _))) -> true (* loads address from ($gp) *)
    | Lop(Istore(_, Ibased(_, _))) -> true (* loads address from ($gp) *)
    | Lop(Iintop(Idiv | Imod)) -> true  (* divq and remq can be turned into *)
    | Lop(Iintop_imm((Idiv | Imod), _)) -> true (* a function call *)
    | Lop(Iintop_imm(_, n)) ->     (* for large n, turned into ldq ($gp) *)
        next || n < -0x80000000 || n > 0x7FFFFFFF
    | Lop _ -> next
    | Lreloadretaddr -> next
    | Lreturn -> false
    | Llabel lbl -> if next then set_label_needs_gp lbl; next
    | Lbranch lbl -> label_needs_gp lbl
    | Lcondbranch(tst, lbl) -> next || label_needs_gp lbl
    | Lcondbranch3(lbl1, lbl2, lbl3) ->
        opt_label_needs_gp next lbl1 ||
        opt_label_needs_gp next lbl2 ||
        opt_label_needs_gp next lbl3
    | Lswitch lblv ->
        let n = ref false in
        for i = 0 to Array.length lblv - 1 do
          n := !n || label_needs_gp lblv.(i)
        done;
        !n
    | Lsetuptrap lbl -> label_needs_gp lbl
    | Lpushtrap -> next
    | Lpoptrap -> next
    | Lraise -> false in

  let rec needs_gp i =
    if i.desc = Lend
    then false
    else instr_needs_gp (needs_gp i.next) i.desc in

  while not !fixpoint_reached do
    fixpoint_reached := true;
    if needs_gp f.fun_body then set_label_needs_gp tailrec_entry_point
  done;

  (* Insert Ireloadgp instructions after calls where needed *)
  let rec insert_reload_gp i =
    if i.desc = Lend then (i, false) else begin
      let (new_next, needs_next) = insert_reload_gp i.next in
      let new_instr =
        match i.desc with
          (* If the instruction destroys $gp and $gp is needed afterwards,
             insert a ldgp after the instructions. *)
          Lop(Icall_ind | Icall_imm _) when needs_next ->
            instr_copy i
              (instr_cons (Lop(Ispecific(Ireloadgp true))) [||] [||] new_next)
        | Lop(Iextcall(_, false)) | Lsetuptrap _ when needs_next ->
            instr_copy i
              (instr_cons (Lop(Ispecific(Ireloadgp false))) [||] [||] new_next)
        | _ ->
            instr_copy i new_next in
      (new_instr, instr_needs_gp needs_next i.desc)
    end in

  let (new_body, uses_gp) = insert_reload_gp f.fun_body in
  ({fun_body = new_body; fun_name = f.fun_name; fun_fast = f.fun_fast},
   uses_gp)

(* Second pass: code generation proper *)

(* Tradeoff between code size and code speed *)

let fastcode_flag = ref true

(* Output a label *)

let emit_label lbl =
  emit_string "$"; emit_int lbl

(* Output a symbol *)

let emit_symbol s =
  Emitaux.emit_symbol '$' s

(* Output a pseudo-register *)

let emit_reg r =
  match r.loc with
    Reg r -> emit_string (register_name r)
  | _ -> fatal_error "Emit_alpha.emit_reg"

(* Layout of the stack frame *)

let stack_offset = ref 0

let frame_size () =
  let size =
    !stack_offset +
    8 * (num_stack_slots.(0) + num_stack_slots.(1)) +
    (if !contains_calls then 8 else 0) in
  Misc.align size 16

let slot_offset loc cl =
  match loc with
    Incoming n -> frame_size() + n
  | Local n ->
      if cl = 0
      then !stack_offset + n * 8
      else !stack_offset + (num_stack_slots.(0) + n) * 8
  | Outgoing n -> n

(* Output a stack reference *)

let emit_stack r =
  match r.loc with
    Stack s ->
      let ofs = slot_offset s (register_class r) in `{emit_int ofs}($sp)`
  | _ -> fatal_error "Emit_alpha.emit_stack"

(* Output an addressing mode *)

let emit_addressing addr r n =
  match addr with
    Iindexed ofs ->
      `{emit_int ofs}({emit_reg r.(n)})`
  | Ibased(s, ofs) ->
      `{emit_symbol s}`;
      if ofs > 0 then ` + {emit_int ofs}`;
      if ofs < 0 then ` - {emit_int(-ofs)}`

(* Immediate operands *)

let is_immediate n = digital_asm || (n >= 0 && n <= 255)

(* Communicate live registers at call points to the assembler *)

let int_reg_number = [|
  0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12;
  16; 17; 18; 19; 20; 21; 22
|]
  
let float_reg_number = [|
  0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15;
  16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 29; 30
|]

let liveregs instr extra_msk =
  (* $13, $14, $15 always live *)
  let int_mask = ref(0x00070000 lor extra_msk)
  and float_mask = ref 0 in
  let add_register = function
      {loc = Reg r; typ = (Int | Addr)} ->
        int_mask := !int_mask lor (1 lsl (31 - int_reg_number.(r)))
    | {loc = Reg r; typ = Float} ->
        float_mask := !float_mask lor (1 lsl (31 - float_reg_number.(r - 100)))
    | _ -> () in
  Reg.Set.iter add_register instr.live;
  Array.iter add_register instr.arg;
  emit_printf "	.livereg 0x%08x, 0x%08x\n" !int_mask !float_mask

let live_24 = 1 lsl (31 - 24)
let live_25 = 1 lsl (31 - 25)
let live_26 = 1 lsl (31 - 26)
let live_27 = 1 lsl (31 - 27)

(* Record live pointers at call points *)

type frame_descr =
  { fd_lbl: int;                        (* Return address *)
    fd_frame_size: int;                 (* Size of stack frame *)
    fd_live_offset: int list }          (* Offsets/regs of live addresses *)

let frame_descriptors = ref([] : frame_descr list)

let record_frame_label live =
  let lbl = new_label() in
  let live_offset = ref [] in
  Reg.Set.iter
    (function
        {typ = Addr; loc = Reg r} ->
          live_offset := ((int_reg_number.(r) lsl 1) + 1) :: !live_offset
      | {typ = Addr; loc = Stack s} as reg ->
          live_offset := slot_offset s (register_class reg) :: !live_offset
      | _ -> ())
    live;
  frame_descriptors :=
    { fd_lbl = lbl;
      fd_frame_size = frame_size();
      fd_live_offset = !live_offset } :: !frame_descriptors;
  lbl

let record_frame live =
  let lbl = record_frame_label live in `{emit_label lbl}:`

let emit_frame fd =
  `	.quad	{emit_label fd.fd_lbl} + 4\n`;
  `	.word	{emit_int fd.fd_frame_size}\n`;
  `	.word	{emit_int (List.length fd.fd_live_offset)}\n`;
  List.iter
    (fun n ->
      `	.word	{emit_int n}\n`)
    fd.fd_live_offset;
  `	.align	3\n`

(* Record calls to the GC -- we've moved them out of the way *)

type gc_call =
  { gc_lbl: label;                      (* Entry label *)
    gc_return_lbl: label;               (* Where to branch after GC *)
    gc_frame: label;                    (* Label of frame descriptor *)
    gc_instr: instruction }             (* Record live registers *)

let call_gc_sites = ref ([] : gc_call list)

let emit_call_gc gc =
  `{emit_label gc.gc_lbl}:`;
  liveregs gc.gc_instr 0;
  `{emit_label gc.gc_frame}:	bsr	$26, caml_call_gc\n`;
  (* caml_call_gc preserves $gp *)
  `	br	{emit_label gc.gc_return_lbl}\n`

(* Names of various instructions *)

let name_for_int_operation = function
    Iadd -> "addq"
  | Isub -> "subq"
  | Imul -> "mulq"
  | Idiv -> "divq"
  | Imod -> "remq"
  | Iand -> "and"
  | Ior -> "or"
  | Ixor -> "xor"
  | Ilsl -> "sll"
  | Ilsr -> "srl"
  | Iasr -> "sra"
  | _ -> Misc.fatal_error "Emit.name_for_int_operation"

let name_for_float_operation = function
    Inegf -> "fneg"
  | Iabsf -> "fabs"
  | Iaddf -> "addt"
  | Isubf -> "subt"
  | Imulf -> "mult"
  | Idivf -> "divt"
  | _ -> Misc.fatal_error "Emit.name_for_float_operation"

let name_for_specific_operation = function
    Iadd4 -> "s4addq"
  | Iadd8 -> "s8addq"
  | Isub4 -> "s4subq"
  | Isub8 -> "s8subq"
  | _ -> Misc.fatal_error "Emit.name_for_specific_operation"

let name_for_int_comparison = function
    Isigned Ceq -> "cmpeq", true     | Isigned Cne -> "cmpeq", false
  | Isigned Cle -> "cmple", true     | Isigned Cgt -> "cmple", false
  | Isigned Clt -> "cmplt", true     | Isigned Cge -> "cmplt", false
  | Iunsigned Ceq -> "cmpeq", true   | Iunsigned Cne -> "cmpeq", false
  | Iunsigned Cle -> "cmpule", true  | Iunsigned Cgt -> "cmpule", false
  | Iunsigned Clt -> "cmpult", true  | Iunsigned Cge -> "cmpult", false

(* Used for comparisons against 0 *)
let name_for_int_cond_branch = function
    Isigned Ceq -> "beq"     | Isigned Cne -> "bne"
  | Isigned Cle -> "ble"     | Isigned Cgt -> "bgt"
  | Isigned Clt -> "blt"     | Isigned Cge -> "bge"
  | Iunsigned Ceq -> "beq"   | Iunsigned Cne -> "bne"
  | Iunsigned Cle -> "beq"   | Iunsigned Cgt -> "bne"
  | Iunsigned Clt -> "#"     | Iunsigned Cge -> "br"
    (* Always false *)         (* Always true *)

let name_for_float_comparison cmp neg =
  match cmp with
    Ceq -> ("cmpteq", false, neg)  | Cne -> ("cmpteq", false, not neg)
  | Cle -> ("cmptle", false, neg)  | Cgt -> ("cmptlt", true, neg)
  | Clt -> ("cmptlt", false, neg)  | Cge -> ("cmptle", true, neg)

(* Output the assembly code for an instruction *)

(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
(* Label of trap for out-of-range accesses *)
let range_check_trap = ref 0
(* List of floating-point and big integer literals
   (fon non-Digital assemblers) *)
let float_constants = ref ([] : (label * string) list)
let bigint_constants = ref ([] : (label * Nativeint.t) list)

let emit_instr i =
    match i.desc with
      Lend -> ()
    | Lop(Imove | Ispill | Ireload) ->
        let src = i.arg.(0) and dst = i.res.(0) in
        if src.loc <> dst.loc then begin        
          match (src.loc, dst.loc) with
            (Reg rs, Reg rd) ->
              if src.typ = Float then
                `	fmov	{emit_reg src}, {emit_reg dst}\n`
              else
                `	mov	{emit_reg src}, {emit_reg dst}\n`
          | (Reg rs, Stack sd) ->
              if src.typ = Float then
                `	stt	{emit_reg src}, {emit_stack dst}\n`
              else
                `	stq	{emit_reg src}, {emit_stack dst}\n`
          | (Stack ss, Reg rd) ->
              if src.typ = Float then
                `	ldt	{emit_reg dst}, {emit_stack src}\n`
              else
                `	ldq	{emit_reg dst}, {emit_stack src}\n`
          | _ ->
              fatal_error "Emit_alpha: Imove"
        end
    | Lop(Iconst_int n) ->
        if Nativeint.sign n = 0 then
          `	clr	{emit_reg i.res.(0)}\n`
        else if digital_asm || 
                (Nativeint.cmp n (-0x80000000) >= 0 &&
                 Nativeint.cmp n 0x7FFFFFFF <= 0) then
          `	ldiq	{emit_reg i.res.(0)}, {emit_nativeint n}\n`
        else begin
          (* Work around a bug in gas/gld concerning big integer constants *)
          let lbl = new_label() in
          bigint_constants := (lbl, n) :: !bigint_constants;
          `	lda	$25, {emit_label lbl}\n`;
          `	ldq     {emit_reg i.res.(0)}, 0($25)\n`
        end
    | Lop(Iconst_float s) ->
        if digital_asm then
          `	ldit	{emit_reg i.res.(0)}, {emit_string s}\n`
        else if float_of_string s = 0.0 then
          `	fmov	$f31, {emit_reg i.res.(0)}\n`
        else begin
          let lbl = new_label() in
          float_constants := (lbl, s) :: !float_constants;
          `	lda	$25, {emit_label lbl}\n`;
          `	ldt     {emit_reg i.res.(0)}, 0($25)\n`
        end
    | Lop(Iconst_symbol s) ->
        `	lda	{emit_reg i.res.(0)}, {emit_symbol s}\n`
    | Lop(Icall_ind) ->
        liveregs i 0;
        `{record_frame i.live}	jsr	({emit_reg i.arg.(0)})\n`
    | Lop(Icall_imm s) ->
        liveregs i 0;
        `{record_frame i.live}	bsr	$26, {emit_symbol s}\n`
    | Lop(Itailcall_ind) ->
        let n = frame_size() in
        if !contains_calls then
          `	ldq	$26, {emit_int(n - 8)}($sp)\n`;
        if n > 0 then
          `	lda	$sp, {emit_int n}($sp)\n`;
        liveregs i live_26;
        `	jmp	({emit_reg i.arg.(0)})\n`
    | Lop(Itailcall_imm s) ->
        if s = !function_name then begin
          `	br	{emit_label !tailrec_entry_point}\n`
        end else begin
          let n = frame_size() in
          if !contains_calls then
            `	ldq	$26, {emit_int(n - 8)}($sp)\n`;
          if n > 0 then
            `	lda	$sp, {emit_int n}($sp)\n`;
          liveregs i live_26;
          `	br	{emit_symbol s}\n`
        end
    | Lop(Iextcall(s, alloc)) ->
        if alloc then begin
          `	lda	$27, {emit_symbol s}\n`;
          liveregs i live_27;
          `{record_frame i.live}	bsr	$26, caml_c_call\n`
        end else begin
          `	jsr	{emit_symbol s}\n`
        end
    | Lop(Istackoffset n) ->
        `	lda	$sp, {emit_int (-n)}($sp)\n`;
        stack_offset := !stack_offset + n
    | Lop(Iload(chunk, addr)) ->
        let load_instr =
          match chunk with
            Word -> if i.res.(0).typ = Float then "ldt" else "ldq"
          | Byte_unsigned -> "ldbu"
          | Byte_signed -> "ldb"
          | Sixteen_unsigned -> "ldwu"
          | Sixteen_signed -> "ldw" in
        `	{emit_string load_instr}	{emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
    | Lop(Istore(chunk, addr)) ->
        let store_instr =
          match chunk with
            Word -> if i.arg.(0).typ = Float then "stt" else "stq"
          | Byte_unsigned | Byte_signed -> "stb"
          | Sixteen_unsigned | Sixteen_signed -> "stw" in
        `	{emit_string store_instr}	{emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
    | Lop(Ialloc n) ->
        if !fastcode_flag then begin
          let lbl_redo = new_label() in
          let lbl_call_gc = new_label() in
          let lbl_frame = record_frame_label i.live in
          call_gc_sites :=
            { gc_lbl = lbl_call_gc;
              gc_return_lbl = lbl_redo;
              gc_frame = lbl_frame;
              gc_instr = i } :: !call_gc_sites;
          `{emit_label lbl_redo}:	lda	$13, -{emit_int n}($13)\n`;
          `	cmpult	$13, $14, $25\n`;
          `	bne	$25, {emit_label lbl_call_gc}\n`;
          `	addq	$13, 8, {emit_reg i.res.(0)}\n`
        end else begin
          begin match n with
            16 -> liveregs i 0;
                  `{record_frame i.live}	bsr	$26, caml_alloc1\n`
          | 24 -> liveregs i 0;
                  `{record_frame i.live}	bsr	$26, caml_alloc2\n`
          | 32 -> liveregs i 0;
                  `{record_frame i.live}	bsr	$26, caml_alloc3\n`
          | _  -> `	ldiq	$25, {emit_int n}\n`;
                  liveregs i live_25;
                  `{record_frame i.live}	bsr	$26, caml_alloc\n`
          end;
          (* $gp preserved by caml_alloc* *)
          `	addq	$13, 8, {emit_reg i.res.(0)}\n`
        end
    | Lop(Iintop(Icomp cmp)) ->
        let (comp, test) = name_for_int_comparison cmp in
        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`;
        if not test then
          `	xor	{emit_reg i.res.(0)}, 1, {emit_reg i.res.(0)}\n`
    | Lop(Iintop(Icheckbound)) ->
        if !range_check_trap = 0 then range_check_trap := new_label();
        `	cmpule	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, $25\n`;
        `	bne	$25, {emit_label !range_check_trap}\n`
    | Lop(Iintop op) ->
        let instr = name_for_int_operation op in
        `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Idiv, n)) when n = 1 lsl (Misc.log2 n) ->
        let l = Misc.log2 n in
        if is_immediate n then
          `	addq	{emit_reg i.arg.(0)}, {emit_int(n-1)}, $25\n`
        else begin
          `	ldiq	$25, {emit_int(n-1)}\n`;
          `	addq	{emit_reg i.arg.(0)}, $25, $25\n`
        end;
        `	cmovge	{emit_reg i.arg.(0)}, {emit_reg i.arg.(0)}, $25\n`;
        `	sra	$25, {emit_int l}, {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Imod, n)) when n = 1 lsl (Misc.log2 n) ->
        let l = Misc.log2 n in
        if is_immediate n then
          `	and	{emit_reg i.arg.(0)}, {emit_int(n-1)}, $25\n`
        else begin
          `	ldiq	$25, {emit_int (n-1)}\n`;
          `	and	{emit_reg i.arg.(0)}, $25, $25\n`
        end;
        `	subq	$25, {emit_int n}, $24\n`;
        `	cmovge	{emit_reg i.arg.(0)}, $25, $24\n`;
        `	cmoveq	$25, $25, $24\n`;
        `	mov	$24, {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Ilsl, 1)) ->
        (* Turn x << 1 into x + x, slightly faster according to the docs *)
        `	addq	{emit_reg i.arg.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
    | Lop(Iintop_imm(Icomp cmp, n)) ->
        let (comp, test) = name_for_int_comparison cmp in
        `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n`;
        if not test then
          `	xor	{emit_reg i.res.(0)}, 1, {emit_reg i.res.(0)}\n`

    | Lop(Iintop_imm(Icheckbound, n)) ->
        if !range_check_trap = 0 then range_check_trap := new_label();
        `	cmpule	{emit_reg i.arg.(0)}, {emit_int n}, $25\n`;
        `	bne	$25, {emit_label !range_check_trap}\n`
    | Lop(Iintop_imm(op, n)) ->
        let instr = name_for_int_operation op in
        `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n`
    | Lop(Inegf | Iabsf as op) ->
        let instr = name_for_float_operation op in
        `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
    | Lop(Iaddf | Isubf | Imulf | Idivf as op) ->
        let instr = name_for_float_operation op in
        `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
    | Lop(Ifloatofint) ->
        `	.set	noat\n`;
        `	lda	$sp, -8($sp)\n`;
        `	stq	{emit_reg i.arg.(0)}, 0($sp)\n`;
        `	ldt	$f28, 0($sp)\n`;
        `	cvtqt	$f28, {emit_reg i.res.(0)}\n`;
        `	lda	$sp, 8($sp)\n`;
        `	.set	at\n`
    | Lop(Iintoffloat) ->
        `	.set	noat\n`;
        `	lda	$sp, -8($sp)\n`;
        `	cvttqc	{emit_reg i.arg.(0)}, $f28\n`;
        `	stt	$f28, 0($sp)\n`;
        `	ldq	{emit_reg i.res.(0)}, 0($sp)\n`;
        `	lda	$sp, 8($sp)\n`;
        `	.set	at\n`
    | Lop(Ispecific(Ireloadgp marked_r26)) ->
        if marked_r26 then begin
          `	bic	$26, 1, $26\n`;
          `	ldgp	$gp, 4($26)\n`
        end else begin
          `	ldgp	$gp, 0($26)\n`
        end
    | Lop(Ispecific sop) ->
        let instr = name_for_specific_operation sop in
        `	{emit_string instr}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
    | Lreloadretaddr ->
        let n = frame_size() in
        `	ldq	$26, {emit_int(n - 8)}($sp)\n`
    | Lreturn ->
        let n = frame_size() in
        if n > 0 then
          `	lda	$sp, {emit_int n}($sp)\n`;
        liveregs i live_26;
        `	ret	($26)\n`
    | Llabel lbl ->
        `{emit_label lbl}:\n`
    | Lbranch lbl ->
        `	br	{emit_label lbl}\n`
    | Lcondbranch(tst, lbl) ->
        begin match tst with
          Itruetest ->
            `	bne	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        | Ifalsetest ->
            `	beq	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        | Iinttest cmp ->
            let (comp, test) = name_for_int_comparison cmp in
            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, $25\n`;
            if test then
              `	bne	$25, {emit_label lbl}\n`
            else
              `	beq	$25, {emit_label lbl}\n`
        | Iinttest_imm(cmp, 0) ->
            let branch = name_for_int_cond_branch cmp in
            `	{emit_string branch}	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        | Iinttest_imm(cmp, n) ->
            let (comp, test) = name_for_int_comparison cmp in
            `	{emit_string comp}	{emit_reg i.arg.(0)}, {emit_int n}, $25\n`;
            if test then
              `	bne	$25, {emit_label lbl}\n`
            else
              `	beq	$25, {emit_label lbl}\n`
        | Ifloattest(cmp, neg) ->
            `	.set	noat\n`;
            let (comp, swap, test) = name_for_float_comparison cmp neg in
            `	{emit_string comp}	`;
            if swap
            then `{emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, $f28\n`
            else `{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, $f28\n`;
            if test
            then `	fbeq	$f28, {emit_label lbl}\n`
            else `	fbne	$f28, {emit_label lbl}\n`;
            `	.set	at\n`
        | Ioddtest ->
            `	blbs	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        | Ieventest ->
            `	blbc	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        end
    | Lcondbranch3(lbl0, lbl1, lbl2) ->
        begin match lbl0 with
          None -> ()
        | Some lbl -> `	beq	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        end;
        begin match lbl1 with
          None -> ()
        | Some lbl -> `	blbs	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
        end;
        begin match lbl2 with
          None -> ()
        | Some lbl ->
            if lbl0 <> None then
              `	blbc	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
            else if lbl1 <> None then
              `	bne	{emit_reg i.arg.(0)}, {emit_label lbl}\n`
            else begin
              `	subq	{emit_reg i.arg.(0)}, 2, $25\n`;
              `	beq	$25, {emit_label lbl}\n`
            end
        end
    | Lswitch jumptbl ->
        let lbl_jump = new_label() in
        `	br	$25, {emit_label lbl_jump}\n`;
        for i = 0 to Array.length jumptbl - 1 do
          `	br	{emit_label jumptbl.(i)}\n`
        done;
        `{emit_label lbl_jump}:	s4addq	{emit_reg i.arg.(0)}, $25, $25\n`;
        `	jmp	($25), {emit_label jumptbl.(0)}\n`
    | Lsetuptrap lbl ->
        `	br	$25, {emit_label lbl}\n`
    | Lpushtrap ->
        stack_offset := !stack_offset + 16;
        `	lda	$sp, -16($sp)\n`;
        `	stq	$15, 0($sp)\n`;
        `	stq	$25, 8($sp)\n`;
        `	mov	$sp, $15\n`
    | Lpoptrap ->
        `	ldq	$15, 0($sp)\n`;
        `	lda	$sp, 16($sp)\n`;
        stack_offset := !stack_offset - 16
    | Lraise ->
        `	ldq	$26, 8($15)\n`;
        `	mov	$15, $sp\n`;
        `	ldq	$15, 0($sp)\n`;
        `	lda	$sp, 16($sp)\n`;
        liveregs i live_26;
        `	jmp	$25, ($26)\n`   (* Keep retaddr in $25 for debugging *)

let rec emit_all i =
  match i.desc with Lend -> () | _ -> emit_instr i; emit_all i.next

(* Emission of a function declaration *)

let emit_fundecl (fundecl, needs_gp) =
  function_name := fundecl.fun_name;
  fastcode_flag := fundecl.fun_fast;
  stack_offset := 0;
  call_gc_sites := [];
  range_check_trap := 0;
  float_constants := [];
  bigint_constants := [];
  `	.text\n`;
  `	.align	4\n`;
  `	.globl	{emit_symbol fundecl.fun_name}\n`;
  `	.ent	{emit_symbol fundecl.fun_name}\n`;
  `{emit_symbol fundecl.fun_name}:\n`;
  let n = frame_size() in
  if n > 0 then
    `	lda	$sp, -{emit_int n}($sp)\n`;
  if needs_gp then begin
    let lbl = new_label() in
    `	br	$27, {emit_label lbl}\n`;
    `{emit_label lbl}:	ldgp	$gp, 0($27)\n`
  end;
  if !contains_calls then begin
    `	stq	$26, {emit_int(n - 8)}($sp)\n`;
    `	.mask	0x04000000, -8\n`;
    `	.fmask	0x0, 0\n`
  end;
  `	.frame	$sp, {emit_int n}, $26\n`;
  `	.prologue 0\n`;
  tailrec_entry_point := new_label();
  `{emit_label !tailrec_entry_point}:\n`;
  emit_all fundecl.fun_body;
  List.iter emit_call_gc !call_gc_sites;
  if !range_check_trap > 0 then begin
    `{emit_label !range_check_trap}:\n`;
    `	br	$25, call_array_bound_error\n`
    (* Keep retaddr in $25 for debugging *)
  end;
  `	.end	{emit_symbol fundecl.fun_name}\n`;
  if !bigint_constants <> [] then begin
    `	.section .rodata\n`;
    `	.align	3\n`;
    List.iter
      (fun (lbl, n) -> `{emit_label lbl}:	.quad	{emit_string(Nativeint.to_hexa_string n)}\n`)
      !bigint_constants
  end;
  if !float_constants <> [] then begin
    `	.section .rodata\n`;
    `	.align	3\n`;
    List.iter
      (fun (lbl, s) -> `{emit_label lbl}:	.t_floating {emit_string s}\n`)
      !float_constants
  end

let fundecl f =
  emit_fundecl (insert_load_gp f)

(* Emission of data *)

let emit_item = function
    Cdefine_symbol s ->
      `	.globl	{emit_symbol s}\n`;
      `{emit_symbol s}:\n`
  | Cdefine_label lbl ->
      `{emit_label (100000 + lbl)}:\n`
  | Cint8 n ->
      `	.byte	{emit_int n}\n`
  | Cint16 n ->
      `	.word	{emit_int n}\n`
  | Cint n ->
      if digital_asm then
        `	.quad	{emit_nativeint n}\n`
      else
        (* Work around a bug in gas regarding the parsing of
           long decimal constants *)
        `	.quad	{emit_string(Nativeint.to_hexa_string n)}\n`
  | Cfloat f ->
      `	.double	{emit_string f}\n`
  | Csymbol_address s ->
      `	.quad	{emit_symbol s}\n`
  | Clabel_address lbl ->
      `	.quad	{emit_label (100000 + lbl)}\n`
  | Cstring s ->
      emit_string_directive "	.ascii	" s
  | Cskip n ->
      if n > 0 then `	.space	{emit_int n}\n`
  | Calign n ->
      `	.align	{emit_int(Misc.log2 n)}\n`

let data l =
  `	.data\n`;
  List.iter emit_item l

(* Beginning / end of an assembly file *)

let begin_assembly() =
  (* There are really two groups of registers:
      $sp and $15 always point to stack locations
      $0 - $14, $16-$23 never point to stack locations. *)
  `	.noalias $0,$sp;  .noalias $0,$15;  .noalias $1,$sp;  .noalias $1,$15\n`;
  `	.noalias $2,$sp;  .noalias $2,$15;  .noalias $3,$sp;  .noalias $3,$15\n`;
  `	.noalias $4,$sp;  .noalias $4,$15;  .noalias $5,$sp;  .noalias $5,$15\n`;
  `	.noalias $6,$sp;  .noalias $6,$15;  .noalias $7,$sp;  .noalias $7,$15\n`;
  `	.noalias $8,$sp;  .noalias $8,$15;  .noalias $9,$sp;  .noalias $9,$15\n`;
  `	.noalias $10,$sp; .noalias $10,$15; .noalias $11,$sp; .noalias $11,$15\n`;
  `	.noalias $12,$sp; .noalias $12,$15; .noalias $13,$sp; .noalias $13,$15\n`;
  `	.noalias $14,$sp; .noalias $14,$15; .noalias $16,$sp; .noalias $16,$15\n`;
  `	.noalias $17,$sp; .noalias $17,$15; .noalias $18,$sp; .noalias $18,$15\n`;
  `	.noalias $19,$sp; .noalias $19,$15; .noalias $20,$sp; .noalias $20,$15\n`;
  `	.noalias $21,$sp; .noalias $21,$15; .noalias $22,$sp; .noalias $22,$15\n`;
  `	.noalias $23,$sp; .noalias $23,$15\n\n`;
  (* The following .file directive is intended to prevent the generation
     of line numbers for the debugger, 'cos they make .o files larger
     and slow down linking. *)
  `	.file	1 \"{emit_string !Location.input_name}\"\n\n`;
  let lbl_begin = Compilenv.current_unit_name() ^ "_data_begin" in
  `	.data\n`;
  `	.globl	{emit_symbol lbl_begin}\n`;
  `{emit_symbol lbl_begin}:\n`;
  let lbl_begin = Compilenv.current_unit_name() ^ "_code_begin" in
  `	.text\n`;
  `	.globl	{emit_symbol lbl_begin}\n`;
  `{emit_symbol lbl_begin}:\n`

let end_assembly () =
  let lbl_end = Compilenv.current_unit_name() ^ "_code_end" in
  `	.text\n`;
  `	.globl	{emit_symbol lbl_end}\n`;
  `{emit_symbol lbl_end}:\n`;
  let lbl_end = Compilenv.current_unit_name() ^ "_data_end" in
  `	.data\n`;
  `	.globl	{emit_symbol lbl_end}\n`;
  `{emit_symbol lbl_end}:\n`;
  `	.quad	0\n`;
  let lbl_frame = Compilenv.current_unit_name() ^ "_frametable" in
  begin match Config.system with
    "digital" -> `	.rdata\n`
  | "linux"   -> `	.section .rodata\n`
  | _         -> assert false
  end;
  `	.globl	{emit_symbol lbl_frame}\n`;
  `{emit_symbol lbl_frame}:\n`;
  `	.quad	{emit_int (List.length !frame_descriptors)}\n`;
  List.iter emit_frame !frame_descriptors;
  frame_descriptors := []