(***********************************************************************)
(*                                                                     *)
(*                         Caml Special Light                          *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1995 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(* Description of the Intel 386 processor, for Windows NT *)

open Misc
open Arch
open Format
open Cmm
open Reg
open Mach

(* Registers available for register allocation *)

(* Register map:
    eax         0               eax - edi: function arguments and results
    ebx         1               eax: C function results
    ecx         2               ebx, esi, edi, ebp: preserved by C
    edx         3
    esi         4
    edi         5
    ebp         6

    f0 - f3     101-104         function arguments and results
                                f0: C function results
                                not preserved by C

   The other 4 floating-point registers are treated as a stack.
   We use the pseudo-register %tos (100) to represent the top of that stack. *)

let int_reg_name =
  [| "eax"; "ebx"; "ecx"; "edx"; "esi"; "edi"; "ebp" |]

let float_reg_name =
  [| "tos"; "st(0)"; "st(1)"; "st(2)"; "st(3)";
     "st(4)"; "st(5)"; "st(6)"; "st(7)" |]

let num_register_classes = 2

let register_class r =
  match r.typ with
    Int -> 0
  | Addr -> 0
  | Float -> 1

let num_available_registers = [| 7; 4 |]

let first_available_register = [| 0; 101 |]

let register_name r =
  if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)

(* There is little scheduling, and some operations are more efficient when
   eax or st(0) are arguments *)

let rotate_registers = false

(* Representation of hard registers by pseudo-registers *)

let hard_int_reg =
  let v = Array.new 7 Reg.dummy in
  for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
  v

let hard_float_reg =
  let v = Array.new 5 Reg.dummy in
  for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
  v

let all_phys_regs =
  Array.append hard_int_reg hard_float_reg

let phys_reg n =
  if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)

let stack_slot slot ty =
  Reg.at_location ty (Stack slot)

let eax = phys_reg 0
let ecx = phys_reg 2
let edx = phys_reg 3
let tos = phys_reg 100

(* Exceptions raised to signal cases not handled here *)

exception Use_default

(* Instruction selection *)

(* Auxiliary for recognizing addressing modes *)

type addressing_expr =
    Asymbol of string
  | Alinear of expression
  | Aadd of expression * expression
  | Ascale of expression * int
  | Ascaledadd of expression * expression * int

let rec select_addr exp =
  match exp with
    Cconst_symbol s ->
      (Asymbol s, 0)
  | Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
      let (a, n) = select_addr arg in (a, n + m)
  | Cop((Csubi | Csuba), [arg; Cconst_int m]) ->
      let (a, n) = select_addr arg in (a, n - m)
  | Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
      let (a, n) = select_addr arg in (a, n + m)
  | Cop(Clsl, [arg; Cconst_int(1|2|3 as shift)]) ->
      begin match select_addr arg with
        (Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift)
      | _ -> (Alinear exp, 0)
      end
  | Cop(Cmuli, [arg; Cconst_int(2|4|8 as mult)]) ->
      begin match select_addr arg with
        (Alinear e, n) -> (Ascale(e, mult), n * mult)
      | _ -> (Alinear exp, 0)
      end
  | Cop(Cmuli, [Cconst_int(2|4|8 as mult); arg]) ->
      begin match select_addr arg with
        (Alinear e, n) -> (Ascale(e, mult), n * mult)
      | _ -> (Alinear exp, 0)
      end
  | Cop((Caddi | Cadda), [arg1; arg2]) ->
      begin match (select_addr arg1, select_addr arg2) with
          ((Alinear e1, n1), (Alinear e2, n2)) ->
              (Aadd(e1, e2), n1 + n2)
        | ((Alinear e1, n1), (Ascale(e2, scale), n2)) ->
              (Ascaledadd(e1, e2, scale), n1 + n2)
        | ((Ascale(e1, scale), n1), (Alinear e2, n2)) ->
              (Ascaledadd(e2, e1, scale), n1 + n2)
        | (_, (Ascale(e2, scale), n2)) ->
              (Ascaledadd(arg1, e2, scale), n2)
        | ((Ascale(e1, scale), n1), _) ->
              (Ascaledadd(arg2, e1, scale), n1)
        | _ ->
              (Aadd(arg1, arg2), 0)
      end
  | arg ->
      (Alinear arg, 0)
    
let select_addressing exp =
  match select_addr exp with
    (Asymbol s, d) ->
      (Ibased(s, d), Ctuple [])
  | (Alinear e, d) ->
      (Iindexed d, e)
  | (Aadd(e1, e2), d) ->
      (Iindexed2 d, Ctuple[e1; e2])
  | (Ascale(e, scale), d) ->
      (Iscaled(scale, d), e)
  | (Ascaledadd(e1, e2, scale), d) ->
      (Iindexed2scaled(scale, d), Ctuple[e1; e2])

(* Estimate number of float temporaries needed to evaluate expression
   (Ershov's algorithm) *)

let rec float_needs = function
    Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
      let n1 = float_needs arg1 in
      let n2 = float_needs arg2 in
      if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
  | _ ->
      1

(* Recognize float arithmetic with mem *)

let select_floatarith regular_op reversed_op mem_op mem_rev_op args =
  match args with
    [arg1; Cop(Cload _, [loc2])] ->
      let (addr, arg2) = select_addressing loc2 in
      (Ispecific(Ifloatarithmem(mem_op, addr)), [arg1; arg2])
  | [Cop(Cload _, [loc1]); arg2] ->
      let (addr, arg1) = select_addressing loc1 in
      (Ispecific(Ifloatarithmem(mem_rev_op, addr)), [arg2; arg1])
  | [arg1; arg2] ->
      (* Evaluate bigger subexpression first to minimize stack usage.
         Because of right-to-left evaluation, rightmost arg is evaluated
         first *)
      if float_needs arg1 <= float_needs arg2
      then (regular_op, [arg1; arg2])
      else (reversed_op, [arg2; arg1])
  | _ ->
      fatal_error "Proc_i386: select_floatarith"

(* Main instruction selection functions *)

exception Use_default

let select_oper op args =
  match op with
  (* Recognize the LEA instruction *)
    Caddi | Cadda | Csubi | Csuba ->
      begin match select_addressing (Cop(op, args)) with
        (Iindexed d, _) -> raise Use_default
      | (Iindexed2 0, _) -> raise Use_default
      | (addr, arg) -> (Ispecific(Ilea addr), [arg])
      end
  (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
  | Cdivi ->
      begin match args with
        [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
          (Iintop_imm(Idiv, n), [arg1])
      | _ -> (Iintop Idiv, args)
      end
  | Cmodi ->
      begin match args with
        [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
          (Iintop_imm(Imod, n), [arg1])
      | _ -> (Iintop Imod, args)
      end
  (* Recognize float arithmetic with memory.
     In passing, apply Ershov's algorithm to reduce stack usage *)
  | Caddf ->
      select_floatarith Iaddf Iaddf Ifloatadd Ifloatadd args
  | Csubf ->
      select_floatarith Isubf (Ispecific Isubfrev) Ifloatsub Ifloatsubrev args
  | Cmulf ->
      select_floatarith Imulf Imulf Ifloatmul Ifloatmul args
  | Cdivf ->
      select_floatarith Idivf (Ispecific Idivfrev) Ifloatdiv Ifloatdivrev args
  (* Recognize store instructions *)
  | Cstore ->
      begin match args with
        [loc; Cconst_int n] ->
          let (addr, arg) = select_addressing loc in
          (Ispecific(Istore_int(n, addr)), [arg])
      | [loc; Cconst_pointer n] ->
          let (addr, arg) = select_addressing loc in
          (Ispecific(Istore_int(n, addr)), [arg])
      | [loc; Cconst_symbol s] ->
          let (addr, arg) = select_addressing loc in
          (Ispecific(Istore_symbol(s, addr)), [arg])
      | [loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
        when loc = loc' ->
          let (addr, arg) = select_addressing loc in
          (Ispecific(Ioffset_loc(n, addr)), [arg])
      | _ ->
          raise Use_default
      end
  | _ -> raise Use_default

let select_store addr exp =
  match exp with
    Cconst_int n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
  | Cconst_pointer n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
  | Cconst_symbol s -> (Ispecific(Istore_symbol(s, addr)), Ctuple [])
  | _ -> raise Use_default

let pseudoregs_for_operation op arg res =
  match op with
  (* Two-address binary operations *)
    Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) ->
      ([|res.(0); arg.(1)|], res, false)
  (* Two-address unary operations *)
  | Iintop_imm((Iadd|Isub|Imul|Idiv|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) ->
      (res, res, false)
  (* For shifts with variable shift count, second arg must be in ecx *)
  | Iintop(Ilsl|Ilsr|Iasr) ->
      ([|res.(0); ecx|], res, false)
  (* For div and mod, first arg must be in eax, edx is clobbered,
     and result is in eax or edx respectively.
     Keep it simple, just force second argument in ecx. *)
  | Iintop(Idiv) ->
      ([| eax; ecx |], [| eax |], true)
  | Iintop(Imod) ->
      ([| eax; ecx |], [| edx |], true)
  (* For mod with immediate operand, arg must not be in eax.
     Keep it simple, force it in edx. *)
  | Iintop_imm(Imod, _) ->
      ([| edx |], [| edx |], true)
  (* For floating-point operations, the result is always left at the
     top of the floating-point stack *)
  | Iconst_float _ | Iaddf | Isubf | Imulf | Idivf | Ifloatofint |
    Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _)) ->
      (arg, [| tos |], false)           (* don't move it immediately *)
  (* Same for a floating-point load *)
  | Iload(Word, addr) when res.(0).typ = Float ->
      (arg, [| tos |], false)
  (* For storing a byte, the argument must be in eax...edx.
     For storing a halfword, any reg is ok.
     Keep it simple, just force it to be in edx in both cases. *)
  | Istore(Word, addr) -> raise Use_default
  | Istore(chunk, addr) ->
      let newarg = Array.copy arg in
      newarg.(0) <- edx;
      (newarg, res, false)
  (* Other instructions are more or less regular *)
  | _ -> raise Use_default

let is_immediate (n: int) = true

let word_addressed = false

(* Calling conventions *)

let calling_conventions first_int last_int first_float last_float make_stack
                        arg =
  let loc = Array.new (Array.length arg) Reg.dummy in
  let int = ref first_int in
  let float = ref first_float in
  let ofs = ref 0 in
  for i = 0 to Array.length arg - 1 do
    match arg.(i).typ with
      Int | Addr as ty ->
        if !int <= last_int then begin
          loc.(i) <- phys_reg !int;
          incr int
        end else begin
          loc.(i) <- stack_slot (make_stack !ofs) ty;
          ofs := !ofs + size_int
        end
    | Float ->
        if !float <= last_float then begin
          loc.(i) <- phys_reg !float;
          incr float
        end else begin
          loc.(i) <- stack_slot (make_stack !ofs) Float;
          ofs := !ofs + size_float
        end
  done;
  (loc, !ofs)

let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"

let loc_arguments arg =
  calling_conventions 0 5 101 104 outgoing arg
let loc_parameters arg =
  let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
let loc_results res =
  let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
let loc_external_arguments arg =
  calling_conventions 0 (-1) 101 100 outgoing arg
let loc_external_results res =
  let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc

let loc_exn_bucket = eax

(* Registers destroyed by operations *)

let destroyed_at_c_call =               (* ebx, esi, edi, ebp preserved *)
  Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])

let destroyed_at_oper = function
    Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
  | Iop(Iextcall(_, false)) -> destroyed_at_c_call
  | Iop(Iintop(Idiv | Imod)) -> [| eax; edx |]
  | Iop(Iintop_imm(Imod, _)) -> [| eax |]
  | Iop(Ialloc _) -> [| eax |]
  | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |]
  | Iop(Iintoffloat) -> [| eax |]
  | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |]
  | _ -> [||]

let destroyed_at_raise = all_phys_regs

(* Maximal register pressure *)

let safe_register_pressure op = 4

let max_register_pressure = function
    Iextcall(_, _) -> [| 4; 0 |]
  | Iintop(Idiv | Imod) -> [| 5; 4 |]
  | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
    Iintoffloat -> [| 6; 4 |]
  | _ -> [|7; 4|]

(* Reloading of instruction arguments, storing of instruction results *)

let stackp r =
  match r.loc with
    Stack _ -> true
  | _ -> false

let reload_test makereg tst arg =
  match tst with
    Iinttest cmp ->
      if stackp arg.(0) & stackp arg.(1)
      then [| makereg arg.(0); arg.(1) |]
      else arg
  | _ -> arg

let reload_operation makereg op arg res =
  match op with
    Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
      (* One of the two arguments can reside in the stack *)
      if stackp arg.(0) & stackp arg.(1)
      then ([|arg.(0); makereg arg.(1)|], res)
      else (arg, res)
  | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
    Iaddf | Isubf | Imulf | Idivf ->
      (* The argument(s) can be either in register or on stack *)
      (arg, res)
  | Ispecific(Ifloatarithmem(_, _)) ->
      (* First arg can be either in register or on stack, but remaining
         arguments must be in registers *)
      let newarg = Array.new (Array.length arg) arg.(0) in
      for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
      (newarg, res)
  | _ -> (* Other operations: all args and results in registers *)
      raise Use_default

(* Scheduling is turned off because our model does not fit the 486
   nor Pentium very well. In particular, it messes up with the
   float reg stack. *)

let need_scheduling = false

let oper_latency _ = 0

(* Layout of the stack frame *)

let num_stack_slots = [| 0; 0 |]
let contains_calls = ref false

(* Calling the assembler *)

let assemble_file infile outfile =
  Sys.command ("ml /nologo /coff /Cp /c /Fo" ^ outfile ^ " " ^ infile ^ ">NUL")
  (* /Cp preserve case of all used identifiers
     /c  assemble only
     /Fo output file name *)
     

(* Calling the archiver *)

let create_archive archive file_list =
  Misc.remove_file archive;
  Sys.command ("lib /nologo /debugtype:cv /out:" ^ archive ^ " " ^
               String.concat " " file_list)