ocaml/asmcomp/proc_i386.ml

439 lines
14 KiB
OCaml

(***********************************************************************)
(* *)
(* Caml Special Light *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1995 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Description of the Intel 386 processor *)
open Misc
open Arch
open Format
open Cmm
open Reg
open Mach
(* Registers available for register allocation *)
(* Register map:
eax 0 eax - edi: function arguments and results
ebx 1 eax: C function results
ecx 2 ebx, esi, edi, ebp: preserved by C
edx 3
esi 4
edi 5
ebp 6
f0 - f3 101-104 function arguments and results
f0: C function results
not preserved by C
The other 4 floating-point registers are treated as a stack.
We use the pseudo-register %tos (100) to represent the top of that stack. *)
let int_reg_name =
[| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |]
let float_reg_name =
[| "%tos"; "%st(0)"; "%st(1)"; "%st(2)"; "%st(3)";
"%st(4)"; "%st(5)"; "%st(6)"; "%st(7)" |]
let num_register_classes = 2
let register_class r =
match r.typ with
Int -> 0
| Addr -> 0
| Float -> 1
let num_available_registers = [| 7; 4 |]
let first_available_register = [| 0; 101 |]
let register_name r =
if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
(* There is little scheduling, and some operations are more efficient when
%eax or %st(0) are arguments *)
let rotate_registers = false
(* Representation of hard registers by pseudo-registers *)
let hard_int_reg =
let v = Array.new 7 Reg.dummy in
for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done;
v
let hard_float_reg =
let v = Array.new 5 Reg.dummy in
for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done;
v
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
let phys_reg n =
if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
let stack_slot slot ty =
Reg.at_location ty (Stack slot)
let eax = phys_reg 0
let ecx = phys_reg 2
let edx = phys_reg 3
let tos = phys_reg 100
(* Exceptions raised to signal cases not handled here *)
exception Use_default
(* Instruction selection *)
(* Auxiliary for recognizing addressing modes *)
type addressing_expr =
Asymbol of string
| Alinear of expression
| Aadd of expression * expression
| Ascale of expression * int
| Ascaledadd of expression * expression * int
let rec select_addr exp =
match exp with
Cconst_symbol s ->
(Asymbol s, 0)
| Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
let (a, n) = select_addr arg in (a, n + m)
| Cop((Csubi | Csuba), [arg; Cconst_int m]) ->
let (a, n) = select_addr arg in (a, n - m)
| Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
let (a, n) = select_addr arg in (a, n + m)
| Cop(Clsl, [arg; Cconst_int(1|2|3 as shift)]) ->
begin match select_addr arg with
(Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift)
| _ -> (Alinear exp, 0)
end
| Cop(Cmuli, [arg; Cconst_int(2|4|8 as mult)]) ->
begin match select_addr arg with
(Alinear e, n) -> (Ascale(e, mult), n * mult)
| _ -> (Alinear exp, 0)
end
| Cop(Cmuli, [Cconst_int(2|4|8 as mult); arg]) ->
begin match select_addr arg with
(Alinear e, n) -> (Ascale(e, mult), n * mult)
| _ -> (Alinear exp, 0)
end
| Cop((Caddi | Cadda), [arg1; arg2]) ->
begin match (select_addr arg1, select_addr arg2) with
((Alinear e1, n1), (Alinear e2, n2)) ->
(Aadd(e1, e2), n1 + n2)
| ((Alinear e1, n1), (Ascale(e2, scale), n2)) ->
(Ascaledadd(e1, e2, scale), n1 + n2)
| ((Ascale(e1, scale), n1), (Alinear e2, n2)) ->
(Ascaledadd(e2, e1, scale), n1 + n2)
| (_, (Ascale(e2, scale), n2)) ->
(Ascaledadd(arg1, e2, scale), n2)
| ((Ascale(e1, scale), n1), _) ->
(Ascaledadd(arg2, e1, scale), n1)
| _ ->
(Aadd(arg1, arg2), 0)
end
| arg ->
(Alinear arg, 0)
let select_addressing exp =
match select_addr exp with
(Asymbol s, d) ->
(Ibased(s, d), Ctuple [])
| (Alinear e, d) ->
(Iindexed d, e)
| (Aadd(e1, e2), d) ->
(Iindexed2 d, Ctuple[e1; e2])
| (Ascale(e, scale), d) ->
(Iscaled(scale, d), e)
| (Ascaledadd(e1, e2, scale), d) ->
(Iindexed2scaled(scale, d), Ctuple[e1; e2])
(* Estimate number of float temporaries needed to evaluate expression
(Ershov's algorithm) *)
let rec float_needs = function
Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) ->
let n1 = float_needs arg1 in
let n2 = float_needs arg2 in
if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
| _ ->
1
(* Recognize float arithmetic with mem *)
let select_floatarith regular_op reversed_op mem_op mem_rev_op args =
match args with
[arg1; Cop(Cload _, [loc2])] ->
let (addr, arg2) = select_addressing loc2 in
(Ispecific(Ifloatarithmem(mem_op, addr)), [arg1; arg2])
| [Cop(Cload _, [loc1]); arg2] ->
let (addr, arg1) = select_addressing loc1 in
(Ispecific(Ifloatarithmem(mem_rev_op, addr)), [arg2; arg1])
| [arg1; arg2] ->
(* Evaluate bigger subexpression first to minimize stack usage.
Because of right-to-left evaluation, rightmost arg is evaluated
first *)
if float_needs arg1 <= float_needs arg2
then (regular_op, [arg1; arg2])
else (reversed_op, [arg2; arg1])
| _ ->
fatal_error "Proc_i386: select_floatarith"
(* Main instruction selection functions *)
exception Use_default
let select_oper op args =
match op with
(* Recognize the LEA instruction *)
Caddi | Cadda | Csubi | Csuba ->
begin match select_addressing (Cop(op, args)) with
(Iindexed d, _) -> raise Use_default
| (Iindexed2 0, _) -> raise Use_default
| (addr, arg) -> (Ispecific(Ilea addr), [arg])
end
(* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
| Cdivi ->
begin match args with
[arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
(Iintop_imm(Idiv, n), [arg1])
| _ -> (Iintop Idiv, args)
end
| Cmodi ->
begin match args with
[arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) ->
(Iintop_imm(Imod, n), [arg1])
| _ -> (Iintop Imod, args)
end
(* Recognize float arithmetic with memory.
In passing, apply Ershov's algorithm to reduce stack usage *)
| Caddf ->
select_floatarith Iaddf Iaddf Ifloatadd Ifloatadd args
| Csubf ->
select_floatarith Isubf (Ispecific Isubfrev) Ifloatsub Ifloatsubrev args
| Cmulf ->
select_floatarith Imulf Imulf Ifloatmul Ifloatmul args
| Cdivf ->
select_floatarith Idivf (Ispecific Idivfrev) Ifloatdiv Ifloatdivrev args
(* Recognize store instructions *)
| Cstore ->
begin match args with
[loc; Cconst_int n] ->
let (addr, arg) = select_addressing loc in
(Ispecific(Istore_int(n, addr)), [arg])
| [loc; Cconst_pointer n] ->
let (addr, arg) = select_addressing loc in
(Ispecific(Istore_int(n, addr)), [arg])
| [loc; Cconst_symbol s] ->
let (addr, arg) = select_addressing loc in
(Ispecific(Istore_symbol(s, addr)), [arg])
| [loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
when loc = loc' ->
let (addr, arg) = select_addressing loc in
(Ispecific(Ioffset_loc(n, addr)), [arg])
| _ ->
raise Use_default
end
| _ -> raise Use_default
let select_store addr exp =
match exp with
Cconst_int n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
| Cconst_pointer n -> (Ispecific(Istore_int(n, addr)), Ctuple [])
| Cconst_symbol s -> (Ispecific(Istore_symbol(s, addr)), Ctuple [])
| _ -> raise Use_default
let pseudoregs_for_operation op arg res =
match op with
(* Two-address binary operations *)
Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) ->
([|res.(0); arg.(1)|], res, false)
(* Two-address unary operations *)
| Iintop_imm((Iadd|Isub|Imul|Idiv|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) ->
(res, res, false)
(* For shifts with variable shift count, second arg must be in ecx *)
| Iintop(Ilsl|Ilsr|Iasr) ->
([|res.(0); ecx|], res, false)
(* For div and mod, first arg must be in eax, edx is clobbered,
and result is in eax or edx respectively.
Keep it simple, just force second argument in ecx. *)
| Iintop(Idiv) ->
([| eax; ecx |], [| eax |], true)
| Iintop(Imod) ->
([| eax; ecx |], [| edx |], true)
(* For mod with immediate operand, arg must not be in eax.
Keep it simple, force it in edx. *)
| Iintop_imm(Imod, _) ->
([| edx |], [| edx |], true)
(* For floating-point operations, the result is always left at the
top of the floating-point stack *)
| Iconst_float _ | Iaddf | Isubf | Imulf | Idivf | Ifloatofint |
Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _)) ->
(arg, [| tos |], false) (* don't move it immediately *)
(* Same for a floating-point load *)
| Iload(Word, addr) when res.(0).typ = Float ->
(arg, [| tos |], false)
(* For storing a byte, the argument must be in eax...edx.
For storing a halfword, any reg is ok.
Keep it simple, just force it to be in edx in both cases. *)
| Istore(Word, addr) -> raise Use_default
| Istore(chunk, addr) ->
let newarg = Array.copy arg in
newarg.(0) <- edx;
(newarg, res, false)
(* Other instructions are more or less regular *)
| _ -> raise Use_default
let is_immediate (n: int) = true
let word_addressed = false
(* Calling conventions *)
let calling_conventions first_int last_int first_float last_float make_stack
arg =
let loc = Array.new (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
for i = 0 to Array.length arg - 1 do
match arg.(i).typ with
Int | Addr as ty ->
if !int <= last_int then begin
loc.(i) <- phys_reg !int;
incr int
end else begin
loc.(i) <- stack_slot (make_stack !ofs) ty;
ofs := !ofs + size_int
end
| Float ->
if !float <= last_float then begin
loc.(i) <- phys_reg !float;
incr float
end else begin
loc.(i) <- stack_slot (make_stack !ofs) Float;
ofs := !ofs + size_float
end
done;
(loc, !ofs)
let incoming ofs = Incoming ofs
let outgoing ofs = Outgoing ofs
let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
let loc_arguments arg =
calling_conventions 0 5 101 104 outgoing arg
let loc_parameters arg =
let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc
let loc_results res =
let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc
let loc_external_arguments arg =
calling_conventions 0 (-1) 101 100 outgoing arg
let loc_external_results res =
let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc
let loc_exn_bucket = eax
(* Registers destroyed by operations *)
let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *)
Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104])
let destroyed_at_oper = function
Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
| Iop(Iextcall(_, false)) -> destroyed_at_c_call
| Iop(Iintop(Idiv | Imod)) -> [| eax; edx |]
| Iop(Iintop_imm(Imod, _)) -> [| eax |]
| Iop(Ialloc _) -> [| eax |]
| Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |]
| Iop(Iintoffloat) -> [| eax |]
| Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |]
| _ -> [||]
let destroyed_at_raise = all_phys_regs
(* Maximal register pressure *)
let safe_register_pressure op = 4
let max_register_pressure = function
Iextcall(_, _) -> [| 4; 0 |]
| Iintop(Idiv | Imod) -> [| 5; 4 |]
| Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) |
Iintoffloat -> [| 6; 4 |]
| _ -> [|7; 4|]
(* Reloading of instruction arguments, storing of instruction results *)
let stackp r =
match r.loc with
Stack _ -> true
| _ -> false
let reload_test makereg tst arg =
match tst with
Iinttest cmp ->
if stackp arg.(0) & stackp arg.(1)
then [| makereg arg.(0); arg.(1) |]
else arg
| _ -> arg
let reload_operation makereg op arg res =
match op with
Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
(* One of the two arguments can reside in the stack *)
if stackp arg.(0) & stackp arg.(1)
then ([|arg.(0); makereg arg.(1)|], res)
else (arg, res)
| Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat |
Iaddf | Isubf | Imulf | Idivf ->
(* The argument(s) can be either in register or on stack *)
(arg, res)
| Ispecific(Ifloatarithmem(_, _)) ->
(* First arg can be either in register or on stack, but remaining
arguments must be in registers *)
let newarg = Array.new (Array.length arg) arg.(0) in
for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done;
(newarg, res)
| _ -> (* Other operations: all args and results in registers *)
raise Use_default
(* Scheduling is turned off because our model does not fit the 486
nor Pentium very well. In particular, it messes up with the
float reg stack. *)
let need_scheduling = false
let oper_latency _ = 0
(* Layout of the stack frame *)
let num_stack_slots = [| 0; 0 |]
let contains_calls = ref false
(* Calling the assembler *)
let assemble_file infile outfile =
Sys.command ("as -o " ^ outfile ^ " " ^ infile)
(* Calling the archiver *)
let create_archive archive file_list =
Misc.remove_file archive;
Sys.command ("ar rc " ^ archive ^ " " ^ String.concat " " file_list ^
" && ranlib " ^ archive)