(***********************************************************************) (* *) (* Caml Special Light *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1995 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* $Id$ *) (* Description of the Intel 386 processor, for Windows NT *) open Misc open Arch open Format open Cmm open Reg open Mach (* Registers available for register allocation *) (* Register map: eax 0 eax - edi: function arguments and results ebx 1 eax: C function results ecx 2 ebx, esi, edi, ebp: preserved by C edx 3 esi 4 edi 5 ebp 6 f0 - f3 101-104 function arguments and results f0: C function results not preserved by C The other 4 floating-point registers are treated as a stack. We use the pseudo-register %tos (100) to represent the top of that stack. *) let int_reg_name = [| "eax"; "ebx"; "ecx"; "edx"; "esi"; "edi"; "ebp" |] let float_reg_name = [| "tos"; "st(0)"; "st(1)"; "st(2)"; "st(3)"; "st(4)"; "st(5)"; "st(6)"; "st(7)" |] let num_register_classes = 2 let register_class r = match r.typ with Int -> 0 | Addr -> 0 | Float -> 1 let num_available_registers = [| 7; 4 |] let first_available_register = [| 0; 101 |] let register_name r = if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) (* There is little scheduling, and some operations are more efficient when eax or st(0) are arguments *) let rotate_registers = false (* Representation of hard registers by pseudo-registers *) let hard_int_reg = let v = Array.new 7 Reg.dummy in for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done; v let hard_float_reg = let v = Array.new 5 Reg.dummy in for i = 0 to 4 do v.(i) <- Reg.at_location Float (Reg(i + 100)) done; v let all_phys_regs = Array.append hard_int_reg hard_float_reg let phys_reg n = if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) let stack_slot slot ty = Reg.at_location ty (Stack slot) let eax = phys_reg 0 let ecx = phys_reg 2 let edx = phys_reg 3 let tos = phys_reg 100 (* Exceptions raised to signal cases not handled here *) exception Use_default (* Instruction selection *) (* Auxiliary for recognizing addressing modes *) type addressing_expr = Asymbol of string | Alinear of expression | Aadd of expression * expression | Ascale of expression * int | Ascaledadd of expression * expression * int let rec select_addr exp = match exp with Cconst_symbol s -> (Asymbol s, 0) | Cop((Caddi | Cadda), [arg; Cconst_int m]) -> let (a, n) = select_addr arg in (a, n + m) | Cop((Csubi | Csuba), [arg; Cconst_int m]) -> let (a, n) = select_addr arg in (a, n - m) | Cop((Caddi | Cadda), [Cconst_int m; arg]) -> let (a, n) = select_addr arg in (a, n + m) | Cop(Clsl, [arg; Cconst_int(1|2|3 as shift)]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift) | _ -> (Alinear exp, 0) end | Cop(Cmuli, [arg; Cconst_int(2|4|8 as mult)]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, mult), n * mult) | _ -> (Alinear exp, 0) end | Cop(Cmuli, [Cconst_int(2|4|8 as mult); arg]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, mult), n * mult) | _ -> (Alinear exp, 0) end | Cop((Caddi | Cadda), [arg1; arg2]) -> begin match (select_addr arg1, select_addr arg2) with ((Alinear e1, n1), (Alinear e2, n2)) -> (Aadd(e1, e2), n1 + n2) | ((Alinear e1, n1), (Ascale(e2, scale), n2)) -> (Ascaledadd(e1, e2, scale), n1 + n2) | ((Ascale(e1, scale), n1), (Alinear e2, n2)) -> (Ascaledadd(e2, e1, scale), n1 + n2) | (_, (Ascale(e2, scale), n2)) -> (Ascaledadd(arg1, e2, scale), n2) | ((Ascale(e1, scale), n1), _) -> (Ascaledadd(arg2, e1, scale), n1) | _ -> (Aadd(arg1, arg2), 0) end | arg -> (Alinear arg, 0) let select_addressing exp = match select_addr exp with (Asymbol s, d) -> (Ibased(s, d), Ctuple []) | (Alinear e, d) -> (Iindexed d, e) | (Aadd(e1, e2), d) -> (Iindexed2 d, Ctuple[e1; e2]) | (Ascale(e, scale), d) -> (Iscaled(scale, d), e) | (Ascaledadd(e1, e2, scale), d) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) (* Estimate number of float temporaries needed to evaluate expression (Ershov's algorithm) *) let rec float_needs = function Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) -> let n1 = float_needs arg1 in let n2 = float_needs arg2 in if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2 | _ -> 1 (* Recognize float arithmetic with mem *) let select_floatarith regular_op reversed_op mem_op mem_rev_op args = match args with [arg1; Cop(Cload _, [loc2])] -> let (addr, arg2) = select_addressing loc2 in (Ispecific(Ifloatarithmem(mem_op, addr)), [arg1; arg2]) | [Cop(Cload _, [loc1]); arg2] -> let (addr, arg1) = select_addressing loc1 in (Ispecific(Ifloatarithmem(mem_rev_op, addr)), [arg2; arg1]) | [arg1; arg2] -> (* Evaluate bigger subexpression first to minimize stack usage. Because of right-to-left evaluation, rightmost arg is evaluated first *) if float_needs arg1 <= float_needs arg2 then (regular_op, [arg1; arg2]) else (reversed_op, [arg2; arg1]) | _ -> fatal_error "Proc_i386: select_floatarith" (* Main instruction selection functions *) exception Use_default let select_oper op args = match op with (* Recognize the LEA instruction *) Caddi | Cadda | Csubi | Csuba -> begin match select_addressing (Cop(op, args)) with (Iindexed d, _) -> raise Use_default | (Iindexed2 0, _) -> raise Use_default | (addr, arg) -> (Ispecific(Ilea addr), [arg]) end (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *) | Cdivi -> begin match args with [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) -> (Iintop_imm(Idiv, n), [arg1]) | _ -> (Iintop Idiv, args) end | Cmodi -> begin match args with [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) -> (Iintop_imm(Imod, n), [arg1]) | _ -> (Iintop Imod, args) end (* Recognize float arithmetic with memory. In passing, apply Ershov's algorithm to reduce stack usage *) | Caddf -> select_floatarith Iaddf Iaddf Ifloatadd Ifloatadd args | Csubf -> select_floatarith Isubf (Ispecific Isubfrev) Ifloatsub Ifloatsubrev args | Cmulf -> select_floatarith Imulf Imulf Ifloatmul Ifloatmul args | Cdivf -> select_floatarith Idivf (Ispecific Idivfrev) Ifloatdiv Ifloatdivrev args (* Recognize store instructions *) | Cstore -> begin match args with [loc; Cconst_int n] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_int(n, addr)), [arg]) | [loc; Cconst_pointer n] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_int(n, addr)), [arg]) | [loc; Cconst_symbol s] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_symbol(s, addr)), [arg]) | [loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])] when loc = loc' -> let (addr, arg) = select_addressing loc in (Ispecific(Ioffset_loc(n, addr)), [arg]) | _ -> raise Use_default end | _ -> raise Use_default let select_store addr exp = match exp with Cconst_int n -> (Ispecific(Istore_int(n, addr)), Ctuple []) | Cconst_pointer n -> (Ispecific(Istore_int(n, addr)), Ctuple []) | Cconst_symbol s -> (Ispecific(Istore_symbol(s, addr)), Ctuple []) | _ -> raise Use_default let pseudoregs_for_operation op arg res = match op with (* Two-address binary operations *) Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) -> ([|res.(0); arg.(1)|], res, false) (* Two-address unary operations *) | Iintop_imm((Iadd|Isub|Imul|Idiv|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) -> (res, res, false) (* For shifts with variable shift count, second arg must be in ecx *) | Iintop(Ilsl|Ilsr|Iasr) -> ([|res.(0); ecx|], res, false) (* For div and mod, first arg must be in eax, edx is clobbered, and result is in eax or edx respectively. Keep it simple, just force second argument in ecx. *) | Iintop(Idiv) -> ([| eax; ecx |], [| eax |], true) | Iintop(Imod) -> ([| eax; ecx |], [| edx |], true) (* For mod with immediate operand, arg must not be in eax. Keep it simple, force it in edx. *) | Iintop_imm(Imod, _) -> ([| edx |], [| edx |], true) (* For floating-point operations, the result is always left at the top of the floating-point stack *) | Iconst_float _ | Iaddf | Isubf | Imulf | Idivf | Ifloatofint | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _)) -> (arg, [| tos |], false) (* don't move it immediately *) (* Same for a floating-point load *) | Iload(Word, addr) when res.(0).typ = Float -> (arg, [| tos |], false) (* For storing a byte, the argument must be in eax...edx. For storing a halfword, any reg is ok. Keep it simple, just force it to be in edx in both cases. *) | Istore(Word, addr) -> raise Use_default | Istore(chunk, addr) -> let newarg = Array.copy arg in newarg.(0) <- edx; (newarg, res, false) (* Other instructions are more or less regular *) | _ -> raise Use_default let is_immediate (n: int) = true let word_addressed = false (* Calling conventions *) let calling_conventions first_int last_int first_float last_float make_stack arg = let loc = Array.new (Array.length arg) Reg.dummy in let int = ref first_int in let float = ref first_float in let ofs = ref 0 in for i = 0 to Array.length arg - 1 do match arg.(i).typ with Int | Addr as ty -> if !int <= last_int then begin loc.(i) <- phys_reg !int; incr int end else begin loc.(i) <- stack_slot (make_stack !ofs) ty; ofs := !ofs + size_int end | Float -> if !float <= last_float then begin loc.(i) <- phys_reg !float; incr float end else begin loc.(i) <- stack_slot (make_stack !ofs) Float; ofs := !ofs + size_float end done; (loc, !ofs) let incoming ofs = Incoming ofs let outgoing ofs = Outgoing ofs let not_supported ofs = fatal_error "Proc.loc_results: cannot call" let loc_arguments arg = calling_conventions 0 5 101 104 outgoing arg let loc_parameters arg = let (loc, ofs) = calling_conventions 0 5 101 104 incoming arg in loc let loc_results res = let (loc, ofs) = calling_conventions 0 5 101 104 not_supported res in loc let loc_external_arguments arg = calling_conventions 0 (-1) 101 100 outgoing arg let loc_external_results res = let (loc, ofs) = calling_conventions 0 0 101 101 not_supported res in loc let loc_exn_bucket = eax (* Registers destroyed by operations *) let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *) Array.of_list(List.map phys_reg [0;2;3;100;101;102;103;104]) let destroyed_at_oper = function Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs | Iop(Iextcall(_, false)) -> destroyed_at_c_call | Iop(Iintop(Idiv | Imod)) -> [| eax; edx |] | Iop(Iintop_imm(Imod, _)) -> [| eax |] | Iop(Ialloc _) -> [| eax |] | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |] | Iop(Iintoffloat) -> [| eax |] | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |] | _ -> [||] let destroyed_at_raise = all_phys_regs (* Maximal register pressure *) let safe_register_pressure op = 4 let max_register_pressure = function Iextcall(_, _) -> [| 4; 0 |] | Iintop(Idiv | Imod) -> [| 5; 4 |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) | Iintoffloat -> [| 6; 4 |] | _ -> [|7; 4|] (* Reloading of instruction arguments, storing of instruction results *) let stackp r = match r.loc with Stack _ -> true | _ -> false let reload_test makereg tst arg = match tst with Iinttest cmp -> if stackp arg.(0) & stackp arg.(1) then [| makereg arg.(0); arg.(1) |] else arg | _ -> arg let reload_operation makereg op arg res = match op with Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) -> (* One of the two arguments can reside in the stack *) if stackp arg.(0) & stackp arg.(1) then ([|arg.(0); makereg arg.(1)|], res) else (arg, res) | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf -> (* The argument(s) can be either in register or on stack *) (arg, res) | Ispecific(Ifloatarithmem(_, _)) -> (* First arg can be either in register or on stack, but remaining arguments must be in registers *) let newarg = Array.new (Array.length arg) arg.(0) in for i = 1 to Array.length arg - 1 do newarg.(i) <- makereg arg.(i) done; (newarg, res) | _ -> (* Other operations: all args and results in registers *) raise Use_default (* Scheduling is turned off because our model does not fit the 486 nor Pentium very well. In particular, it messes up with the float reg stack. *) let need_scheduling = false let oper_latency _ = 0 (* Layout of the stack frame *) let num_stack_slots = [| 0; 0 |] let contains_calls = ref false (* Calling the assembler *) let assemble_file infile outfile = Sys.command ("ml /nologo /coff /Cp /c /Fo" ^ outfile ^ " " ^ infile ^ ">NUL") (* /Cp preserve case of all used identifiers /c assemble only /Fo output file name *) (* Calling the archiver *) let create_archive archive file_list = Misc.remove_file archive; Sys.command ("lib /nologo /debugtype:cv /out:" ^ archive ^ " " ^ String.concat " " file_list)