(***********************************************************************) (* *) (* Objective Caml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* $Id$ *) (* Description of the Intel 386 processor *) open Misc open Arch open Format open Cmm open Reg open Mach (* Registers available for register allocation *) (* Register map: eax 0 eax - edi: function arguments and results ebx 1 eax: C function results ecx 2 ebx, esi, edi, ebp: preserved by C edx 3 esi 4 edi 5 ebp 6 tos 100 top of floating-point stack. *) let int_reg_name = [| "%eax"; "%ebx"; "%ecx"; "%edx"; "%esi"; "%edi"; "%ebp" |] let float_reg_name = [| "%tos" |] let num_register_classes = 2 let register_class r = match r.typ with Int -> 0 | Addr -> 0 | Float -> 1 let num_available_registers = [| 7; 0 |] let first_available_register = [| 0; 100 |] let register_name r = if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) (* There is little scheduling, and some operations are more compact when their argument is %eax. *) let rotate_registers = false (* Representation of hard registers by pseudo-registers *) let hard_int_reg = let v = Array.create 7 Reg.dummy in for i = 0 to 6 do v.(i) <- Reg.at_location Int (Reg i) done; v let hard_float_reg = [| Reg.at_location Float (Reg 100) |] let all_phys_regs = Array.append hard_int_reg hard_float_reg let phys_reg n = if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) let stack_slot slot ty = Reg.at_location ty (Stack slot) let eax = phys_reg 0 let ecx = phys_reg 2 let edx = phys_reg 3 let tos = phys_reg 100 (* Exceptions raised to signal cases not handled here *) exception Use_default (* Instruction selection *) (* Auxiliary for recognizing addressing modes *) type addressing_expr = Asymbol of string | Alinear of expression | Aadd of expression * expression | Ascale of expression * int | Ascaledadd of expression * expression * int let rec select_addr exp = match exp with Cconst_symbol s -> (Asymbol s, 0) | Cop((Caddi | Cadda), [arg; Cconst_int m]) -> let (a, n) = select_addr arg in (a, n + m) | Cop((Csubi | Csuba), [arg; Cconst_int m]) -> let (a, n) = select_addr arg in (a, n - m) | Cop((Caddi | Cadda), [Cconst_int m; arg]) -> let (a, n) = select_addr arg in (a, n + m) | Cop(Clsl, [arg; Cconst_int(1|2|3 as shift)]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift) | _ -> (Alinear exp, 0) end | Cop(Cmuli, [arg; Cconst_int(2|4|8 as mult)]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, mult), n * mult) | _ -> (Alinear exp, 0) end | Cop(Cmuli, [Cconst_int(2|4|8 as mult); arg]) -> begin match select_addr arg with (Alinear e, n) -> (Ascale(e, mult), n * mult) | _ -> (Alinear exp, 0) end | Cop((Caddi | Cadda), [arg1; arg2]) -> begin match (select_addr arg1, select_addr arg2) with ((Alinear e1, n1), (Alinear e2, n2)) -> (Aadd(e1, e2), n1 + n2) | ((Alinear e1, n1), (Ascale(e2, scale), n2)) -> (Ascaledadd(e1, e2, scale), n1 + n2) | ((Ascale(e1, scale), n1), (Alinear e2, n2)) -> (Ascaledadd(e2, e1, scale), n1 + n2) | (_, (Ascale(e2, scale), n2)) -> (Ascaledadd(arg1, e2, scale), n2) | ((Ascale(e1, scale), n1), _) -> (Ascaledadd(arg2, e1, scale), n1) | _ -> (Aadd(arg1, arg2), 0) end | arg -> (Alinear arg, 0) let select_addressing exp = match select_addr exp with (Asymbol s, d) -> (Ibased(s, d), Ctuple []) | (Alinear e, d) -> (Iindexed d, e) | (Aadd(e1, e2), d) -> (Iindexed2 d, Ctuple[e1; e2]) | (Ascale(e, scale), d) -> (Iscaled(scale, d), e) | (Ascaledadd(e1, e2, scale), d) -> (Iindexed2scaled(scale, d), Ctuple[e1; e2]) (* Estimate number of float temporaries needed to evaluate expression (Ershov's algorithm) *) let rec float_needs = function Cop((Caddf | Csubf | Cmulf | Cdivf), [arg1; arg2]) -> let n1 = float_needs arg1 in let n2 = float_needs arg2 in if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2 | _ -> 1 (* Recognize float arithmetic with mem *) let select_floatarith regular_op reversed_op mem_op mem_rev_op args = match args with [arg1; Cop(Cload _, [loc2])] -> let (addr, arg2) = select_addressing loc2 in (Ispecific(Ifloatarithmem(mem_op, addr)), [arg1; arg2]) | [Cop(Cload _, [loc1]); arg2] -> let (addr, arg1) = select_addressing loc1 in (Ispecific(Ifloatarithmem(mem_rev_op, addr)), [arg2; arg1]) | [arg1; arg2] -> (* Evaluate bigger subexpression first to minimize stack usage. Because of right-to-left evaluation, rightmost arg is evaluated first *) if float_needs arg1 <= float_needs arg2 then (regular_op, [arg1; arg2]) else (reversed_op, [arg2; arg1]) | _ -> fatal_error "Proc_i386: select_floatarith" (* Main instruction selection functions *) let select_oper op args = match op with (* Recognize the LEA instruction *) Caddi | Cadda | Csubi | Csuba -> begin match select_addressing (Cop(op, args)) with (Iindexed d, _) -> raise Use_default | (Iindexed2 0, _) -> raise Use_default | (addr, arg) -> (Ispecific(Ilea addr), [arg]) end (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *) | Cdivi -> begin match args with [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) -> (Iintop_imm(Idiv, n), [arg1]) | _ -> (Iintop Idiv, args) end | Cmodi -> begin match args with [arg1; Cconst_int n] when n = 1 lsl (Misc.log2 n) -> (Iintop_imm(Imod, n), [arg1]) | _ -> (Iintop Imod, args) end (* Recognize float arithmetic with memory. In passing, apply Ershov's algorithm to reduce stack usage *) | Caddf -> select_floatarith Iaddf Iaddf Ifloatadd Ifloatadd args | Csubf -> select_floatarith Isubf (Ispecific Isubfrev) Ifloatsub Ifloatsubrev args | Cmulf -> select_floatarith Imulf Imulf Ifloatmul Ifloatmul args | Cdivf -> select_floatarith Idivf (Ispecific Idivfrev) Ifloatdiv Ifloatdivrev args (* Recognize store instructions *) | Cstore -> begin match args with [loc; Cconst_int n] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_int(n, addr)), [arg]) | [loc; Cconst_pointer n] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_int(n, addr)), [arg]) | [loc; Cconst_symbol s] -> let (addr, arg) = select_addressing loc in (Ispecific(Istore_symbol(s, addr)), [arg]) | [loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])] when loc = loc' -> let (addr, arg) = select_addressing loc in (Ispecific(Ioffset_loc(n, addr)), [arg]) | _ -> raise Use_default end | _ -> raise Use_default let select_store addr exp = match exp with Cconst_int n -> (Ispecific(Istore_int(n, addr)), Ctuple []) | Cconst_pointer n -> (Ispecific(Istore_int(n, addr)), Ctuple []) | Cconst_symbol s -> (Ispecific(Istore_symbol(s, addr)), Ctuple []) | _ -> raise Use_default let select_push exp = match exp with Cconst_int n -> (Ispecific(Ipush_int n), Ctuple []) | Cconst_pointer n -> (Ispecific(Ipush_int n), Ctuple []) | Cconst_symbol s -> (Ispecific(Ipush_symbol s), Ctuple []) | Cop(Cload ty, [loc]) when ty = typ_float -> let (addr, arg) = select_addressing loc in (Ispecific(Ipush_load_float addr), arg) | Cop(Cload ty, [loc]) when ty = typ_addr or ty = typ_int -> let (addr, arg) = select_addressing loc in (Ispecific(Ipush_load addr), arg) | _ -> (Ispecific(Ipush), exp) let pseudoregs_for_operation op arg res = match op with (* Two-address binary operations *) Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) -> ([|res.(0); arg.(1)|], res, false) (* Two-address unary operations *) | Iintop_imm((Iadd|Isub|Imul|Idiv|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _) -> (res, res, false) (* For shifts with variable shift count, second arg must be in ecx *) | Iintop(Ilsl|Ilsr|Iasr) -> ([|res.(0); ecx|], res, false) (* For div and mod, first arg must be in eax, edx is clobbered, and result is in eax or edx respectively. Keep it simple, just force second argument in ecx. *) | Iintop(Idiv) -> ([| eax; ecx |], [| eax |], true) | Iintop(Imod) -> ([| eax; ecx |], [| edx |], true) (* For mod with immediate operand, arg must not be in eax. Keep it simple, force it in edx. *) | Iintop_imm(Imod, _) -> ([| edx |], [| edx |], true) (* For floating-point operations, the result is always left at the top of the floating-point stack *) | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Ifloatofint |Ispecific(Isubfrev | Idivfrev | Ifloatarithmem(_, _)) -> (arg, [| tos |], false) (* don't move it immediately *) (* Same for a floating-point load *) | Iload(Word, addr) when res.(0).typ = Float -> (arg, [| tos |], false) (* For storing a byte, the argument must be in eax...edx. For storing a halfword, any reg is ok. Keep it simple, just force it to be in edx in both cases. *) | Istore(Word, addr) -> raise Use_default | Istore(chunk, addr) -> let newarg = Array.copy arg in newarg.(0) <- edx; (newarg, res, false) (* Other instructions are more or less regular *) | _ -> raise Use_default let is_immediate (n: int) = true let word_addressed = false (* Calling conventions *) let calling_conventions first_int last_int first_float last_float make_stack arg = let loc = Array.create (Array.length arg) Reg.dummy in let int = ref first_int in let float = ref first_float in let ofs = ref 0 in for i = 0 to Array.length arg - 1 do match arg.(i).typ with Int | Addr as ty -> if !int <= last_int then begin loc.(i) <- phys_reg !int; incr int end else begin loc.(i) <- stack_slot (make_stack !ofs) ty; ofs := !ofs + size_int end | Float -> if !float <= last_float then begin loc.(i) <- phys_reg !float; incr float end else begin loc.(i) <- stack_slot (make_stack !ofs) Float; ofs := !ofs + size_float end done; (loc, !ofs) let incoming ofs = Incoming ofs let outgoing ofs = Outgoing ofs let not_supported ofs = fatal_error "Proc.loc_results: cannot call" let loc_arguments arg = calling_conventions 0 5 100 99 outgoing arg let loc_parameters arg = let (loc, ofs) = calling_conventions 0 5 100 99 incoming arg in loc let loc_results res = let (loc, ofs) = calling_conventions 0 5 100 100 not_supported res in loc let extcall_use_push = true let loc_external_arguments arg = fatal_error "Proc.loc_external_arguments" let loc_external_results res = let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc let loc_exn_bucket = eax (* Registers destroyed by operations *) let destroyed_at_c_call = (* ebx, esi, edi, ebp preserved *) Array.of_list(List.map phys_reg [0;2;3]) let destroyed_at_oper = function Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs | Iop(Iextcall(_, false)) -> destroyed_at_c_call | Iop(Iintop(Idiv | Imod)) -> [| eax; edx |] | Iop(Iintop_imm(Imod, _)) -> [| eax |] | Iop(Ialloc _) -> [| eax |] | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |] | Iop(Iintoffloat) -> [| eax |] | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |] | _ -> [||] let destroyed_at_raise = all_phys_regs (* Maximal register pressure *) let safe_register_pressure op = 4 let max_register_pressure = function Iextcall(_, _) -> [| 4; max_int |] | Iintop(Idiv | Imod) -> [| 5; max_int |] | Ialloc _ | Iintop(Icomp _) | Iintop_imm(Icomp _, _) | Iintoffloat -> [| 6; max_int |] | _ -> [|7; max_int |] (* Reloading of instruction arguments, storing of instruction results. *) let stackp r = match r.loc with Stack _ -> true | _ -> false let reload_test makereg round tst arg = match tst with Iinttest cmp -> if stackp arg.(0) & stackp arg.(1) then [| makereg arg.(0); arg.(1) |] else arg | _ -> arg (* Since #floatregs = 0, pseudoregs of type float will never be reloaded. Hence there is no need to make special cases for floating-point operations. *) let reload_operation makereg round op arg res = match op with Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor|Icomp _|Icheckbound) -> (* One of the two arguments can reside in the stack *) if stackp arg.(0) & stackp arg.(1) then ([|arg.(0); makereg arg.(1)|], res) else (arg, res) | Iintop(Ilsl|Ilsr|Iasr) | Iintop_imm(_, _) | Ifloatofint | Iintoffloat | Ispecific(Ipush) -> (* The argument(s) can be either in register or on stack *) (arg, res) | _ -> (* Other operations: all args and results in registers *) raise Use_default (* Scheduling is turned off because our model does not fit the 486 nor Pentium very well. In particular, it messes up with the float reg stack. The Pentium Pro schedules at run-time much better than what we could do. *) let need_scheduling = false let oper_latency _ = 0 (* Layout of the stack frame *) let num_stack_slots = [| 0; 0 |] let contains_calls = ref false (* Calling the assembler *) let assemble_file infile outfile = Sys.command ("as -o " ^ outfile ^ " " ^ infile) (* Calling the archiver *) let create_archive archive file_list = Misc.remove_file archive; Sys.command ("ar rc " ^ archive ^ " " ^ String.concat " " file_list ^ " && ranlib " ^ archive)