132 lines
4.5 KiB
OCaml
132 lines
4.5 KiB
OCaml
|
(***********************************************************************)
|
||
|
(* *)
|
||
|
(* Objective Caml *)
|
||
|
(* *)
|
||
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||
|
(* *)
|
||
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
||
|
(* en Automatique. All rights reserved. This file is distributed *)
|
||
|
(* under the terms of the Q Public License version 1.0. *)
|
||
|
(* *)
|
||
|
(***********************************************************************)
|
||
|
|
||
|
(* $Id$ *)
|
||
|
|
||
|
open Cmm
|
||
|
open Reg
|
||
|
open Arch
|
||
|
open Mach
|
||
|
open Proc
|
||
|
open Linearize
|
||
|
|
||
|
(* (Feeble attempt at) Instruction scheduling for the IA64 *)
|
||
|
|
||
|
(* Prior to scheduling, we rewrite the code to split reg -> stack
|
||
|
and stack -> reg moves into two instructions: a "stackaddr" instruction
|
||
|
that takes the address of the stack variable, and a "spill" or "reload"
|
||
|
instruction that performs the stack access proper. This way,
|
||
|
the "stackaddr" instructions can be scheduled earlier. *)
|
||
|
|
||
|
let temp_counter = ref 0
|
||
|
let temporaries = [| phys_reg 80 (*r14*); phys_reg 81 (*r15*) |]
|
||
|
let new_temp () =
|
||
|
let r = temporaries.(!temp_counter) in
|
||
|
incr temp_counter;
|
||
|
if !temp_counter >= Array.length temporaries then temp_counter := 0;
|
||
|
r
|
||
|
|
||
|
let rec fixup_stack_accesses i =
|
||
|
match i.desc with
|
||
|
Lend -> i
|
||
|
| Lop(Imove | Ireload | Ispill) ->
|
||
|
let src = i.arg.(0) and dst = i.res.(0) in
|
||
|
begin match (src.loc, dst.loc) with
|
||
|
(Reg _, Reg _) ->
|
||
|
{ i with next = fixup_stack_accesses i.next }
|
||
|
| (Stack _, Reg _) ->
|
||
|
let tmp = new_temp() in
|
||
|
instr_cons (Lop(Ispecific Istackaddr)) [|src|] [|tmp|]
|
||
|
(instr_cons (Lop Ireload) [|tmp|] [|dst|]
|
||
|
(fixup_stack_accesses i.next))
|
||
|
| (Reg _, Stack _) ->
|
||
|
let tmp = new_temp() in
|
||
|
instr_cons (Lop(Ispecific Istackaddr)) [|dst|] [|tmp|]
|
||
|
(instr_cons (Lop Ispill) [|src; tmp|] [||]
|
||
|
(fixup_stack_accesses i.next))
|
||
|
| (_, _) ->
|
||
|
assert false
|
||
|
end
|
||
|
| _ ->
|
||
|
{ i with next = fixup_stack_accesses i.next }
|
||
|
|
||
|
(* The basic-block scheduler proper *)
|
||
|
|
||
|
class scheduler = object (self)
|
||
|
|
||
|
inherit Schedgen.scheduler_generic as super
|
||
|
|
||
|
(* Latencies (in cycles). Based on the Itanium, with considerable poetic
|
||
|
licence. All latencies are tripled in an attempt to favor dual- or
|
||
|
triple-issue. *)
|
||
|
|
||
|
(* Most integer operations: 1 cycle --> 3
|
||
|
Shifts with variable count: 2 cycles --> 6
|
||
|
Float add, sub, mult, multadd: 5 cycles --> 15
|
||
|
FP integer multiply: 7 cycles --> 21
|
||
|
Int loads: 2 cycles --> 6
|
||
|
Float loads: 9 cycles --> 27
|
||
|
GP to FP register move: 7 cycles --> 21
|
||
|
FP to GP register move: 2 cycles --> 6
|
||
|
*)
|
||
|
|
||
|
method oper_latency = function
|
||
|
Ireload -> 6
|
||
|
| Iload(kind, _) ->
|
||
|
begin match kind with Single | Double | Double_u -> 27 | _ -> 6 end
|
||
|
| Iconst_symbol _ -> 6 (* turned into a load *)
|
||
|
| Iconst_float _ -> 21 (* ends up in a GP to FP register move *)
|
||
|
| Iintop(Imul) -> 6 (* ends up in a FP to GP register move *)
|
||
|
| Iintop(Ilsl | Ilsr | Iasr) -> 6
|
||
|
| Iaddf -> 15
|
||
|
| Isubf -> 15
|
||
|
| Imulf -> 15
|
||
|
| Idivf -> 15
|
||
|
| Ispecific(Imultaddf | Imultsubf | Isubmultf) -> 15
|
||
|
| _ -> 3
|
||
|
|
||
|
(* Issue cycles. Rough approximations. E.g. an operation that expands
|
||
|
into 2 dependent one-cycle operations is considered to waste 3 issue slots.
|
||
|
(Depending on the grouping with surrounding instructions, this could
|
||
|
be as low as 2 or as high as 6.) We adjust upward if the first operation
|
||
|
has longer latency. *)
|
||
|
|
||
|
method oper_issue_cycles = function
|
||
|
Iconst_float _ -> 3
|
||
|
| Iconst_symbol _ -> 3
|
||
|
| Iload((Byte_signed | Sixteen_signed | Thirtytwo_signed), _) -> 6
|
||
|
| Ialloc _ -> 4
|
||
|
| Iintop(Imul) -> 25
|
||
|
| Iintop(Icomp _) -> 5
|
||
|
| Iintop(Icheckbound) -> 3
|
||
|
| Iintop_imm(Imul, _) -> 12
|
||
|
| Iintop_imm(Idiv, _) -> 12
|
||
|
| Iintop_imm(Imod, _) -> 12
|
||
|
| Iintop_imm(Icheckbound, _) -> 3
|
||
|
| Idivf -> 24
|
||
|
| Ifloatofint -> 45
|
||
|
| Iintoffloat -> 45
|
||
|
| _ -> 1
|
||
|
|
||
|
(* Say that Istoreincr terminates a basic block *)
|
||
|
|
||
|
method oper_in_basic_block = function
|
||
|
Ispecific(Istoreincr _) -> false
|
||
|
| op -> super#oper_in_basic_block op
|
||
|
|
||
|
end
|
||
|
|
||
|
let schedule_fundecl = (new scheduler)#schedule_fundecl
|
||
|
|
||
|
let fundecl f =
|
||
|
schedule_fundecl {f with fun_body = fixup_stack_accesses f.fun_body}
|