ocaml/asmcomp/ia64/scheduling.ml

132 lines
4.5 KiB
OCaml

(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* $Id$ *)
open Cmm
open Reg
open Arch
open Mach
open Proc
open Linearize
(* (Feeble attempt at) Instruction scheduling for the IA64 *)
(* Prior to scheduling, we rewrite the code to split reg -> stack
and stack -> reg moves into two instructions: a "stackaddr" instruction
that takes the address of the stack variable, and a "spill" or "reload"
instruction that performs the stack access proper. This way,
the "stackaddr" instructions can be scheduled earlier. *)
let temp_counter = ref 0
let temporaries = [| phys_reg 80 (*r14*); phys_reg 81 (*r15*) |]
let new_temp () =
let r = temporaries.(!temp_counter) in
incr temp_counter;
if !temp_counter >= Array.length temporaries then temp_counter := 0;
r
let rec fixup_stack_accesses i =
match i.desc with
Lend -> i
| Lop(Imove | Ireload | Ispill) ->
let src = i.arg.(0) and dst = i.res.(0) in
begin match (src.loc, dst.loc) with
(Reg _, Reg _) ->
{ i with next = fixup_stack_accesses i.next }
| (Stack _, Reg _) ->
let tmp = new_temp() in
instr_cons (Lop(Ispecific Istackaddr)) [|src|] [|tmp|]
(instr_cons (Lop Ireload) [|tmp|] [|dst|]
(fixup_stack_accesses i.next))
| (Reg _, Stack _) ->
let tmp = new_temp() in
instr_cons (Lop(Ispecific Istackaddr)) [|dst|] [|tmp|]
(instr_cons (Lop Ispill) [|src; tmp|] [||]
(fixup_stack_accesses i.next))
| (_, _) ->
assert false
end
| _ ->
{ i with next = fixup_stack_accesses i.next }
(* The basic-block scheduler proper *)
class scheduler = object (self)
inherit Schedgen.scheduler_generic as super
(* Latencies (in cycles). Based on the Itanium, with considerable poetic
licence. All latencies are tripled in an attempt to favor dual- or
triple-issue. *)
(* Most integer operations: 1 cycle --> 3
Shifts with variable count: 2 cycles --> 6
Float add, sub, mult, multadd: 5 cycles --> 15
FP integer multiply: 7 cycles --> 21
Int loads: 2 cycles --> 6
Float loads: 9 cycles --> 27
GP to FP register move: 7 cycles --> 21
FP to GP register move: 2 cycles --> 6
*)
method oper_latency = function
Ireload -> 6
| Iload(kind, _) ->
begin match kind with Single | Double | Double_u -> 27 | _ -> 6 end
| Iconst_symbol _ -> 6 (* turned into a load *)
| Iconst_float _ -> 21 (* ends up in a GP to FP register move *)
| Iintop(Imul) -> 6 (* ends up in a FP to GP register move *)
| Iintop(Ilsl | Ilsr | Iasr) -> 6
| Iaddf -> 15
| Isubf -> 15
| Imulf -> 15
| Idivf -> 15
| Ispecific(Imultaddf | Imultsubf | Isubmultf) -> 15
| _ -> 3
(* Issue cycles. Rough approximations. E.g. an operation that expands
into 2 dependent one-cycle operations is considered to waste 3 issue slots.
(Depending on the grouping with surrounding instructions, this could
be as low as 2 or as high as 6.) We adjust upward if the first operation
has longer latency. *)
method oper_issue_cycles = function
Iconst_float _ -> 3
| Iconst_symbol _ -> 3
| Iload((Byte_signed | Sixteen_signed | Thirtytwo_signed), _) -> 6
| Ialloc _ -> 4
| Iintop(Imul) -> 25
| Iintop(Icomp _) -> 5
| Iintop(Icheckbound) -> 3
| Iintop_imm(Imul, _) -> 12
| Iintop_imm(Idiv, _) -> 12
| Iintop_imm(Imod, _) -> 12
| Iintop_imm(Icheckbound, _) -> 3
| Idivf -> 24
| Ifloatofint -> 45
| Iintoffloat -> 45
| _ -> 1
(* Say that Istoreincr terminates a basic block *)
method oper_in_basic_block = function
Ispecific(Istoreincr _) -> false
| op -> super#oper_in_basic_block op
end
let schedule_fundecl = (new scheduler)#schedule_fundecl
let fundecl f =
schedule_fundecl {f with fun_body = fixup_stack_accesses f.fun_body}