ocaml/asmcomp/ia64/scheduling.ml

(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the Q Public License version 1.0.               *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

open Cmm
open Reg
open Arch
open Mach
open Proc
open Linearize

(* (Feeble attempt at) Instruction scheduling for the IA64 *)

(* Prior to scheduling, we rewrite the code to split reg -> stack
   and stack -> reg moves into two instructions: a "stackaddr" instruction
   that takes the address of the stack variable, and a "spill" or "reload"
   instruction that performs the stack access proper.  This way,
   the "stackaddr" instructions can be scheduled earlier. *)

let temp_counter = ref 0
let temporaries = [| phys_reg 80 (*r14*); phys_reg 81 (*r15*) |]
let new_temp () =
  let r = temporaries.(!temp_counter) in
  incr temp_counter;
  if !temp_counter >= Array.length temporaries then temp_counter := 0;
  r

let rec fixup_stack_accesses i =
  match i.desc with
    Lend -> i
  | Lop(Imove | Ireload | Ispill) ->
      let src = i.arg.(0) and dst = i.res.(0) in
      begin match (src.loc, dst.loc) with
        (Reg _, Reg _) ->
          { i with next = fixup_stack_accesses i.next }
      | (Stack _, Reg _) ->
          let tmp = new_temp() in
          instr_cons (Lop(Ispecific Istackaddr)) [|src|] [|tmp|]
              (instr_cons (Lop Ireload) [|tmp|] [|dst|]
                  (fixup_stack_accesses i.next))
      | (Reg _, Stack _) ->
          let tmp = new_temp() in
          instr_cons (Lop(Ispecific Istackaddr)) [|dst|] [|tmp|]
              (instr_cons (Lop Ispill) [|src; tmp|] [||]
                  (fixup_stack_accesses i.next))
      | (_, _) ->
          assert false
      end
  | _ ->
      { i with next = fixup_stack_accesses i.next }

(* The basic-block scheduler proper *)

class scheduler = object (self)

inherit Schedgen.scheduler_generic as super

(* Latencies (in cycles). Based on the Itanium, with considerable poetic
   licence.  All latencies are tripled in an attempt to favor dual- or
   triple-issue. *)

(* Most integer operations: 1 cycle --> 3
   Shifts with variable count: 2 cycles --> 6
   Float add, sub, mult, multadd: 5 cycles --> 15
   FP integer multiply: 7 cycles --> 21
   Int loads: 2 cycles --> 6
   Float loads: 9 cycles --> 27
   GP to FP register move: 7 cycles --> 21
   FP to GP register move: 2 cycles --> 6
*)

method oper_latency = function
    Ireload -> 6
  | Iload(kind, _) ->
      begin match kind with Single | Double | Double_u -> 27 | _ -> 6 end
  | Iconst_symbol _ -> 6 (* turned into a load *)
  | Iconst_float _ -> 21 (* ends up in a GP to FP register move *)
  | Iintop(Imul) -> 6 (* ends up in a FP to GP register move *)
  | Iintop(Ilsl | Ilsr | Iasr) -> 6
  | Iaddf -> 15
  | Isubf -> 15
  | Imulf -> 15
  | Idivf -> 15
  | Ispecific(Imultaddf | Imultsubf | Isubmultf) -> 15
  | _ -> 3

(* Issue cycles.  Rough approximations.  E.g. an operation that expands
   into 2 dependent one-cycle operations is considered to waste 3 issue slots.
   (Depending on the grouping with surrounding instructions, this could
   be as low as 2 or as high as 6.)  We adjust upward if the first operation
   has longer latency. *)

method oper_issue_cycles = function
    Iconst_float _ -> 3
  | Iconst_symbol _ -> 3
  | Iload((Byte_signed | Sixteen_signed | Thirtytwo_signed), _) -> 6
  | Ialloc _ -> 4
  | Iintop(Imul) -> 25
  | Iintop(Icomp _) -> 5
  | Iintop(Icheckbound) -> 3
  | Iintop_imm(Imul, _) -> 12
  | Iintop_imm(Idiv, _) -> 12
  | Iintop_imm(Imod, _) -> 12
  | Iintop_imm(Icheckbound, _) -> 3
  | Idivf -> 24
  | Ifloatofint -> 45
  | Iintoffloat -> 45
  | _ -> 1

(* Say that Istoreincr terminates a basic block *)

method oper_in_basic_block = function
    Ispecific(Istoreincr _) -> false
  | op -> super#oper_in_basic_block op

end

let schedule_fundecl = (new scheduler)#schedule_fundecl

let fundecl f =
  schedule_fundecl {f with fun_body = fixup_stack_accesses f.fun_body}