365 lines
14 KiB
OCaml
365 lines
14 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. *)
|
|
(* *)
|
|
(* All rights reserved. This file is distributed under the terms of *)
|
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
|
(* special exception on linking described in the file LICENSE. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
(* Common subexpression elimination by value numbering over extended
|
|
basic blocks. *)
|
|
|
|
open Mach
|
|
|
|
type valnum = int
|
|
|
|
(* Classification of operations *)
|
|
|
|
type op_class =
|
|
| Op_pure (* pure arithmetic, produce one or several result *)
|
|
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
|
|
| Op_load (* memory load *)
|
|
| Op_store of bool (* memory store, false = init, true = assign *)
|
|
| Op_other (* anything else that does not allocate nor store in memory *)
|
|
|
|
(* We maintain sets of equations of the form
|
|
valnums = operation(valnums)
|
|
plus a mapping from registers to valnums (value numbers). *)
|
|
|
|
type rhs = operation * valnum array
|
|
|
|
module Equations = struct
|
|
module Rhs_map =
|
|
Map.Make(struct type t = rhs let compare = Pervasives.compare end)
|
|
|
|
type 'a t =
|
|
{ load_equations : 'a Rhs_map.t;
|
|
other_equations : 'a Rhs_map.t }
|
|
|
|
let empty =
|
|
{ load_equations = Rhs_map.empty;
|
|
other_equations = Rhs_map.empty }
|
|
|
|
let add op_class op v m =
|
|
match op_class with
|
|
| Op_load ->
|
|
{ m with load_equations = Rhs_map.add op v m.load_equations }
|
|
| _ ->
|
|
{ m with other_equations = Rhs_map.add op v m.other_equations }
|
|
|
|
let find op_class op m =
|
|
match op_class with
|
|
| Op_load ->
|
|
Rhs_map.find op m.load_equations
|
|
| _ ->
|
|
Rhs_map.find op m.other_equations
|
|
|
|
let remove_loads m =
|
|
{ load_equations = Rhs_map.empty;
|
|
other_equations = m.other_equations }
|
|
end
|
|
|
|
type numbering =
|
|
{ num_next: int; (* next fresh value number *)
|
|
num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *)
|
|
num_reg: valnum Reg.Map.t } (* mapping register -> valnum *)
|
|
|
|
let empty_numbering =
|
|
{ num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
|
|
|
|
(** Generate a fresh value number [v] and associate it to register [r].
|
|
Returns a pair [(n',v)] with the updated value numbering [n']. *)
|
|
|
|
let fresh_valnum_reg n r =
|
|
let v = n.num_next in
|
|
({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
|
|
|
|
(* Same, for a set of registers [rs]. *)
|
|
|
|
let array_fold_transf (f: numbering -> 'a -> numbering * 'b) n (a: 'a array)
|
|
: numbering * 'b array =
|
|
match Array.length a with
|
|
| 0 -> (n, [||])
|
|
| 1 -> let (n', b) = f n a.(0) in (n', [|b|])
|
|
| l -> let b = Array.make l 0 and n = ref n in
|
|
for i = 0 to l - 1 do
|
|
let (n', x) = f !n a.(i) in
|
|
b.(i) <- x; n := n'
|
|
done;
|
|
(!n, b)
|
|
|
|
let fresh_valnum_regs n rs =
|
|
array_fold_transf fresh_valnum_reg n rs
|
|
|
|
(** [valnum_reg n r] returns the value number for the contents of
|
|
register [r]. If none exists, a fresh value number is returned
|
|
and associated with register [r]. The possibly updated numbering
|
|
is also returned. [valnum_regs] is similar, but for an array of
|
|
registers. *)
|
|
|
|
let valnum_reg n r =
|
|
try
|
|
(n, Reg.Map.find r n.num_reg)
|
|
with Not_found ->
|
|
fresh_valnum_reg n r
|
|
|
|
let valnum_regs n rs =
|
|
array_fold_transf valnum_reg n rs
|
|
|
|
(* Look up the set of equations for an equation with the given rhs.
|
|
Return [Some res] if there is one, where [res] is the lhs. *)
|
|
|
|
let find_equation op_class n rhs =
|
|
try
|
|
Some(Equations.find op_class rhs n.num_eqs)
|
|
with Not_found ->
|
|
None
|
|
|
|
(* Find a register containing the given value number. *)
|
|
|
|
let find_reg_containing n v =
|
|
Reg.Map.fold (fun r v' res -> if v' = v then Some r else res)
|
|
n.num_reg None
|
|
|
|
(* Find a set of registers containing the given value numbers. *)
|
|
|
|
let find_regs_containing n vs =
|
|
match Array.length vs with
|
|
| 0 -> Some [||]
|
|
| 1 -> begin match find_reg_containing n vs.(0) with
|
|
| None -> None
|
|
| Some r -> Some [|r|]
|
|
end
|
|
| l -> let rs = Array.make l Reg.dummy in
|
|
begin try
|
|
for i = 0 to l - 1 do
|
|
match find_reg_containing n vs.(i) with
|
|
| None -> raise Exit
|
|
| Some r -> rs.(i) <- r
|
|
done;
|
|
Some rs
|
|
with Exit ->
|
|
None
|
|
end
|
|
|
|
(* Associate the given value number to the given result register,
|
|
without adding new equations. *)
|
|
|
|
let set_known_reg n r v =
|
|
{ n with num_reg = Reg.Map.add r v n.num_reg }
|
|
|
|
(* Associate the given value numbers to the given result registers,
|
|
without adding new equations. *)
|
|
|
|
let array_fold2 f n a1 a2 =
|
|
let l = Array.length a1 in
|
|
assert (l = Array.length a2);
|
|
let n = ref n in
|
|
for i = 0 to l - 1 do n := f !n a1.(i) a2.(i) done;
|
|
!n
|
|
|
|
let set_known_regs n rs vs =
|
|
array_fold2 set_known_reg n rs vs
|
|
|
|
(* Record the effect of a move: no new equations, but the result reg
|
|
maps to the same value number as the argument reg. *)
|
|
|
|
let set_move n src dst =
|
|
let (n1, v) = valnum_reg n src in
|
|
{ n1 with num_reg = Reg.Map.add dst v n1.num_reg }
|
|
|
|
(* Record the equation [fresh valnums = rhs] and associate the given
|
|
result registers [rs] to [fresh valnums]. *)
|
|
|
|
let set_fresh_regs n rs rhs op_class =
|
|
let (n1, vs) = fresh_valnum_regs n rs in
|
|
{ n1 with num_eqs = Equations.add op_class rhs vs n.num_eqs }
|
|
|
|
(* Forget everything we know about the given result registers,
|
|
which are receiving unpredictable values at run-time. *)
|
|
|
|
let set_unknown_regs n rs =
|
|
{ n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
|
|
|
|
(* Keep only the equations satisfying the given predicate. *)
|
|
|
|
let remove_load_numbering n =
|
|
{ n with num_eqs = Equations.remove_loads n.num_eqs }
|
|
|
|
(* Forget everything we know about registers of type [Addr]. *)
|
|
|
|
let kill_addr_regs n =
|
|
{ n with num_reg =
|
|
Reg.Map.filter (fun r _n -> r.Reg.typ <> Cmm.Addr) n.num_reg }
|
|
|
|
(* Prepend a set of moves before [i] to assign [srcs] to [dsts]. *)
|
|
|
|
let insert_single_move i src dst = instr_cons (Iop Imove) [|src|] [|dst|] i
|
|
|
|
let insert_move srcs dsts i =
|
|
match Array.length srcs with
|
|
| 0 -> i
|
|
| 1 -> instr_cons (Iop Imove) srcs dsts i
|
|
| _ -> (* Parallel move: first copy srcs into tmps one by one,
|
|
then copy tmps into dsts one by one *)
|
|
let tmps = Reg.createv_like srcs in
|
|
let i1 = array_fold2 insert_single_move i tmps dsts in
|
|
array_fold2 insert_single_move i1 srcs tmps
|
|
|
|
class cse_generic = object (self)
|
|
|
|
(* Default classification of operations. Can be overriden in
|
|
processor-specific files to classify specific operations better. *)
|
|
|
|
method class_of_operation op =
|
|
match op with
|
|
| Imove | Ispill | Ireload -> assert false (* treated specially *)
|
|
| Iconst_int _ | Iconst_float _ | Iconst_symbol _ -> Op_pure
|
|
| Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _
|
|
| Iextcall _ -> assert false (* treated specially *)
|
|
| Istackoffset _ -> Op_other
|
|
| Iload(_,_) -> Op_load
|
|
| Istore(_,_,asg) -> Op_store asg
|
|
| Ialloc _ -> assert false (* treated specially *)
|
|
| Iintop(Icheckbound _) -> Op_checkbound
|
|
| Iintop _ -> Op_pure
|
|
| Iintop_imm(Icheckbound _, _) -> Op_checkbound
|
|
| Iintop_imm(_, _) -> Op_pure
|
|
| Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
|
|
| Ifloatofint | Iintoffloat -> Op_pure
|
|
| Ispecific _ -> Op_other
|
|
|
|
(* Operations that are so cheap that it isn't worth factoring them. *)
|
|
|
|
method is_cheap_operation op =
|
|
match op with
|
|
| Iconst_int _ -> true
|
|
| _ -> false
|
|
|
|
(* Forget all equations involving memory loads. Performed after a
|
|
non-initializing store *)
|
|
|
|
method private kill_loads n =
|
|
remove_load_numbering n
|
|
|
|
(* Perform CSE on the given instruction [i] and its successors.
|
|
[n] is the value numbering current at the beginning of [i]. *)
|
|
|
|
method private cse n i =
|
|
match i.desc with
|
|
| Iend | Ireturn | Iop(Itailcall_ind _) | Iop(Itailcall_imm _)
|
|
| Iexit _ | Iraise _ ->
|
|
i
|
|
| Iop (Imove | Ispill | Ireload) ->
|
|
(* For moves, we associate the same value number to the result reg
|
|
as to the argument reg. *)
|
|
let n1 = set_move n i.arg.(0) i.res.(0) in
|
|
{i with next = self#cse n1 i.next}
|
|
| Iop (Icall_ind _ | Icall_imm _ | Iextcall _) ->
|
|
(* For function calls, we should at least forget:
|
|
- equations involving memory loads, since the callee can
|
|
perform arbitrary memory stores;
|
|
- equations involving arithmetic operations that can
|
|
produce [Addr]-typed derived pointers into the heap
|
|
(see below for Ialloc);
|
|
- mappings from hardware registers to value numbers,
|
|
since the callee does not preserve these registers.
|
|
That doesn't leave much usable information: checkbounds
|
|
could be kept, but won't be usable for CSE as one of their
|
|
arguments is always a memory load. For simplicity, we
|
|
just forget everything. *)
|
|
{i with next = self#cse empty_numbering i.next}
|
|
| Iop (Ialloc _) ->
|
|
(* For allocations, we must avoid extending the live range of a
|
|
pseudoregister across the allocation if this pseudoreg
|
|
is a derived heap pointer (a pointer into the heap that does
|
|
not point to the beginning of a Caml block). PR#6484 is an
|
|
example of this situation. Such pseudoregs have type [Addr].
|
|
Pseudoregs with types other than [Addr] can be kept.
|
|
Moreover, allocation can trigger the asynchronous execution
|
|
of arbitrary Caml code (finalizer, signal handler, context
|
|
switch), which can contain non-initializing stores.
|
|
Hence, all equations over loads must be removed. *)
|
|
let n1 = kill_addr_regs (self#kill_loads n) in
|
|
let n2 = set_unknown_regs n1 i.res in
|
|
{i with next = self#cse n2 i.next}
|
|
| Iop op ->
|
|
begin match self#class_of_operation op with
|
|
| (Op_pure | Op_checkbound | Op_load) as op_class ->
|
|
let (n1, varg) = valnum_regs n i.arg in
|
|
let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in
|
|
begin match find_equation op_class n1 (op, varg) with
|
|
| Some vres ->
|
|
(* This operation was computed earlier. *)
|
|
(* Are there registers that hold the results computed earlier? *)
|
|
begin match find_regs_containing n1 vres with
|
|
| Some res when (not (self#is_cheap_operation op))
|
|
&& (not (Proc.regs_are_volatile res)) ->
|
|
(* We can replace res <- op args with r <- move res,
|
|
provided res are stable (non-volatile) registers.
|
|
If the operation is very cheap to compute, e.g.
|
|
an integer constant, don't bother. *)
|
|
let n3 = set_known_regs n1 i.res vres in
|
|
(* This is n1 above and not n2 because the move
|
|
does not destroy any regs *)
|
|
insert_move res i.res (self#cse n3 i.next)
|
|
| _ ->
|
|
(* We already computed the operation but lost its
|
|
results. Associate the result registers to
|
|
the result valnums of the previous operation. *)
|
|
let n3 = set_known_regs n2 i.res vres in
|
|
{i with next = self#cse n3 i.next}
|
|
end
|
|
| None ->
|
|
(* This operation produces a result we haven't seen earlier. *)
|
|
let n3 = set_fresh_regs n2 i.res (op, varg) op_class in
|
|
{i with next = self#cse n3 i.next}
|
|
end
|
|
| Op_store false | Op_other ->
|
|
(* An initializing store or an "other" operation do not invalidate
|
|
any equations, but we do not know anything about the results. *)
|
|
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
|
|
let n2 = set_unknown_regs n1 i.res in
|
|
{i with next = self#cse n2 i.next}
|
|
| Op_store true ->
|
|
(* A non-initializing store can invalidate
|
|
anything we know about prior loads. *)
|
|
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
|
|
let n2 = set_unknown_regs n1 i.res in
|
|
let n3 = self#kill_loads n2 in
|
|
{i with next = self#cse n3 i.next}
|
|
end
|
|
(* For control structures, we set the numbering to empty at every
|
|
join point, but propagate the current numbering across fork points. *)
|
|
| Iifthenelse(test, ifso, ifnot) ->
|
|
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
|
|
{i with desc = Iifthenelse(test, self#cse n1 ifso, self#cse n1 ifnot);
|
|
next = self#cse empty_numbering i.next}
|
|
| Iswitch(index, cases) ->
|
|
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
|
|
{i with desc = Iswitch(index, Array.map (self#cse n1) cases);
|
|
next = self#cse empty_numbering i.next}
|
|
| Iloop(body) ->
|
|
{i with desc = Iloop(self#cse empty_numbering body);
|
|
next = self#cse empty_numbering i.next}
|
|
| Icatch(nfail, body, handler) ->
|
|
{i with desc = Icatch(nfail, self#cse n body,
|
|
self#cse empty_numbering handler);
|
|
next = self#cse empty_numbering i.next}
|
|
| Itrywith(body, handler) ->
|
|
{i with desc = Itrywith(self#cse n body,
|
|
self#cse empty_numbering handler);
|
|
next = self#cse empty_numbering i.next}
|
|
|
|
method fundecl f =
|
|
{f with fun_body = self#cse empty_numbering f.fun_body}
|
|
|
|
end
|