New back-end optimization pass: common subexpression elimination (CSE).
(Reuses results of previous computations instead of recomputing them.) (Cherry-picked from branch backend-optim.) Tested on amd64/linux and i386/linux. Other back-ends compile (after assorted updates) but are untested. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14688 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02master
parent
95d98cd978
commit
558f40e344
9
.depend
9
.depend
|
@ -576,6 +576,7 @@ bytecomp/typeopt.cmo : typing/types.cmi typing/typedtree.cmi \
|
|||
bytecomp/typeopt.cmx : typing/types.cmx typing/typedtree.cmx \
|
||||
typing/predef.cmx typing/path.cmx bytecomp/lambda.cmx typing/ident.cmx \
|
||||
typing/env.cmx typing/ctype.cmx bytecomp/typeopt.cmi
|
||||
asmcomp/CSEgen.cmi : asmcomp/mach.cmi
|
||||
asmcomp/asmgen.cmi : bytecomp/lambda.cmi asmcomp/cmm.cmi
|
||||
asmcomp/asmlibrarian.cmi :
|
||||
asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi
|
||||
|
@ -618,6 +619,10 @@ asmcomp/selection.cmi : asmcomp/mach.cmi asmcomp/cmm.cmi
|
|||
asmcomp/spill.cmi : asmcomp/mach.cmi
|
||||
asmcomp/split.cmi : asmcomp/mach.cmi
|
||||
asmcomp/strmatch.cmi : asmcomp/cmm.cmi
|
||||
asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo
|
||||
asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx
|
||||
asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/mach.cmi asmcomp/CSEgen.cmi
|
||||
asmcomp/CSEgen.cmx : asmcomp/reg.cmx asmcomp/mach.cmx asmcomp/CSEgen.cmi
|
||||
asmcomp/arch.cmo :
|
||||
asmcomp/arch.cmx :
|
||||
asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
|
||||
|
@ -629,7 +634,7 @@ asmcomp/asmgen.cmo : bytecomp/translmod.cmi asmcomp/split.cmi \
|
|||
asmcomp/emitaux.cmi asmcomp/emit.cmi asmcomp/deadcode.cmi \
|
||||
utils/config.cmi asmcomp/compilenv.cmi asmcomp/comballoc.cmi \
|
||||
asmcomp/coloring.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \
|
||||
asmcomp/closure.cmi utils/clflags.cmi asmcomp/asmgen.cmi
|
||||
asmcomp/closure.cmi utils/clflags.cmi asmcomp/CSE.cmo asmcomp/asmgen.cmi
|
||||
asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
|
||||
asmcomp/spill.cmx asmcomp/selection.cmx asmcomp/scheduling.cmx \
|
||||
asmcomp/reload.cmx asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/printmach.cmx \
|
||||
|
@ -639,7 +644,7 @@ asmcomp/asmgen.cmx : bytecomp/translmod.cmx asmcomp/split.cmx \
|
|||
asmcomp/emitaux.cmx asmcomp/emit.cmx asmcomp/deadcode.cmx \
|
||||
utils/config.cmx asmcomp/compilenv.cmx asmcomp/comballoc.cmx \
|
||||
asmcomp/coloring.cmx asmcomp/cmmgen.cmx asmcomp/cmm.cmx \
|
||||
asmcomp/closure.cmx utils/clflags.cmx asmcomp/asmgen.cmi
|
||||
asmcomp/closure.cmx utils/clflags.cmx asmcomp/CSE.cmx asmcomp/asmgen.cmi
|
||||
asmcomp/asmlibrarian.cmo : utils/misc.cmi parsing/location.cmi \
|
||||
utils/config.cmi asmcomp/compilenv.cmi asmcomp/cmx_format.cmi \
|
||||
utils/clflags.cmi asmcomp/clambda.cmi utils/ccomp.cmi asmcomp/asmlink.cmi \
|
||||
|
|
2
Changes
2
Changes
|
@ -39,6 +39,8 @@ Compilers:
|
|||
int32/int64/nativeint arithmetic. Constant propagation for floats
|
||||
can be turned off with option -no-float-const-prop, for codes that
|
||||
change FP rounding modes at run-time.
|
||||
- New back-end optimization pass: common subexpression elimination (CSE).
|
||||
(Reuses results of previous computations instead of recomputing them.)
|
||||
- New back-end optimization pass: dead code elimination.
|
||||
(Removes arithmetic and load instructions whose results are unused.)
|
||||
- PR#6269 Optimization of string matching (patch by Benoit Vaugon
|
||||
|
|
12
Makefile
12
Makefile
|
@ -84,7 +84,9 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
|
|||
asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
|
||||
asmcomp/closure.cmo asmcomp/strmatch.cmo asmcomp/cmmgen.cmo \
|
||||
asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
|
||||
asmcomp/comballoc.cmo asmcomp/liveness.cmo \
|
||||
asmcomp/comballoc.cmo \
|
||||
asmcomp/CSEgen.cmo asmcomp/CSE.cmo \
|
||||
asmcomp/liveness.cmo \
|
||||
asmcomp/spill.cmo asmcomp/split.cmo \
|
||||
asmcomp/interf.cmo asmcomp/coloring.cmo \
|
||||
asmcomp/reloadgen.cmo asmcomp/reload.cmo \
|
||||
|
@ -589,6 +591,14 @@ partialclean::
|
|||
|
||||
beforedepend:: asmcomp/selection.ml
|
||||
|
||||
asmcomp/CSE.ml: asmcomp/$(ARCH)/CSE.ml
|
||||
ln -s $(ARCH)/CSE.ml asmcomp/CSE.ml
|
||||
|
||||
partialclean::
|
||||
rm -f asmcomp/CSE.ml
|
||||
|
||||
beforedepend:: asmcomp/CSE.ml
|
||||
|
||||
asmcomp/reload.ml: asmcomp/$(ARCH)/reload.ml
|
||||
ln -s $(ARCH)/reload.ml asmcomp/reload.ml
|
||||
|
||||
|
|
|
@ -0,0 +1,258 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* Common subexpression elimination by value numbering over extended
|
||||
basic blocks. *)
|
||||
|
||||
open Mach
|
||||
|
||||
type valnum = int
|
||||
|
||||
(* We maintain sets of equations of the form
|
||||
valnums = operation(valnums)
|
||||
plus a mapping from registers to value numbers. *)
|
||||
|
||||
type rhs = operation * valnum array
|
||||
|
||||
module Equations =
|
||||
Map.Make(struct type t = rhs let compare = Pervasives.compare end)
|
||||
|
||||
type numbering =
|
||||
{ num_next: int; (* next fresh value number *)
|
||||
num_eqs: valnum array Equations.t; (* mapping rhs -> valnums *)
|
||||
num_reg: valnum Reg.Map.t } (* mapping register -> valnum *)
|
||||
|
||||
let empty_numbering =
|
||||
{ num_next = 0; num_eqs = Equations.empty; num_reg = Reg.Map.empty }
|
||||
|
||||
(** [valnum_reg n r] returns the value number for the contents of
|
||||
register [r]. If none exists, a fresh value number is returned
|
||||
and associated with register [r]. The possibly updated numbering
|
||||
is also returned. [valnum_regs] is similar, but for an array of
|
||||
registers. *)
|
||||
|
||||
let valnum_reg n r =
|
||||
try
|
||||
(n, Reg.Map.find r n.num_reg)
|
||||
with Not_found ->
|
||||
let v = n.num_next in
|
||||
({n with num_next = v + 1; num_reg = Reg.Map.add r v n.num_reg}, v)
|
||||
|
||||
let valnum_regs n rs =
|
||||
let l = Array.length rs in
|
||||
let vs = Array.make l 0 in
|
||||
let n = ref n in
|
||||
for i = 0 to l-1 do
|
||||
let (ni, vi) = valnum_reg !n rs.(i) in
|
||||
vs.(i) <- vi;
|
||||
n := ni
|
||||
done;
|
||||
(!n, vs)
|
||||
|
||||
(* Look up the set of equations for an equation with the given rhs.
|
||||
Return [Some res] if there is one, where [res] is the lhs. *)
|
||||
|
||||
let find_equation n rhs =
|
||||
try
|
||||
Some(Equations.find rhs n.num_eqs)
|
||||
with Not_found ->
|
||||
None
|
||||
|
||||
(* Find a set of registers containing the given value numbers. *)
|
||||
|
||||
let find_regs_containing n vs =
|
||||
match Array.length vs with
|
||||
| 0 -> Some [||]
|
||||
| 1 -> let v = vs.(0) in
|
||||
Reg.Map.fold (fun r v' res -> if v' = v then Some [|r|] else res)
|
||||
n.num_reg None
|
||||
| _ -> assert false
|
||||
|
||||
(* Associate the given value numbers to the given result registers,
|
||||
without adding new equations. *)
|
||||
|
||||
let set_known_regs n rs vs =
|
||||
match Array.length rs with
|
||||
| 0 -> n
|
||||
| 1 -> { n with num_reg = Reg.Map.add rs.(0) vs.(0) n.num_reg }
|
||||
| _ -> assert false
|
||||
|
||||
(* Record the effect of a move: no new equations, but the result reg
|
||||
maps to the same value number as the argument reg. *)
|
||||
|
||||
let set_move n src dst =
|
||||
let (n1, v) = valnum_reg n src in
|
||||
{ n1 with num_reg = Reg.Map.add dst v n1.num_reg }
|
||||
|
||||
(* Record the equation [fresh valnums = rhs] and associate the given
|
||||
result registers [rs] to [fresh valnums]. *)
|
||||
|
||||
let set_fresh_regs n rs rhs =
|
||||
match Array.length rs with
|
||||
| 0 -> { n with num_eqs = Equations.add rhs [||] n.num_eqs }
|
||||
| 1 -> let v = n.num_next in
|
||||
{ num_next = v + 1;
|
||||
num_eqs = Equations.add rhs [|v|] n.num_eqs;
|
||||
num_reg = Reg.Map.add rs.(0) v n.num_reg }
|
||||
| _ -> assert false
|
||||
|
||||
(* Forget everything we know about the given result registers,
|
||||
which are receiving unpredictable values at run-time. *)
|
||||
|
||||
let set_unknown_regs n rs =
|
||||
{ n with num_reg = Array.fold_right Reg.Map.remove rs n.num_reg }
|
||||
|
||||
(* Keep only the equations satisfying the given predicate. *)
|
||||
|
||||
let filter_equations pred n =
|
||||
{ n with num_eqs = Equations.filter (fun (op,_) res -> pred op) n.num_eqs }
|
||||
|
||||
(* Prepend a reg-reg move *)
|
||||
|
||||
let insert_move srcs dsts i =
|
||||
match Array.length srcs with
|
||||
| 0 -> i
|
||||
| 1 -> instr_cons (Iop Imove) srcs dsts i
|
||||
| _ -> assert false
|
||||
|
||||
(* Classification of operations *)
|
||||
|
||||
type op_class =
|
||||
| Op_pure (* pure, produce one result *)
|
||||
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
|
||||
| Op_load (* memory load *)
|
||||
| Op_store of bool (* memory store, false = init, true = assign *)
|
||||
| Op_other (* anything else that does not store in memory *)
|
||||
|
||||
class cse_generic = object (self)
|
||||
|
||||
(* Default classification of operations. Can be overriden in
|
||||
processor-specific files to classify specific operations better. *)
|
||||
|
||||
method class_of_operation op =
|
||||
match op with
|
||||
| Imove | Ispill | Ireload -> assert false (* treated specially *)
|
||||
| Iconst_int _ | Iconst_float _ | Iconst_symbol _
|
||||
| Iconst_blockheader _ -> Op_pure
|
||||
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
|
||||
| Iextcall _ -> assert false (* treated specially *)
|
||||
| Istackoffset _ -> Op_other
|
||||
| Iload(_,_) -> Op_load
|
||||
| Istore(_,_,asg) -> Op_store asg
|
||||
| Ialloc _ -> Op_other
|
||||
| Iintop(Icheckbound) -> Op_checkbound
|
||||
| Iintop _ -> Op_pure
|
||||
| Iintop_imm(Icheckbound, _) -> Op_checkbound
|
||||
| Iintop_imm(_, _) -> Op_pure
|
||||
| Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
|
||||
| Ifloatofint | Iintoffloat -> Op_pure
|
||||
| Ispecific _ -> Op_other
|
||||
|
||||
(* Operations that are so cheap that it isn't worth factoring them. *)
|
||||
|
||||
method is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int _ | Iconst_blockheader _ -> true
|
||||
| _ -> false
|
||||
|
||||
(* Forget all equations involving memory loads. Performed after a
|
||||
non-initializing store *)
|
||||
|
||||
method private kill_loads n =
|
||||
filter_equations (fun o -> self#class_of_operation o <> Op_load) n
|
||||
|
||||
(* Keep only equations involving checkbounds, and forget register values.
|
||||
Performed across a call. *)
|
||||
|
||||
method private keep_checkbounds n =
|
||||
filter_equations (fun o -> self#class_of_operation o = Op_checkbound)
|
||||
{n with num_reg = Reg.Map.empty }
|
||||
|
||||
(* Perform CSE on the given instruction [i] and its successors.
|
||||
[n] is the value numbering current at the beginning of [i]. *)
|
||||
|
||||
method private cse n i =
|
||||
match i.desc with
|
||||
| Iend | Ireturn | Iop(Itailcall_ind) | Iop(Itailcall_imm _)
|
||||
| Iexit _ | Iraise _ ->
|
||||
i
|
||||
| Iop (Imove | Ispill | Ireload) ->
|
||||
(* For moves, we associate the same value number to the result reg
|
||||
as to the argument reg. *)
|
||||
let n1 = set_move n i.arg.(0) i.res.(0) in
|
||||
{i with next = self#cse n1 i.next}
|
||||
| Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
|
||||
(* We don't perform CSE across function calls, as it increases
|
||||
register pressure too much. We do remember the checkbound
|
||||
instructions already performed, though, since their reuse
|
||||
cannot increase register pressure. *)
|
||||
let n1 = self#keep_checkbounds n in
|
||||
{i with next = self#cse n1 i.next}
|
||||
| Iop op ->
|
||||
begin match self#class_of_operation op with
|
||||
| Op_pure | Op_checkbound | Op_load ->
|
||||
assert (Array.length i.res <= 1);
|
||||
let (n1, varg) = valnum_regs n i.arg in
|
||||
begin match find_equation n1 (op, varg) with
|
||||
| Some vres ->
|
||||
(* This operation was computed earlier. *)
|
||||
let n2 = set_known_regs n1 i.res vres in
|
||||
begin match find_regs_containing n1 vres with
|
||||
| Some res when not (self#is_cheap_operation op) ->
|
||||
(* We can replace res <- op args with r <- move res.
|
||||
If the operation is very cheap to compute, e.g.
|
||||
an integer constant, don't bother. *)
|
||||
insert_move res i.res (self#cse n2 i.next)
|
||||
| _ ->
|
||||
{i with next = self#cse n2 i.next}
|
||||
end
|
||||
| None ->
|
||||
(* This operation produces a result we haven't seen earlier. *)
|
||||
let n2 = set_fresh_regs n1 i.res (op, varg) in
|
||||
{i with next = self#cse n2 i.next}
|
||||
end
|
||||
| Op_store false | Op_other ->
|
||||
(* An initializing store or an "other" operation do not invalidate
|
||||
any equations, but we do not know anything about the results. *)
|
||||
let n1 = set_unknown_regs n i.res in
|
||||
{i with next = self#cse n1 i.next}
|
||||
| Op_store true ->
|
||||
(* A non-initializing store: it can invalidate
|
||||
anything we know about prior loads. *)
|
||||
let n1 = set_unknown_regs (self#kill_loads n) i.res in
|
||||
{i with next = self#cse n1 i.next}
|
||||
end
|
||||
(* For control structures, we set the numbering to empty at every
|
||||
join point, but propagate the current numbering across fork points. *)
|
||||
| Iifthenelse(test, ifso, ifnot) ->
|
||||
{i with desc = Iifthenelse(test, self#cse n ifso, self#cse n ifnot);
|
||||
next = self#cse empty_numbering i.next}
|
||||
| Iswitch(index, cases) ->
|
||||
{i with desc = Iswitch(index, Array.map (self#cse n) cases);
|
||||
next = self#cse empty_numbering i.next}
|
||||
| Iloop(body) ->
|
||||
{i with desc = Iloop(self#cse empty_numbering body);
|
||||
next = self#cse empty_numbering i.next}
|
||||
| Icatch(nfail, body, handler) ->
|
||||
{i with desc = Icatch(nfail, self#cse n body, self#cse empty_numbering handler);
|
||||
next = self#cse empty_numbering i.next}
|
||||
| Itrywith(body, handler) ->
|
||||
{i with desc = Itrywith(self#cse n body, self#cse empty_numbering handler);
|
||||
next = self#cse empty_numbering i.next}
|
||||
|
||||
method fundecl f =
|
||||
{f with fun_body = self#cse empty_numbering f.fun_body}
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* Common subexpression elimination by value numbering over extended
|
||||
basic blocks. *)
|
||||
|
||||
type op_class =
|
||||
| Op_pure (* pure, produce one result *)
|
||||
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
|
||||
| Op_load (* memory load *)
|
||||
| Op_store of bool (* memory store, false = init, true = assign *)
|
||||
| Op_other (* anything else that does not store in memory *)
|
||||
|
||||
class cse_generic : object
|
||||
(* The following methods can be overriden to handle processor-specific
|
||||
operations. *)
|
||||
|
||||
method class_of_operation: Mach.operation -> op_class
|
||||
|
||||
method is_cheap_operation: Mach.operation -> bool
|
||||
(* Operations that are so cheap that it isn't worth factoring them. *)
|
||||
|
||||
(* The following method is the entry point and should not be overridden *)
|
||||
method fundecl: Mach.fundecl -> Mach.fundecl
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for the AMD64 *)
|
||||
|
||||
open Arch
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic as super
|
||||
|
||||
method! class_of_operation op =
|
||||
match op with
|
||||
| Ispecific(Ilea _) -> Op_pure
|
||||
| Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
|
||||
| Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
|
||||
| Ispecific(Ioffset_loc(_, _)) -> Op_store true
|
||||
| Ispecific(Ifloatarithmem _) -> Op_load
|
||||
| _ -> super#class_of_operation op
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -33,8 +33,8 @@ type addressing_mode =
|
|||
|
||||
type specific_operation =
|
||||
Ilea of addressing_mode (* "lea" gives scaled adds *)
|
||||
| Istore_int of nativeint * addressing_mode (* Store an integer constant *)
|
||||
| Istore_symbol of string * addressing_mode (* Store a symbol *)
|
||||
| Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
|
||||
| Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
|
||||
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
|
||||
| Ifloatarithmem of float_operation * addressing_mode
|
||||
(* Float arith operation with memory *)
|
||||
|
@ -101,10 +101,14 @@ let print_addressing printreg addr ppf arg =
|
|||
let print_specific_operation printreg op ppf arg =
|
||||
match op with
|
||||
| Ilea addr -> print_addressing printreg addr ppf arg
|
||||
| Istore_int(n, addr) ->
|
||||
fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n
|
||||
| Istore_symbol(lbl, addr) ->
|
||||
fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
|
||||
| Istore_int(n, addr, is_assign) ->
|
||||
fprintf ppf "[%a] := %nd %s"
|
||||
(print_addressing printreg addr) arg n
|
||||
(if is_assign then "(assign)" else "(init)")
|
||||
| Istore_symbol(lbl, addr, is_assign) ->
|
||||
fprintf ppf "[%a] := \"%s\" %s"
|
||||
(print_addressing printreg addr) arg lbl
|
||||
(if is_assign then "(assign)" else "(init)")
|
||||
| Ioffset_loc(n, addr) ->
|
||||
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
|
||||
| Isqrtf ->
|
||||
|
|
|
@ -449,7 +449,7 @@ let emit_instr fallthrough i =
|
|||
| Double | Double_u ->
|
||||
` movsd {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
begin match chunk with
|
||||
| Word ->
|
||||
` movq {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
|
||||
|
@ -542,9 +542,9 @@ let emit_instr fallthrough i =
|
|||
` cvttsd2siq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Ispecific(Ilea addr)) ->
|
||||
` leaq {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Ispecific(Istore_int(n, addr))) ->
|
||||
| Lop(Ispecific(Istore_int(n, addr, _))) ->
|
||||
` movq ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Istore_symbol(s, addr))) ->
|
||||
| Lop(Ispecific(Istore_symbol(s, addr, _))) ->
|
||||
assert (not !pic_code && not !Clflags.dlcode);
|
||||
` movq ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
|
||||
|
|
|
@ -443,7 +443,7 @@ let emit_instr fallthrough i =
|
|||
| Double | Double_u ->
|
||||
` movsd {emit_reg dest}, REAL8 PTR {emit_addressing addr i.arg 0}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
begin match chunk with
|
||||
| Word ->
|
||||
` mov QWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
|
||||
|
@ -532,9 +532,9 @@ let emit_instr fallthrough i =
|
|||
` cvttsd2si {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
|
||||
| Lop(Ispecific(Ilea addr)) ->
|
||||
` lea {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Istore_int(n, addr))) ->
|
||||
| Lop(Ispecific(Istore_int(n, addr, _))) ->
|
||||
` mov QWORD PTR {emit_addressing addr i.arg 0}, {emit_nativeint n}\n`
|
||||
| Lop(Ispecific(Istore_symbol(s, addr))) ->
|
||||
| Lop(Ispecific(Istore_symbol(s, addr, _))) ->
|
||||
assert (not !pic_code);
|
||||
add_used_symbol s;
|
||||
` mov QWORD PTR {emit_addressing addr i.arg 0}, OFFSET {emit_symbol s}\n`
|
||||
|
|
|
@ -259,7 +259,7 @@ let destroyed_at_oper = function
|
|||
| Iop(Iextcall(_, false)) -> destroyed_at_c_call
|
||||
| Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
|
||||
-> [| rax; rdx |]
|
||||
| Iop(Istore(Single, _)) -> [| rxmm15 |]
|
||||
| Iop(Istore(Single, _, _)) -> [| rxmm15 |]
|
||||
| Iop(Ialloc _ | Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _))
|
||||
-> [| rax |]
|
||||
| Iswitch(_, _) -> [| rax; rdx |]
|
||||
|
@ -290,7 +290,7 @@ let max_register_pressure = function
|
|||
if fp then [| 10; 16 |] else [| 11; 16 |]
|
||||
| Ialloc _ | Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
|
||||
if fp then [| 11; 16 |] else [| 12; 16 |]
|
||||
| Istore(Single, _) ->
|
||||
| Istore(Single, _, _) ->
|
||||
if fp then [| 12; 15 |] else [| 13; 15 |]
|
||||
| _ -> if fp then [| 12; 16 |] else [| 13; 16 |]
|
||||
|
||||
|
|
|
@ -152,20 +152,20 @@ method select_addressing chunk exp =
|
|||
| Ascaledadd(e1, e2, scale) ->
|
||||
(Iindexed2scaled(scale, d), Ctuple[e1; e2])
|
||||
|
||||
method! select_store addr exp =
|
||||
method! select_store is_assign addr exp =
|
||||
match exp with
|
||||
Cconst_int n when self#is_immediate n ->
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
|
||||
| (Cconst_natint n | Cconst_blockheader n) when self#is_immediate_natint n ->
|
||||
(Ispecific(Istore_int(n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_pointer n when self#is_immediate n ->
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_natpointer n when self#is_immediate_natint n ->
|
||||
(Ispecific(Istore_int(n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_symbol s when not (!pic_code || !Clflags.dlcode) ->
|
||||
(Ispecific(Istore_symbol(s, addr)), Ctuple [])
|
||||
(Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
|
||||
| _ ->
|
||||
super#select_store addr exp
|
||||
super#select_store is_assign addr exp
|
||||
|
||||
method! select_operation op args =
|
||||
match op with
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for ARM *)
|
||||
|
||||
open Arch
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic as super
|
||||
|
||||
method! class_of_operation op =
|
||||
match op with
|
||||
| Ispecific(Ishiftcheckbound _) -> Op_checkbound
|
||||
| Ispecific _ -> Op_pure
|
||||
| _ -> super#class_of_operation op
|
||||
|
||||
method! is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int n | Iconst_blockheader n -> n <= 255n && n >= 0n
|
||||
| _ -> false
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -283,7 +283,7 @@ let num_literals = ref 0
|
|||
|
||||
(* Label a floating-point literal *)
|
||||
let float_literal f =
|
||||
let repr = Int64.bits_of_float cst in
|
||||
let repr = Int64.bits_of_float f in
|
||||
try
|
||||
List.assoc repr !float_literals
|
||||
with Not_found ->
|
||||
|
@ -391,8 +391,7 @@ let emit_instr i =
|
|||
| Lop(Iconst_int n | Iconst_blockheader n) ->
|
||||
emit_intconst i.res.(0) (Nativeint.to_int32 n)
|
||||
| Lop(Iconst_float f) when !fpu = Soft ->
|
||||
` @ {emit_string f}\n`;
|
||||
let bits = Int64.bits_of_float (float_of_string f) in
|
||||
let bits = Int64.bits_of_float f in
|
||||
let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
|
||||
and low_bits = Int64.to_int32 bits in
|
||||
if is_immediate low_bits || is_immediate high_bits then begin
|
||||
|
@ -407,7 +406,7 @@ let emit_instr i =
|
|||
end
|
||||
| Lop(Iconst_float f) when !fpu = VFPv2 ->
|
||||
let lbl = float_literal f in
|
||||
` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`;
|
||||
` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`;
|
||||
1
|
||||
| Lop(Iconst_float f) ->
|
||||
let encode imm =
|
||||
|
@ -426,12 +425,12 @@ let emit_instr i =
|
|||
let ex = ((ex + 3) land 0x07) lxor 0x04 in
|
||||
Some((sg lsl 7) lor (ex lsl 4) lor mn)
|
||||
end in
|
||||
begin match encode (Int64.bits_of_float (float_of_string f)) with
|
||||
begin match encode (Int64.bits_of_float f) with
|
||||
None ->
|
||||
let lbl = float_literal f in
|
||||
` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
|
||||
` fldd {emit_reg i.res.(0)}, {emit_label lbl}\n`
|
||||
| Some imm8 ->
|
||||
` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
|
||||
` fconstd {emit_reg i.res.(0)}, #{emit_int imm8}\n`
|
||||
end; 1
|
||||
| Lop(Iconst_symbol s) ->
|
||||
emit_load_symbol_addr i.res.(0) s
|
||||
|
@ -509,10 +508,10 @@ let emit_instr i =
|
|||
| Double_u -> "fldd"
|
||||
| _ (* 32-bit quantities *) -> "ldr" in
|
||||
` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
|
||||
| Lop(Istore(Single, addr)) when !fpu >= VFPv2 ->
|
||||
| Lop(Istore(Single, addr, _)) when !fpu >= VFPv2 ->
|
||||
` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
|
||||
` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
|
||||
| Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
|
||||
| Lop(Istore((Double | Double_u), addr, _)) when !fpu = Soft ->
|
||||
(* Use STM or STRD if possible *)
|
||||
begin match i.arg.(0), i.arg.(1), addr with
|
||||
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
|
||||
|
@ -526,7 +525,7 @@ let emit_instr i =
|
|||
` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
|
||||
` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
|
||||
end
|
||||
| Lop(Istore(size, addr)) ->
|
||||
| Lop(Istore(size, addr, _)) ->
|
||||
let r = i.arg.(0) in
|
||||
let instr =
|
||||
match size with
|
||||
|
|
|
@ -203,7 +203,7 @@ let destroyed_at_oper = function
|
|||
[| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *)
|
||||
| Iop(Iintop Imulh) when !arch < ARMv6 ->
|
||||
[| phys_reg 8 |] (* r12 destroyed *)
|
||||
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
|
||||
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
|
||||
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
|
||||
| _ -> [||]
|
||||
|
||||
|
@ -222,7 +222,7 @@ let max_register_pressure = function
|
|||
| Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
|
||||
| Iconst_symbol _ when !pic_code -> [| 7; 16; 32 |]
|
||||
| Iintoffloat | Ifloatofint
|
||||
| Iload(Single, _) | Istore(Single, _) -> [| 9; 15; 31 |]
|
||||
| Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
|
||||
| _ -> [| 9; 16; 32 |]
|
||||
|
||||
(* Pure operations (without any side effect besides updating their result
|
||||
|
@ -231,7 +231,8 @@ let max_register_pressure = function
|
|||
let op_is_pure = function
|
||||
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
|
||||
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
|
||||
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false
|
||||
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
|
||||
| Ispecific(Ishiftcheckbound _) -> false
|
||||
| _ -> true
|
||||
|
||||
(* Layout of the stack *)
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for ARM64 *)
|
||||
|
||||
open Arch
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic as super
|
||||
|
||||
method! class_of_operation op =
|
||||
match op with
|
||||
| Ispecific(Ishiftcheckbound _) -> Op_checkbound
|
||||
| Ispecific _ -> Op_pure
|
||||
| _ -> super#class_of_operation op
|
||||
|
||||
method! is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int n | Iconst_blockheader n -> n <= 65535n && n >= 0n
|
||||
| _ -> false
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -328,9 +328,9 @@ let emit_instr i =
|
|||
| Lop(Iconst_float f) ->
|
||||
let b = Int64.bits_of_float f in
|
||||
if b = 0L then
|
||||
` fmov {emit_reg i.res.(0)}, xzr /* {emit_string f} */\n`
|
||||
` fmov {emit_reg i.res.(0)}, xzr\n`
|
||||
else if is_immediate_float b then
|
||||
` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b} /* {emit_string f} */\n`
|
||||
` fmov {emit_reg i.res.(0)}, #{emit_printf "0x%Lx" b}\n`
|
||||
else begin
|
||||
let lbl = float_literal b in
|
||||
` adrp {emit_reg reg_tmp1}, {emit_label lbl}\n`;
|
||||
|
@ -388,7 +388,7 @@ let emit_instr i =
|
|||
| Word | Double | Double_u ->
|
||||
` ldr {emit_reg dst}, {emit_addressing addr base}\n`
|
||||
end
|
||||
| Lop(Istore(size, addr)) ->
|
||||
| Lop(Istore(size, addr, _)) ->
|
||||
let src = i.arg.(0) in
|
||||
let base =
|
||||
match addr with
|
||||
|
|
|
@ -177,7 +177,7 @@ let destroyed_at_oper = function
|
|||
destroyed_at_c_call
|
||||
| Iop(Ialloc _) ->
|
||||
[| reg_x15 |]
|
||||
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
|
||||
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
|
||||
[| reg_d7 |] (* d7 / s7 destroyed *)
|
||||
| _ -> [||]
|
||||
|
||||
|
@ -194,9 +194,19 @@ let max_register_pressure = function
|
|||
| Iextcall(_, _) -> [| 10; 8 |]
|
||||
| Ialloc _ -> [| 25; 32 |]
|
||||
| Iintoffloat | Ifloatofint
|
||||
| Iload(Single, _) | Istore(Single, _) -> [| 26; 31 |]
|
||||
| Iload(Single, _) | Istore(Single, _, _) -> [| 26; 31 |]
|
||||
| _ -> [| 26; 32 |]
|
||||
|
||||
(* Pure operations (without any side effect besides updating their result
|
||||
registers). *)
|
||||
|
||||
let op_is_pure = function
|
||||
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
|
||||
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
|
||||
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _)
|
||||
| Ispecific(Ishiftcheckbound _) -> false
|
||||
| _ -> true
|
||||
|
||||
(* Layout of the stack *)
|
||||
|
||||
let num_stack_slots = [| 0; 0 |]
|
||||
|
|
|
@ -64,6 +64,8 @@ let compile_fundecl (ppf : formatter) fd_cmm =
|
|||
++ pass_dump_if ppf dump_selection "After instruction selection"
|
||||
++ Comballoc.fundecl
|
||||
++ pass_dump_if ppf dump_combine "After allocation combining"
|
||||
++ CSE.fundecl
|
||||
++ pass_dump_if ppf dump_cse "After CSE"
|
||||
++ liveness ppf
|
||||
++ Deadcode.fundecl
|
||||
++ pass_dump_if ppf dump_live "Liveness analysis"
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for the i386 *)
|
||||
|
||||
open Cmm
|
||||
open Arch
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic as super
|
||||
|
||||
method! class_of_operation op =
|
||||
match op with
|
||||
(* Operations that affect the floating-point stack cannot be factored *)
|
||||
| Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf
|
||||
| Iintoffloat | Ifloatofint
|
||||
| Iload((Single | Double | Double_u), _) -> Op_other
|
||||
(* Specific ops *)
|
||||
| Ispecific(Ilea _) -> Op_pure
|
||||
| Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg
|
||||
| Ispecific(Istore_symbol(_, _, is_asg)) -> Op_store is_asg
|
||||
| Ispecific(Ioffset_loc(_, _)) -> Op_store true
|
||||
| Ispecific _ -> Op_other
|
||||
| _ -> super#class_of_operation op
|
||||
|
||||
method! is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int _ | Iconst_blockheader _ -> true
|
||||
| Iconst_symbol _ -> true
|
||||
| _ -> false
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -31,8 +31,8 @@ type addressing_mode =
|
|||
|
||||
type specific_operation =
|
||||
Ilea of addressing_mode (* Lea gives scaled adds *)
|
||||
| Istore_int of nativeint * addressing_mode (* Store an integer constant *)
|
||||
| Istore_symbol of string * addressing_mode (* Store a symbol *)
|
||||
| Istore_int of nativeint * addressing_mode * bool (* Store an integer constant *)
|
||||
| Istore_symbol of string * addressing_mode * bool (* Store a symbol *)
|
||||
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
|
||||
| Ipush (* Push regs on stack *)
|
||||
| Ipush_int of nativeint (* Push an integer constant *)
|
||||
|
@ -105,11 +105,14 @@ let print_addressing printreg addr ppf arg =
|
|||
let print_specific_operation printreg op ppf arg =
|
||||
match op with
|
||||
| Ilea addr -> print_addressing printreg addr ppf arg
|
||||
| Istore_int(n, addr) ->
|
||||
fprintf ppf "[%a] := %s" (print_addressing printreg addr) arg
|
||||
(Nativeint.to_string n)
|
||||
| Istore_symbol(lbl, addr) ->
|
||||
fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
|
||||
| Istore_int(n, addr, is_assign) ->
|
||||
fprintf ppf "[%a] := %nd %s"
|
||||
(print_addressing printreg addr) arg n
|
||||
(if is_assign then "(assign)" else "(init)")
|
||||
| Istore_symbol(lbl, addr, is_assign) ->
|
||||
fprintf ppf "[%a] := \"%s\" %s"
|
||||
(print_addressing printreg addr) arg lbl
|
||||
(if is_assign then "(assign)" else "(init)")
|
||||
| Ioffset_loc(n, addr) ->
|
||||
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
|
||||
| Ipush ->
|
||||
|
|
|
@ -544,7 +544,7 @@ let emit_instr fallthrough i =
|
|||
| Double | Double_u ->
|
||||
` fldl {emit_addressing addr i.arg 0}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
begin match chunk with
|
||||
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
|
||||
` movl {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
|
||||
|
@ -684,9 +684,9 @@ let emit_instr fallthrough i =
|
|||
stack_offset := !stack_offset + 8
|
||||
| Lop(Ispecific(Ilea addr)) ->
|
||||
` lea {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Ispecific(Istore_int(n, addr))) ->
|
||||
| Lop(Ispecific(Istore_int(n, addr, _))) ->
|
||||
` movl ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Istore_symbol(s, addr))) ->
|
||||
| Lop(Ispecific(Istore_symbol(s, addr, _))) ->
|
||||
` movl ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
|
||||
` addl ${emit_int n}, {emit_addressing addr i.arg 0}\n`
|
||||
|
|
|
@ -480,7 +480,7 @@ let emit_instr i =
|
|||
| Double | Double_u ->
|
||||
` fld REAL8 PTR {emit_addressing addr i.arg 0}\n`
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
begin match chunk with
|
||||
| Word | Thirtytwo_signed | Thirtytwo_unsigned ->
|
||||
` mov DWORD PTR {emit_addressing addr i.arg 1}, {emit_reg i.arg.(0)}\n`
|
||||
|
@ -618,9 +618,9 @@ let emit_instr i =
|
|||
stack_offset := !stack_offset + 8
|
||||
| Lop(Ispecific(Ilea addr)) ->
|
||||
` lea {emit_reg i.res.(0)}, DWORD PTR {emit_addressing addr i.arg 0}\n`
|
||||
| Lop(Ispecific(Istore_int(n, addr))) ->
|
||||
| Lop(Ispecific(Istore_int(n, addr, _))) ->
|
||||
` mov DWORD PTR {emit_addressing addr i.arg 0},{emit_nativeint n}\n`
|
||||
| Lop(Ispecific(Istore_symbol(s, addr))) ->
|
||||
| Lop(Ispecific(Istore_symbol(s, addr, _))) ->
|
||||
add_used_symbol s ;
|
||||
` mov DWORD PTR {emit_addressing addr i.arg 0},OFFSET {emit_symbol s}\n`
|
||||
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
|
||||
|
|
|
@ -135,7 +135,7 @@ let pseudoregs_for_operation op arg res =
|
|||
(* For storing a byte, the argument must be in eax...edx.
|
||||
(But for a short, any reg will do!)
|
||||
Keep it simple, just force the argument to be in edx. *)
|
||||
| Istore((Byte_unsigned | Byte_signed), addr) ->
|
||||
| Istore((Byte_unsigned | Byte_signed), addr, _) ->
|
||||
let newarg = Array.copy arg in
|
||||
newarg.(0) <- edx;
|
||||
(newarg, res, false)
|
||||
|
@ -178,20 +178,20 @@ method select_addressing chunk exp =
|
|||
| (Ascaledadd(e1, e2, scale), d) ->
|
||||
(Iindexed2scaled(scale, d), Ctuple[e1; e2])
|
||||
|
||||
method! select_store addr exp =
|
||||
method! select_store is_assign addr exp =
|
||||
match exp with
|
||||
Cconst_int n ->
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
|
||||
| (Cconst_natint n | Cconst_blockheader n) ->
|
||||
(Ispecific(Istore_int(n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_pointer n ->
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_natpointer n ->
|
||||
(Ispecific(Istore_int(n, addr)), Ctuple [])
|
||||
(Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
|
||||
| Cconst_symbol s ->
|
||||
(Ispecific(Istore_symbol(s, addr)), Ctuple [])
|
||||
(Ispecific(Istore_symbol(s, addr, is_assign)), Ctuple [])
|
||||
| _ ->
|
||||
super#select_store addr exp
|
||||
super#select_store is_assign addr exp
|
||||
|
||||
method! select_operation op args =
|
||||
match op with
|
||||
|
|
|
@ -46,7 +46,7 @@ type operation =
|
|||
| Iextcall of string * bool
|
||||
| Istackoffset of int
|
||||
| Iload of Cmm.memory_chunk * Arch.addressing_mode
|
||||
| Istore of Cmm.memory_chunk * Arch.addressing_mode
|
||||
| Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
|
||||
| Ialloc of int
|
||||
| Iintop of integer_operation
|
||||
| Iintop_imm of integer_operation * int
|
||||
|
|
|
@ -43,10 +43,11 @@ type operation =
|
|||
| Icall_imm of string
|
||||
| Itailcall_ind
|
||||
| Itailcall_imm of string
|
||||
| Iextcall of string * bool
|
||||
| Iextcall of string * bool (* false = noalloc, true = alloc *)
|
||||
| Istackoffset of int
|
||||
| Iload of Cmm.memory_chunk * Arch.addressing_mode
|
||||
| Istore of Cmm.memory_chunk * Arch.addressing_mode
|
||||
| Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
|
||||
(* false = initialization, true = assignment *)
|
||||
| Ialloc of int
|
||||
| Iintop of integer_operation
|
||||
| Iintop_imm of integer_operation * int
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for the PowerPC *)
|
||||
|
||||
open Arch
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic as super
|
||||
|
||||
method! class_of_operation op =
|
||||
match op with
|
||||
| Ispecific(Imultaddf | Imultsubf) -> Op_pure
|
||||
| Ispecific(Ialloc_far _) -> Op_other
|
||||
| _ -> super#class_of_operation op
|
||||
|
||||
method! is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int n | Iconst_blockheader n -> n <= 32767n && n >= -32768n
|
||||
| _ -> false
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -333,7 +333,7 @@ let instr_size = function
|
|||
if chunk = Byte_signed
|
||||
then load_store_size addr + 1
|
||||
else load_store_size addr
|
||||
| Lop(Istore(chunk, addr)) -> load_store_size addr
|
||||
| Lop(Istore(chunk, addr, _)) -> load_store_size addr
|
||||
| Lop(Ialloc n) -> 4
|
||||
| Lop(Ispecific(Ialloc_far n)) -> 5
|
||||
| Lop(Iintop Imod) -> 3
|
||||
|
@ -548,7 +548,7 @@ let rec emit_instr i dslot =
|
|||
emit_load_store loadinstr addr i.arg 0 i.res.(0);
|
||||
if chunk = Byte_signed then
|
||||
` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
let storeinstr =
|
||||
match chunk with
|
||||
Byte_unsigned | Byte_signed -> "stb"
|
||||
|
|
|
@ -44,7 +44,7 @@ method reload_retaddr_latency = 12
|
|||
method oper_issue_cycles = function
|
||||
Iconst_float _ | Iconst_symbol _ -> 2
|
||||
| Iload(_, Ibased(_, _)) -> 2
|
||||
| Istore(_, Ibased(_, _)) -> 2
|
||||
| Istore(_, Ibased(_, _), _) -> 2
|
||||
| Ialloc _ -> 4
|
||||
| Iintop(Imod) -> 40 (* assuming full stall *)
|
||||
| Iintop(Icomp _) -> 4
|
||||
|
|
|
@ -119,12 +119,13 @@ let operation op arg ppf res =
|
|||
| Iload(chunk, addr) ->
|
||||
fprintf ppf "%s[%a]"
|
||||
(Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg
|
||||
| Istore(chunk, addr) ->
|
||||
fprintf ppf "%s[%a] := %a"
|
||||
| Istore(chunk, addr, is_assign) ->
|
||||
fprintf ppf "%s[%a] := %a %s"
|
||||
(Printcmm.chunk chunk)
|
||||
(Arch.print_addressing reg addr)
|
||||
(Array.sub arg 1 (Array.length arg - 1))
|
||||
reg arg.(0)
|
||||
(if is_assign then "(assign)" else "(init)")
|
||||
| Ialloc n -> fprintf ppf "alloc %i" n
|
||||
| Iintop(op) -> fprintf ppf "%a%s%a" reg arg.(0) (intop op) reg arg.(1)
|
||||
| Iintop_imm(op, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intop op) n
|
||||
|
|
|
@ -165,7 +165,7 @@ method private instr_in_basic_block instr =
|
|||
load or store instructions (e.g. on the I386). *)
|
||||
|
||||
method is_store = function
|
||||
Istore(_, _) -> true
|
||||
Istore(_, _, _) -> true
|
||||
| _ -> false
|
||||
|
||||
method is_load = function
|
||||
|
|
|
@ -209,8 +209,8 @@ method virtual select_addressing :
|
|||
|
||||
(* Default instruction selection for stores (of words) *)
|
||||
|
||||
method select_store addr arg =
|
||||
(Istore(Word, addr), arg)
|
||||
method select_store is_assign addr arg =
|
||||
(Istore(Word, addr, is_assign), arg)
|
||||
|
||||
(* call marking methods, documented in selectgen.mli *)
|
||||
|
||||
|
@ -256,10 +256,10 @@ method select_operation op args =
|
|||
| (Cstore chunk, [arg1; arg2]) ->
|
||||
let (addr, eloc) = self#select_addressing chunk arg1 in
|
||||
if chunk = Word then begin
|
||||
let (op, newarg2) = self#select_store addr arg2 in
|
||||
let (op, newarg2) = self#select_store true addr arg2 in
|
||||
(op, [newarg2; eloc])
|
||||
end else begin
|
||||
(Istore(chunk, addr), [arg2; eloc])
|
||||
(Istore(chunk, addr, true), [arg2; eloc])
|
||||
(* Inversion addr/datum in Istore *)
|
||||
end
|
||||
| (Calloc, _) -> (Ialloc 0, args)
|
||||
|
@ -677,16 +677,16 @@ method emit_stores env data regs_addr =
|
|||
ref (Arch.offset_addressing Arch.identity_addressing (-Arch.size_int)) in
|
||||
List.iter
|
||||
(fun e ->
|
||||
let (op, arg) = self#select_store !a e in
|
||||
let (op, arg) = self#select_store false !a e in
|
||||
match self#emit_expr env arg with
|
||||
None -> assert false
|
||||
| Some regs ->
|
||||
match op with
|
||||
Istore(_, _) ->
|
||||
Istore(_, _, _) ->
|
||||
for i = 0 to Array.length regs - 1 do
|
||||
let r = regs.(i) in
|
||||
let kind = if r.typ = Float then Double_u else Word in
|
||||
self#insert (Iop(Istore(kind, !a)))
|
||||
self#insert (Iop(Istore(kind, !a, false)))
|
||||
(Array.append [|r|] regs_addr) [||];
|
||||
a := Arch.offset_addressing !a (size_component r.typ)
|
||||
done
|
||||
|
|
|
@ -35,7 +35,8 @@ class virtual selector_generic : object
|
|||
method select_condition : Cmm.expression -> Mach.test * Cmm.expression
|
||||
(* Can be overridden to deal with special test instructions *)
|
||||
method select_store :
|
||||
Arch.addressing_mode -> Cmm.expression -> Mach.operation * Cmm.expression
|
||||
bool -> Arch.addressing_mode -> Cmm.expression ->
|
||||
Mach.operation * Cmm.expression
|
||||
(* Can be overridden to deal with special store constant instructions *)
|
||||
method regs_for : Cmm.machtype -> Reg.t array
|
||||
(* Return an array of fresh registers of the given type.
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* OCaml *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 2014 Institut National de Recherche en Informatique et *)
|
||||
(* en Automatique. All rights reserved. This file is distributed *)
|
||||
(* under the terms of the Q Public License version 1.0. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* CSE for Sparc *)
|
||||
|
||||
open Mach
|
||||
open CSEgen
|
||||
|
||||
class cse = object (self)
|
||||
|
||||
inherit cse_generic (* as super *)
|
||||
|
||||
method! is_cheap_operation op =
|
||||
match op with
|
||||
| Iconst_int n | Iconst_blockheader n -> n <= 4095n && n >= -4096n
|
||||
| _ -> false
|
||||
|
||||
end
|
||||
|
||||
let fundecl f =
|
||||
(new cse)#fundecl f
|
||||
|
|
@ -375,7 +375,7 @@ let rec emit_instr i dslot =
|
|||
| _ -> "ld" in
|
||||
emit_load loadinstr addr i.arg dest
|
||||
end
|
||||
| Lop(Istore(chunk, addr)) ->
|
||||
| Lop(Istore(chunk, addr, _)) ->
|
||||
let src = i.arg.(0) in
|
||||
begin match chunk with
|
||||
Double_u ->
|
||||
|
@ -612,7 +612,7 @@ let is_one_instr i =
|
|||
| Iconst_int n | Iconst_blockheader n -> is_native_immediate n
|
||||
| Istackoffset _ -> true
|
||||
| Iload(_, Iindexed n) -> i.res.(0).typ <> Float && is_immediate n
|
||||
| Istore(_, Iindexed n) -> i.arg.(0).typ <> Float && is_immediate n
|
||||
| Istore(_, Iindexed n, _) -> i.arg.(0).typ <> Float && is_immediate n
|
||||
| Iintop(op) -> is_one_instr_op op
|
||||
| Iintop_imm(op, _) -> is_one_instr_op op
|
||||
| Iaddf | Isubf | Imulf | Idivf -> true
|
||||
|
|
|
@ -389,6 +389,10 @@ let mk_dcombine f =
|
|||
"-dcombine", Arg.Unit f, " (undocumented)"
|
||||
;;
|
||||
|
||||
let mk_dcse f =
|
||||
"-dcse", Arg.Unit f, " (undocumented)"
|
||||
;;
|
||||
|
||||
let mk_dlive f =
|
||||
"-dlive", Arg.Unit f, " (undocumented)"
|
||||
;;
|
||||
|
@ -599,6 +603,7 @@ module type Optcomp_options = sig
|
|||
val _dcmm : unit -> unit
|
||||
val _dsel : unit -> unit
|
||||
val _dcombine : unit -> unit
|
||||
val _dcse : unit -> unit
|
||||
val _dlive : unit -> unit
|
||||
val _dspill : unit -> unit
|
||||
val _dsplit : unit -> unit
|
||||
|
@ -651,6 +656,7 @@ module type Opttop_options = sig
|
|||
val _dcmm : unit -> unit
|
||||
val _dsel : unit -> unit
|
||||
val _dcombine : unit -> unit
|
||||
val _dcse : unit -> unit
|
||||
val _dlive : unit -> unit
|
||||
val _dspill : unit -> unit
|
||||
val _dsplit : unit -> unit
|
||||
|
@ -848,6 +854,7 @@ struct
|
|||
mk_dcmm F._dcmm;
|
||||
mk_dsel F._dsel;
|
||||
mk_dcombine F._dcombine;
|
||||
mk_dcse F._dcse;
|
||||
mk_dlive F._dlive;
|
||||
mk_dspill F._dspill;
|
||||
mk_dsplit F._dsplit;
|
||||
|
@ -900,6 +907,7 @@ module Make_opttop_options (F : Opttop_options) = struct
|
|||
mk_dcmm F._dcmm;
|
||||
mk_dsel F._dsel;
|
||||
mk_dcombine F._dcombine;
|
||||
mk_dcse F._dcse;
|
||||
mk_dlive F._dlive;
|
||||
mk_dspill F._dspill;
|
||||
mk_dsplit F._dsplit;
|
||||
|
|
|
@ -177,6 +177,7 @@ module type Optcomp_options = sig
|
|||
val _dcmm : unit -> unit
|
||||
val _dsel : unit -> unit
|
||||
val _dcombine : unit -> unit
|
||||
val _dcse : unit -> unit
|
||||
val _dlive : unit -> unit
|
||||
val _dspill : unit -> unit
|
||||
val _dsplit : unit -> unit
|
||||
|
@ -229,6 +230,7 @@ module type Opttop_options = sig
|
|||
val _dcmm : unit -> unit
|
||||
val _dsel : unit -> unit
|
||||
val _dcombine : unit -> unit
|
||||
val _dcse : unit -> unit
|
||||
val _dlive : unit -> unit
|
||||
val _dspill : unit -> unit
|
||||
val _dsplit : unit -> unit
|
||||
|
|
|
@ -135,6 +135,7 @@ module Options = Main_args.Make_optcomp_options (struct
|
|||
let _dcmm = set dump_cmm
|
||||
let _dsel = set dump_selection
|
||||
let _dcombine = set dump_combine
|
||||
let _dcse = set dump_cse
|
||||
let _dlive () = dump_live := true; Printmach.print_live := true
|
||||
let _dspill = set dump_spill
|
||||
let _dsplit = set dump_split
|
||||
|
|
|
@ -106,6 +106,7 @@ module Options = Main_args.Make_optcomp_options (struct
|
|||
let _dcmm = option "-dcmm"
|
||||
let _dsel = option "-dsel"
|
||||
let _dcombine = option "-dcombine"
|
||||
let _dcse = option "-dcse"
|
||||
let _dlive = option "-dlive"
|
||||
let _dspill = option "-dspill"
|
||||
let _dsplit = option "-dsplit"
|
||||
|
|
|
@ -73,6 +73,7 @@ let optimize_for_speed = ref true (* -compact *)
|
|||
|
||||
and dump_cmm = ref false (* -dcmm *)
|
||||
let dump_selection = ref false (* -dsel *)
|
||||
let dump_cse = ref false (* -dcse *)
|
||||
let dump_live = ref false (* -dlive *)
|
||||
let dump_spill = ref false (* -dspill *)
|
||||
let dump_split = ref false (* -dsplit *)
|
||||
|
|
|
@ -68,6 +68,7 @@ val keep_asm_file : bool ref
|
|||
val optimize_for_speed : bool ref
|
||||
val dump_cmm : bool ref
|
||||
val dump_selection : bool ref
|
||||
val dump_cse : bool ref
|
||||
val dump_live : bool ref
|
||||
val dump_spill : bool ref
|
||||
val dump_split : bool ref
|
||||
|
|
Loading…
Reference in New Issue