#4800: better compilation of tuple assignment (joint work Gabriel Scherer / Alain Frisch).

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@16501 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Alain Frisch 2015-10-15 12:07:08 +00:00
parent 1c6229235b
commit aaeda7763b
4 changed files with 164 additions and 2 deletions

View File

@ -34,6 +34,8 @@ Language features:
(Jérémie Dimino)
Compilers:
- PR#4800: better compilation of tuple assignment (Gabriel Scherer and
Alain Frisch)
- PR#6501: harden the native-code generator against certain uses of "%identity"
(Xavier Leroy, report by Antoine Miné).
- PR#6636: add --version option

View File

@ -3013,9 +3013,132 @@ let for_trywith param pat_act_list =
(fun () -> Lprim(Praise Raise_reraise, [param]))
param pat_act_list Partial
let for_let loc param pat body =
let simple_for_let loc param pat body =
compile_matching loc None (partial_function loc) param [pat, body] Partial
(* Optimize binding of immediate tuples
The goal of the implementation of 'for_let' below, which replaces
'simple_for_let', is to avoid tuple allocation in cases such as
this one:
let (x,y) =
let foo = ... in
if foo then (1, 2) else (3,4)
in bar
The compiler easily optimizes the simple `let (x,y) = (1,2) in ...`
case (call to Matching.for_multiple_match from Translcore), but
didn't optimize situations where the rhs tuples are hidden under
a more complex context.
The idea comes from Alain Frisch which suggested and implemented
the following compilation method, based on Lassign:
let x = dummy in let y = dummy in
begin
let foo = ... in
if foo then
(let x1 = 1 in let y1 = 2 in x <- x1; y <- y1)
else
(let x2 = 3 in let y2 = 4 in x <- x2; y <- y2)
end;
bar
The current implementation from Gabriel Scherer uses Lstaticcatch /
Lstaticraise instead:
catch
let foo = ... in
if foo then
(let x1 = 1 in let y1 = 2 in exit x1 y1)
else
(let x2 = 3 in let y2 = 4 in exit x2 y2)
with x y ->
bar
The catch/exit is used to avoid duplication of the let body ('bar'
in the example), on 'if' branches for example; it is useless for
linear contexts such as 'let', but we don't need to be careful to
generate nice code because Simplif will remove such useless
catch/exit.
*)
let rec map_return f = function
| Llet (k, id, l1, l2) -> Llet (k, id, l1, map_return f l2)
| Lletrec (l1, l2) -> Lletrec (l1, map_return f l2)
| Lifthenelse (lcond, lthen, lelse) ->
Lifthenelse (lcond, map_return f lthen, map_return f lelse)
| Lsequence (l1, l2) -> Lsequence (l1, map_return f l2)
| Levent (l, ev) -> Levent (map_return f l, ev)
| Ltrywith (l1, id, l2) -> Ltrywith (map_return f l1, id, map_return f l2)
| Lstaticcatch (l1, b, l2) -> Lstaticcatch (map_return f l1, b, map_return f l2)
| Lstaticraise _ | Lprim(Praise _, _) as l -> l
| l -> f l
(* The 'opt' reference indicates if the optimization is worthy.
It is shared by the different calls to 'assign_pat' performed from
'map_return'. For example with the code
let (x, y) = if foo then z else (1,2)
the else-branch will activate the optimization for both branches.
That means that the optimization is activated if *there exists* an
interesting tuple in one hole of the let-rhs context. We could
choose to activate it only if *all* holes are interesting. We made
that choice because being optimistic is extremely cheap (one static
exit/catch overhead in the "wrong cases"), while being pessimistic
can be costly (one unnecessary tuple allocation).
*)
let assign_pat opt nraise catch_ids loc pat lam =
let rec collect acc pat lam = match pat.pat_desc, lam with
| Tpat_tuple patl, Lprim(Pmakeblock _, lams) ->
opt := true;
List.fold_left2 collect acc patl lams
| Tpat_tuple patl, Lconst(Const_block(_, scl)) ->
opt := true;
let collect_const acc pat sc = collect acc pat (Lconst sc) in
List.fold_left2 collect_const acc patl scl
| _ ->
(* pattern idents will be bound in staticcatch (let body), so we
refresh them here to guarantee binders uniqueness *)
let pat_ids = pat_bound_idents pat in
let fresh_ids = List.map (fun (id, _) -> id, Ident.rename id) pat_ids in
(fresh_ids, alpha_pat fresh_ids pat, lam) :: acc
in
(* sublets were accumulated by 'collect' with the leftmost tuple
pattern at the bottom of the list; to respect right-to-left
evaluation order for tuples, we must evaluate sublets
top-to-bottom. To preserve tail-rec, we will fold_left the
reversed list. *)
let rev_sublets = List.rev (collect [] pat lam) in
let exit =
(* build an Ident.tbl to avoid quadratic refreshing costs *)
let add t (id, fresh_id) = Ident.add id fresh_id t in
let add_ids acc (ids, _pat, _lam) = List.fold_left add acc ids in
let tbl = List.fold_left add_ids Ident.empty rev_sublets in
let fresh_var id = Lvar (Ident.find_same id tbl) in
Lstaticraise(nraise, List.map fresh_var catch_ids)
in
let push_sublet code (_ids, pat, lam) = simple_for_let loc lam pat code in
List.fold_left push_sublet exit rev_sublets
let for_let loc param pat body =
match pat.pat_desc with
| Tpat_any | Tpat_var _ ->
(* fast path *)
simple_for_let loc param pat body
| _ ->
let opt = ref false in
let nraise = next_raise_count () in
let catch_ids = List.map fst (pat_bound_idents pat) in
let bind = map_return (assign_pat opt nraise catch_ids loc pat) param in
if !opt then Lstaticcatch(bind, (nraise, catch_ids), body)
else simple_for_let loc param pat body
(* Handling of tupled functions and matchings *)
(* Easy case since variables are available *)

View File

@ -47,7 +47,7 @@ parsecmm.mli parsecmm.ml: parsecmm.mly
lexcmm.ml: lexcmm.mll
@$(OCAMLLEX) -q lexcmm.mll
MLCASES=optargs staticalloc
MLCASES=optargs staticalloc bind_tuples
CASES=fib tak quicksort quicksort2 soli \
arith checkbound tagged-fib tagged-integr tagged-quicksort tagged-tak

View File

@ -0,0 +1,37 @@
(***********************************************************************)
(* *)
(* OCaml *)
(* *)
(* Alain Frisch, LexiFi *)
(* *)
(* Copyright 2014 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the Q Public License version 1.0. *)
(* *)
(***********************************************************************)
(* Check the effectiveness of optimized compilation of tuple binding
Ref: http://caml.inria.fr/mantis/view.php?id=4800
*)
let () =
let x0 = Gc.allocated_bytes () in
let x1 = Gc.allocated_bytes () in
let r = ref 0 in
for i = 1 to 20 do
let (x, y) =
try
if i mod 2 = 0 then (1, i * 2)
else if i mod 5 = 0 then raise Exit
else (-1, i * 3)
with Exit ->
(1, -1)
in
r := !r * x + y
done;
let x2 = Gc.allocated_bytes () in
print_int !r;
assert (!r = 82);
assert(x1 -. x0 = x2 -. x1) (* check no allocation between x1 and x2 *)