481 lines
18 KiB
OCaml
481 lines
18 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Mark Shinwell and Leo White, Jane Street Europe *)
|
|
(* *)
|
|
(* Copyright 2015--2018 Jane Street Group LLC *)
|
|
(* *)
|
|
(* All rights reserved. This file is distributed under the terms of *)
|
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
|
(* special exception on linking described in the file LICENSE. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
[@@@ocaml.warning "+a-4-30-40-41-42"]
|
|
|
|
module V = Backend_var
|
|
module VP = Backend_var.With_provenance
|
|
|
|
let node_num_header_words = 2 (* [Node_num_header_words] in the runtime. *)
|
|
let index_within_node = ref node_num_header_words
|
|
(* The [lazy]s are to ensure that we don't create [V.t]s at toplevel
|
|
when not using Spacetime profiling. (This could cause stamps to differ
|
|
between bytecode and native .cmis when no .mli is present, e.g.
|
|
arch.ml.) *)
|
|
let spacetime_node = ref (lazy (Cmm.Cvar (V.create_local "dummy")))
|
|
let spacetime_node_ident = ref (lazy (V.create_local "dummy"))
|
|
let current_function_label = ref None
|
|
let direct_tail_call_point_indexes = ref []
|
|
|
|
let reverse_shape = ref ([] : Mach.spacetime_shape)
|
|
|
|
(* CR-someday mshinwell: This code could be updated to use [placeholder_dbg] as
|
|
in [Cmmgen]. *)
|
|
let cconst_int i = Cmm.Cconst_int (i, Debuginfo.none)
|
|
let cconst_natint i = Cmm_helpers.natint_const_untagged Debuginfo.none i
|
|
let cconst_symbol s = Cmm.Cconst_symbol (s, Debuginfo.none)
|
|
|
|
let something_was_instrumented () =
|
|
!index_within_node > node_num_header_words
|
|
|
|
let next_index_within_node ~part_of_shape ~label =
|
|
let index = !index_within_node in
|
|
begin match part_of_shape with
|
|
| Mach.Direct_call_point _ ->
|
|
incr index_within_node;
|
|
if Config.enable_call_counts then begin
|
|
incr index_within_node
|
|
end
|
|
| Mach.Indirect_call_point ->
|
|
incr index_within_node
|
|
| Mach.Allocation_point ->
|
|
incr index_within_node;
|
|
incr index_within_node;
|
|
incr index_within_node
|
|
end;
|
|
reverse_shape := (part_of_shape, label) :: !reverse_shape;
|
|
index
|
|
|
|
let reset ~spacetime_node_ident:ident ~function_label =
|
|
index_within_node := node_num_header_words;
|
|
spacetime_node := lazy (Cmm.Cvar ident);
|
|
spacetime_node_ident := lazy ident;
|
|
direct_tail_call_point_indexes := [];
|
|
current_function_label := Some function_label;
|
|
reverse_shape := []
|
|
|
|
let code_for_function_prologue ~function_name ~fun_dbg:dbg ~node_hole =
|
|
let node = V.create_local "node" in
|
|
let new_node = V.create_local "new_node" in
|
|
let must_allocate_node = V.create_local "must_allocate_node" in
|
|
let is_new_node = V.create_local "is_new_node" in
|
|
let no_tail_calls = List.length !direct_tail_call_point_indexes < 1 in
|
|
let open Cmm in
|
|
let initialize_direct_tail_call_points_and_return_node =
|
|
let new_node_encoded = V.create_local "new_node_encoded" in
|
|
(* The callee node pointers within direct tail call points must initially
|
|
point back at the start of the current node and be marked as per
|
|
[Encode_tail_caller_node] in the runtime. *)
|
|
let indexes = !direct_tail_call_point_indexes in
|
|
let body =
|
|
List.fold_left (fun init_code index ->
|
|
(* Cf. [Direct_callee_node] in the runtime. *)
|
|
let offset_in_bytes = index * Arch.size_addr in
|
|
Csequence (
|
|
Cop (Cstore (Word_int, Lambda.Assignment),
|
|
[Cop (Caddi, [Cvar new_node; cconst_int offset_in_bytes], dbg);
|
|
Cvar new_node_encoded], dbg),
|
|
init_code))
|
|
(Cvar new_node)
|
|
indexes
|
|
in
|
|
match indexes with
|
|
| [] -> body
|
|
| _ ->
|
|
Clet (VP.create new_node_encoded,
|
|
(* Cf. [Encode_tail_caller_node] in the runtime. *)
|
|
Cop (Cor, [Cvar new_node; cconst_int 1], dbg),
|
|
body)
|
|
in
|
|
let pc = V.create_local "pc" in
|
|
Clet (VP.create node,
|
|
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
|
|
Clet (VP.create must_allocate_node,
|
|
Cop (Cand, [Cvar node; cconst_int 1], dbg),
|
|
Cifthenelse (
|
|
Cop (Ccmpi Cne, [Cvar must_allocate_node; cconst_int 1], dbg),
|
|
dbg,
|
|
Cvar node,
|
|
dbg,
|
|
Clet (VP.create is_new_node,
|
|
Clet (VP.create pc, cconst_symbol function_name,
|
|
Cop (Cextcall ("caml_spacetime_allocate_node",
|
|
typ_int, [], false, None),
|
|
[cconst_int (1 (* header *) + !index_within_node);
|
|
Cvar pc;
|
|
Cvar node_hole;
|
|
],
|
|
dbg)),
|
|
Clet (VP.create new_node,
|
|
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
|
|
if no_tail_calls then Cvar new_node
|
|
else
|
|
Cifthenelse (
|
|
Cop (Ccmpi Ceq, [Cvar is_new_node; cconst_int 0], dbg),
|
|
dbg,
|
|
Cvar new_node,
|
|
dbg,
|
|
initialize_direct_tail_call_points_and_return_node,
|
|
dbg))),
|
|
dbg)))
|
|
|
|
let code_for_blockheader ~value's_header ~node ~dbg =
|
|
let num_words = Nativeint.shift_right_logical value's_header 10 in
|
|
let existing_profinfo = V.create_local "existing_profinfo" in
|
|
let existing_count = V.create_local "existing_count" in
|
|
let profinfo = V.create_local "profinfo" in
|
|
let address_of_profinfo = V.create_local "address_of_profinfo" in
|
|
let label = Cmm.new_label () in
|
|
let index_within_node =
|
|
next_index_within_node ~part_of_shape:Mach.Allocation_point ~label
|
|
in
|
|
let offset_into_node = Arch.size_addr * index_within_node in
|
|
let open Cmm in
|
|
let generate_new_profinfo =
|
|
(* This will generate a static branch to a function that should usually
|
|
be in the cache, which hopefully gives a good code size/performance
|
|
balance.
|
|
The "Some label" is important: it provides the link between the shape
|
|
table, the allocation point, and the frame descriptor table---enabling
|
|
the latter table to be used for resolving a program counter at such
|
|
a point to a location.
|
|
*)
|
|
Cop (Cextcall ("caml_spacetime_generate_profinfo", typ_int, [],
|
|
false, Some label),
|
|
[Cvar address_of_profinfo;
|
|
cconst_int (index_within_node + 1)],
|
|
dbg)
|
|
in
|
|
(* Check if we have already allocated a profinfo value for this allocation
|
|
point with the current backtrace. If so, use that value; if not,
|
|
allocate a new one. *)
|
|
Clet (VP.create address_of_profinfo,
|
|
Cop (Caddi, [
|
|
Cvar node;
|
|
cconst_int offset_into_node;
|
|
], dbg),
|
|
Clet (VP.create existing_profinfo,
|
|
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar address_of_profinfo],
|
|
dbg),
|
|
Clet (VP.create profinfo,
|
|
Cifthenelse (
|
|
Cop (Ccmpi Cne, [Cvar existing_profinfo; cconst_int 1 (* () *)], dbg),
|
|
dbg,
|
|
Cvar existing_profinfo,
|
|
dbg,
|
|
generate_new_profinfo,
|
|
dbg),
|
|
Clet (VP.create existing_count,
|
|
Cop (Cload (Word_int, Asttypes.Mutable), [
|
|
Cop (Caddi,
|
|
[Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg)
|
|
], dbg),
|
|
Csequence (
|
|
Cop (Cstore (Word_int, Lambda.Assignment),
|
|
[Cop (Caddi,
|
|
[Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg);
|
|
Cop (Caddi, [
|
|
Cvar existing_count;
|
|
(* N.B. "*2" since the count is an OCaml integer.
|
|
The "1 +" is to count the value's header. *)
|
|
cconst_int (2 * (1 + Nativeint.to_int num_words));
|
|
], dbg);
|
|
], dbg),
|
|
(* [profinfo] looks like a black [Infix_tag] header. Instead of
|
|
having to mask [profinfo] before ORing it with the desired
|
|
header, we can use an XOR trick, to keep code size down. *)
|
|
let value's_header =
|
|
Nativeint.logxor value's_header
|
|
(Nativeint.logor
|
|
((Nativeint.logor (Nativeint.of_int Obj.infix_tag)
|
|
(Nativeint.shift_left 3n (* <- Caml_black *) 8)))
|
|
(Nativeint.shift_left
|
|
(* The following is the [Infix_offset_val], in words. *)
|
|
(Nativeint.of_int (index_within_node + 1)) 10))
|
|
in
|
|
Cop (Cxor, [Cvar profinfo; cconst_natint value's_header], dbg))))))
|
|
|
|
type callee =
|
|
| Direct of string
|
|
| Indirect of Cmm.expression
|
|
|
|
let code_for_call ~node ~callee ~is_tail ~label dbg =
|
|
(* We treat self recursive calls as tail calls to avoid blow-ups in the
|
|
graph. *)
|
|
let is_self_recursive_call =
|
|
match callee with
|
|
| Direct callee ->
|
|
begin match !current_function_label with
|
|
| None -> Misc.fatal_error "[current_function_label] not set"
|
|
| Some label -> String.equal callee label
|
|
end
|
|
| Indirect _ -> false
|
|
in
|
|
let is_tail = is_tail || is_self_recursive_call in
|
|
let index_within_node =
|
|
match callee with
|
|
| Direct callee ->
|
|
next_index_within_node
|
|
~part_of_shape:(Mach.Direct_call_point { callee; })
|
|
~label
|
|
| Indirect _ ->
|
|
next_index_within_node ~part_of_shape:Mach.Indirect_call_point ~label
|
|
in
|
|
begin match callee with
|
|
(* If this is a direct tail call point, we need to note down its index,
|
|
so the correct initialization code can be emitted in the prologue. *)
|
|
| Direct _ when is_tail ->
|
|
direct_tail_call_point_indexes :=
|
|
index_within_node::!direct_tail_call_point_indexes
|
|
| Direct _ | Indirect _ -> ()
|
|
end;
|
|
let place_within_node = V.create_local "place_within_node" in
|
|
let open Cmm in
|
|
Clet (VP.create place_within_node,
|
|
Cop (Caddi, [node; cconst_int (index_within_node * Arch.size_addr)], dbg),
|
|
(* The following code returns the address that is to be moved into the
|
|
(hard) node hole pointer register immediately before the call.
|
|
(That move is inserted in [Selectgen].) *)
|
|
match callee with
|
|
| Direct _callee ->
|
|
if Config.enable_call_counts then begin
|
|
let count_addr = V.create_local "call_count_addr" in
|
|
let count = V.create_local "call_count" in
|
|
Clet (VP.create count_addr,
|
|
Cop (Caddi, [Cvar place_within_node; cconst_int Arch.size_addr], dbg),
|
|
Clet (VP.create count,
|
|
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar count_addr], dbg),
|
|
Csequence (
|
|
Cop (Cstore (Word_int, Lambda.Assignment),
|
|
(* Adding 2 really means adding 1; the count is encoded
|
|
as an OCaml integer. *)
|
|
[Cvar count_addr; Cop (Caddi, [Cvar count; cconst_int 2], dbg)],
|
|
dbg),
|
|
Cvar place_within_node)))
|
|
end else begin
|
|
Cvar place_within_node
|
|
end
|
|
| Indirect callee ->
|
|
let caller_node =
|
|
if is_tail then node
|
|
else cconst_int 1 (* [Val_unit] *)
|
|
in
|
|
Cop (Cextcall ("caml_spacetime_indirect_node_hole_ptr",
|
|
typ_int, [], false, None),
|
|
[callee; Cvar place_within_node; caller_node],
|
|
dbg))
|
|
|
|
class virtual instruction_selection = object (self)
|
|
inherit Selectgen.selector_generic as super
|
|
|
|
(* [disable_instrumentation] ensures that we don't try to instrument the
|
|
instrumentation... *)
|
|
val mutable disable_instrumentation = false
|
|
|
|
method private instrument_direct_call ~env ~func ~is_tail ~label_after dbg =
|
|
let instrumentation =
|
|
code_for_call
|
|
~node:(Lazy.force !spacetime_node)
|
|
~callee:(Direct func)
|
|
~is_tail
|
|
~label:label_after
|
|
dbg
|
|
in
|
|
match self#emit_expr env instrumentation with
|
|
| None -> assert false
|
|
| Some reg -> Some reg
|
|
|
|
method private instrument_indirect_call ~env ~callee ~is_tail
|
|
~label_after dbg =
|
|
(* [callee] is a pseudoregister, so we have to bind it in the environment
|
|
and reference the variable to which it is bound. *)
|
|
let callee_ident = V.create_local "callee" in
|
|
let env = Selectgen.env_add (VP.create callee_ident) [| callee |] env in
|
|
let instrumentation =
|
|
code_for_call
|
|
~node:(Lazy.force !spacetime_node)
|
|
~callee:(Indirect (Cmm.Cvar callee_ident))
|
|
~is_tail
|
|
~label:label_after
|
|
dbg
|
|
in
|
|
match self#emit_expr env instrumentation with
|
|
| None -> assert false
|
|
| Some reg -> Some reg
|
|
|
|
method private can_instrument () =
|
|
Config.spacetime && not disable_instrumentation
|
|
|
|
method! about_to_emit_call env desc arg dbg =
|
|
if not (self#can_instrument ()) then None
|
|
else
|
|
let module M = Mach in
|
|
match desc with
|
|
| M.Iop (M.Icall_imm { func; label_after; }) ->
|
|
assert (Array.length arg = 0);
|
|
self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
|
|
| M.Iop (M.Icall_ind { label_after; }) ->
|
|
assert (Array.length arg = 1);
|
|
self#instrument_indirect_call ~env ~callee:arg.(0)
|
|
~is_tail:false ~label_after dbg
|
|
| M.Iop (M.Itailcall_imm { func; label_after; }) ->
|
|
assert (Array.length arg = 0);
|
|
self#instrument_direct_call ~env ~func ~is_tail:true ~label_after dbg
|
|
| M.Iop (M.Itailcall_ind { label_after; }) ->
|
|
assert (Array.length arg = 1);
|
|
self#instrument_indirect_call ~env ~callee:arg.(0)
|
|
~is_tail:true ~label_after dbg
|
|
| M.Iop (M.Iextcall { func; alloc = true; label_after; _}) ->
|
|
(* N.B. No need to instrument "noalloc" external calls. *)
|
|
assert (Array.length arg = 0);
|
|
self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
|
|
| _ -> None
|
|
|
|
method private instrument_blockheader ~env ~value's_header ~dbg =
|
|
let instrumentation =
|
|
code_for_blockheader
|
|
~node:(Lazy.force !spacetime_node_ident)
|
|
~value's_header ~dbg
|
|
in
|
|
self#emit_expr env instrumentation
|
|
|
|
method private emit_prologue f ~node_hole ~env =
|
|
(* We don't need the prologue unless we inserted some instrumentation.
|
|
This corresponds to adding the prologue if the function contains one
|
|
or more call or allocation points. *)
|
|
if something_was_instrumented () then begin
|
|
let prologue_cmm =
|
|
code_for_function_prologue ~function_name:f.Cmm.fun_name ~node_hole
|
|
~fun_dbg:f.Cmm.fun_dbg
|
|
in
|
|
disable_instrumentation <- true;
|
|
let node_temp_reg =
|
|
match self#emit_expr env prologue_cmm with
|
|
| None ->
|
|
Misc.fatal_error "Spacetime prologue instruction \
|
|
selection did not yield a destination register"
|
|
| Some node_temp_reg -> node_temp_reg
|
|
in
|
|
disable_instrumentation <- false;
|
|
let node = Lazy.force !spacetime_node_ident in
|
|
let node_reg = Selectgen.env_find node env in
|
|
self#insert_moves env node_temp_reg node_reg
|
|
end
|
|
|
|
method! emit_blockheader env n dbg =
|
|
if self#can_instrument () then begin
|
|
disable_instrumentation <- true;
|
|
let result = self#instrument_blockheader ~env ~value's_header:n ~dbg in
|
|
disable_instrumentation <- false;
|
|
result
|
|
end else begin
|
|
super#emit_blockheader env n dbg
|
|
end
|
|
|
|
method! select_allocation bytes =
|
|
if self#can_instrument () then begin
|
|
(* Leave space for a direct call point. We cannot easily insert any
|
|
instrumentation code, so the fields are filled in instead by
|
|
[caml_spacetime_caml_garbage_collection]. *)
|
|
let label = Cmm.new_label () in
|
|
let index =
|
|
next_index_within_node
|
|
~part_of_shape:(Mach.Direct_call_point { callee = "caml_call_gc"; })
|
|
~label
|
|
in
|
|
Mach.Ialloc {
|
|
bytes;
|
|
dbginfo = [];
|
|
label_after_call_gc = Some label;
|
|
spacetime_index = index;
|
|
}
|
|
end else begin
|
|
super#select_allocation bytes
|
|
end
|
|
|
|
method! select_allocation_args env =
|
|
if self#can_instrument () then begin
|
|
let regs = Selectgen.env_find (Lazy.force !spacetime_node_ident) env in
|
|
match regs with
|
|
| [| reg |] -> [| reg |]
|
|
| _ -> failwith "Expected one register only for spacetime_node_ident"
|
|
end else begin
|
|
super#select_allocation_args env
|
|
end
|
|
|
|
method! select_checkbound () =
|
|
(* This follows [select_allocation], above. *)
|
|
if self#can_instrument () then begin
|
|
let label = Cmm.new_label () in
|
|
let index =
|
|
next_index_within_node
|
|
~part_of_shape:(
|
|
Mach.Direct_call_point { callee = "caml_ml_array_bound_error"; })
|
|
~label
|
|
in
|
|
Mach.Icheckbound {
|
|
label_after_error = Some label;
|
|
spacetime_index = index;
|
|
}
|
|
end else begin
|
|
super#select_checkbound ()
|
|
end
|
|
|
|
method! select_checkbound_extra_args () =
|
|
if self#can_instrument () then begin
|
|
(* This follows [select_allocation_args], above. *)
|
|
[Cmm.Cvar (Lazy.force !spacetime_node_ident)]
|
|
end else begin
|
|
super#select_checkbound_extra_args ()
|
|
end
|
|
|
|
method! initial_env () =
|
|
let env = super#initial_env () in
|
|
if Config.spacetime then
|
|
Selectgen.env_add (VP.create (Lazy.force !spacetime_node_ident))
|
|
(self#regs_for Cmm.typ_int) env
|
|
else
|
|
env
|
|
|
|
method! emit_fundecl f =
|
|
if Config.spacetime then begin
|
|
disable_instrumentation <- false;
|
|
let node = V.create_local "spacetime_node" in
|
|
reset ~spacetime_node_ident:node ~function_label:f.Cmm.fun_name
|
|
end;
|
|
super#emit_fundecl f
|
|
|
|
method! insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env =
|
|
let fun_spacetime_shape =
|
|
super#insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env
|
|
in
|
|
(* CR-soon mshinwell: add check to make sure the node size doesn't exceed
|
|
the chunk size of the allocator *)
|
|
if not Config.spacetime then fun_spacetime_shape
|
|
else begin
|
|
let node_hole, node_hole_reg =
|
|
match spacetime_node_hole with
|
|
| None -> assert false
|
|
| Some (node_hole, reg) -> node_hole, reg
|
|
in
|
|
self#insert_moves env [| Proc.loc_spacetime_node_hole |] node_hole_reg;
|
|
self#emit_prologue f ~node_hole ~env;
|
|
match !reverse_shape with
|
|
| [] -> None
|
|
(* N.B. We do not reverse the shape list, since the function that
|
|
reconstructs it (caml_spacetime_shape_table) reverses it again. *)
|
|
| reverse_shape -> Some reverse_shape
|
|
end
|
|
end
|