(**************************************************************************) (* *) (* OCaml *) (* *) (* Mark Shinwell and Leo White, Jane Street Europe *) (* *) (* Copyright 2015--2018 Jane Street Group LLC *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) (* special exception on linking described in the file LICENSE. *) (* *) (**************************************************************************) [@@@ocaml.warning "+a-4-30-40-41-42"] module V = Backend_var module VP = Backend_var.With_provenance let node_num_header_words = 2 (* [Node_num_header_words] in the runtime. *) let index_within_node = ref node_num_header_words (* The [lazy]s are to ensure that we don't create [V.t]s at toplevel when not using Spacetime profiling. (This could cause stamps to differ between bytecode and native .cmis when no .mli is present, e.g. arch.ml.) *) let spacetime_node = ref (lazy (Cmm.Cvar (V.create_local "dummy"))) let spacetime_node_ident = ref (lazy (V.create_local "dummy")) let current_function_label = ref None let direct_tail_call_point_indexes = ref [] let reverse_shape = ref ([] : Mach.spacetime_shape) (* CR-someday mshinwell: This code could be updated to use [placeholder_dbg] as in [Cmmgen]. *) let cconst_int i = Cmm.Cconst_int (i, Debuginfo.none) let cconst_natint i = Cmm_helpers.natint_const_untagged Debuginfo.none i let cconst_symbol s = Cmm.Cconst_symbol (s, Debuginfo.none) let something_was_instrumented () = !index_within_node > node_num_header_words let next_index_within_node ~part_of_shape ~label = let index = !index_within_node in begin match part_of_shape with | Mach.Direct_call_point _ -> incr index_within_node; if Config.enable_call_counts then begin incr index_within_node end | Mach.Indirect_call_point -> incr index_within_node | Mach.Allocation_point -> incr index_within_node; incr index_within_node; incr index_within_node end; reverse_shape := (part_of_shape, label) :: !reverse_shape; index let reset ~spacetime_node_ident:ident ~function_label = index_within_node := node_num_header_words; spacetime_node := lazy (Cmm.Cvar ident); spacetime_node_ident := lazy ident; direct_tail_call_point_indexes := []; current_function_label := Some function_label; reverse_shape := [] let code_for_function_prologue ~function_name ~fun_dbg:dbg ~node_hole = let node = V.create_local "node" in let new_node = V.create_local "new_node" in let must_allocate_node = V.create_local "must_allocate_node" in let is_new_node = V.create_local "is_new_node" in let no_tail_calls = List.length !direct_tail_call_point_indexes < 1 in let open Cmm in let initialize_direct_tail_call_points_and_return_node = let new_node_encoded = V.create_local "new_node_encoded" in (* The callee node pointers within direct tail call points must initially point back at the start of the current node and be marked as per [Encode_tail_caller_node] in the runtime. *) let indexes = !direct_tail_call_point_indexes in let body = List.fold_left (fun init_code index -> (* Cf. [Direct_callee_node] in the runtime. *) let offset_in_bytes = index * Arch.size_addr in Csequence ( Cop (Cstore (Word_int, Lambda.Assignment), [Cop (Caddi, [Cvar new_node; cconst_int offset_in_bytes], dbg); Cvar new_node_encoded], dbg), init_code)) (Cvar new_node) indexes in match indexes with | [] -> body | _ -> Clet (VP.create new_node_encoded, (* Cf. [Encode_tail_caller_node] in the runtime. *) Cop (Cor, [Cvar new_node; cconst_int 1], dbg), body) in let pc = V.create_local "pc" in Clet (VP.create node, Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg), Clet (VP.create must_allocate_node, Cop (Cand, [Cvar node; cconst_int 1], dbg), Cifthenelse ( Cop (Ccmpi Cne, [Cvar must_allocate_node; cconst_int 1], dbg), dbg, Cvar node, dbg, Clet (VP.create is_new_node, Clet (VP.create pc, cconst_symbol function_name, Cop (Cextcall ("caml_spacetime_allocate_node", typ_int, [], false, None), [cconst_int (1 (* header *) + !index_within_node); Cvar pc; Cvar node_hole; ], dbg)), Clet (VP.create new_node, Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg), if no_tail_calls then Cvar new_node else Cifthenelse ( Cop (Ccmpi Ceq, [Cvar is_new_node; cconst_int 0], dbg), dbg, Cvar new_node, dbg, initialize_direct_tail_call_points_and_return_node, dbg))), dbg))) let code_for_blockheader ~value's_header ~node ~dbg = let num_words = Nativeint.shift_right_logical value's_header 10 in let existing_profinfo = V.create_local "existing_profinfo" in let existing_count = V.create_local "existing_count" in let profinfo = V.create_local "profinfo" in let address_of_profinfo = V.create_local "address_of_profinfo" in let label = Cmm.new_label () in let index_within_node = next_index_within_node ~part_of_shape:Mach.Allocation_point ~label in let offset_into_node = Arch.size_addr * index_within_node in let open Cmm in let generate_new_profinfo = (* This will generate a static branch to a function that should usually be in the cache, which hopefully gives a good code size/performance balance. The "Some label" is important: it provides the link between the shape table, the allocation point, and the frame descriptor table---enabling the latter table to be used for resolving a program counter at such a point to a location. *) Cop (Cextcall ("caml_spacetime_generate_profinfo", typ_int, [], false, Some label), [Cvar address_of_profinfo; cconst_int (index_within_node + 1)], dbg) in (* Check if we have already allocated a profinfo value for this allocation point with the current backtrace. If so, use that value; if not, allocate a new one. *) Clet (VP.create address_of_profinfo, Cop (Caddi, [ Cvar node; cconst_int offset_into_node; ], dbg), Clet (VP.create existing_profinfo, Cop (Cload (Word_int, Asttypes.Mutable), [Cvar address_of_profinfo], dbg), Clet (VP.create profinfo, Cifthenelse ( Cop (Ccmpi Cne, [Cvar existing_profinfo; cconst_int 1 (* () *)], dbg), dbg, Cvar existing_profinfo, dbg, generate_new_profinfo, dbg), Clet (VP.create existing_count, Cop (Cload (Word_int, Asttypes.Mutable), [ Cop (Caddi, [Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg) ], dbg), Csequence ( Cop (Cstore (Word_int, Lambda.Assignment), [Cop (Caddi, [Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg); Cop (Caddi, [ Cvar existing_count; (* N.B. "*2" since the count is an OCaml integer. The "1 +" is to count the value's header. *) cconst_int (2 * (1 + Nativeint.to_int num_words)); ], dbg); ], dbg), (* [profinfo] looks like a black [Infix_tag] header. Instead of having to mask [profinfo] before ORing it with the desired header, we can use an XOR trick, to keep code size down. *) let value's_header = Nativeint.logxor value's_header (Nativeint.logor ((Nativeint.logor (Nativeint.of_int Obj.infix_tag) (Nativeint.shift_left 3n (* <- Caml_black *) 8))) (Nativeint.shift_left (* The following is the [Infix_offset_val], in words. *) (Nativeint.of_int (index_within_node + 1)) 10)) in Cop (Cxor, [Cvar profinfo; cconst_natint value's_header], dbg)))))) type callee = | Direct of string | Indirect of Cmm.expression let code_for_call ~node ~callee ~is_tail ~label dbg = (* We treat self recursive calls as tail calls to avoid blow-ups in the graph. *) let is_self_recursive_call = match callee with | Direct callee -> begin match !current_function_label with | None -> Misc.fatal_error "[current_function_label] not set" | Some label -> String.equal callee label end | Indirect _ -> false in let is_tail = is_tail || is_self_recursive_call in let index_within_node = match callee with | Direct callee -> next_index_within_node ~part_of_shape:(Mach.Direct_call_point { callee; }) ~label | Indirect _ -> next_index_within_node ~part_of_shape:Mach.Indirect_call_point ~label in begin match callee with (* If this is a direct tail call point, we need to note down its index, so the correct initialization code can be emitted in the prologue. *) | Direct _ when is_tail -> direct_tail_call_point_indexes := index_within_node::!direct_tail_call_point_indexes | Direct _ | Indirect _ -> () end; let place_within_node = V.create_local "place_within_node" in let open Cmm in Clet (VP.create place_within_node, Cop (Caddi, [node; cconst_int (index_within_node * Arch.size_addr)], dbg), (* The following code returns the address that is to be moved into the (hard) node hole pointer register immediately before the call. (That move is inserted in [Selectgen].) *) match callee with | Direct _callee -> if Config.enable_call_counts then begin let count_addr = V.create_local "call_count_addr" in let count = V.create_local "call_count" in Clet (VP.create count_addr, Cop (Caddi, [Cvar place_within_node; cconst_int Arch.size_addr], dbg), Clet (VP.create count, Cop (Cload (Word_int, Asttypes.Mutable), [Cvar count_addr], dbg), Csequence ( Cop (Cstore (Word_int, Lambda.Assignment), (* Adding 2 really means adding 1; the count is encoded as an OCaml integer. *) [Cvar count_addr; Cop (Caddi, [Cvar count; cconst_int 2], dbg)], dbg), Cvar place_within_node))) end else begin Cvar place_within_node end | Indirect callee -> let caller_node = if is_tail then node else cconst_int 1 (* [Val_unit] *) in Cop (Cextcall ("caml_spacetime_indirect_node_hole_ptr", typ_int, [], false, None), [callee; Cvar place_within_node; caller_node], dbg)) class virtual instruction_selection = object (self) inherit Selectgen.selector_generic as super (* [disable_instrumentation] ensures that we don't try to instrument the instrumentation... *) val mutable disable_instrumentation = false method private instrument_direct_call ~env ~func ~is_tail ~label_after dbg = let instrumentation = code_for_call ~node:(Lazy.force !spacetime_node) ~callee:(Direct func) ~is_tail ~label:label_after dbg in match self#emit_expr env instrumentation with | None -> assert false | Some reg -> Some reg method private instrument_indirect_call ~env ~callee ~is_tail ~label_after dbg = (* [callee] is a pseudoregister, so we have to bind it in the environment and reference the variable to which it is bound. *) let callee_ident = V.create_local "callee" in let env = Selectgen.env_add (VP.create callee_ident) [| callee |] env in let instrumentation = code_for_call ~node:(Lazy.force !spacetime_node) ~callee:(Indirect (Cmm.Cvar callee_ident)) ~is_tail ~label:label_after dbg in match self#emit_expr env instrumentation with | None -> assert false | Some reg -> Some reg method private can_instrument () = Config.spacetime && not disable_instrumentation method! about_to_emit_call env desc arg dbg = if not (self#can_instrument ()) then None else let module M = Mach in match desc with | M.Iop (M.Icall_imm { func; label_after; }) -> assert (Array.length arg = 0); self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg | M.Iop (M.Icall_ind { label_after; }) -> assert (Array.length arg = 1); self#instrument_indirect_call ~env ~callee:arg.(0) ~is_tail:false ~label_after dbg | M.Iop (M.Itailcall_imm { func; label_after; }) -> assert (Array.length arg = 0); self#instrument_direct_call ~env ~func ~is_tail:true ~label_after dbg | M.Iop (M.Itailcall_ind { label_after; }) -> assert (Array.length arg = 1); self#instrument_indirect_call ~env ~callee:arg.(0) ~is_tail:true ~label_after dbg | M.Iop (M.Iextcall { func; alloc = true; label_after; _}) -> (* N.B. No need to instrument "noalloc" external calls. *) assert (Array.length arg = 0); self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg | _ -> None method private instrument_blockheader ~env ~value's_header ~dbg = let instrumentation = code_for_blockheader ~node:(Lazy.force !spacetime_node_ident) ~value's_header ~dbg in self#emit_expr env instrumentation method private emit_prologue f ~node_hole ~env = (* We don't need the prologue unless we inserted some instrumentation. This corresponds to adding the prologue if the function contains one or more call or allocation points. *) if something_was_instrumented () then begin let prologue_cmm = code_for_function_prologue ~function_name:f.Cmm.fun_name ~node_hole ~fun_dbg:f.Cmm.fun_dbg in disable_instrumentation <- true; let node_temp_reg = match self#emit_expr env prologue_cmm with | None -> Misc.fatal_error "Spacetime prologue instruction \ selection did not yield a destination register" | Some node_temp_reg -> node_temp_reg in disable_instrumentation <- false; let node = Lazy.force !spacetime_node_ident in let node_reg = Selectgen.env_find node env in self#insert_moves env node_temp_reg node_reg end method! emit_blockheader env n dbg = if self#can_instrument () then begin disable_instrumentation <- true; let result = self#instrument_blockheader ~env ~value's_header:n ~dbg in disable_instrumentation <- false; result end else begin super#emit_blockheader env n dbg end method! select_allocation bytes = if self#can_instrument () then begin (* Leave space for a direct call point. We cannot easily insert any instrumentation code, so the fields are filled in instead by [caml_spacetime_caml_garbage_collection]. *) let label = Cmm.new_label () in let index = next_index_within_node ~part_of_shape:(Mach.Direct_call_point { callee = "caml_call_gc"; }) ~label in Mach.Ialloc { bytes; dbginfo = []; label_after_call_gc = Some label; spacetime_index = index; } end else begin super#select_allocation bytes end method! select_allocation_args env = if self#can_instrument () then begin let regs = Selectgen.env_find (Lazy.force !spacetime_node_ident) env in match regs with | [| reg |] -> [| reg |] | _ -> failwith "Expected one register only for spacetime_node_ident" end else begin super#select_allocation_args env end method! select_checkbound () = (* This follows [select_allocation], above. *) if self#can_instrument () then begin let label = Cmm.new_label () in let index = next_index_within_node ~part_of_shape:( Mach.Direct_call_point { callee = "caml_ml_array_bound_error"; }) ~label in Mach.Icheckbound { label_after_error = Some label; spacetime_index = index; } end else begin super#select_checkbound () end method! select_checkbound_extra_args () = if self#can_instrument () then begin (* This follows [select_allocation_args], above. *) [Cmm.Cvar (Lazy.force !spacetime_node_ident)] end else begin super#select_checkbound_extra_args () end method! initial_env () = let env = super#initial_env () in if Config.spacetime then Selectgen.env_add (VP.create (Lazy.force !spacetime_node_ident)) (self#regs_for Cmm.typ_int) env else env method! emit_fundecl f = if Config.spacetime then begin disable_instrumentation <- false; let node = V.create_local "spacetime_node" in reset ~spacetime_node_ident:node ~function_label:f.Cmm.fun_name end; super#emit_fundecl f method! insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env = let fun_spacetime_shape = super#insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env in (* CR-soon mshinwell: add check to make sure the node size doesn't exceed the chunk size of the allocator *) if not Config.spacetime then fun_spacetime_shape else begin let node_hole, node_hole_reg = match spacetime_node_hole with | None -> assert false | Some (node_hole, reg) -> node_hole, reg in self#insert_moves env [| Proc.loc_spacetime_node_hole |] node_hole_reg; self#emit_prologue f ~node_hole ~env; match !reverse_shape with | [] -> None (* N.B. We do not reverse the shape list, since the function that reconstructs it (caml_spacetime_shape_table) reverses it again. *) | reverse_shape -> Some reverse_shape end end