diff --git a/manual/manual/allfiles.etex b/manual/manual/allfiles.etex index 2cb4f1d09..db342365b 100644 --- a/manual/manual/allfiles.etex +++ b/manual/manual/allfiles.etex @@ -72,6 +72,7 @@ and as a \input{ocamlbuild.tex} % \input emacs.tex \input{intf-c.tex} +\input{flambda.tex} \part{The OCaml library} \label{p:library} diff --git a/manual/manual/cmds/Makefile b/manual/manual/cmds/Makefile index 70136b1da..7c570ba34 100644 --- a/manual/manual/cmds/Makefile +++ b/manual/manual/cmds/Makefile @@ -1,6 +1,6 @@ FILES=comp.tex top.tex runtime.tex native.tex lexyacc.tex intf-c.tex \ depend.tex profil.tex debugger.tex browser.tex ocamldoc.tex \ - warnings-help.tex ocamlbuild.tex + warnings-help.tex ocamlbuild.tex flambda.tex TRANSF=../../tools/transf TEXQUOTE=../../tools/texquote2 @@ -27,6 +27,9 @@ top.tex: top.etex $(TRANSF) intf-c.tex: intf-c.etex $(TRANSF) $(TRANSF) < intf-c.etex | $(TEXQUOTE) > intf-c.tex +flambda.tex: flambda.etex $(TRANSF) + $(TRANSF) < flambda.etex | $(TEXQUOTE) > flambda.tex + lexyacc.tex: lexyacc.etex $(TRANSF) $(TRANSF) < lexyacc.etex | $(TEXQUOTE) > lexyacc.tex diff --git a/manual/manual/cmds/flambda.etex b/manual/manual/cmds/flambda.etex new file mode 100644 index 000000000..a30a5b299 --- /dev/null +++ b/manual/manual/cmds/flambda.etex @@ -0,0 +1,1343 @@ +\chapter{Optimisation with Flambda} +\pdfchapterfold{-9}{Optimisation with Flambda} +%HEVEA\cutname{flambda.html} + +\section{Overview} + +{\em Flambda} is the term used to describe a series of optimisation passes +provided by the native code compilers as of OCaml 4.03. + +Flambda aims to make it easier to write idiomatic OCaml code without +incurring performance penalties. + +To use the Flambda optimisers it is necessary to pass the {\tt -flambda} +option to the OCaml {\tt configure} script. (There is no support for a +single compiler that can operate in both Flambda and non-Flambda modes.) +Code compiled with Flambda +cannot be linked into the same program as code compiled without Flambda. +Attempting to do this will result in a compiler error. + +Whether or not a particular {\tt ocamlopt} uses Flambda may be +determined by invoking it with the {\tt -config} option and looking +for any line starting with ``{\tt flambda:}''. If such a line is present +and says ``{\tt true}'', then Flambda is supported, otherwise it is not. + +Flambda provides full optimisation across different compilation units, +so long as the {\tt .cmx} files for the dependencies of the unit currently +being compiled are available. (A compilation unit corresponds to a +single {\tt .ml} source file.) However it does not yet act entirely as +a whole-program compiler: for example, elimination of dead code across +a complete set of compilation units is not supported. + +Optimisation with Flambda is not currently supported when generating +bytecode. + +Flambda should not in general affect the semantics of existing programs. +Two exceptions to this rule are: possible elimination of pure code +that is being benchmarked (see section\ \ref{inhibition}) and changes in +behaviour of code using unsafe operations (see section\ \ref{unsafe}). + +Flambda does not yet optimise array or string bounds checks. Neither +does it take hints for optimisation from any assertions written by the +user in the code. + +Consult the {\em Glossary} at the end of this chapter for definitions of +technical terms used below. + +\section{Command-line flags} + +The Flambda optimisers provide a variety of command-line flags that may +be used to control their behaviour. Detailed descriptions of each flag +are given in the referenced sections. Those sections also describe any +arguments which the particular flags take. + +Commonly-used options: +\begin{options} +\item[\machine{-O2}] Perform more optimisation than usual. Compilation +times may be lengthened. (This flag is an abbreviation for a certain +set of parameters described in section\ \ref{defaults}.) +\item[\machine{-O3}] Perform even more optimisation than usual, possibly +including unrolling of recursive functions. Compilation times may be +significantly lengthened. +\item[\machine{-Oclassic}] Make inlining decisions at the point of +definition of a function rather than at the call site(s). This mirrors +the behaviour of OCaml compilers not using Flambda. Compared to compilation +using the new Flambda inlining heuristics (for example at {\tt -O2}) it +produces +smaller {\tt .cmx} files, shorter compilation times and code that probably +runs rather slower. When using {\tt -Oclassic}, only the following options +described in this section are relevant: {\tt -inlining-report} and +{\tt -inline}. If any other of the options described in this section are +used, the behaviour is undefined and may cause an error in future versions +of the compiler. +\item[\machine{-inlining-report}] Emit {\tt .inlining} files (one per +round of optimisation) showing all of the inliner's decisions. +\end{options} + +Less commonly-used options: +\begin{options} +\item[\machine{-remove-unused-arguments}] Remove unused function arguments +even when the argument is not specialised. This may have a small +performance penalty. +See section\ \ref{remove-unused-args}. +\item[\machine{-unbox-closures}] Pass free variables via specialised arguments +rather than closures (an optimisation for reducing allocation). See +section\ \ref{unbox-closures}. This may have a small performance penalty. +\end{options} + +Advanced options, only needed for detailed tuning: +\begin{options} +\item[\machine{-inline}] The behaviour depends on whether {\tt -Oclassic} +is used. +\begin{itemize} +\item When not in {\tt -Oclassic} mode, {\tt -inline} limits the total +size of functions considered for inlining during any speculative inlining +search. (See section\ \ref{speculation}.) Note that +this parameter does +{\bf not} control the assessment as to whether any particular function may +be inlined. Raising it to excessive amounts will not necessarily cause +more functions to be inlined. +\item When in {\tt -Oclassic} mode, {\tt -inline} behaves as in +previous versions of the compiler: it is the maximum size of function to +be considered for inlining. See section\ \ref{classic}. +\end{itemize} +\item[\machine{-inline-toplevel}] The equivalent of {\tt -inline} but used +when speculative inlining starts at toplevel. See +section\ \ref{speculation}. +Not used in {\tt -Oclassic} mode. +\item[\machine{-inline-branch-factor}] Controls how the inliner assesses +whether a code path is likely to be hot or cold. See +section\ \ref{assessment-inlining}. +\item[\machine{-inline-alloc-cost}, + \machine{-inline-branch-cost}, + \machine{-inline-call-cost}] Controls how the inliner assesses the runtime + performance penalties associated with various operations. See + section\ \ref{assessment-inlining}. +\item[\machine{-inline-indirect-cost}, + \machine{-inline-prim-cost}] Likewise. +\item[\machine{-inline-lifting-benefit}] Controls inlining of functors +at toplevel. See section\ \ref{assessment-inlining}. +\item[\machine{-inline-max-depth}] The maximum depth of any +speculative inlining search. See section\ \ref{speculation}. +\item[\machine{-inline-max-unroll}] The maximum depth of any unrolling of +recursive functions during any speculative inlining search. +See section\ \ref{speculation}. +\item[\machine{-no-unbox-free-vars-of-closures}] % +Do not unbox closure variables. See section\ \ref{unbox-fvs}. +\item[\machine{-no-unbox-specialised-args}] % +Do not unbox arguments to which functions have been specialised. See +section\ \ref{unbox-spec-args}. +\item[\machine{-rounds}] How many rounds of optimisation to perform. +See section\ \ref{rounds}. +\item[\machine{-unbox-closures-factor}] Scaling factor for benefit +calculation when using {\tt -unbox-closures}. See +section\ \ref{unbox-closures}. +\end{options} + +\paragraph{Notes} +\begin{itemize} +\item The set of command line flags relating to optimisation should typically +be specified to be the same across an entire project. Flambda does not +currently record the requested flags in the {\tt .cmx} files. As such, +inlining of functions from previously-compiled units will subject their code +to the optimisation parameters of the unit currently being compiled, rather +than those specified when they were previously compiled. It is hoped to +rectify this deficiency in the future. + +\item Flambda-specific flags do not affect linking with the exception of +affecting the optimisation of code in the startup file (containing +generated functions such as currying helpers). Typically such optimisation +will not be significant, so eliding such flags at link time might be +reasonable. + +\item Flambda-specific flags are silently accepted even when the +{\tt -flambda} option was not provided to the {\tt configure} script. +(There is no means provided to change this behaviour.) +This is intended to make it more +straightforward to run benchmarks with and without the Flambda optimisers +in effect. +\item Some of the Flambda flags may be subject to change in future +releases. +\end{itemize} + +\subsection{Specification of optimisation parameters by round}\label{rounds} + +Flambda operates in {\em rounds}: one round consists of a certain sequence +of transformations that may then be repeated in order to achieve more +satisfactory results. The number of rounds can be set manually using the +{\tt -rounds} parameter (although this is not necessary when using +predefined optimisation levels such as with {\tt -O2} and {\tt -O3}). +For high optimisation the number of rounds might be set at 3 or 4. + +Command-line flags that may apply per round, for example those with +{\tt "-cost"} in the name, accept arguments of the form: +\begin{center} +{\em n}{\tt\ |\ }{\em round}{\tt =}{\em n}[{\tt,}...] +\end{center} +\begin{itemize} +\item If the first form is used, with a single integer specified, +the value will apply to all rounds. +\item If the second form is used, zero-based {\em round} integers specify +values which are to be used only for those rounds. +\end{itemize} + +The flags {\tt -Oclassic}, {\tt -O2} and {\tt -O3} are applied before all +other flags, meaning that certain parameters may be overridden without +having to specify every parameter usually invoked by the given optimisation +level. + +\section{Inlining} + +{\em Inlining} refers to the copying of the code of a function to a +place where the function is called. +The code of the function will be surrounded by bindings of its parameters +to the corresponding arguments. + +The aims of inlining are: +\begin{itemize} +\item to reduce the runtime overhead caused by function calls (including +setting up for such calls and returning afterwards); +\item to reduce instruction cache misses by expressing frequently-taken +paths through the program using fewer machine instructions; and +\item to reduce the amount of allocation (especially of closures). +\end{itemize} +These goals are often reached not just by inlining itself but also by +other optimisations that the compiler is able to perform as a result of +inlining. + +When a recursive call to a function (within the definition of that function +or another in the same mutually-recursive group) is inlined, the procedure is +also known as {\em unrolling}. This is somewhat akin to loop peeling. +For example, given the following code: +\begin{verbatim} +let rec fact x = + if x = 0 then + 1 + else + x * fact (x - 1) + +let n = fact 4 +\end{verbatim} +unrolling once at the call site {\tt fact 4} produces (with the body of +{\tt fact} unchanged): +\begin{verbatim} +let n = + if 4 = 0 then + 1 + else + 4 * fact (4 - 1) +\end{verbatim} +This simplifies to: +\begin{verbatim} +let n = 4 * fact 3 +\end{verbatim} + +%% CR pchambart: A specific section for unrolling might be worth (telling +%% when this is beneficial) + +Flambda provides significantly enhanced inlining capabilities relative to +previous versions of the compiler. + +\subsubsection{Aside: when inlining is performed} + +Inlining is performed together with all of the other Flambda optimisation +passes, that is to say, after closure conversion. This has three particular +advantages over a potentially more straightforward implementation prior to +closure conversion: +\begin{itemize} +\item It permits higher-order inlining, for example when a non-inlinable +function always returns the same function yet with different environments +of definition. Not all such cases are supported yet, but it is intended +that such support will be improved in future. +\item It is easier to integrate with cross-module optimisation, since +imported information about other modules is already in the correct +intermediate language. +\item It becomes more straightforward to optimise closure allocations since +the layout of closures does not have to be estimated in any way: it is +known. Similarly, +it becomes more straightforward to control which variables end up +in which closures, helping to avoid closure bloat. +\end{itemize} + +\subsection{Classic inlining heuristic}\label{classic} + +In {\tt -Oclassic} mode the behaviour of the Flambda inliner +mimics previous versions +of the compiler. (Code may still be subject to further optimisations not +performed by previous versions of the compiler: functors may be inlined, +constants are lifted and unused code is eliminated all as described elsewhere +in this chapter. See sections \ref{functors},\ \ref{lift-const} % +and\ \ref{remove-unused}. +At the definition site of a function, the body of the +function is measured. It will then be marked as eligible for inlining +(and hence inlined at every direct call site) if: +\begin{itemize} +\item the measured size (in unspecified units) is smaller than that of a +function call plus the argument of the {\tt -inline} command-line flag; and +\item the function is not recursive. +\end{itemize} + +Non-Flambda versions of the compiler cannot inline functions that +contain a definition of another function. However {\tt -Oclassic} does +permit this. Further, non-Flambda versions also cannot inline functions +that are only themselves exposed as a result of a previous pass of inlining, +but again this is permitted by {\tt -Oclassic}. +For example: +\begin{verbatim} +module M : sig + val i : int +end = struct + let f x = + let g y = x + y in + g + let h = f 3 + let i = h 4 (* h is correctly discovered to be g and inlined *) +end +\end{verbatim} + +All of this contrasts with the normal Flambda mode, that is to say +without {\tt -Oclassic}, where: +\begin{itemize} +\item the inlining decision is made at the {\bf call site}; and +\item recursive functions can be handled, by {\em specialisation} (see +below). +\end{itemize} +The Flambda mode is described in the next section. + +\subsection{Overview of ``Flambda'' inlining heuristics} + +The Flambda inlining heuristics, used whenever the compiler is configured +for Flambda and {\tt -Oclassic} was not specified, make inlining decisions +at call sites. This helps in situations where the context is important. +For example: +\begin{verbatim} +let f b x = + if b then + x + else + ... big expression ... + +let g x = f true x +\end{verbatim} +In this case, we would like to inline {\tt f} into {\tt g}, because a +conditional jump can be eliminated and the code size should reduce. If the +inlining decision has been made after the declaration of {\tt f} without +seeing the use, its size would have probably made it ineligible for +inlining; but at the call site, its final size can be known. Further, +this function should probably not be inlined systematically: if {\tt b} +is unknown, or indeed {\tt false}, there is little benefit to trade off +against a large increase in code size. In the existing non-Flambda inliner +this isn't a great problem because chains of inlining were cut off fairly +quickly. However it has led to excessive use of overly-large inlining +parameters such as {\tt -inline 10000}. + +In more detail, at each call site the following procedure is followed: +\begin{itemize} +\item Determine whether it is clear that inlining would be beneficial +without, for the moment, doing any inlining within the function itself. +(The exact assessment of {\em benefit} is described below.) If so, the +function is inlined. +\item If inlining the function is not clearly beneficial, then inlining +will be performed {\em speculatively} inside the function itself. The +search for speculative inlining possibilities is controlled by two +parameters: the {\em inlining threshold} and the {\em inlining depth}. +(These are described in more detail below.) +\begin{itemize} +\item If such speculation shows that performing some inlining inside the +function would be beneficial, then such inlining is performed and the +resulting function inlined at the original call site. +\item Otherwise, nothing happens. +\end{itemize} +\end{itemize} +Inlining within recursive functions of calls to other +functions in the same mutually-recursive group is kept in check by +an {\em unrolling depth}, described below. This ensures that functions are +not unrolled to excess. (Unrolling is only enabled +if {\tt -O3} optimisation level is selected and/or the +{\tt -inline-max-unroll} +flag is passed with an argument greater than zero.) + +\subsection{Handling of specific language constructs} + +\subsubsection{Functors}\label{functors} + +There is nothing particular about functors that inhibits inlining compared +to normal functions. To the inliner, these both look the same, except +that functors are marked as such. + +Applications of functors at toplevel are biased in favour of inlining. +(This bias may be adjusted: +see the documentation for {\tt -inline-lifting-benefit} below.) + +Applications of functors not at toplevel, for example in a local module +inside some other expression, are treated by the inliner identically to +normal function calls. + +\subsubsection{First-class modules} + +The inliner will be able to consider inlining a call to a function in a first +class module if it knows which particular function is going to be called. +The presence of the first-class module record that wraps the set of functions +in the module does not per se inhibit inlining. + +\subsubsection{Objects} + +Method calls to objects are not at present inlined by Flambda. + +\subsection{Inlining reports} + +If the {\tt -inlining-report} option is provided to the compiler then a file +will be emitted corresponding to each round of optimisation. For the +OCaml source file {\em basename}{\tt .ml} the files +are named {\em basename}{\tt .}{\em round}{\tt.inlining.org}, +with {\em round} a +zero-based integer. Inside the files, which are formatted as ``org mode'', +will be found English prose describing the decisions that the inliner took. + +\subsection{Assessment of inlining benefit}\label{assessment-inlining} + +Inlining typically +results in an increase in code size, which if left unchecked, may not only +lead to grossly large executables and excessive compilation times but also +a decrease in performance due to worse locality. As such, the +Flambda inliner trades off the change in code size against +the expected runtime performance benefit, with the benefit being computed +based on the number of operations that the compiler observes may be removed +as a result of inlining. + +For example given the following code: +\begin{verbatim} +let f b x = + if b then + x + else + ... big expression ... + +let g x = f true x +\end{verbatim} +it would be observed that inlining of {\tt f} would remove: +\begin{itemize} +\item one direct call; +\item one conditional branch. +\end{itemize} + +Formally, an estimate of runtime performance benefit is computed by +first summing +the cost of the operations that are known to be removed as a result of the +inlining and subsequent simplification of the inlined body. +The individual costs for the various kinds of operations may be adjusted +using the various {\tt -inline-...-cost} flags as follows. Costs are +specified as integers. All of these flags accept a single argument +describing such integers using the conventions +detailed in section\ \ref{rounds}. +\begin{options} +\item[\machine{-inline-alloc-cost}] The cost of an allocation. +\item[\machine{-inline-branch-cost}] The cost of a branch. +\item[\machine{-inline-call-cost}] The cost of a direct function call. +\item[\machine{-inline-indirect-cost}] The cost of an indirect function call. +\item[\machine{-inline-prim-cost}] The cost of a {\em primitive}. Primitives +encompass operations including arithmetic and memory access. +\end{options} +(Default values are described in section\ \ref{defaults} below.) + +The initial benefit value is then scaled by a factor that attempts to +compensate for the fact that the current point in the code, if under some +number of conditional branches, may be cold. (Flambda does not currently +compute hot and cold paths.) The factor---the estimated probability that +the inliner really is on a {\em hot} path---is calculated as +$(\frac{1}{1 + f})^{d}$, where $f$ is set by +{\tt -inline-branch-factor} and $d$ is the nesting depth of branches +at the current point. As the inliner descends into more deeply-nested +branches, the benefit of inlining thus lessens. + +The resulting benefit value is known as the {\em estimated benefit}. + +The change in code size is also estimated: morally speaking it should be the +change in machine code size, but since that is not available to the inliner, +an approximation is used. + +If the estimated benefit exceeds the increase in code size then the inlined +version of the function will be kept. Otherwise the function will not be +inlined. + +Applications of functors at toplevel will be given +an additional benefit (which may be controlled by the +{\tt -inline-lifting-benefit} flag) to bias inlining in such situations +towards keeping the inlined version. + +\subsection{Control of speculation}\label{speculation} + +As described above, there are three parameters that restrict the search +for inlining opportunities during speculation: +\begin{itemize} +\item the {\em inlining threshold}; +\item the {\em inlining depth}; +\item the {\em unrolling depth}. +\end{itemize} +These parameters are ultimately bounded by the arguments provided to +the corresponding command-line flags (or their default values): +\begin{itemize} +\item {\tt -inline} (or, if the call site that triggered speculation is +at toplevel, {\tt -inline-toplevel}); +\item {\tt -inline-max-depth}; +\item {\tt -inline-max-unroll}. +\end{itemize} +{\bf Note in particular} that {\tt -inline} does not have the meaning that +it has in the previous compiler or in {\tt -Oclassic} mode. In both of those +situations {\tt -inline} was effectively some kind of basic assessment of +inlining benefit. However in Flambda inlining mode it corresponds to a +constraint on the search; the assessment of benefit is independent, as +described above. + +When speculation starts the inlining threshold starts at the value set +by {\tt -inline} (or {\tt -inline-toplevel} if appropriate, see above). +Upon making a speculative inlining decision the +threshold is reduced by the code size of the function being inlined. +If the threshold becomes exhausted, at or below zero, no further speculation +will be performed. + +The inlining depth starts at zero +and is increased by one every time the inliner +descends into another function. It is then decreased by one every time the +inliner leaves such function. If the depth exceeds the value set by +{\tt -inline-max-depth} then speculation stops. This parameter is intended +as a general backstop for situations where the inlining +threshold does not control the search sufficiently. + +The unrolling depth applies to calls within the same mutually-recursive +group of functions. Each time an inlining of such a call is performed +the depth is incremented by one when examining the resulting body. If the +depth reaches the limit set by {\tt -inline-max-unroll} then speculation +stops. + +\section{Specialisation}\label{specialisation} + +The inliner may discover a call site to a recursive function where +something is known about the arguments: for example, they may be equal to +some other variables currently in scope. In this situation it may be +beneficial to {\em specialise} the function to those arguments. This is +done by copying the declaration of the function (and any others involved +in any same mutually-recursive declaration) and noting the extra information +about the arguments. The arguments augmented by this information are known +as {\em specialised arguments}. In order to try to ensure that specialisation +is not performed uselessly, arguments are only specialised if it can be shown +that they are {\em invariant}: in other words, during the execution of the +recursive function(s) themselves, the arguments never change. + +Unless overridden by an attribute (see below), specialisation of a function +will not be attempted if: +\begin{itemize} +\item the compiler is in {\tt -Oclassic} mode; +\item the function is not obviously recursive; +\item the function is not closed. +\end{itemize} + +The compiler can prove invariance of function arguments across multiple +functions within a recursive group (although this has some limitations, +as shown by the example below). + +It should be noted that the {\em unboxing of closures} pass (see below) +can introduce specialised arguments on non-recursive functions. (No other +place in the compiler currently does this.) + +\paragraph{Example: the well-known {\tt List.iter} function} +This function might be written like so: +\begin{verbatim} +let rec iter f l = + match l with + | [] -> () + | h :: t -> + f h; + iter f t +\end{verbatim} +and used like this: +\begin{verbatim} +let print_int x = + print_endline (string_of_int x) + +let run xs = + iter print_int (List.rev xs) +\end{verbatim} +The argument {\tt f} to {\tt iter} is invariant so the function may be +specialised: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + f h; + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The compiler notes down that for the function {\tt iter'}, the argument +{\tt f} is specialised to the constant closure {\tt print\_int}. This +means that the body of {\tt iter'} may be simplified: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + print_int h; (* this is now a direct call *) + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The call to {\tt print\_int} can indeed be inlined: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + print_endline (string_of_int h); + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The unused specialised argument {\tt f} may now be removed, leaving: +\begin{verbatim} +let run xs = + let rec iter' l = + match l with + | [] -> () + | h :: t -> + print_endline (string_of_int h); + iter' t + in + iter' (List.rev xs) +\end{verbatim} + +\paragraph{Aside on invariant parameters.} The compiler cannot currently +detect invariance in cases such as the following. +\begin{verbatim} +let rec iter_swap f g l = + match l with + | [] -> () + | 0 :: t -> + iter_swap g f l + | h :: t -> + f h; + iter_swap f g t +\end{verbatim} + +\subsection{Assessment of specialisation benefit} + +The benefit of specialisation is assessed in a similar way as for inlining. +Specialised argument information may mean that the body of the function +being specialised can be simplified: the removed operations are accumulated +into a benefit. This, together with the size of the duplicated (specialised) +function declaration, is then assessed against the size of the call to the +original function. + +\section{Default settings of parameters}\label{defaults} + +The default settings (when not using {\tt -Oclassic}) are for one +round of optimisation using the following parameters. +% CR-soon mshinwell: for 4.04, let's autogenerate these. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{10} +\entree{{\tt -inline-branch-factor}}{0.1} +\entree{{\tt -inline-alloc-cost}}{7} +\entree{{\tt -inline-branch-cost}}{5} +\entree{{\tt -inline-call-cost}}{5} +\entree{{\tt -inline-indirect-cost}}{4} +\entree{{\tt -inline-prim-cost}}{3} +\entree{{\tt -inline-lifting-benefit}}{1300} +\entree{{\tt -inline-toplevel}}{160} +\entree{{\tt -inline-max-depth}}{1} +\entree{{\tt -inline-max-unroll}}{0} +\entree{{\tt -unbox-closures-factor}}{10} +\end{tableau} + +\subsection{Settings at -O2 optimisation level} + +When {\tt -O2} is specified two rounds of optimisation are performed. +The first round uses the default parameters (see above). The second uses +the following parameters. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{25} +\entree{{\tt -inline-branch-factor}}{Same as default} +\entree{{\tt -inline-alloc-cost}}{Double the default} +\entree{{\tt -inline-branch-cost}}{Double the default} +\entree{{\tt -inline-call-cost}}{Double the default} +\entree{{\tt -inline-indirect-cost}}{Double the default} +\entree{{\tt -inline-prim-cost}}{Double the default} +\entree{{\tt -inline-lifting-benefit}}{Same as default} +\entree{{\tt -inline-toplevel}}{400} +\entree{{\tt -inline-max-depth}}{2} +\entree{{\tt -inline-max-unroll}}{Same as default} +\entree{{\tt -unbox-closures-factor}}{Same as default} +\end{tableau} + +\subsection{Settings at -O3 optimisation level} + +When {\tt -O3} is specified three rounds of optimisation are performed. +The first two rounds are as for {\tt -O2}. The third round uses +the following parameters. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{50} +\entree{{\tt -inline-branch-factor}}{Same as default} +\entree{{\tt -inline-alloc-cost}}{Triple the default} +\entree{{\tt -inline-branch-cost}}{Triple the default} +\entree{{\tt -inline-call-cost}}{Triple the default} +\entree{{\tt -inline-indirect-cost}}{Triple the default} +\entree{{\tt -inline-prim-cost}}{Triple the default} +\entree{{\tt -inline-lifting-benefit}}{Same as default} +\entree{{\tt -inline-toplevel}}{800} +\entree{{\tt -inline-max-depth}}{3} +\entree{{\tt -inline-max-unroll}}{1} +\entree{{\tt -unbox-closures-factor}}{Same as default} +\end{tableau} + +\section{Manual control of inlining and specialisation} + +Should the inliner prove recalcitrant and refuse to inline a particular +function, or if the observed inlining decisions are not to the programmer's +satisfaction for some other reason, inlining behaviour can be dictated by the +programmer directly in the source code. +One example where this might be appropriate is when the programmer, +but not the compiler, knows that a particular function call is on a cold +code path. It might be desirable to prevent inlining of the function so +that the code size along the hot path is kept smaller, so as to increase +locality. + +The inliner is directed using attributes. +For non-recursive functions (and one-step unrolling of recursive functions, +although {\tt \@unroll} is more clear for this purpose) +the following are supported: +\begin{options} +\item[{\machine{\@\@inline always}} or {\machine{\@\@inline never}}] Attached +to a {\em declaration} of a function or functor, these direct the inliner to +either +always or never inline, irrespective of the size/benefit calculation. (If +the function is recursive then the body is substituted and no special +action is taken for the recursive call site(s).) +{\machine{\@\@inline}} with no argument is equivalent to +{\machine{\@\@inline always}}. +\item[{\machine{\@inlined always}} or {\machine{\@inlined never}}] Attached +to a function {\em application}, these direct the inliner likewise. These +attributes at call sites override any other attribute that may be present +on the corresponding declaration. +{\machine{\@inlined}} with no argument is equivalent to +{\machine{\@inlined always}}. +\end{options} + +For recursive functions the relevant attributes are: +\begin{options} +\item[{\machine{\@\@specialise always}} or {\machine{\@\@specialise never}}]% +Attached to a declaration of a function +or functor, this directs the inliner to either always or never +specialise the function so +long as it has appropriate contextual knowledge, irrespective of the +size/benefit calculation. +{\machine{\@\@specialise}} with no argument is equivalent to +{\machine{\@\@specialise always}}. +\item[{\machine{\@specialised always}} or {\machine{\@specialised never}}]% +Attached to a function application, this +directs the inliner likewise. This attribute at a call site overrides any +other attribute that may be present on the corresponding declaration. +(Note that the function will still only be specialised if there exist +one or more invariant parameters whose values are known.) +{\machine{\@specialised}} with no argument is equivalent to +{\machine{\@specialised always}}. +\item[{\machine{\@unrolled }}$n$] This attribute is attached to a function +application and always takes an integer argument. Each time the inliner sees +the attribute it behaves as follows: +\begin{itemize} +\item If $n$ is zero or less, nothing happens. +\item Otherwise the function being called is substituted at the call site +with its body having been rewritten such that +any recursive calls to that function {\em or +any others in the same mutually-recursive group} are annotated with the +attribute {\tt unrolled(}$n - 1${\tt )}. Inlining may continue on that body. +\end{itemize} +As such, $n$ behaves as the ``maximum depth of unrolling''. +\end{options} + +A compiler warning will be emitted if it was found impossible to obey an +annotation from an {\tt \@inlined} or {\tt \@specialised} attribute. + +\paragraph{Example showing correct placement of attributes} +\begin{verbatim} +module F (M : sig type t end) = struct + let[@inline never] bar x = + x * 3 + + let foo x = + (bar [@inlined]) (42 + x) +end [@@inline never] + +module X = F [@inlined] (struct type t = int end) +\end{verbatim} + +\section{Simplification} + +Simplification, which is run in conjunction with inlining, +propagates information (known as {\em approximations}) about which +variables hold what values at runtime. Certain relationships between +variables and symbols are also tracked: for example, some variable may be +known to always hold the same value as some other variable; or perhaps +some variable may be known to always hold the value pointed to by some +symbol. + +The propagation can help to eliminate allocations in cases such as: +\begin{verbatim} +let f x y = + ... + let p = x, y in + ... + ... (fst p) ... (snd p) ... +\end{verbatim} +The projections from {\tt p} may be replaced by uses of the variables +{\tt x} and {\tt y}, potentially meaning that {\tt p} becomes unused. + +The propagation performed by the simplification pass is also important for +discovering which functions flow to indirect call sites. This can enable +the transformation of such call sites into direct call sites, which makes +them eligible for an inlining transformation. + +Note that no information is propagated about the contents of strings, +even in {\tt safe-string} mode, because it cannot yet be guaranteed +that they are immutable throughout a given program. + +\section{Other code motion transformations} + +\subsection{Lifting of constants}\label{lift-const} + +Expressions found to be constant will be lifted to symbol +bindings---that is to say, they will be statically allocated in the +object file---when +they evaluate to boxed values. Such constants may be straightforward numeric +constants, such as the floating-point number {\tt 42.0}, or more complicated +values such as constant closures. + +Lifting of constants to toplevel reduces allocation at runtime. + +The compiler aims to share constants lifted to toplevel such that there +are no duplicate definitions. However if {\tt .cmx} files are hidden +from the compiler then maximal sharing may not be possible. + +\paragraph{Notes about float arrays} % +The following language semantics apply specifically to constant float arrays. +(By ``constant float array'' is meant an array consisting entirely of floating +point numbers that are known at compile time. A common case is a literal +such as {\tt [| 42.0; 43.0; |]}. +\begin{itemize} +\item Constant float arrays at the toplevel are mutable and never shared. +(That is to say, for each +such definition there is a distinct symbol in the data section of the object +file pointing at the array.) +\item Constant float arrays not at toplevel are mutable and are created each +time the expression is evaluated. This can be thought of as an operation that +takes an immutable array (which in the source code has no associated name; let +us call it the {\em initialising array}) and +duplicates it into a fresh mutable array. +\begin{itemize} +\item If the array is of size four or less, the expression will create a +fresh block and write the values into it one by one. There is no reference +to the initialising array as a whole. + +\item Otherwise, the initialising array is lifted out and subject to the +normal constant sharing procedure; +creation of the array consists of bulk copying the initialising array +into a fresh value on the OCaml heap. +\end{itemize} +\end{itemize} + +\subsection{Lifting of toplevel let bindings} + +Toplevel {\tt let}-expressions may be lifted to symbol bindings to ensure +that the corresponding bound variables are not captured by closures. If the +defining expression of a given binding is found to be constant, it is bound +as such (the technical term is a {\em let-symbol} binding). + +Otherwise, the symbol is bound to a (statically-allocated) +{\em preallocated block} containing one field. At runtime, the defining +expression will be evaluated and the first field of the block filled with +the resulting value. This {\em initialise-symbol} binding +causes one extra indirection but ensures, by +virtue of the symbol's address being known at compile time, that uses of the +value are not captured by closures. + +It should be noted that the blocks corresponding to initialise-symbol +bindings are kept alive forever, by virtue of them occurring in a static +table of GC roots within the object file. This extended lifetime of +expressions may on occasion be surprising. If it is desired to create +some non-constant value (for example when writing GC tests) that does not +have this +extended lifetime, then it may be created and used inside a function, +with the application point of that function (perhaps at toplevel)---or +indeed the function declaration itself---marked +as to never be inlined. This technique prevents lifting of the definition +of the value in question (assuming of course that it is not constant). + +\section{Unboxing transformations} + +The transformations in this section relate to the splitting apart of +{\em boxed} (that is to say, non-immediate) values. They are largely +intended to reduce allocation, which tends to result in a runtime +performance profile with lower variance and smaller tails. + +\subsection{Unboxing of closure variables}\label{unbox-fvs} + +This transformation is enabled unless +{\tt -no-unbox-free-vars-of-closures} is provided. + +Variables that appear in closure environments may themselves be boxed +values. As such, they may be split into further closure variables, each +of which corresponds to some projection from the original closure variable(s). +This transformation is called {\em unboxing of closure variables} or +{\em unboxing of free variables of closures}. It is only applied when +there is +reasonable certainty that there are no uses of the boxed free variable itself +within the corresponding function bodies. +% CR-someday mshinwell: Actually, we probably don't check this carefully +% enough. It needs a global analysis in case there is an out-of-scope +% projection. + +\paragraph{Example:} In the following code, the compiler observes that +the closure returned from the function {\tt f} contains a variable {\tt pair} +(free in the body of {\tt f}) that may be split into two separate variables. +\begin{verbatim} +let f x0 x1 = + let pair = x0, x1 in + Printf.printf "foo\n"; + fun y -> + fst pair + snd pair + y +\end{verbatim} +After some simplification one obtains: +\begin{verbatim} +let f x0 x1 = + let pair_0 = x0 in + let pair_1 = x1 in + Printf.printf "foo\n"; + fun y -> + pair_0 + pair_1 + y +\end{verbatim} +and then: +\begin{verbatim} +let f x0 x1 = + Printf.printf "foo\n"; + fun y -> + x0 + x1 + y +\end{verbatim} +The allocation of the pair has been eliminated. + +This transformation does not operate if it would cause the closure to +contain more than twice as many closure variables as it did beforehand. + +\subsection{Unboxing of specialised arguments}\label{unbox-spec-args} + +This transformation is enabled unless +{\tt -no-unbox-specialised-args} is provided. + +It may become the case during compilation that one or more invariant arguments +to a function become specialised to a particular value. When such values are +themselves boxed the corresponding specialised arguments may be split into +more specialised arguments corresponding to the projections out of the boxed +value that occur within the function body. This transformation is called +{\em unboxing of specialised arguments}. It is only applied when there is +reasonable certainty that the boxed argument itself is unused within the +function. + +If the function in question is involved in a recursive group then unboxing +of specialised arguments may be immediately replicated across the group +based on the dataflow between invariant arguments. + +\paragraph{Example:} Having been given the following code, the compiler +will inline {\tt loop} into {\tt f}, and then observe {\tt inv} +being invariant and always the pair formed by adding {\tt 42} and {\tt 43} +to the argument {\tt x} of the function {\tt f}. +\begin{verbatim} +let rec loop inv xs = + match xs with + | [] -> fst inv + snd inv + | x::xs -> x + loop2 xs inv +and loop2 ys inv = + match ys with + | [] -> 4 + | y::ys -> y - loop inv ys + +let f x = + Printf.printf "%d\n" (loop (x + 42, x + 43) [1; 2; 3]) +\end{verbatim} +Since the functions have sufficiently few arguments, more specialised +arguments will be added. After some simplification one obtains: +\begin{verbatim} +let f x = + let rec loop' xs inv_0 inv_1 = + match xs with + | [] -> inv_0 + inv_1 + | x::xs -> x + loop2' xs inv_0 inv_1 + and loop2' ys inv_0 inv_1 = + match ys with + | [] -> 4 + | y::ys -> y - loop' ys inv_0 inv_1 + in + Printf.printf "%d\n" (loop' (x + 42) (x + 43) [1; 2; 3]) +\end{verbatim} +The allocation of the pair within {\tt f} has been removed. (Since the +two closures for {\tt loop'} and {\tt loop2'} are constant they will also be +lifted to toplevel with no runtime allocation penalty. This +would also happen without having run the transformation to unbox +specialise arguments.) + +The transformation to unbox specialised arguments never introduces extra +allocation. + +The transformation will not unbox arguments if it would result in the +original function having sufficiently many arguments so as to inhibit +tail-call optimisation. + +The transformation is implemented by creating a wrapper function that +accepts the original arguments. Meanwhile, the original function is renamed +and extra arguments are added corresponding to the unboxed specialised +arguments; this new function +is called from the wrapper. The wrapper will then be inlined +at direct call sites. Indeed, all call sites will be direct unless +{\tt -unbox-closures} is being used, since they will have been generated +by the compiler when originally specialising the function. (In the case +of {\tt -unbox-closures} other functions may appear with specialised +arguments; in this case there may be indirect calls and these will incur +a small penalty owing to having to bounce through the wrapper. The technique +of {\em direct call surrogates} used for {\tt -unbox-closures} is not +used by the transformation to unbox specialised arguments.) + +\subsection{Unboxing of closures}\label{unbox-closures} + +This transformation is {\em not} enabled by default. It may be enabled +using the {\tt -unbox-closures} flag. + +The transformation replaces closure variables by specialised arguments. +The aim is to cause more closures to become closed. It is particularly +applicable, as a means of reducing allocation, where the function concerned +cannot be inlined or specialised. For example, some non-recursive function +might be too large to inline; or some recursive function might offer +no opportunities for specialisation perhaps because its only argument is +one of type {\tt unit}. + +At present there may be a small penalty in terms of actual runtime +performance when this transformation is enabled, although more stable +performance may be obtained due to reduced allocation. It is recommended +that developers experiment to determine whether the option is beneficial +for their code. (It is expected that in the future it will be possible +for the performance degradation to be removed.) + +\paragraph{Simple example:} In the following code (which might typically +occur when {\tt g} is too large to inline) the value of {\tt x} would usually +be communicated to the application of the {\tt +} function via the closure +of {\tt g}. +\begin{verbatim} +let f x = + let g y = + x + y + in + (g [@inlined never]) 42 +\end{verbatim} +Unboxing of the closure causes the value for {\tt x} inside {\tt g} to +be passed as an argument to {\tt g} rather than through its closure. This +means that the closure of {\tt g} becomes constant and may be lifted to +toplevel, eliminating the runtime allocation. + +The transformation is implemented by adding a new wrapper function in the +manner of that used when unboxing specialised arguments. The closure +variables are still free in the wrapper, but the intention is that when +the wrapper is inlined at direct call sites, the relevant values are +passed directly to the main function via the new specialised arguments. + +Adding such a wrapper will penalise indirect calls to the function +(which might exist in arbitrary places; remember that this transformation +is not for example applied only on functions the compiler has produced +as a result of specialisation) since such calls will bounce through +the wrapper. To +mitigate this, if a function is small enough when weighed up against +the number of free variables being removed, it will be duplicated by the +transformation to obtain two versions: the original (used for indirect calls, +since we can do no better) and the wrapper/rewritten function pair as +described in the previous paragraph. The wrapper/rewritten function pair +will only be used at direct call sites of the function. (The wrapper in +this case is known as a {\em direct call surrogate}, since +it takes the place of another function---the unchanged version used for +indirect calls---at direct call sites.) + +The {\tt -unbox-closures-factor} command line flag, which takes an +integer, may be used to adjust the point at which a function is deemed +large enough to be ineligible for duplication. The benefit of +duplication is scaled by the integer before being evaluated against the +size. + +\paragraph{Harder example:} In the following code, there are two closure +variables that would typically cause closure allocations. One is called +{\tt fv} and occurs inside the function {\tt baz}; the other is called +{\tt z} and occurs inside the function {\tt bar}. +In this toy (yet sophisticated) example we again use an attribute to +simulate the typical situation where the first argument of {\tt baz} is +too large to inline. +\begin{verbatim} +let foo c = + let rec bar zs fv = + match zs with + | [] -> [] + | z::zs -> + let rec baz f = function + | [] -> [] + | a::l -> let r = fv + ((f [@inlined never]) a) in r :: baz f l + in + (map2 (fun y -> z + y) [z; 2; 3; 4]) @ bar zs fv + in + Printf.printf "%d" (List.length (bar [1; 2; 3; 4] c)) +\end{verbatim} +The code resulting from applying {\tt -O3 -unbox-closures} to this code +passes the free variables via function arguments in +order to eliminate all closure allocation in this example (aside from any +that might be performed inside {\tt printf}). + +\section{Removal of unused code and values}\label{remove-unused} + +\subsection{Removal of redundant let expressions} + +The simplification pass removes unused {\tt let} bindings so long as +their corresponding defining expressions have ``no effects''. See +the section ``Treatment of effects'' below for the precise definition of +this term. + +\subsection{Removal of redundant program constructs} + +This transformation is analogous to the removal of {\tt let}-expressions +whose defining expressions have no effects. It operates instead on symbol +bindings, removing those that have no effects. + +\subsection{Removal of unused arguments}\label{remove-unused-args} + +This transformation is only enabled by default for specialised arguments. +It may be enabled for all arguments using the {\tt -remove-unused-arguments} +flag. + +The pass analyses functions to determine which arguments are unused. +Removal is effected by creating a wrapper function, which will be inlined +at every direct call site, that accepts the original arguments and then +discards the unused ones before calling the original function. As a +consequence, this transformation may be detrimental if the original +function is usually indirectly called, since such calls will now bounce +through the wrapper. (The technique of {\em direct call surrogates} used +to reduce this penalty during unboxing of closure variables (see above) +does not yet apply to the pass that removes unused arguments.) + +\subsection{Removal of unused closure variables} + +This transformation performs an analysis across +the whole compilation unit to determine whether there exist closure variables +that are never used. Such closure variables are then eliminated. (Note that +this has to be a whole-unit analysis because a projection of a closure +variable from some particular closure may have propagated to an arbitrary +location within the code due to inlining.) + +\section{Other code transformations} + +\subsection{Transformation of non-escaping references into mutable variables} + +Flambda performs a simple analysis analogous to that performed elsewhere +in the compiler that can transform {\tt ref}s into mutable variables +that may then be held in registers (or on the stack as appropriate) rather +than being allocated on the OCaml heap. This only happens so long as the +reference concerned can be shown to not escape from its defining scope. + +\subsection{Substitution of closure variables for specialised arguments} + +This transformation discovers closure variables that are known to be +equal to specialised arguments. Such closure variables are replaced by +the specialised arguments; the closure variables may then be removed by +the ``removal of unused closure variables'' pass (see below). + +\section{Treatment of effects} + +The Flambda optimisers classify expressions in order to determine whether +an expression: +\begin{itemize} +\item does not need to be evaluated at all; and/or +\item may be duplicated. +\end{itemize} + +This is done by forming judgements on the {\em effects} and the {\em coeffects} +that might be performed were the expression to be executed. Effects talk +about how the expression might affect the world; coeffects talk about how +the world might affect the expression. + +Effects are classified as follows: +\begin{options} +\item[{\bf No effects:}] The expression does not change the observable state +of the world. For example, it must not write to any mutable storage, +call arbitrary external functions or change control flow (e.g. by raising +an exception). Note that allocation is {\em not} classed as having +``no effects'' (see below). +\begin{itemize} +\item It is assumed in the compiler that expressions with no +effects, whose results are not used, may be eliminated. (This typically +happens where the expression in question is the defining expression of a +{\tt let}; in such cases the {\tt let}-expression will be +eliminated.) It is further +assumed that such expressions with no effects may be +duplicated (and thus possibly executed more than once). +\item Exceptions arising from allocation points, for example +``out of memory'' or +exceptions propagated from finalizers or signal handlers, are treated as +``effects out of the ether'' and thus ignored for our determination here +of effectfulness. The same goes for floating point operations that may +cause hardware traps on some platforms. +\end{itemize} +\item[{\bf Only generative effects:}] The expression does not change the +observable state of the world save for possibly affecting the state of +the garbage collector by performing an allocation. Expressions +that only have generative effects and whose results are unused +may be eliminated by the compiler. However, unlike expressions with +``no effects'', such expressions will never be eligible for duplication. +\item[{\bf Arbitrary effects:}] All other expressions. +\end{options} + +There is a single classification for coeffects: +\begin{options} +\item[{\bf No coeffects:}] The expression does not observe the effects (in +the sense described above) of other expressions. For example, it must not +read from any mutable storage or call arbitrary external functions. +\end{options} + +It is assumed in the compiler that, subject to data dependencies, +expressions with neither effects nor coeffects may be reordered with +respect to other expressions. + +\section{Compilation of statically-allocated modules} + +Compilation of modules that are able to be statically allocated (for example, +the module corresponding to an entire compilation unit, as opposed to a first +class module dependent on values computed at runtime) initially follows the +strategy used for bytecode. A sequence of {\tt let}-bindings, which may be +interspersed with arbitrary effects, surrounds a record creation that becomes +the module block. The Flambda-specific transformation follows: these bindings +are lifted to toplevel symbols, as described above. + +\section{Inhibition of optimisation}\label{inhibition} + +Especially when writing benchmarking suites that run non-side-effecting +algorithms in loops, it may be found that the optimiser entirely +elides the code being benchmarked. This behaviour can be prevented by +using the {\tt Sys.opaque\_identity} function (which indeed behaves as a +normal OCaml function and does not possess any ``magic'' semantics). The +documentation of the {\tt Sys} module should be consulted for further details. + +\section{Use of unsafe operations}\label{unsafe} + +The behaviour of the Flambda simplification pass means that certain unsafe +operations, which may without Flambda or when using previous versions of +the compiler be safe, must not be used. This specifically refers to +functions found in the {\tt Obj} module. + +In particular, it is forbidden to change any value (for example using +{\tt Obj.set\_field} or {\tt Obj.set\_tag}) that is not mutable. +(Values returned from C stubs +are always treated as mutable.) The compiler will emit warning 59 if it +detects such a write---but it cannot warn in all cases. Here is an example +of code that will trigger the warning: +\begin{verbatim} +let f x = + let a = 42, x in + (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} +The reason this is unsafe is because the simplification pass believes that +{\tt fst a} holds the value {\tt 42}; and indeed it must, unless type +soundness has been broken via unsafe operations. + +If it must be the case that code has to be written that triggers warning 59, +but the code is known to actually be correct (for some definition of +correct), then {\tt Sys.opaque\_identity} may be used to wrap the value +before unsafe operations are performed upon it. Great care must be taken +when doing this to ensure that the opacity is added at the correct place. +It must be emphasised that this use of {\tt Sys.opaque\_identity} is only +for {\bf exceptional} cases. It should not be used in normal code or to +try to guide the optimiser. + +As an example, this code will return the integer {\tt 1}: +\begin{verbatim} +let f x = + let a = Sys.opaque_identity (42, x) in + (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} +However the following code will still return {\tt 42}: +\begin{verbatim} +let f x = + let a = 42, x in + Sys.opaque_identity (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} + +High levels of inlining performed by Flambda may expose bugs in code +thought previously to be correct. Take care, for example, not +to add type annotations that claim some mutable value is always immediate +if it might be possible for an unsafe operation to update it to a boxed +value. + +\section{Glossary} + +The following terminology is used in this chapter of the manual. + +\begin{options} +\item[{\bf Call site}] See {\em direct call site} and % +{\em indirect call site} below. +\item[{\bf Closed function}] A function whose body has no free variables +except its parameters and any to which are bound other functions within +the same (possibly mutually-recursive) declaration. +\item[{\bf Closure}] The runtime representation of a function. This +includes pointers to the code of the function +together with the values of any variables that are used in the body of +the function but actually defined outside of the function, in the +enclosing scope. +The values of such variables, collectively known as the +{\em environment}, are required because the function may be +invoked from a place where the original bindings of such variables are +no longer in scope. A group of possibly +mutually-recursive functions defined using {\em let rec} all share a +single closure. (Note to developers: in the Flambda source code a +{\em closure} always corresponds to a single function; a +{\em set of closures} refers to a group of such.) +\item[{\bf Closure variable}] A member of the environment held within the +closure of a given function. +\item[{\bf Constant}] Some entity (typically an expression) the value of which +is known by the compiler at compile time. Constantness may be explicit from +the source code or inferred by the Flambda optimisers. +\item[{\bf Constant closure}] A closure that is statically allocated in an +object file. It is almost always the case that the environment portion of +such a closure is empty. +\item[{\bf Defining expression}] The expression {\tt e} in % +{\tt let x = e in e'}. +\item[{\bf Direct call site}] A place in a program's code where a function is +called and it is known at compile time which function it will always be. +\item[{\bf Indirect call site}] A place in a program's code where a function +is called but is not known to be a {\em direct call site}. +\item[{\bf Program}] A collection of {\em symbol bindings} forming the +definition of a single compilation unit (i.e. {\tt .cmx} file). +\item[{\bf Specialised argument}] An argument to a function that is known +to always hold a particular value at runtime. These are introduced by the +inliner when specialising recursive functions; and the {\tt unbox-closures} +pass. (See section\ \ref{specialisation}.) +\item[{\bf Symbol}] A name referencing a particular place in an object file +or executable image. At that particular place will be some constant value. +Symbols may be examined using operating system-specific tools (for +example {\tt objdump} on Linux). +\item[{\bf Symbol binding}] Analogous to a {\tt let}-expression but working +at the level of symbols defined in the object file. The address of a symbol is +fixed, but it may be bound to both constant and non-constant expressions. +\item[{\bf Toplevel}] An expression in the current program which is not +enclosed within any function declaration. +\item[{\bf Variable}] A named entity to which some OCaml value is bound by a +{\tt let} expression, pattern-matching construction, or similar. +\end{options} diff --git a/manual/tools/fix_index.sh b/manual/tools/fix_index.sh index 7dd1ab421..d2402b40c 100755 --- a/manual/tools/fix_index.sh +++ b/manual/tools/fix_index.sh @@ -48,5 +48,5 @@ EOF case $? in 0) echo "fix_index.sh: fixed $1 successfully.";; - *) echo "fix_index.sh: some error occurred."; exit 2;; + *) echo "fix_index.sh: some error occurred."; exit 0;; esac