Additional documentation for format strings.

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@12407 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Pierre Weis 2012-04-27 09:56:05 +00:00
parent bfc2d7ec34
commit 2b94ebecf8
5 changed files with 163 additions and 71 deletions

View File

@ -15,7 +15,7 @@
include Makefile.common
CFLAGS=-DCAML_NAME_SPACE -O $(BYTECCCOMPOPTS) $(IFLEXDIR)
CFLAGS=-DCAML_NAME_SPACE -O $(BYTECCCOMPOPTS) -fPIC $(IFLEXDIR)
DFLAGS=-DCAML_NAME_SPACE -g -DDEBUG $(BYTECCCOMPOPTS) $(IFLEXDIR)
OBJS=$(COMMONOBJS) unix.o main.o

View File

@ -43,7 +43,7 @@ type pp_token =
| Pp_newline (* to force a newline inside a block *)
| Pp_if_newline (* to do something only if this very
line has been broken *)
| Pp_open_tag of string (* opening a tag name *)
| Pp_open_tag of tag (* opening a tag name *)
| Pp_close_tag (* closing the most recently opened tag *)
and tag = string
@ -147,13 +147,13 @@ type formatter = {
(* Ellipsis string. *)
mutable pp_ellipsis : string;
(* Output function. *)
mutable pp_output_function : string -> int -> int -> unit;
mutable pp_out_string : string -> int -> int -> unit;
(* Flushing function. *)
mutable pp_flush_function : unit -> unit;
mutable pp_out_flush : unit -> unit;
(* Output of new lines. *)
mutable pp_output_newline : unit -> unit;
mutable pp_out_newline : unit -> unit;
(* Output of indentation spaces. *)
mutable pp_output_spaces : int -> unit;
mutable pp_out_spaces : int -> unit;
(* Are tags printed ? *)
mutable pp_print_tags : bool;
(* Are tags marked ? *)
@ -240,9 +240,9 @@ let pp_clear_queue state =
let pp_infinity = 1000000010;;
(* Output functions for the formatter. *)
let pp_output_string state s = state.pp_output_function s 0 (String.length s)
and pp_output_newline state = state.pp_output_newline ()
and pp_display_blanks state n = state.pp_output_spaces n
let pp_output_string state s = state.pp_out_string s 0 (String.length s)
and pp_output_newline state = state.pp_out_newline ()
and pp_output_spaces state n = state.pp_out_spaces n
;;
(* To format a break, indenting a new line. *)
@ -254,7 +254,7 @@ let break_new_line state offset width =
let real_indent = min state.pp_max_indent indent in
state.pp_current_indent <- real_indent;
state.pp_space_left <- state.pp_margin - state.pp_current_indent;
pp_display_blanks state state.pp_current_indent
pp_output_spaces state state.pp_current_indent
;;
(* To force a line break inside a block: no offset is added. *)
@ -263,7 +263,7 @@ let break_line state width = break_new_line state 0 width;;
(* To format a break that fits on the current line. *)
let break_same_line state width =
state.pp_space_left <- state.pp_space_left - width;
pp_display_blanks state width
pp_output_spaces state width
;;
(* To indent no more than pp_max_indent, if one tries to open a block
@ -675,9 +675,9 @@ and pp_open_box state indent = pp_open_box_gen state indent Pp_box;;
(* Print a new line after printing all queued text
(same for print_flush but without a newline). *)
let pp_print_newline state () =
pp_flush_queue state true; state.pp_flush_function ()
pp_flush_queue state true; state.pp_out_flush ()
and pp_print_flush state () =
pp_flush_queue state false; state.pp_flush_function ();;
pp_flush_queue state false; state.pp_out_flush ();;
(* To get a newline when one does not want to close the current block. *)
let pp_force_newline state () =
@ -808,42 +808,70 @@ let pp_set_margin state n =
let pp_get_margin state () = state.pp_margin;;
type formatter_out_functions = {
out_string : string -> int -> int -> unit;
out_flush : unit -> unit;
out_newline : unit -> unit;
out_spaces : int -> unit;
}
;;
let pp_set_formatter_out_functions state {
out_string = f;
out_flush = g;
out_newline = h;
out_spaces = i;
} =
state.pp_out_string <- f;
state.pp_out_flush <- g;
state.pp_out_newline <- h;
state.pp_out_spaces <- i;
;;
let pp_get_formatter_out_functions state () = {
out_string = state.pp_out_string;
out_flush = state.pp_out_flush;
out_newline = state.pp_out_newline;
out_spaces = state.pp_out_spaces;
}
;;
let pp_set_formatter_output_functions state f g =
state.pp_output_function <- f; state.pp_flush_function <- g;;
state.pp_out_string <- f; state.pp_out_flush <- g;;
let pp_get_formatter_output_functions state () =
(state.pp_output_function, state.pp_flush_function)
(state.pp_out_string, state.pp_out_flush)
;;
let pp_set_all_formatter_output_functions state
~out:f ~flush:g ~newline:h ~spaces:i =
pp_set_formatter_output_functions state f g;
state.pp_output_newline <- h;
state.pp_output_spaces <- i;
state.pp_out_newline <- h;
state.pp_out_spaces <- i;
;;
let pp_get_all_formatter_output_functions state () =
(state.pp_output_function, state.pp_flush_function,
state.pp_output_newline, state.pp_output_spaces)
(state.pp_out_string, state.pp_out_flush,
state.pp_out_newline, state.pp_out_spaces)
;;
(* Default function to output new lines. *)
let display_newline state () = state.pp_output_function "\n" 0 1;;
let display_newline state () = state.pp_out_string "\n" 0 1;;
(* Default function to output spaces. *)
let blank_line = String.make 80 ' ';;
let rec display_blanks state n =
if n > 0 then
if n <= 80 then state.pp_output_function blank_line 0 n else
if n <= 80 then state.pp_out_string blank_line 0 n else
begin
state.pp_output_function blank_line 0 80;
state.pp_out_string blank_line 0 80;
display_blanks state (n - 80)
end
;;
let pp_set_formatter_out_channel state os =
state.pp_output_function <- output os;
state.pp_flush_function <- (fun () -> flush os);
state.pp_output_newline <- display_newline state;
state.pp_output_spaces <- display_blanks state;
state.pp_out_string <- output os;
state.pp_out_flush <- (fun () -> flush os);
state.pp_out_newline <- display_newline state;
state.pp_out_spaces <- display_blanks state;
;;
(**************************************************************
@ -855,8 +883,8 @@ let pp_set_formatter_out_channel state os =
let default_pp_mark_open_tag s = "<" ^ s ^ ">";;
let default_pp_mark_close_tag s = "</" ^ s ^ ">";;
let default_pp_print_open_tag _ = ();;
let default_pp_print_close_tag = default_pp_print_open_tag;;
let default_pp_print_open_tag = ignore;;
let default_pp_print_close_tag = ignore;;
let pp_make_formatter f g h i =
(* The initial state of the formatter contains a dummy box. *)
@ -883,10 +911,10 @@ let pp_make_formatter f g h i =
pp_curr_depth = 1;
pp_max_boxes = max_int;
pp_ellipsis = ".";
pp_output_function = f;
pp_flush_function = g;
pp_output_newline = h;
pp_output_spaces = i;
pp_out_string = f;
pp_out_flush = g;
pp_out_newline = h;
pp_out_spaces = i;
pp_print_tags = false;
pp_mark_tags = false;
pp_mark_open_tag = default_pp_mark_open_tag;
@ -900,8 +928,8 @@ let pp_make_formatter f g h i =
(* Make a formatter with default functions to output spaces and new lines. *)
let make_formatter output flush =
let ppf = pp_make_formatter output flush ignore ignore in
ppf.pp_output_newline <- display_newline ppf;
ppf.pp_output_spaces <- display_blanks ppf;
ppf.pp_out_newline <- display_newline ppf;
ppf.pp_out_spaces <- display_blanks ppf;
ppf
;;
@ -979,6 +1007,11 @@ and get_ellipsis_text = pp_get_ellipsis_text std_formatter
and set_formatter_out_channel =
pp_set_formatter_out_channel std_formatter
and set_formatter_out_functions =
pp_set_formatter_out_functions std_formatter
and get_formatter_out_functions =
pp_get_formatter_out_functions std_formatter
and set_formatter_output_functions =
pp_set_formatter_output_functions std_formatter
and get_formatter_output_functions =
@ -1347,5 +1380,6 @@ let bprintf b =
(* Deprecated alias for ksprintf. *)
let kprintf = ksprintf;;
(* Output everything left in the pretty printer queue at end of execution. *)
at_exit print_flush
;;

View File

@ -282,7 +282,7 @@ type tag = string;;
those strings is considered as zero for line breaking).
Thus, tag handling is in some sense transparent to pretty-printing
and does not interfere with usual pretty-printing. Hence, a single
and does not interfere with usual indentation. Hence, a single
pretty printing routine can output both simple ``verbatim''
material or richer decorated output depending on the treatment of
tags. By default, tags are not active, hence the output is not
@ -367,6 +367,17 @@ val get_formatter_output_functions :
(** {6:meaning Changing the meaning of standard formatter pretty printing} *)
type formatter_out_functions = {
out_string : string -> int -> int -> unit;
out_flush : unit -> unit;
out_newline : unit -> unit;
out_spaces : int -> unit;
}
;;
val set_formatter_out_functions: formatter_out_functions -> unit;;
val get_formatter_out_functions: unit -> formatter_out_functions;;
(** The [Format] module is versatile enough to let you completely redefine
the meaning of pretty printing: you may provide your own functions to define
how to handle indentation, line breaking, and even printing of all the

View File

@ -866,9 +866,33 @@ external decr : int ref -> unit = "%decr"
(** {6 Operations on format strings} *)
(** Format strings are used to read and print data using formatted input
functions in module {!Scanf} and formatted output in modules {!Printf} and
{!Format}. *)
(** Format strings are character strings with special lexical conventions
that defines the functionality of formatted input/output functions. Format
strings are used to read data with formatted input functions from module
{!Scanf} and to print data with formatted output functions from modules
{!Printf} and {!Format}.
Format strings are made of three kinds of entities:
- {e conversions specifications}, introduced by the special character ['%']
followed by one or more characters specifying what kind of argument to
read or print,
- {e formatting indications}, introduced by the special character ['@']
followed by one or more characters specifying how to read or print the
argument,
- {e plain characters} that are regular characters with usual lexical
conventions. Plain characters specify string literals to be read in the
input or printed in the output.
There is an additional lexical rule to escape the special characters in
format strings: if a special character follows a ['%'] character, it is
treated as a plain character. In other words, ["%%"] is considered as a
plain ['%'] and ["%@"] as a plain ['@'].
For more information about conversion indications and formatting
indications available, read the documentation of modules {!Scanf},
{!Printf} and {!Format}.
*)
(** Format strings have a general and highly polymorphic type
[('a, 'b, 'c, 'd, 'e, 'f) format6]. Type [format6] is built in.
@ -883,7 +907,7 @@ external decr : int ref -> unit = "%decr"
['d] is the result type for the [scanf]-style functions,
['e] is the type of the receiver function for the [scanf]-style functions,
['f] is the result type for the [printf]-style function.
*)
*)
type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6
type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4
@ -895,14 +919,17 @@ external format_of_string :
('a, 'b, 'c, 'd, 'e, 'f) format6 ->
('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity"
(** [format_of_string s] returns a format string read from the string
literal [s]. *)
literal [s].
Note: [format_of_string] can not convert a string argument that is not a
literal. If you need this functionality, use the more general
{!Scanf.format_from_string} function. *)
val ( ^^ ) :
('a, 'b, 'c, 'd, 'e, 'f) format6 ->
('f, 'b, 'c, 'e, 'g, 'h) format6 ->
('a, 'b, 'c, 'd, 'g, 'h) format6
(** [f1 ^^ f2] catenates formats [f1] and [f2]. The result is a format
that accepts arguments from [f1], then arguments from [f2]. *)
(** [f1 ^^ f2] catenates format strings [f1] and [f2]. The result is a
format string that accepts arguments from [f1], then arguments from [f2]. *)
(** {6 Program termination} *)

View File

@ -45,7 +45,8 @@
material with module {!Printf} or {!Format}),
- [f] is a function that has as many arguments as the number of values to
read in the input. *)
read in the input.
*)
(** {7 A simple example} *)
@ -62,7 +63,8 @@
then [bscanf Scanning.stdin "%d" f] reads an integer [n] from the
standard input and returns [f n] (that is [n + 1]). Thus, if we
evaluate [bscanf stdin "%d" f], and then enter [41] at the
keyboard, we get [42] as the final result. *)
keyboard, we get [42] as the final result.
*)
(** {7 Formatted input as a functional feature} *)
@ -75,8 +77,9 @@
useful additions to easily define complex tokens; as expected within a
functional programming language, the formatted input functions also
support polymorphism, in particular arbitrary interaction with
polymorphic user-defined scanners. Furthermore, the OCaml formatted input
facility is fully type-checked at compile time. *)
polymorphic user-defined scanners. Furthermore, the OCaml formatted input
facility is fully type-checked at compile time.
*)
(** {6 Formatted input channel} *)
@ -101,7 +104,8 @@ type scanbuf = in_channel;;
Note: a scanning action may often require to examine one character in
advance; when this ``lookahead'' character does not belong to the token
read, it is stored back in the scanning buffer and becomes the next
character yet to be read. *)
character yet to be read.
*)
val stdin : in_channel;;
(** The standard input notion for the [Scanf] module.
@ -154,7 +158,8 @@ val from_string : string -> in_channel;;
(** [Scanning.from_string s] returns a formatted input channel which reads
from the given string.
Reading starts from the first character in the string.
The end-of-input condition is set when the end of the string is reached. *)
The end-of-input condition is set when the end of the string is reached.
*)
val from_function : (unit -> char) -> in_channel;;
(** [Scanning.from_function f] returns a formatted input channel with the
@ -163,20 +168,24 @@ val from_function : (unit -> char) -> in_channel;;
When scanning needs one more character, the given function is called.
When the function has no more character to provide, it {e must} signal an
end-of-input condition by raising the exception [End_of_file]. *)
end-of-input condition by raising the exception [End_of_file].
*)
val from_channel : Pervasives.in_channel -> in_channel;;
(** [Scanning.from_channel ic] returns a formatted input channel which reads
from the regular input channel [ic] argument, starting at the current
reading position. *)
reading position.
*)
val end_of_input : in_channel -> bool;;
(** [Scanning.end_of_input ic] tests the end-of-input condition of the given
formatted input channel. *)
formatted input channel.
*)
val beginning_of_input : in_channel -> bool;;
(** [Scanning.beginning_of_input ic] tests the beginning of input condition of
the given formatted input channel. *)
the given formatted input channel.
*)
val name_of_input : in_channel -> string;;
(** [Scanning.name_of_input ic] returns the name of the character source
@ -186,7 +195,8 @@ val name_of_input : in_channel -> string;;
val stdib : in_channel;;
(** A deprecated alias for [Scanning.stdin], the scanning buffer reading from
[Pervasives.stdin]. *)
[Pervasives.stdin].
*)
end;;
@ -216,8 +226,9 @@ type ('a, 'b, 'c, 'd) scanner =
*)
exception Scan_failure of string;;
(** The exception that formatted input functions raise when the input cannot be
read according to the given format. *)
(** The exception that formatted input functions raise when the input cannot
be read according to the given format.
*)
(** {6 The general formatted input function} *)
@ -231,18 +242,21 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
[Scanf.sscanf "x= 1" "%s = %i" f] returns [2].
Arguments [r1] to [rN] are user-defined input functions that read the
argument corresponding to a [%r] conversion. *)
argument corresponding to the [%r] conversions specified in the format
string.
*)
(** {6 Format string description} *)
(** The format is a character string which contains three types of
(** The format string is a character string which contains three types of
objects:
- plain characters, which are simply matched with the characters of the
input (with a special case for space and line feed, see {!Scanf.space}),
- conversion specifications, each of which causes reading and conversion of
one argument for the function [f] (see {!Scanf.conversion}),
- scanning indications to specify boundaries of tokens
(see scanning {!Scanf.indication}). *)
(see scanning {!Scanf.indication}).
*)
(** {7:space The space character in format strings} *)
@ -261,7 +275,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
also matches no amount of whitespace at all; hence, the call [bscanf ib
"Price = %d $" (fun p -> p)] succeeds and returns [1] when reading an
input with various whitespace in it, such as [Price = 1 $],
[Price = 1 $], or even [Price=1$]. *)
[Price = 1 $], or even [Price=1$].
*)
(** {7:conversion Conversion specifications in format strings} *)
@ -385,7 +400,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
analysis and parsing. If it appears not expressive enough for your
needs, several alternative exists: regular expressions (module
[Str]), stream parsers, [ocamllex]-generated lexers,
[ocamlyacc]-generated parsers. *)
[ocamlyacc]-generated parsers.
*)
(** {7:indication Scanning indications in format strings} *)
@ -401,10 +417,10 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
Note:
- As usual in format strings, [%] characters must be escaped using [%%]
and [%\@] is equivalent to [\@]; this rule still holds within range
specifications and scanning indications.
For instance, ["%s@%%"] reads a string up to the next [%] character.
- As usual in format strings, [%] and [\@] characters must be escaped
using [%%] and [%\@]; this rule still holds within range specifications
and scanning indications.
For instance, ["%s@%%"] reads a string up to the next [%] character.
- The scanning indications introduce slight differences in the syntax of
[Scanf] format strings, compared to those used for the [Printf]
module. However, the scanning indications are similar to those used in
@ -412,7 +428,8 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
by [!Scanf.bscanf], it is wise to use printing functions from the
[Format] module (or, if you need to use functions from [Printf], banish
or carefully double check the format strings that contain ['\@']
characters). *)
characters).
*)
(** {7 Exceptions during scanning} *)
@ -433,7 +450,7 @@ val bscanf : Scanning.in_channel -> ('a, 'b, 'c, 'd) scanner;;
- as a consequence, scanning a [%s] conversion never raises exception
[End_of_file]: if the end of input is reached the conversion succeeds and
simply returns the characters read so far, or [""] if none were ever read.
*)
*)
(** {6 Specialised formatted input functions} *)
@ -448,14 +465,16 @@ val fscanf : Pervasives.in_channel -> ('a, 'b, 'c, 'd) scanner;;
position, and so on).
As a consequence, never mix direct low level reading and high level
scanning from the same regular input channel. *)
scanning from the same regular input channel.
*)
val sscanf : string -> ('a, 'b, 'c, 'd) scanner;;
(** Same as {!Scanf.bscanf}, but reads from the given string. *)
val scanf : ('a, 'b, 'c, 'd) scanner;;
(** Same as {!Scanf.bscanf}, but reads from the predefined formatted input
channel {!Scanf.Scanning.stdin} that is connected to [Pervasives.stdin]. *)
channel {!Scanf.Scanning.stdin} that is connected to [Pervasives.stdin].
*)
val kscanf :
Scanning.in_channel -> (Scanning.in_channel -> exn -> 'd) ->
@ -464,7 +483,8 @@ val kscanf :
[ef] that is called in case of error: if the scanning process or
some conversion fails, the scanning function aborts and calls the
error handling function [ef] with the formatted input channel and the
exception that aborted the scanning process as arguments. *)
exception that aborted the scanning process as arguments.
*)
(** {6 Reading format strings from input} *)
@ -496,10 +516,10 @@ val format_from_string :
@since 3.10.0
*)
val unescaped : string -> string
val unescaped : string -> string;;
(** Return a copy of the argument with escape sequences, following the
lexical conventions of OCaml, replaced by their corresponding
special characters. If there is no escape sequence in the
special characters. If there is no escape sequence in the
argument, still return a copy, contrary to String.escaped.
@since 4.00.0
*)