2002-05-07 00:41:12 -07:00
|
|
|
(***********************************************************************)
|
|
|
|
(* *)
|
|
|
|
(* Objective Caml *)
|
|
|
|
(* *)
|
|
|
|
(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)
|
|
|
|
(* *)
|
|
|
|
(* Copyright 2002 Institut National de Recherche en Informatique et *)
|
|
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
|
|
(* under the terms of the GNU Library General Public License, with *)
|
|
|
|
(* the special exception on linking described in file ../LICENSE. *)
|
|
|
|
(* *)
|
|
|
|
(***********************************************************************)
|
|
|
|
|
2002-05-08 06:51:09 -07:00
|
|
|
(* $Id$ *)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
|
|
|
(** Formatted input functions. *)
|
|
|
|
|
2002-07-25 07:06:19 -07:00
|
|
|
(** Scanning buffers. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
module Scanning : sig
|
|
|
|
|
|
|
|
type scanbuf;;
|
|
|
|
(** The type of scanning buffers. A scanning buffer is the argument passed
|
|
|
|
to the scanning functions used by the [scanf] family of functions.
|
|
|
|
The scanning buffer holds the current state of the scan, plus
|
|
|
|
a function to get the next char from the input, and a token buffer
|
2006-04-05 08:41:42 -07:00
|
|
|
to store the string matched so far.
|
|
|
|
|
|
|
|
Note: a scan may often require to examine one character in advance;
|
|
|
|
when this ``lookahead'' character does not belong to the token read,
|
|
|
|
it is stored back in the scanning buffer and becomes the next
|
|
|
|
character read. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2002-12-08 07:16:09 -08:00
|
|
|
val stdib : scanbuf;;
|
|
|
|
(** The scanning buffer reading from [stdin].
|
2006-04-05 08:41:42 -07:00
|
|
|
[stdib] is equivalent to [Scanning.from_channel stdin].
|
|
|
|
|
|
|
|
Note: when input is read interactively from [stdin], the carriage return
|
|
|
|
that triggers the evaluation is incoporated in the input; thus, scanning
|
|
|
|
specifications must properly skip this character (simply add a space
|
|
|
|
as the last character of the format string). *)
|
2002-12-08 07:16:09 -08:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val from_string : string -> scanbuf;;
|
|
|
|
(** [Scanning.from_string s] returns a scanning buffer which reads
|
|
|
|
from the given string.
|
|
|
|
Reading starts from the first character in the string.
|
|
|
|
The end-of-input condition is set when the end of the string is reached. *)
|
|
|
|
|
2003-04-25 03:21:21 -07:00
|
|
|
val from_file : string -> scanbuf;;
|
|
|
|
(** Bufferized file reading in text mode. The efficient and usual
|
|
|
|
way to scan text mode files (in effect, [from_file] returns a
|
|
|
|
buffer that reads characters in large chunks, rather than one
|
|
|
|
character at a time as buffers returned by [from_channel] do).
|
|
|
|
[Scanning.from_file fname] returns a scanning buffer which reads
|
|
|
|
from the given file [fname] in text mode. *)
|
|
|
|
|
|
|
|
val from_file_bin : string -> scanbuf;;
|
|
|
|
(** Bufferized file reading in binary mode. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
|
|
|
val from_function : (unit -> char) -> scanbuf;;
|
|
|
|
(** [Scanning.from_function f] returns a scanning buffer with
|
|
|
|
the given function as its reading method.
|
|
|
|
When scanning needs one more character, the given function is called.
|
2003-04-25 03:21:21 -07:00
|
|
|
When the function has no more character to provide, it must signal
|
2002-07-28 14:29:42 -07:00
|
|
|
an end-of-input condition by raising the exception [End_of_file]. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2003-04-25 03:21:21 -07:00
|
|
|
val from_channel : in_channel -> scanbuf;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(** [Scanning.from_channel ic] returns a scanning buffer which reads
|
|
|
|
one character at a time from the input channel [ic], starting at the
|
2003-04-25 03:21:21 -07:00
|
|
|
current reading position. *)
|
|
|
|
|
2002-10-30 15:46:21 -08:00
|
|
|
val end_of_input : scanbuf -> bool;;
|
2004-11-16 02:27:28 -08:00
|
|
|
(** [Scanning.end_of_input ib] tests the end-of-input condition
|
2002-10-30 15:46:21 -08:00
|
|
|
of the given buffer. *)
|
2003-05-13 23:30:04 -07:00
|
|
|
val beginning_of_input : scanbuf -> bool;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(** [Scanning.beginning_of_input ib] tests the beginning of input
|
2003-05-13 23:30:04 -07:00
|
|
|
condition of the given buffer. *)
|
2004-11-16 02:27:28 -08:00
|
|
|
|
2004-10-04 23:54:45 -07:00
|
|
|
val name_of_input : scanbuf -> string;;
|
|
|
|
(** [Scanning.file_name_of_input ib] returns the name of the character
|
2004-11-16 02:27:28 -08:00
|
|
|
source for the input buffer [ib]. *)
|
2002-10-30 15:46:21 -08:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
end;;
|
|
|
|
|
2002-06-27 02:18:11 -07:00
|
|
|
exception Scan_failure of string;;
|
2002-06-12 01:31:21 -07:00
|
|
|
(** The exception that formatted input functions raise when the input
|
|
|
|
cannot be read according to the given format. *)
|
|
|
|
|
|
|
|
val bscanf :
|
2003-07-05 04:13:24 -07:00
|
|
|
Scanning.scanbuf -> ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;;
|
2005-09-20 14:42:44 -07:00
|
|
|
(** [bscanf ib fmt f] reads tokens from the scanning buffer [ib] according
|
|
|
|
to the format string [fmt], converts these tokens to values, and
|
2002-05-28 10:51:49 -07:00
|
|
|
applies the function [f] to these values.
|
2002-05-07 00:41:12 -07:00
|
|
|
The result of this application of [f] is the result of the whole construct.
|
2002-05-15 13:28:57 -07:00
|
|
|
|
2002-12-09 02:28:05 -08:00
|
|
|
For instance, if [p] is the function [fun s i -> i + 1], then
|
|
|
|
[Scanf.sscanf "x = 1" "%s = %i" p] returns [2].
|
|
|
|
|
2002-05-27 00:05:36 -07:00
|
|
|
The format is a character string which contains three types of
|
|
|
|
objects:
|
|
|
|
- plain characters, which are simply matched with the
|
2002-07-11 15:39:26 -07:00
|
|
|
characters of the input,
|
2002-05-27 00:05:36 -07:00
|
|
|
- conversion specifications, each of which causes reading and
|
|
|
|
conversion of one argument for [f],
|
2002-07-28 14:29:42 -07:00
|
|
|
- scanning indications to specify boundaries of tokens.
|
2002-05-27 00:05:36 -07:00
|
|
|
|
2002-07-25 05:11:29 -07:00
|
|
|
Among plain characters the space character (ASCII code 32) has a
|
|
|
|
special meaning: it matches ``whitespace'', that is any number of tab,
|
|
|
|
space, newline and carriage return characters. Hence, a space in the format
|
|
|
|
matches any amount of whitespace in the input.
|
2002-05-15 13:28:57 -07:00
|
|
|
|
2002-12-09 02:28:05 -08:00
|
|
|
Conversion specifications consist in the [%] character, followed by
|
|
|
|
an optional flag, an optional field width, and followed by one or
|
|
|
|
two conversion characters. The conversion characters and their
|
|
|
|
meanings are:
|
|
|
|
|
2002-05-15 13:28:57 -07:00
|
|
|
- [d]: reads an optionally signed decimal integer.
|
2002-05-07 00:41:12 -07:00
|
|
|
- [i]: reads an optionally signed integer
|
2002-07-25 08:24:58 -07:00
|
|
|
(usual input formats for hexadecimal ([0x[d]+] and [0X[d]+]),
|
|
|
|
octal ([0o[d]+]), and binary [0b[d]+] notations are understood).
|
2002-05-17 01:17:52 -07:00
|
|
|
- [u]: reads an unsigned decimal integer.
|
2002-07-25 08:24:58 -07:00
|
|
|
- [x] or [X]: reads an unsigned hexadecimal integer.
|
2002-05-07 00:41:12 -07:00
|
|
|
- [o]: reads an unsigned octal integer.
|
2004-11-16 02:27:28 -08:00
|
|
|
- [s]: reads a string argument that spreads as much as possible,
|
|
|
|
until the next white space, the next scanning indication, or the
|
|
|
|
end-of-input is reached. Hence, this conversion always succeeds:
|
|
|
|
it returns an empty string if the bounding condition holds
|
|
|
|
when the scan begins.
|
2002-06-07 03:05:52 -07:00
|
|
|
- [S]: reads a delimited string argument (delimiters and special
|
2002-07-28 14:29:42 -07:00
|
|
|
escaped characters follow the lexical conventions of Caml).
|
2003-05-13 23:39:50 -07:00
|
|
|
- [c]: reads a single character. To test the current input character
|
|
|
|
without reading it, specify a null field width, i.e. use
|
2003-07-07 04:13:21 -07:00
|
|
|
specification [%0c]. Raise [Invalid_argument], if the field width
|
|
|
|
specification is greater than 1.
|
2002-06-07 03:05:52 -07:00
|
|
|
- [C]: reads a single delimited character (delimiters and special
|
2002-07-28 14:29:42 -07:00
|
|
|
escaped characters follow the lexical conventions of Caml).
|
2004-02-04 02:16:25 -08:00
|
|
|
- [f], [e], [E], [g], [G]: reads an optionally signed
|
2002-09-05 03:38:11 -07:00
|
|
|
floating-point number in decimal notation, in the style [dddd.ddd
|
|
|
|
e/E+-dd].
|
2003-07-14 03:04:25 -07:00
|
|
|
- [F]: reads a floating point number according to the lexical
|
2003-07-15 00:25:09 -07:00
|
|
|
conventions of Caml (hence the decimal point is mandatory if the
|
|
|
|
exponent part is not mentioned).
|
2002-10-07 05:16:03 -07:00
|
|
|
- [B]: reads a boolean argument ([true] or [false]).
|
2003-07-01 09:30:12 -07:00
|
|
|
- [b]: reads a boolean argument (for backward compatibility; do not use
|
|
|
|
in new programs).
|
|
|
|
- [ld], [li], [lu], [lx], [lX], [lo]: reads an [int32] argument to
|
2002-05-08 06:51:09 -07:00
|
|
|
the format specified by the second letter (decimal, hexadecimal, etc).
|
2003-07-01 09:30:12 -07:00
|
|
|
- [nd], [ni], [nu], [nx], [nX], [no]: reads a [nativeint] argument to
|
2002-05-08 06:51:09 -07:00
|
|
|
the format specified by the second letter.
|
2003-07-01 09:30:12 -07:00
|
|
|
- [Ld], [Li], [Lu], [Lx], [LX], [Lo]: reads an [int64] argument to
|
2002-05-08 06:51:09 -07:00
|
|
|
the format specified by the second letter.
|
2002-06-07 03:05:52 -07:00
|
|
|
- [\[ range \]]: reads characters that matches one of the characters
|
|
|
|
mentioned in the range of characters [range] (or not mentioned in
|
2005-09-13 04:26:06 -07:00
|
|
|
it, if the range starts with [^]). Reads a [string] that can be
|
|
|
|
empty, if no character in the input matches the range. The set of
|
|
|
|
characters from [c1] to [c2] (inclusively) is denoted by [c1-c2].
|
|
|
|
Hence, [%\[0-9\]] returns a string representing a decimal number
|
|
|
|
or an empty string if no decimal digit is found; similarly,
|
|
|
|
[%\[\\048-\\057\\065-\\070\]] returns a string of hexadecimal digits.
|
2002-10-07 05:16:03 -07:00
|
|
|
If a closing bracket appears in a range, it must occur as the
|
|
|
|
first character of the range (or just after the [^] in case of
|
|
|
|
range negation); hence [\[\]\]] matches a [\]] character and
|
|
|
|
[\[^\]\]] matches any character that is not [\]].
|
2005-09-13 08:44:02 -07:00
|
|
|
- [\{ fmt %\}]: reads a format string argument to the format
|
|
|
|
specified by the internal format [fmt]. The format string to be
|
|
|
|
read must have the same type as the internal format [fmt].
|
2004-09-22 02:17:21 -07:00
|
|
|
For instance, "%\{%i%\}" reads any format string that can read a value of
|
|
|
|
type [int]; hence [Scanf.sscanf "fmt:\\\"number is %u\\\"" "fmt:%\{%i%\}"]
|
|
|
|
succeeds and returns the format string ["number is %u"].
|
2005-09-13 04:26:06 -07:00
|
|
|
- [\( fmt %\)]: scanning format substitution.
|
2005-09-13 08:44:02 -07:00
|
|
|
Reads a format string to replace [fmt]. The format string read
|
|
|
|
must have the same type as [fmt].
|
2003-11-30 14:13:03 -08:00
|
|
|
- [l]: applies [f] to the number of lines read so far.
|
2002-12-08 05:52:02 -08:00
|
|
|
- [n]: applies [f] to the number of characters read so far.
|
2004-09-09 00:54:50 -07:00
|
|
|
- [N] or [L]: applies [f] to the number of tokens read so far.
|
2003-07-07 04:13:21 -07:00
|
|
|
- [!]: matches the end of input condition.
|
2002-05-07 00:41:12 -07:00
|
|
|
- [%]: matches one [%] character in the input.
|
|
|
|
|
2003-03-02 15:03:15 -08:00
|
|
|
Following the [%] character introducing a conversion, there may be
|
2002-12-09 02:28:05 -08:00
|
|
|
the special flag [_]: the conversion that follows occurs as usual,
|
|
|
|
but the resulting value is discarded.
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
The field widths are composed of an optional integer literal
|
2002-05-27 00:05:36 -07:00
|
|
|
indicating the maximal width of the token to read.
|
2002-05-07 00:41:12 -07:00
|
|
|
For instance, [%6d] reads an integer, having at most 6 decimal digits;
|
2005-09-13 04:26:06 -07:00
|
|
|
[%4f] reads a float with at most 4 characters; and [%8\[\\000-\\255\]]
|
|
|
|
returns the next 8 characters (or all the characters still available,
|
|
|
|
if less than 8 characters are available in the input).
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2005-07-11 07:49:57 -07:00
|
|
|
Scanning indications appear just after the string conversions [s]
|
|
|
|
and [\[ range \]] to delimit the end of the token. A scanning
|
2002-07-28 14:29:42 -07:00
|
|
|
indication is introduced by a [@] character, followed by some
|
|
|
|
constant character [c]. It means that the string token should end
|
2002-10-07 23:46:15 -07:00
|
|
|
just before the next matching [c] (which is skipped). If no [c]
|
|
|
|
character is encountered, the string token spreads as much as
|
2003-10-16 09:25:25 -07:00
|
|
|
possible. For instance, ["%s@\t"] reads a string up to the next
|
2005-07-11 07:49:57 -07:00
|
|
|
tabulation character or to the end of input. If a scanning
|
|
|
|
indication [\@c] does not follow a string conversion, it is treated
|
|
|
|
as a plain [c] character.
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-11-16 02:27:28 -08:00
|
|
|
Raise [Scanf.Scan_failure] if the given input does not match the format.
|
|
|
|
|
|
|
|
Raise [Failure] if a conversion to a number is not possible.
|
|
|
|
|
|
|
|
Raise [End_of_file] if the end of input is encountered while some
|
|
|
|
more characters are needed to read the current conversion
|
|
|
|
specification (this means in particular that scanning a [%s]
|
|
|
|
conversion never raises exception [End_of_file]: if the end of
|
|
|
|
input is reached the conversion succeeds and simply returns [""]).
|
|
|
|
|
2003-11-30 14:13:03 -08:00
|
|
|
Notes:
|
2003-07-15 00:25:09 -07:00
|
|
|
|
2003-10-16 09:25:25 -07:00
|
|
|
- the scanning indications introduce slight differences in the
|
|
|
|
syntax of [Scanf] format strings compared to those used by the
|
|
|
|
[Printf] module. However, scanning indications are similar to those
|
|
|
|
of the [Format] module; hence, when producing formatted text to be
|
2003-11-30 14:13:03 -08:00
|
|
|
scanned by [!Scanf.bscanf], it is wise to use printing functions
|
2003-10-16 09:25:25 -07:00
|
|
|
from [Format] (or, if you need to use functions from [Printf],
|
|
|
|
banish or carefully double check the format strings that contain
|
2004-11-03 00:54:15 -08:00
|
|
|
['\@'] characters).
|
2003-10-16 09:25:25 -07:00
|
|
|
|
2003-07-15 00:25:09 -07:00
|
|
|
- in addition to relevant digits, ['_'] characters may appear
|
|
|
|
inside numbers (this is reminiscent to the usual Caml
|
|
|
|
conventions). If stricter scanning is desired, use the range
|
|
|
|
conversion facility instead of the number conversions.
|
|
|
|
|
|
|
|
- the [scanf] facility is not intended for heavy duty lexical
|
|
|
|
analysis and parsing. If it appears not expressive enough for your
|
|
|
|
needs, several alternative exists: regular expressions (module
|
|
|
|
[Str]), stream parsers, [ocamllex]-generated lexers,
|
2004-11-03 00:54:15 -08:00
|
|
|
[ocamlyacc]-generated parsers.
|
2003-10-15 00:28:56 -07:00
|
|
|
*)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2003-07-05 04:13:24 -07:00
|
|
|
val fscanf : in_channel -> ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;;
|
2003-04-25 03:21:21 -07:00
|
|
|
(** Same as {!Scanf.bscanf}, but inputs from the given channel.
|
2003-07-15 00:25:09 -07:00
|
|
|
|
|
|
|
Warning: since all scanning functions operate from a scanning
|
|
|
|
buffer, be aware that each [fscanf] invocation must allocate a new
|
|
|
|
fresh scanning buffer (unless careful use of partial evaluation in
|
2003-11-30 14:13:03 -08:00
|
|
|
the program). Hence, there are chances that some characters seem
|
2003-07-15 00:25:09 -07:00
|
|
|
to be skipped (in fact they are pending in the previously used
|
|
|
|
buffer). This happens in particular when calling [fscanf] again
|
|
|
|
after a scan involving a format that necessitates some look ahead
|
|
|
|
(such as a format that ends by skipping whitespace in the input).
|
|
|
|
|
2004-06-24 04:19:05 -07:00
|
|
|
To avoid confusion, consider using [bscanf] with an explicitly
|
2003-07-15 00:25:09 -07:00
|
|
|
created scanning buffer. Use for instance [Scanning.from_file f]
|
|
|
|
to allocate the scanning buffer reading from file [f].
|
|
|
|
|
|
|
|
This method is not only clearer it is also faster, since scanning
|
|
|
|
buffers to files are optimized for fast bufferized reading. *)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2003-07-05 04:13:24 -07:00
|
|
|
val sscanf : string -> ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;;
|
2002-07-25 05:11:29 -07:00
|
|
|
(** Same as {!Scanf.bscanf}, but inputs from the given string. *)
|
2002-06-12 01:31:21 -07:00
|
|
|
|
2003-07-05 04:13:24 -07:00
|
|
|
val scanf : ('a, Scanning.scanbuf, 'b) format -> 'a -> 'b;;
|
2003-07-15 00:25:09 -07:00
|
|
|
(** Same as {!Scanf.bscanf}, but reads from the predefined scanning
|
2004-03-22 07:59:22 -08:00
|
|
|
buffer {!Scanf.Scanning.stdib} that is connected to [stdin]. *)
|
2002-06-26 02:31:02 -07:00
|
|
|
|
2002-06-27 02:18:11 -07:00
|
|
|
val kscanf :
|
2002-07-28 14:29:42 -07:00
|
|
|
Scanning.scanbuf -> (Scanning.scanbuf -> exn -> 'a) ->
|
2003-07-05 04:13:24 -07:00
|
|
|
('b, Scanning.scanbuf, 'a) format -> 'b -> 'a;;
|
2002-06-27 02:18:11 -07:00
|
|
|
(** Same as {!Scanf.bscanf}, but takes an additional function argument
|
|
|
|
[ef] that is called in case of error: if the scanning process or
|
2002-07-28 14:51:51 -07:00
|
|
|
some conversion fails, the scanning function aborts and applies the
|
2002-07-25 05:11:29 -07:00
|
|
|
error handling function [ef] to the scanning buffer and the
|
2003-07-15 00:25:09 -07:00
|
|
|
exception that aborted the scanning process. *)
|
2005-09-20 14:42:44 -07:00
|
|
|
|
|
|
|
val bscanf_format :
|
|
|
|
Scanning.scanbuf -> ('a, 'b, 'c, 'd) format4 ->
|
|
|
|
(('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;;
|
2006-01-03 10:25:21 -08:00
|
|
|
(** [bscanf_format ib fmt f] reads a format string token in buffer [ib],
|
|
|
|
according to the format string [fmt], and applies the function [f] to the
|
|
|
|
resulting format string value.
|
|
|
|
Raises [Scan_failure] if the format string value read has not the same type
|
|
|
|
as [fmt]. *)
|
2005-09-20 14:42:44 -07:00
|
|
|
|
|
|
|
val sscanf_format :
|
2006-01-12 02:18:18 -08:00
|
|
|
string -> ('a, 'b, 'c, 'd) format4 ->
|
|
|
|
(('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;;
|
|
|
|
(** Same as {!Scanf.bscanf}, but inputs from the given string. *)
|
|
|
|
|
2006-04-05 04:49:07 -07:00
|
|
|
val format_from_string :
|
2005-09-20 14:42:44 -07:00
|
|
|
string -> ('a, 'b, 'c, 'd) format4 -> ('a, 'b, 'c, 'd) format4;;
|
2006-01-12 02:18:18 -08:00
|
|
|
(** Same as {!Scanf.sscanf_format}, but converts the given string to a format
|
2006-01-03 10:25:21 -08:00
|
|
|
string. *)
|