2002-05-07 00:41:12 -07:00
|
|
|
(***********************************************************************)
|
|
|
|
(* *)
|
|
|
|
(* Objective Caml *)
|
|
|
|
(* *)
|
|
|
|
(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)
|
|
|
|
(* *)
|
|
|
|
(* Copyright 2002 Institut National de Recherche en Informatique et *)
|
|
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
|
|
(* under the terms of the GNU Library General Public License, with *)
|
|
|
|
(* the special exception on linking described in file ../LICENSE. *)
|
|
|
|
(* *)
|
|
|
|
(***********************************************************************)
|
|
|
|
|
2002-05-08 06:51:09 -07:00
|
|
|
(* $Id$ *)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
(* The run-time library for scanners. *)
|
|
|
|
|
2002-07-28 14:29:42 -07:00
|
|
|
(* Scanning buffers. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
module type SCANNING = sig
|
|
|
|
|
|
|
|
type scanbuf;;
|
|
|
|
|
2002-12-08 07:16:09 -08:00
|
|
|
val stdib : scanbuf;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* The scanning buffer reading from [stdin].
|
2002-12-08 07:16:09 -08:00
|
|
|
[stdib] is equivalent to [Scanning.from_channel stdin]. *)
|
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
val next_char : scanbuf -> char;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.next_char ib] advance the scanning buffer for
|
2002-10-07 05:20:07 -07:00
|
|
|
one character.
|
2003-11-30 14:13:03 -08:00
|
|
|
If no more character can be read, sets a end of file condition and
|
|
|
|
returns '\000'. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
val invalidate_current_char : scanbuf -> unit;;
|
|
|
|
(* [Scanning.invalidate_current_char ib] mark the current_char as already
|
|
|
|
scanned. *)
|
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val peek_char : scanbuf -> char;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.peek_char ib] returns the current char available in
|
2005-07-01 01:15:02 -07:00
|
|
|
the buffer or read one if necessary (when the current character is
|
|
|
|
already scanned).
|
|
|
|
If no character can be read, sets an end of file condition and
|
2003-11-30 14:13:03 -08:00
|
|
|
returns '\000'. *)
|
2002-10-07 05:20:07 -07:00
|
|
|
|
|
|
|
val checked_peek_char : scanbuf -> char;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* Same as above but always returns a valid char or fails:
|
2003-11-30 14:13:03 -08:00
|
|
|
instead of returning a null char when the reading method of the
|
|
|
|
input buffer has reached an end of file, the function raises exception
|
|
|
|
[End_of_file]. *)
|
2002-10-07 05:20:07 -07:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val store_char : scanbuf -> char -> int -> int;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.store_char ib c lim] adds [c] to the token buffer
|
2002-05-27 15:00:09 -07:00
|
|
|
of the scanning buffer. It also advances the scanning buffer for one
|
2003-11-30 14:13:03 -08:00
|
|
|
character and returns [lim - 1], indicating the new limit
|
|
|
|
for the length of the current token. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
val skip_char : scanbuf -> int -> int;;
|
|
|
|
(* [Scanning.skip_char ib lim] ignores the current character. *)
|
|
|
|
|
|
|
|
val ignore_char : scanbuf -> int -> int;;
|
|
|
|
(* [Scanning.ignore_char ib lim] ignores the current character and
|
|
|
|
decrements the limit. *)
|
2003-07-15 00:25:09 -07:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val token : scanbuf -> string;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.token ib] returns the string stored into the token
|
2002-05-27 15:00:09 -07:00
|
|
|
buffer of the scanning buffer: it returns the token matched by the
|
|
|
|
format. *)
|
|
|
|
|
|
|
|
val reset_token : scanbuf -> unit;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.reset_token ib] resets the token buffer of
|
2002-05-27 15:00:09 -07:00
|
|
|
the given scanning buffer. *)
|
|
|
|
|
2003-11-30 14:13:03 -08:00
|
|
|
val char_count : scanbuf -> int;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.char_count ib] returns the number of characters
|
2003-11-30 14:13:03 -08:00
|
|
|
read so far from the given buffer. *)
|
|
|
|
|
|
|
|
val line_count : scanbuf -> int;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.line_count ib] returns the number of new line
|
2003-11-30 14:13:03 -08:00
|
|
|
characters read so far from the given buffer. *)
|
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val token_count : scanbuf -> int;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.token_count ib] returns the number of tokens read
|
|
|
|
so far from [ib]. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2003-07-07 04:13:21 -07:00
|
|
|
val eof : scanbuf -> bool;;
|
2005-07-01 01:15:02 -07:00
|
|
|
(* [Scanning.eof ib] returns the end of input condition
|
|
|
|
of the given buffer. *)
|
2003-07-07 04:13:21 -07:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val end_of_input : scanbuf -> bool;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.end_of_input ib] tests the end of input condition
|
2005-07-01 01:15:02 -07:00
|
|
|
of the given buffer (if no char has ever been read, an attempt to
|
|
|
|
read one is performed). *)
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2003-05-13 23:30:04 -07:00
|
|
|
val beginning_of_input : scanbuf -> bool;;
|
2004-10-04 23:54:45 -07:00
|
|
|
(* [Scanning.beginning_of_input ib] tests the beginning of input
|
2003-05-13 23:30:04 -07:00
|
|
|
condition of the given buffer. *)
|
2002-12-08 05:52:02 -08:00
|
|
|
|
2004-10-04 23:54:45 -07:00
|
|
|
val name_of_input : scanbuf -> string;;
|
|
|
|
(* [Scanning.name_of_input ib] returns the name of the character
|
|
|
|
source for input buffer [ib]. *)
|
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
val from_string : string -> scanbuf;;
|
|
|
|
val from_channel : in_channel -> scanbuf;;
|
2003-04-25 03:21:21 -07:00
|
|
|
val from_file : string -> scanbuf;;
|
|
|
|
val from_file_bin : string -> scanbuf;;
|
2002-05-27 15:00:09 -07:00
|
|
|
val from_function : (unit -> char) -> scanbuf;;
|
|
|
|
|
|
|
|
end;;
|
|
|
|
|
|
|
|
module Scanning : SCANNING = struct
|
|
|
|
|
|
|
|
(* The run-time library for scanf. *)
|
2003-11-30 14:13:03 -08:00
|
|
|
type file_name = string;;
|
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
type scanbuf = {
|
|
|
|
mutable eof : bool;
|
2005-07-01 01:15:02 -07:00
|
|
|
mutable current_char : char;
|
2005-09-20 14:42:44 -07:00
|
|
|
mutable current_char_is_valid : bool;
|
2002-05-27 15:00:09 -07:00
|
|
|
mutable char_count : int;
|
2003-11-30 14:13:03 -08:00
|
|
|
mutable line_count : int;
|
2002-05-27 15:00:09 -07:00
|
|
|
mutable token_count : int;
|
|
|
|
mutable get_next_char : unit -> char;
|
|
|
|
tokbuf : Buffer.t;
|
2003-11-30 14:13:03 -08:00
|
|
|
file_name : file_name;
|
2002-05-27 15:00:09 -07:00
|
|
|
};;
|
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
let null_char = '\000';;
|
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
(* Reads a new character from input buffer. Next_char never fails,
|
|
|
|
even in case of end of input: it then simply sets the end of file
|
|
|
|
condition. *)
|
2002-05-27 15:00:09 -07:00
|
|
|
let next_char ib =
|
|
|
|
try
|
2005-07-01 01:15:02 -07:00
|
|
|
let c = ib.get_next_char () in
|
|
|
|
ib.current_char <- c;
|
2005-09-20 14:42:44 -07:00
|
|
|
ib.current_char_is_valid <- true;
|
2006-05-04 05:52:22 -07:00
|
|
|
ib.char_count <- succ ib.char_count;
|
|
|
|
if c == '\n' then ib.line_count <- succ ib.line_count;
|
2005-07-01 01:15:02 -07:00
|
|
|
c with
|
|
|
|
| End_of_file ->
|
|
|
|
let c = null_char in
|
|
|
|
ib.current_char <- c;
|
2005-09-20 14:42:44 -07:00
|
|
|
ib.current_char_is_valid <- false;
|
2005-07-01 01:15:02 -07:00
|
|
|
ib.eof <- true;
|
|
|
|
c;;
|
|
|
|
|
|
|
|
let peek_char ib =
|
2005-09-20 14:42:44 -07:00
|
|
|
if ib.current_char_is_valid then ib.current_char else next_char ib;;
|
2002-10-07 05:20:07 -07:00
|
|
|
|
2005-03-11 05:41:25 -08:00
|
|
|
(* Returns a valid current char for the input buffer. In particular
|
2003-05-13 23:30:04 -07:00
|
|
|
no irrelevant null character (as set by [next_char] in case of end
|
|
|
|
of input) is returned, since [End_of_file] is raised when
|
|
|
|
[next_char] sets the end of file condition while trying to read a
|
|
|
|
new character. *)
|
|
|
|
let checked_peek_char ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
let c = peek_char ib in
|
2003-05-13 23:30:04 -07:00
|
|
|
if ib.eof then raise End_of_file;
|
|
|
|
c;;
|
|
|
|
|
2003-07-07 04:13:21 -07:00
|
|
|
let end_of_input ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
ignore (peek_char ib);
|
2003-07-07 04:13:21 -07:00
|
|
|
ib.eof;;
|
2005-07-01 01:15:02 -07:00
|
|
|
|
|
|
|
let eof ib = ib.eof;;
|
|
|
|
|
|
|
|
let beginning_of_input ib = ib.char_count = 0;;
|
2004-10-04 23:54:45 -07:00
|
|
|
let name_of_input ib = ib.file_name;;
|
2002-05-27 15:00:09 -07:00
|
|
|
let char_count ib = ib.char_count;;
|
2003-11-30 14:13:03 -08:00
|
|
|
let line_count ib = ib.line_count;;
|
2002-05-27 15:00:09 -07:00
|
|
|
let reset_token ib = Buffer.reset ib.tokbuf;;
|
2005-09-20 14:42:44 -07:00
|
|
|
let invalidate_current_char ib = ib.current_char_is_valid <- false;;
|
2002-05-27 15:00:09 -07:00
|
|
|
|
|
|
|
let token ib =
|
|
|
|
let tokbuf = ib.tokbuf in
|
|
|
|
let tok = Buffer.contents tokbuf in
|
|
|
|
Buffer.clear tokbuf;
|
2006-05-04 05:52:22 -07:00
|
|
|
ib.token_count <- succ ib.token_count;
|
2002-05-27 15:00:09 -07:00
|
|
|
tok;;
|
|
|
|
|
|
|
|
let token_count ib = ib.token_count;;
|
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
let skip_char ib max =
|
|
|
|
invalidate_current_char ib;
|
|
|
|
max;;
|
|
|
|
|
|
|
|
let ignore_char ib max = skip_char ib (max - 1);;
|
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
let store_char ib c max =
|
|
|
|
Buffer.add_char ib.tokbuf c;
|
2005-07-01 01:15:02 -07:00
|
|
|
ignore_char ib max;;
|
2003-07-15 00:25:09 -07:00
|
|
|
|
2003-04-25 03:21:21 -07:00
|
|
|
let default_token_buffer_size = 1024;;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
let create fname next = {
|
|
|
|
eof = false;
|
2005-07-01 01:15:02 -07:00
|
|
|
current_char = '\000';
|
2005-09-20 14:42:44 -07:00
|
|
|
current_char_is_valid = false;
|
2004-02-04 02:16:25 -08:00
|
|
|
char_count = 0;
|
|
|
|
line_count = 0;
|
|
|
|
token_count = 0;
|
|
|
|
get_next_char = next;
|
|
|
|
tokbuf = Buffer.create default_token_buffer_size;
|
|
|
|
file_name = fname;
|
|
|
|
};;
|
2002-05-27 15:00:09 -07:00
|
|
|
|
|
|
|
let from_string s =
|
|
|
|
let i = ref 0 in
|
|
|
|
let len = String.length s in
|
|
|
|
let next () =
|
|
|
|
if !i >= len then raise End_of_file else
|
|
|
|
let c = s.[!i] in
|
|
|
|
incr i;
|
|
|
|
c in
|
2004-10-04 23:54:45 -07:00
|
|
|
create "string input" next;;
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2004-10-04 23:54:45 -07:00
|
|
|
let from_function = create "function input";;
|
2002-12-08 13:28:23 -08:00
|
|
|
|
2006-10-04 03:02:01 -07:00
|
|
|
(* Scan from an input channel. *)
|
|
|
|
|
|
|
|
(* The input channel [ic] may not be allocated in this library, hence it may be
|
|
|
|
shared (two functions of the user's program may successively read from
|
|
|
|
it). Furthermore, the user may define more than one scanning buffer reading
|
|
|
|
from the same [ic] channel.
|
|
|
|
|
|
|
|
However, we cannot prevent the scanning mechanism to use one lookahead
|
|
|
|
character, if needed by the semantics of format string specifications
|
|
|
|
(e.g. a trailing ``skip space'' specification in the format string); in this
|
|
|
|
case, the mandatory lookahead character is read from the channel and stored
|
|
|
|
into the scanning buffer for further reading. This implies that multiple
|
|
|
|
functions alternatively scanning the same [ic] channel will miss characters
|
|
|
|
from time to time, due to unnoticed look ahead characters, silently read
|
|
|
|
from [ic] (hence no more available for reading) and retained inside the
|
|
|
|
scanning buffer to ensure the correct incremental scanning of the same
|
|
|
|
scanning buffer. This phenomenon is even worse if one defines more than one
|
|
|
|
scanning buffer reading from the same input channel [ic]. We have no simple
|
|
|
|
way to circumvent this problem (unless the scanning buffer allocation is a
|
|
|
|
memo function that never allocates two different scanning buffers for the
|
|
|
|
same input channel, orelse the input channel API offers a ``consider this
|
|
|
|
char as unread'' procedure to keep back the lookahead character as available
|
|
|
|
in the input channel for further reading).
|
|
|
|
|
|
|
|
Hence, we do bufferize characters to create a scanning buffer from an input
|
|
|
|
channel in order to preserve the same semantics as other from_* functions
|
|
|
|
above: two successive calls to the scanner will work appropriately, since
|
|
|
|
the bufferized character (if any) will be retained inside the scanning
|
|
|
|
buffer from a call to the next one.
|
|
|
|
|
|
|
|
Otherwise, if we do not bufferize characters, we will loose the clearly
|
|
|
|
correct scanning behaviour even for the simple regular case, when we scan
|
|
|
|
the (possibly shared) channel [ic] using a unique function, while not
|
|
|
|
gaining anything for multiple functions reading from [ic] or multiple
|
|
|
|
allocation of scanning buffers reading from the same [ic].
|
|
|
|
|
|
|
|
As mentioned above, a more ambitious fix could be to change the input
|
|
|
|
channel API or to have a memo scanning buffer allocation for reading from
|
|
|
|
input channel not allocated from within Scanf's input buffer creation
|
|
|
|
functions. *)
|
|
|
|
|
2002-12-08 13:28:23 -08:00
|
|
|
(* Perform bufferized input to improve efficiency. *)
|
|
|
|
let file_buffer_size = ref 1024;;
|
|
|
|
|
2006-04-05 04:49:07 -07:00
|
|
|
(* To close a channel at end of input. *)
|
|
|
|
let scan_close_at_end ic = close_in ic; raise End_of_file;;
|
|
|
|
|
|
|
|
let from_ic scan_close_ic fname ic =
|
2002-12-08 13:28:23 -08:00
|
|
|
let len = !file_buffer_size in
|
|
|
|
let buf = String.create len in
|
|
|
|
let i = ref 0 in
|
|
|
|
let lim = ref 0 in
|
2006-10-04 03:02:01 -07:00
|
|
|
let eof = ref false in
|
2002-12-08 13:28:23 -08:00
|
|
|
let next () =
|
2006-10-04 03:02:01 -07:00
|
|
|
if !i < !lim then begin let c = buf.[!i] in incr i; c end else
|
|
|
|
if !eof then raise End_of_file else begin
|
2002-12-08 13:28:23 -08:00
|
|
|
lim := input ic buf 0 len;
|
2006-10-04 03:02:01 -07:00
|
|
|
if !lim = 0 then begin eof := true; scan_close_ic ic end else begin
|
2002-12-08 13:28:23 -08:00
|
|
|
i := 1;
|
|
|
|
buf.[0]
|
|
|
|
end
|
|
|
|
end in
|
2003-11-30 14:13:03 -08:00
|
|
|
create fname next;;
|
2002-05-27 15:00:09 -07:00
|
|
|
|
2006-04-05 04:49:07 -07:00
|
|
|
let from_ic_close_at_end = from_ic scan_close_at_end;;
|
2003-04-25 03:21:21 -07:00
|
|
|
|
2006-04-05 04:49:07 -07:00
|
|
|
let from_file fname = from_ic_close_at_end fname (open_in fname);;
|
|
|
|
let from_file_bin fname = from_ic_close_at_end fname (open_in_bin fname);;
|
|
|
|
|
2006-04-05 08:40:03 -07:00
|
|
|
let scan_raise_at_end ic = raise End_of_file;;
|
2006-04-05 04:49:07 -07:00
|
|
|
|
2006-04-05 08:40:03 -07:00
|
|
|
let from_channel = from_ic scan_raise_at_end "input channel";;
|
2006-04-05 04:49:07 -07:00
|
|
|
|
|
|
|
(* The scanning buffer reading from [stdin].
|
2006-04-05 08:40:03 -07:00
|
|
|
One could try to define stdib as a scanning buffer reading a character at a
|
|
|
|
time (no bufferization at all), but unfortunately the toplevel
|
|
|
|
interaction would be wrong.
|
2006-04-05 04:49:07 -07:00
|
|
|
This is due to some kind of ``race condition'' when reading from stdin,
|
|
|
|
since the interactive compiler and scanf will simultaneously read the
|
|
|
|
material they need from stdin; then, confusion will result from what should
|
|
|
|
be read by the toplevel and what should be read by scanf.
|
|
|
|
This is even more complicated by the one character lookahead that scanf
|
|
|
|
is sometimes obliged to maintain: the lookahead character will be available
|
|
|
|
for the next (scanf) entry, seamingly coming from nowhere.
|
|
|
|
Also no End_of_file is raised when reading from stdin: if not enough
|
|
|
|
characters have been read, we simply ask to read more. *)
|
2006-04-05 08:40:03 -07:00
|
|
|
let stdib = from_ic scan_raise_at_end "stdin" stdin;;
|
2002-12-08 07:16:09 -08:00
|
|
|
|
2002-05-27 15:00:09 -07:00
|
|
|
end;;
|
|
|
|
|
2004-10-04 23:54:45 -07:00
|
|
|
(* Formatted input functions. *)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2006-10-27 02:12:07 -07:00
|
|
|
type ('a, 'b, 'c, 'd) tscanf =
|
|
|
|
('a, Scanning.scanbuf, 'b, 'c, 'a -> 'd, 'd) format6 -> 'c;;
|
|
|
|
|
2006-04-05 04:49:07 -07:00
|
|
|
module Sformat = Printf.Sformat;;
|
|
|
|
|
|
|
|
external string_to_format :
|
2006-10-27 02:12:07 -07:00
|
|
|
string -> ('a, 'b, 'c, 'd, 'e, 'f) format6 = "%identity";;
|
2006-04-05 04:49:07 -07:00
|
|
|
|
2002-06-26 02:32:27 -07:00
|
|
|
(* Reporting errors. *)
|
2002-06-27 02:20:25 -07:00
|
|
|
exception Scan_failure of string;;
|
2002-06-26 02:32:27 -07:00
|
|
|
|
2002-06-27 02:20:25 -07:00
|
|
|
let bad_input s = raise (Scan_failure s);;
|
|
|
|
let bad_input_char c = bad_input (String.make 1 c);;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-06-27 02:20:25 -07:00
|
|
|
let bad_input_escape c =
|
2004-03-10 14:10:54 -08:00
|
|
|
bad_input (Printf.sprintf "illegal escape character %C" c);;
|
2002-06-26 02:32:27 -07:00
|
|
|
|
2002-07-11 15:39:26 -07:00
|
|
|
let scanf_bad_input ib = function
|
|
|
|
| Scan_failure s | Failure s ->
|
2006-10-04 03:02:01 -07:00
|
|
|
let i = Scanning.char_count ib in
|
|
|
|
bad_input (Printf.sprintf "scanf: bad input at char number %i: %s" i s)
|
2002-07-11 15:39:26 -07:00
|
|
|
| x -> raise x;;
|
2002-05-09 11:26:44 -07:00
|
|
|
|
2004-12-06 22:17:12 -08:00
|
|
|
let bad_conversion fmt i c =
|
2002-05-07 00:41:12 -07:00
|
|
|
invalid_arg
|
|
|
|
(Printf.sprintf
|
2005-03-04 06:51:31 -08:00
|
|
|
"scanf: bad conversion %%%c, at char number %i \
|
2006-04-05 04:49:07 -07:00
|
|
|
in format string ``%s''" c i (Sformat.to_string fmt));;
|
2005-03-04 06:51:31 -08:00
|
|
|
|
|
|
|
let incomplete_format fmt =
|
|
|
|
invalid_arg
|
2006-04-05 04:49:07 -07:00
|
|
|
(Printf.sprintf "scanf: premature end of format string ``%s''"
|
|
|
|
(Sformat.to_string fmt));;
|
2003-11-30 14:13:03 -08:00
|
|
|
|
|
|
|
let bad_float () = bad_input "no dot or exponent part found in float token";;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2005-09-20 14:42:44 -07:00
|
|
|
let format_mismatch_err fmt1 fmt2 =
|
2006-10-04 03:02:01 -07:00
|
|
|
Printf.sprintf
|
|
|
|
"format read ``%s'' does not match specification ``%s''" fmt1 fmt2;;
|
2005-09-20 14:42:44 -07:00
|
|
|
|
2004-09-22 02:17:21 -07:00
|
|
|
let format_mismatch fmt1 fmt2 ib =
|
2005-09-20 14:42:44 -07:00
|
|
|
scanf_bad_input ib (Scan_failure (format_mismatch_err fmt1 fmt2));;
|
|
|
|
|
|
|
|
(* Checking that 2 format string are type compatible. *)
|
|
|
|
let compatible_format_type fmt1 fmt2 =
|
2006-04-05 04:49:07 -07:00
|
|
|
Printf.summarize_format_type (string_to_format fmt1) =
|
|
|
|
Printf.summarize_format_type (string_to_format fmt2);;
|
2004-09-22 02:17:21 -07:00
|
|
|
|
2005-07-11 07:49:57 -07:00
|
|
|
(* Checking that [c] is indeed in the input, then skips it.
|
|
|
|
In this case, the character c has been explicitely specified in the
|
2005-07-02 14:10:04 -07:00
|
|
|
format as being mandatory in the input; hence we should fail with
|
|
|
|
End_of_file in case of end_of_input.
|
|
|
|
That's why we use checked_peek_char here. *)
|
2002-10-07 05:20:07 -07:00
|
|
|
let check_char ib c =
|
2005-07-02 14:10:04 -07:00
|
|
|
let ci = Scanning.checked_peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if ci != c then
|
|
|
|
bad_input (Printf.sprintf "looking for %C, found %C" c ci) else
|
|
|
|
Scanning.invalidate_current_char ib;;
|
2002-10-07 05:20:07 -07:00
|
|
|
|
2005-07-11 07:49:57 -07:00
|
|
|
(* Checks that the current char is indeed one of the stopper characters,
|
|
|
|
then skips it.
|
|
|
|
Be careful that if ib has no more character this procedure should
|
|
|
|
just do nothing (since %s@c defaults to the entire rest of the
|
|
|
|
buffer, when no character c can be found in the input). *)
|
|
|
|
let ignore_stoppers stps ib =
|
|
|
|
if stps <> [] && not (Scanning.eof ib) then
|
|
|
|
let ci = Scanning.peek_char ib in
|
|
|
|
if List.memq ci stps then Scanning.invalidate_current_char ib else
|
|
|
|
let sr = String.concat "" (List.map (String.make 1) stps) in
|
|
|
|
bad_input
|
|
|
|
(Printf.sprintf "looking for one of range %S, found %C" sr ci);;
|
|
|
|
|
2002-05-07 09:28:19 -07:00
|
|
|
(* Extracting tokens from ouput token buffer. *)
|
2002-06-27 02:20:25 -07:00
|
|
|
|
|
|
|
let token_char ib = (Scanning.token ib).[0];;
|
|
|
|
|
|
|
|
let token_string = Scanning.token;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-07 07:47:28 -07:00
|
|
|
let token_bool ib =
|
2002-05-07 00:41:12 -07:00
|
|
|
match Scanning.token ib with
|
|
|
|
| "true" -> true
|
|
|
|
| "false" -> false
|
2002-06-27 02:20:25 -07:00
|
|
|
| s -> bad_input ("invalid boolean " ^ s);;
|
2002-05-07 07:47:28 -07:00
|
|
|
|
2002-10-07 05:20:07 -07:00
|
|
|
(* Extract an integer literal token.
|
|
|
|
Since the functions Pervasives.*int*_of_string do not accept a leading +,
|
|
|
|
we skip it if necessary. *)
|
2002-07-25 08:24:58 -07:00
|
|
|
let token_int_literal conv ib =
|
2002-10-07 05:20:07 -07:00
|
|
|
let tok =
|
|
|
|
match conv with
|
|
|
|
| 'd' | 'i' | 'u' -> Scanning.token ib
|
|
|
|
| 'o' -> "0o" ^ Scanning.token ib
|
|
|
|
| 'x' | 'X' -> "0x" ^ Scanning.token ib
|
|
|
|
| 'b' -> "0b" ^ Scanning.token ib
|
|
|
|
| _ -> assert false in
|
|
|
|
let l = String.length tok in
|
2003-07-07 04:13:21 -07:00
|
|
|
if l = 0 || tok.[0] <> '+' then tok else String.sub tok 1 (l - 1);;
|
2002-07-25 08:24:58 -07:00
|
|
|
|
2002-07-28 14:29:42 -07:00
|
|
|
(* All the functions that convert a string to a number raise the exception
|
|
|
|
Failure when the conversion is not possible.
|
|
|
|
This exception is then trapped in kscanf. *)
|
2002-07-25 08:24:58 -07:00
|
|
|
let token_int conv ib = int_of_string (token_int_literal conv ib);;
|
2002-06-27 02:20:25 -07:00
|
|
|
let token_float ib = float_of_string (Scanning.token ib);;
|
2002-05-09 11:26:44 -07:00
|
|
|
|
2002-05-07 09:28:19 -07:00
|
|
|
(* To scan native ints, int32 and int64 integers.
|
2002-07-28 14:29:42 -07:00
|
|
|
We cannot access to conversions to/from strings for those types,
|
2002-05-27 15:00:09 -07:00
|
|
|
Nativeint.of_string, Int32.of_string, and Int64.of_string,
|
2003-11-30 14:13:03 -08:00
|
|
|
since those modules are not available to Scanf.
|
2002-05-27 15:00:09 -07:00
|
|
|
However, we can bind and use the corresponding primitives that are
|
|
|
|
available in the runtime. *)
|
2006-04-05 04:49:07 -07:00
|
|
|
external nativeint_of_string : string -> nativeint
|
|
|
|
= "caml_nativeint_of_string";;
|
|
|
|
external int32_of_string : string -> int32
|
|
|
|
= "caml_int32_of_string";;
|
|
|
|
external int64_of_string : string -> int64
|
|
|
|
= "caml_int64_of_string";;
|
2002-05-07 09:28:19 -07:00
|
|
|
|
2002-07-25 08:24:58 -07:00
|
|
|
let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);;
|
|
|
|
let token_int32 conv ib = int32_of_string (token_int_literal conv ib);;
|
|
|
|
let token_int64 conv ib = int64_of_string (token_int_literal conv ib);;
|
2002-05-07 09:28:19 -07:00
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
(* Scanning numbers. *)
|
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
(* Digits scanning functions suppose that one character has been
|
|
|
|
checked and is available, since they return at end of file with the
|
|
|
|
currently found token selected. The digits scanning functions scan
|
|
|
|
a possibly empty sequence of digits, (hence a successful scanning
|
|
|
|
from one of those functions does not imply that the token is a
|
|
|
|
well-formed number: to get a true number, it is mandatory to check
|
|
|
|
that at least one digit is available before calling a digit
|
|
|
|
scanning function). *)
|
|
|
|
|
|
|
|
(* The decimal case is treated especially for optimization purposes. *)
|
2005-07-01 01:15:02 -07:00
|
|
|
let rec scan_decimal_digits max ib =
|
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
|
|
|
match c with
|
|
|
|
| '0' .. '9' as c ->
|
|
|
|
let max = Scanning.store_char ib c max in
|
|
|
|
scan_decimal_digits max ib
|
|
|
|
| '_' ->
|
|
|
|
let max = Scanning.ignore_char ib max in
|
|
|
|
scan_decimal_digits max ib
|
|
|
|
| _ -> max;;
|
|
|
|
|
|
|
|
let scan_decimal_digits_plus max ib =
|
|
|
|
let c = Scanning.checked_peek_char ib in
|
|
|
|
match c with
|
|
|
|
| '0' .. '9' ->
|
|
|
|
let max = Scanning.store_char ib c max in
|
|
|
|
scan_decimal_digits max ib
|
|
|
|
| c -> bad_input_char c;;
|
|
|
|
|
|
|
|
let scan_digits_plus digitp max ib =
|
|
|
|
(* To scan numbers from other bases, we use a predicate argument to
|
|
|
|
scan_digits. *)
|
|
|
|
let rec scan_digits max =
|
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
|
|
|
match c with
|
2003-07-15 00:25:09 -07:00
|
|
|
| c when digitp c ->
|
|
|
|
let max = Scanning.store_char ib c max in
|
2005-07-01 01:15:02 -07:00
|
|
|
scan_digits max
|
|
|
|
| '_' ->
|
|
|
|
let max = Scanning.ignore_char ib max in
|
|
|
|
scan_digits max
|
2003-07-15 00:25:09 -07:00
|
|
|
| _ -> max in
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
let c = Scanning.checked_peek_char ib in
|
|
|
|
if digitp c then
|
|
|
|
let max = Scanning.store_char ib c max in
|
2005-07-01 01:15:02 -07:00
|
|
|
scan_digits max
|
2004-04-01 07:07:02 -08:00
|
|
|
else bad_input_char c;;
|
|
|
|
|
|
|
|
let is_binary_digit = function
|
2002-05-07 00:41:12 -07:00
|
|
|
| '0' .. '1' -> true
|
2004-04-01 07:07:02 -08:00
|
|
|
| _ -> false;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
let scan_binary_int = scan_digits_plus is_binary_digit;;
|
|
|
|
|
|
|
|
let is_octal_digit = function
|
2002-07-25 08:24:58 -07:00
|
|
|
| '0' .. '7' -> true
|
2004-04-01 07:07:02 -08:00
|
|
|
| _ -> false;;
|
|
|
|
|
|
|
|
let scan_octal_int = scan_digits_plus is_octal_digit;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
let is_hexa_digit = function
|
2002-07-25 08:24:58 -07:00
|
|
|
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
|
2004-04-01 07:07:02 -08:00
|
|
|
| _ -> false;;
|
|
|
|
|
|
|
|
let scan_hexadecimal_int = scan_digits_plus is_hexa_digit;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
(* Scan a decimal integer. *)
|
2005-07-01 01:15:02 -07:00
|
|
|
let scan_unsigned_decimal_int = scan_decimal_digits_plus;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-10-07 05:20:07 -07:00
|
|
|
let scan_sign max ib =
|
|
|
|
let c = Scanning.checked_peek_char ib in
|
|
|
|
match c with
|
|
|
|
| '+' -> Scanning.store_char ib c max
|
|
|
|
| '-' -> Scanning.store_char ib c max
|
|
|
|
| c -> max;;
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
let scan_optionally_signed_decimal_int max ib =
|
|
|
|
let max = scan_sign max ib in
|
|
|
|
scan_unsigned_decimal_int max ib;;
|
|
|
|
|
|
|
|
(* Scan an unsigned integer that could be given in any (common) basis.
|
2002-07-28 14:29:42 -07:00
|
|
|
If digits are prefixed by one of 0x, 0X, 0o, or 0b, the number is
|
2002-07-25 08:24:58 -07:00
|
|
|
assumed to be written respectively in hexadecimal, hexadecimal,
|
2002-05-07 00:41:12 -07:00
|
|
|
octal, or binary. *)
|
|
|
|
let scan_unsigned_int max ib =
|
2002-10-07 05:20:07 -07:00
|
|
|
match Scanning.checked_peek_char ib with
|
2002-05-07 00:41:12 -07:00
|
|
|
| '0' as c ->
|
|
|
|
let max = Scanning.store_char ib c max in
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
let c = Scanning.peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
begin match c with
|
2005-07-01 01:15:02 -07:00
|
|
|
| 'x' | 'X' -> scan_hexadecimal_int (Scanning.store_char ib c max) ib
|
|
|
|
| 'o' -> scan_octal_int (Scanning.store_char ib c max) ib
|
|
|
|
| 'b' -> scan_binary_int (Scanning.store_char ib c max) ib
|
2002-05-07 00:41:12 -07:00
|
|
|
| c -> scan_decimal_digits max ib end
|
2004-04-01 07:07:02 -08:00
|
|
|
| c -> scan_unsigned_decimal_int max ib;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
|
|
|
let scan_optionally_signed_int max ib =
|
|
|
|
let max = scan_sign max ib in
|
|
|
|
scan_unsigned_int max ib;;
|
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
let scan_int_conv conv max ib =
|
2002-05-27 00:08:05 -07:00
|
|
|
match conv with
|
2004-04-01 07:07:02 -08:00
|
|
|
| 'b' -> scan_binary_int max ib
|
2002-05-07 09:28:19 -07:00
|
|
|
| 'd' -> scan_optionally_signed_decimal_int max ib
|
|
|
|
| 'i' -> scan_optionally_signed_int max ib
|
2004-04-01 07:07:02 -08:00
|
|
|
| 'o' -> scan_octal_int max ib
|
2002-05-07 09:28:19 -07:00
|
|
|
| 'u' -> scan_unsigned_decimal_int max ib
|
2004-04-01 07:07:02 -08:00
|
|
|
| 'x' | 'X' -> scan_hexadecimal_int max ib
|
2002-05-07 09:28:19 -07:00
|
|
|
| c -> assert false;;
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
(* Scanning floating point numbers. *)
|
2002-10-07 05:20:07 -07:00
|
|
|
(* Fractional part is optional and can be reduced to 0 digits. *)
|
|
|
|
let scan_frac_part max ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
|
|
|
match c with
|
|
|
|
| '0' .. '9' as c ->
|
|
|
|
scan_decimal_digits (Scanning.store_char ib c max) ib
|
|
|
|
| _ -> max;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-10-07 05:20:07 -07:00
|
|
|
(* Exp part is optional and can be reduced to 0 digits. *)
|
2002-05-07 00:41:12 -07:00
|
|
|
let scan_exp_part max ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
let c = Scanning.peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
match c with
|
|
|
|
| 'e' | 'E' as c ->
|
2003-07-15 00:25:09 -07:00
|
|
|
scan_optionally_signed_decimal_int (Scanning.store_char ib c max) ib
|
2002-05-07 00:41:12 -07:00
|
|
|
| _ -> max;;
|
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
(* Scan the integer part of a floating point number, (not using the
|
|
|
|
Caml lexical convention since the integer part can be empty):
|
|
|
|
an optional sign, followed by a possibly empty sequence of decimal
|
|
|
|
digits (e.g. -.1). *)
|
|
|
|
let scan_int_part max ib =
|
2004-04-01 07:07:02 -08:00
|
|
|
let max = scan_sign max ib in
|
|
|
|
scan_decimal_digits max ib;;
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
let scan_float max ib =
|
2004-04-01 07:07:02 -08:00
|
|
|
let max = scan_int_part max ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
let c = Scanning.peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then max else
|
2002-05-07 00:41:12 -07:00
|
|
|
match c with
|
|
|
|
| '.' ->
|
2002-05-07 09:28:19 -07:00
|
|
|
let max = Scanning.store_char ib c max in
|
|
|
|
let max = scan_frac_part max ib in
|
|
|
|
scan_exp_part max ib
|
2002-05-07 00:41:12 -07:00
|
|
|
| c -> scan_exp_part max ib;;
|
|
|
|
|
2003-07-14 03:04:25 -07:00
|
|
|
let scan_Float max ib =
|
|
|
|
let max = scan_optionally_signed_decimal_int max ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then bad_float () else
|
2003-07-14 03:04:25 -07:00
|
|
|
let c = Scanning.peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then bad_float () else
|
2003-07-14 03:04:25 -07:00
|
|
|
match c with
|
|
|
|
| '.' ->
|
|
|
|
let max = Scanning.store_char ib c max in
|
|
|
|
let max = scan_frac_part max ib in
|
|
|
|
scan_exp_part max ib
|
2003-07-15 00:25:09 -07:00
|
|
|
| 'e' | 'E' ->
|
|
|
|
scan_exp_part max ib
|
2003-07-14 03:04:25 -07:00
|
|
|
| c -> bad_float ();;
|
|
|
|
|
2004-04-01 07:07:02 -08:00
|
|
|
(* Scan a regular string: stops when encountering a space or one of the
|
2002-10-30 15:46:21 -08:00
|
|
|
characters in stp. It also stops when the maximum number of
|
|
|
|
characters has been read.*)
|
2002-05-09 11:26:44 -07:00
|
|
|
let scan_string stp max ib =
|
|
|
|
let rec loop max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if stp == [] then
|
2002-05-09 11:26:44 -07:00
|
|
|
match c with
|
2005-07-01 02:02:02 -07:00
|
|
|
| ' ' | '\t' | '\n' | '\r' -> max
|
2002-05-09 11:26:44 -07:00
|
|
|
| c -> loop (Scanning.store_char ib c max) else
|
2005-07-01 01:15:02 -07:00
|
|
|
if List.memq c stp then Scanning.skip_char ib max else
|
2002-10-07 23:46:15 -07:00
|
|
|
loop (Scanning.store_char ib c max) in
|
2005-07-01 01:15:02 -07:00
|
|
|
loop max;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-20 12:19:15 -07:00
|
|
|
(* Scan a char: peek strictly one character in the input, whatsoever. *)
|
2002-05-07 00:41:12 -07:00
|
|
|
let scan_char max ib =
|
2002-10-07 05:20:07 -07:00
|
|
|
Scanning.store_char ib (Scanning.checked_peek_char ib) max;;
|
2002-05-20 12:19:15 -07:00
|
|
|
|
2004-01-16 07:24:03 -08:00
|
|
|
let char_for_backslash = function
|
|
|
|
| 'n' -> '\010'
|
|
|
|
| 'r' -> '\013'
|
|
|
|
| 'b' -> '\008'
|
|
|
|
| 't' -> '\009'
|
|
|
|
| c -> c
|
|
|
|
|
|
|
|
(* The integer value corresponding to the facial value of a valid
|
2003-11-30 14:13:03 -08:00
|
|
|
decimal digit character. *)
|
|
|
|
let int_value_of_char c = int_of_char c - 48;;
|
|
|
|
|
2002-06-27 02:20:25 -07:00
|
|
|
let char_for_decimal_code c0 c1 c2 =
|
2002-05-20 12:19:15 -07:00
|
|
|
let c =
|
2003-11-30 14:13:03 -08:00
|
|
|
100 * int_value_of_char c0 +
|
|
|
|
10 * int_value_of_char c1 +
|
|
|
|
int_value_of_char c2 in
|
2002-05-20 12:19:15 -07:00
|
|
|
if c < 0 || c > 255
|
2002-06-27 02:20:25 -07:00
|
|
|
then bad_input (Printf.sprintf "bad char \\%c%c%c" c0 c1 c2)
|
2002-05-20 12:19:15 -07:00
|
|
|
else char_of_int c;;
|
|
|
|
|
|
|
|
(* Called when encountering '\\' as starter of a char.
|
|
|
|
Stops before the corresponding '\''. *)
|
|
|
|
let scan_backslash_char max ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then bad_input "a char" else
|
2002-05-07 00:41:12 -07:00
|
|
|
let c = Scanning.peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then bad_input "a char" else
|
2002-05-20 12:19:15 -07:00
|
|
|
match c with
|
|
|
|
| '\\' | '\'' | '"' | 'n' | 't' | 'b' | 'r' (* '"' helping Emacs *) ->
|
|
|
|
Scanning.store_char ib (char_for_backslash c) max
|
|
|
|
| '0' .. '9' as c ->
|
|
|
|
let get_digit () =
|
2005-07-01 01:15:02 -07:00
|
|
|
let c = Scanning.next_char ib in
|
2002-05-20 12:19:15 -07:00
|
|
|
match c with
|
|
|
|
| '0' .. '9' as c -> c
|
2002-06-27 02:20:25 -07:00
|
|
|
| c -> bad_input_escape c in
|
2002-05-20 12:19:15 -07:00
|
|
|
let c0 = c in
|
|
|
|
let c1 = get_digit () in
|
|
|
|
let c2 = get_digit () in
|
2002-06-27 02:20:25 -07:00
|
|
|
Scanning.store_char ib (char_for_decimal_code c0 c1 c2) (max - 2)
|
|
|
|
| c -> bad_input_char c;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-12 10:57:36 -07:00
|
|
|
let scan_Char max ib =
|
2002-05-20 12:19:15 -07:00
|
|
|
let rec loop s max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then bad_input "a char" else
|
2002-10-07 05:20:07 -07:00
|
|
|
let c = Scanning.checked_peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then bad_input "a char" else
|
2002-05-20 12:19:15 -07:00
|
|
|
match c, s with
|
2005-07-01 01:15:02 -07:00
|
|
|
| '\'', 3 -> loop 2 (Scanning.ignore_char ib max)
|
|
|
|
| '\'', 1 -> Scanning.ignore_char ib max
|
|
|
|
| '\\', 2 -> loop 1 (scan_backslash_char (Scanning.ignore_char ib max) ib)
|
2002-05-20 12:19:15 -07:00
|
|
|
| c, 2 -> loop 1 (Scanning.store_char ib c max)
|
2002-06-27 02:20:25 -07:00
|
|
|
| c, _ -> bad_input_escape c in
|
2002-05-20 12:19:15 -07:00
|
|
|
loop 3 max;;
|
|
|
|
|
2002-07-28 14:29:42 -07:00
|
|
|
let scan_String max ib =
|
2002-05-20 12:19:15 -07:00
|
|
|
let rec loop s max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then bad_input "a string" else
|
2002-10-07 05:20:07 -07:00
|
|
|
let c = Scanning.checked_peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then bad_input "a string" else
|
2002-07-28 14:29:42 -07:00
|
|
|
match c, s with
|
|
|
|
| '"', true (* '"' helping Emacs *) ->
|
2005-07-01 01:15:02 -07:00
|
|
|
loop false (Scanning.ignore_char ib max)
|
2002-07-28 14:29:42 -07:00
|
|
|
| '"', false (* '"' helping Emacs *) ->
|
2005-07-01 01:15:02 -07:00
|
|
|
Scanning.ignore_char ib max
|
2002-07-28 14:29:42 -07:00
|
|
|
| '\\', false ->
|
2005-07-01 01:15:02 -07:00
|
|
|
skip_spaces true (Scanning.ignore_char ib max)
|
2003-03-02 15:03:15 -08:00
|
|
|
| c, false -> loop false (Scanning.store_char ib c max)
|
|
|
|
| c, _ -> bad_input_char c
|
|
|
|
and skip_spaces s max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then bad_input "a string" else
|
2003-03-02 15:03:15 -08:00
|
|
|
let c = Scanning.checked_peek_char ib in
|
2005-07-01 01:15:02 -07:00
|
|
|
if Scanning.eof ib then bad_input "a string" else
|
2003-03-02 15:03:15 -08:00
|
|
|
match c, s with
|
2003-05-13 23:30:04 -07:00
|
|
|
| '\n', true
|
|
|
|
| ' ', false ->
|
2005-07-01 01:15:02 -07:00
|
|
|
skip_spaces false (Scanning.ignore_char ib max)
|
2003-05-13 23:30:04 -07:00
|
|
|
| '\\', false -> loop false max
|
2002-07-28 14:29:42 -07:00
|
|
|
| c, false -> loop false (Scanning.store_char ib c max)
|
2003-05-13 23:30:04 -07:00
|
|
|
| _, _ -> loop false (scan_backslash_char (max - 1) ib) in
|
2002-05-12 10:57:36 -07:00
|
|
|
loop true max;;
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
let scan_bool max ib =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max < 4 then bad_input "a boolean" else
|
|
|
|
let c = Scanning.checked_peek_char ib in
|
|
|
|
if Scanning.eof ib then bad_input "a boolean" else
|
2002-05-07 00:41:12 -07:00
|
|
|
let m =
|
2005-07-01 01:15:02 -07:00
|
|
|
match c with
|
2002-05-07 00:41:12 -07:00
|
|
|
| 't' -> 4
|
|
|
|
| 'f' -> 5
|
2005-07-01 01:15:02 -07:00
|
|
|
| _ -> bad_input "a boolean" in
|
2002-05-09 11:26:44 -07:00
|
|
|
scan_string [] (min max m) ib;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Reading char sets in %[...] conversions. *)
|
2002-05-20 12:19:15 -07:00
|
|
|
type char_set =
|
2004-02-04 02:16:25 -08:00
|
|
|
| Pos_set of string (* Positive (regular) set. *)
|
|
|
|
| Neg_set of string (* Negative (complementary) set. *);;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Char sets are read as sub-strings in the format string. *)
|
2002-05-07 00:41:12 -07:00
|
|
|
let read_char_set fmt i =
|
2006-04-05 04:49:07 -07:00
|
|
|
let lim = Sformat.length fmt - 1 in
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
let rec find_in_set j =
|
2004-12-06 22:17:12 -08:00
|
|
|
if j > lim then incomplete_format fmt else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt j with
|
2004-02-04 02:16:25 -08:00
|
|
|
| ']' -> j
|
2006-05-04 05:52:22 -07:00
|
|
|
| c -> find_in_set (succ j)
|
2002-05-07 00:41:12 -07:00
|
|
|
|
|
|
|
and find_set i =
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2006-05-04 05:52:22 -07:00
|
|
|
| ']' -> find_in_set (succ i)
|
2004-02-04 02:16:25 -08:00
|
|
|
| c -> find_in_set i in
|
|
|
|
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2004-02-04 02:16:25 -08:00
|
|
|
| '^' ->
|
2006-05-04 05:52:22 -07:00
|
|
|
let i = succ i in
|
2004-02-04 02:16:25 -08:00
|
|
|
let j = find_set i in
|
2006-04-05 04:49:07 -07:00
|
|
|
j, Neg_set (Sformat.sub fmt i (j - i))
|
2004-02-04 02:16:25 -08:00
|
|
|
| _ ->
|
|
|
|
let j = find_set i in
|
2006-04-05 04:49:07 -07:00
|
|
|
j, Pos_set (Sformat.sub fmt i (j - i));;
|
2004-02-04 02:16:25 -08:00
|
|
|
|
|
|
|
(* Char sets are now represented as bitvects that are represented as
|
|
|
|
byte strings. *)
|
|
|
|
|
|
|
|
(* Bit manipulations into bytes. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let set_bit_of_byte byte idx b =
|
|
|
|
(b lsl idx) lor (byte land (* mask idx *) (lnot (1 lsl idx)));;
|
|
|
|
|
|
|
|
let get_bit_of_byte byte idx = (byte lsr idx) land 1;;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Bit manipulations in vectors of bytes represented as strings. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let set_bit_of_range r c b =
|
|
|
|
let idx = c land 0x7 in
|
|
|
|
let ydx = c lsr 3 in
|
|
|
|
let byte = r.[ydx] in
|
|
|
|
r.[ydx] <- char_of_int (set_bit_of_byte (int_of_char byte) idx b);;
|
|
|
|
|
|
|
|
let get_bit_of_range r c =
|
|
|
|
let idx = c land 0x7 in
|
|
|
|
let ydx = c lsr 3 in
|
|
|
|
let byte = r.[ydx] in
|
|
|
|
get_bit_of_byte (int_of_char byte) idx;;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Char sets represented as bitvects represented as fixed length byte
|
|
|
|
strings. *)
|
|
|
|
(* Create a full or empty set of chars. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let make_range bit =
|
|
|
|
let c = char_of_int (if bit = 0 then 0 else 0xFF) in
|
|
|
|
String.make 32 c;;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Test is a char belongs to a set of chars. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let get_char_in_range r c = get_bit_of_range r (int_of_char c);;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
let bit_not b = (lnot b) land 1;;
|
|
|
|
|
2004-09-22 02:17:21 -07:00
|
|
|
(* Build the bit vector corresponding to the set of characters
|
|
|
|
that belongs to the string argument [set].
|
|
|
|
(In the Scanf module [set] is always a sub-string of the format). *)
|
|
|
|
let make_char_bit_vect bit set =
|
2003-10-27 00:21:04 -08:00
|
|
|
let r = make_range (bit_not bit) in
|
|
|
|
let lim = String.length set - 1 in
|
|
|
|
let rec loop bit rp i =
|
|
|
|
if i <= lim then
|
|
|
|
match set.[i] with
|
|
|
|
| '-' when rp ->
|
2004-09-22 02:17:21 -07:00
|
|
|
(* if i = 0 then rp is false (since the initial call is
|
|
|
|
loop bit false 0). Hence i >= 1 and the following is safe. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let c1 = set.[i - 1] in
|
2006-05-04 05:52:22 -07:00
|
|
|
let i = succ i in
|
2003-10-27 00:21:04 -08:00
|
|
|
if i > lim then loop bit false (i - 1) else
|
|
|
|
let c2 = set.[i] in
|
|
|
|
for j = int_of_char c1 to int_of_char c2 do
|
|
|
|
set_bit_of_range r j bit done;
|
2006-05-04 05:52:22 -07:00
|
|
|
loop bit false (succ i)
|
2003-11-30 14:13:03 -08:00
|
|
|
| c ->
|
2003-10-27 00:21:04 -08:00
|
|
|
set_bit_of_range r (int_of_char set.[i]) bit;
|
2006-05-04 05:52:22 -07:00
|
|
|
loop bit true (succ i) in
|
2003-10-27 00:21:04 -08:00
|
|
|
loop bit false 0;
|
|
|
|
r;;
|
|
|
|
|
2004-02-04 02:16:25 -08:00
|
|
|
(* Compute the predicate on chars corresponding to a char set. *)
|
2003-10-27 00:21:04 -08:00
|
|
|
let make_pred bit set stp =
|
2004-09-22 02:17:21 -07:00
|
|
|
let r = make_char_bit_vect bit set in
|
2003-10-27 00:21:04 -08:00
|
|
|
List.iter
|
|
|
|
(fun c -> set_bit_of_range r (int_of_char c) (bit_not bit)) stp;
|
|
|
|
(fun c -> get_char_in_range r c);;
|
|
|
|
|
2002-05-09 11:26:44 -07:00
|
|
|
let make_setp stp char_set =
|
2002-05-07 00:41:12 -07:00
|
|
|
match char_set with
|
|
|
|
| Pos_set set ->
|
2003-10-27 00:21:04 -08:00
|
|
|
begin match String.length set with
|
|
|
|
| 0 -> (fun c -> 0)
|
|
|
|
| 1 ->
|
|
|
|
let p = set.[0] in
|
|
|
|
(fun c -> if c == p then 1 else 0)
|
|
|
|
| 2 ->
|
|
|
|
let p1 = set.[0] and p2 = set.[1] in
|
|
|
|
(fun c -> if c == p1 || c == p2 then 1 else 0)
|
2004-02-04 02:16:25 -08:00
|
|
|
| 3 ->
|
|
|
|
let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
|
2004-06-04 06:27:24 -07:00
|
|
|
if p2 = '-' then make_pred 1 set stp else
|
2004-02-04 02:16:25 -08:00
|
|
|
(fun c -> if c == p1 || c == p2 || c == p3 then 1 else 0)
|
2003-10-27 00:21:04 -08:00
|
|
|
| n -> make_pred 1 set stp
|
|
|
|
end
|
2002-05-07 00:41:12 -07:00
|
|
|
| Neg_set set ->
|
2003-10-27 00:21:04 -08:00
|
|
|
begin match String.length set with
|
|
|
|
| 0 -> (fun c -> 1)
|
|
|
|
| 1 ->
|
|
|
|
let p = set.[0] in
|
|
|
|
(fun c -> if c != p then 1 else 0)
|
|
|
|
| 2 ->
|
|
|
|
let p1 = set.[0] and p2 = set.[1] in
|
|
|
|
(fun c -> if c != p1 && c != p2 then 1 else 0)
|
2004-02-04 02:16:25 -08:00
|
|
|
| 3 ->
|
|
|
|
let p1 = set.[0] and p2 = set.[1] and p3 = set.[2] in
|
2004-06-04 06:27:24 -07:00
|
|
|
if p2 = '-' then make_pred 0 set stp else
|
2004-02-04 02:16:25 -08:00
|
|
|
(fun c -> if c != p1 && c != p2 && c != p3 then 1 else 0)
|
2003-10-27 00:21:04 -08:00
|
|
|
| n -> make_pred 0 set stp
|
|
|
|
end;;
|
|
|
|
|
|
|
|
let setp_table = Hashtbl.create 7;;
|
|
|
|
|
|
|
|
let add_setp stp char_set setp =
|
|
|
|
let char_set_tbl =
|
|
|
|
try Hashtbl.find setp_table char_set with
|
|
|
|
| Not_found ->
|
|
|
|
let char_set_tbl = Hashtbl.create 3 in
|
|
|
|
Hashtbl.add setp_table char_set char_set_tbl;
|
|
|
|
char_set_tbl in
|
|
|
|
Hashtbl.add char_set_tbl stp setp;;
|
|
|
|
|
|
|
|
let find_setp stp char_set =
|
|
|
|
try Hashtbl.find (Hashtbl.find setp_table char_set) stp with
|
|
|
|
| Not_found ->
|
|
|
|
let setp = make_setp stp char_set in
|
|
|
|
add_setp stp char_set setp;
|
|
|
|
setp;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-09 11:26:44 -07:00
|
|
|
let scan_chars_in_char_set stp char_set max ib =
|
2003-10-27 00:21:04 -08:00
|
|
|
let rec loop_pos1 cp1 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c == cp1
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_pos1 cp1 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop_pos2 cp1 cp2 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c == cp1 || c == cp2
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_pos2 cp1 cp2 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop_pos3 cp1 cp2 cp3 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c == cp1 || c == cp2 || c == cp3
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_pos3 cp1 cp2 cp3 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop_neg1 cp1 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c != cp1
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_neg1 cp1 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop_neg2 cp1 cp2 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c != cp1 && c != cp2
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_neg2 cp1 cp2 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop_neg3 cp1 cp2 cp3 max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
2003-11-30 14:13:03 -08:00
|
|
|
if c != cp1 && c != cp2 && c != cp3
|
2003-10-27 00:21:04 -08:00
|
|
|
then loop_neg3 cp1 cp2 cp3 (Scanning.store_char ib c max)
|
|
|
|
else max
|
|
|
|
and loop setp max =
|
2005-07-01 01:15:02 -07:00
|
|
|
if max = 0 then max else
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if Scanning.eof ib then max else
|
|
|
|
if setp c == 1
|
|
|
|
then loop setp (Scanning.store_char ib c max)
|
|
|
|
else max in
|
2003-10-27 00:21:04 -08:00
|
|
|
|
|
|
|
let max =
|
|
|
|
match char_set with
|
|
|
|
| Pos_set set ->
|
|
|
|
begin match String.length set with
|
|
|
|
| 0 -> loop (fun c -> 0) max
|
|
|
|
| 1 -> loop_pos1 set.[0] max
|
|
|
|
| 2 -> loop_pos2 set.[0] set.[1] max
|
2004-06-04 06:27:24 -07:00
|
|
|
| 3 when set.[1] != '-' -> loop_pos3 set.[0] set.[1] set.[2] max
|
2003-10-27 00:21:04 -08:00
|
|
|
| n -> loop (find_setp stp char_set) max end
|
|
|
|
| Neg_set set ->
|
|
|
|
begin match String.length set with
|
|
|
|
| 0 -> loop (fun c -> 1) max
|
|
|
|
| 1 -> loop_neg1 set.[0] max
|
|
|
|
| 2 -> loop_neg2 set.[0] set.[1] max
|
2004-06-04 06:27:24 -07:00
|
|
|
| 3 when set.[1] != '-' -> loop_neg3 set.[0] set.[1] set.[2] max
|
2003-10-27 00:21:04 -08:00
|
|
|
| n -> loop (find_setp stp char_set) max end in
|
2005-07-11 07:49:57 -07:00
|
|
|
ignore_stoppers stp ib;
|
2003-05-13 23:30:04 -07:00
|
|
|
max;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2003-11-30 14:13:03 -08:00
|
|
|
let get_count t ib =
|
|
|
|
match t with
|
|
|
|
| 'l' -> Scanning.line_count ib
|
|
|
|
| 'n' -> Scanning.char_count ib
|
|
|
|
| _ -> Scanning.token_count ib;;
|
|
|
|
|
2005-07-01 01:15:02 -07:00
|
|
|
let rec skip_whites ib =
|
|
|
|
let c = Scanning.peek_char ib in
|
|
|
|
if not (Scanning.eof ib) then begin
|
|
|
|
match c with
|
|
|
|
| ' ' | '\t' | '\n' | '\r' ->
|
|
|
|
Scanning.invalidate_current_char ib; skip_whites ib
|
|
|
|
| _ -> ()
|
|
|
|
end;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2006-11-06 02:19:12 -08:00
|
|
|
let list_iter_i f l =
|
|
|
|
let rec loop i = function
|
|
|
|
| [] -> ()
|
|
|
|
| [x] -> f i x (* Tail calling [f] *)
|
|
|
|
| x :: xs -> f i x; loop (succ i) xs in
|
|
|
|
loop 0 l;;
|
|
|
|
|
2003-11-30 14:13:03 -08:00
|
|
|
(* The [kscanf] main scanning function.
|
|
|
|
It takes as arguments:
|
|
|
|
- an input buffer [ib] from which to read characters,
|
|
|
|
- an error handling function [ef],
|
|
|
|
- a format [fmt] that specifies what to read in the input,
|
|
|
|
- and a function [f] to pass the tokens read to.
|
|
|
|
|
|
|
|
Then [kscanf] scans the format and the buffer in parallel to find
|
|
|
|
out tokens as specified by the format; when it founds one token, it
|
|
|
|
converts it as specified, remembers the converted value as a future
|
2002-06-27 02:20:25 -07:00
|
|
|
argument to the function [f], and continues scanning.
|
2002-07-28 14:29:42 -07:00
|
|
|
|
|
|
|
If the entire scanning succeeds (i.e. the format string has been
|
|
|
|
exhausted and the buffer has provided tokens according to the
|
|
|
|
format string), the tokens are applied to [f].
|
|
|
|
|
2003-11-30 14:13:03 -08:00
|
|
|
If the scanning or some conversion fails, the main scanning function
|
2002-06-26 02:32:27 -07:00
|
|
|
aborts and applies the scanning buffer and a string that explains
|
2003-11-30 14:13:03 -08:00
|
|
|
the error to the error handling function [ef] (the error continuation). *)
|
2006-11-06 02:19:12 -08:00
|
|
|
let ascanf sc fmt =
|
|
|
|
let ac = Printf.ac_of_format fmt in
|
|
|
|
match ac.Printf.ac_rdrs with
|
|
|
|
| 0 -> Obj.magic (fun f -> sc fmt [||] f)
|
|
|
|
| 1 -> Obj.magic (fun x f -> sc fmt [| Obj.repr x |] f)
|
|
|
|
| 2 -> Obj.magic (fun x y f -> sc fmt [| Obj.repr x; Obj.repr y; |] f)
|
|
|
|
| 3 -> Obj.magic (fun x y z f ->
|
|
|
|
sc fmt [| Obj.repr x; Obj.repr y; Obj.repr z; |] f)
|
|
|
|
| nargs ->
|
|
|
|
let rec loop i args =
|
|
|
|
if i >= nargs then
|
|
|
|
let a = Array.make nargs (Obj.repr 0) in
|
|
|
|
list_iter_i (fun i arg -> a.(nargs - i - 1) <- arg) args;
|
|
|
|
Obj.magic (fun f -> sc fmt a f)
|
|
|
|
else Obj.magic (fun x -> loop (succ i) (x :: args)) in
|
|
|
|
loop 0 [];;
|
|
|
|
|
|
|
|
let scan_format ib ef fmt v f =
|
2006-04-05 04:49:07 -07:00
|
|
|
|
|
|
|
let lim = Sformat.length fmt - 1 in
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2006-11-06 02:19:12 -08:00
|
|
|
let limr = Array.length v - 1 in
|
|
|
|
|
2002-05-09 12:37:07 -07:00
|
|
|
let return v = Obj.magic v () in
|
2002-05-12 11:41:07 -07:00
|
|
|
let delay f x () = f x in
|
2002-05-09 12:37:07 -07:00
|
|
|
let stack f = delay (return f) in
|
2002-12-08 23:13:29 -08:00
|
|
|
let no_stack f x = f in
|
2002-05-09 12:37:07 -07:00
|
|
|
|
2006-11-06 02:19:12 -08:00
|
|
|
let rec scan_fmt ir f i =
|
2002-06-26 02:32:27 -07:00
|
|
|
if i > lim then f else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2006-11-06 02:19:12 -08:00
|
|
|
| ' ' -> skip_whites ib; scan_fmt ir f (succ i)
|
2003-11-30 14:13:03 -08:00
|
|
|
| '%' ->
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_conversion false max_int ir f (succ i)
|
2004-12-06 22:17:12 -08:00
|
|
|
| '@' ->
|
2006-05-04 05:52:22 -07:00
|
|
|
let i = succ i in
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else begin
|
2006-04-05 04:49:07 -07:00
|
|
|
check_char ib (Sformat.get fmt i);
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir f (succ i) end
|
|
|
|
| c -> check_char ib c; scan_fmt ir f (succ i)
|
2002-10-07 05:20:07 -07:00
|
|
|
|
2006-11-06 02:19:12 -08:00
|
|
|
and scan_conversion skip max ir f i =
|
2002-12-08 23:13:29 -08:00
|
|
|
let stack = if skip then no_stack else stack in
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2004-09-22 02:17:21 -07:00
|
|
|
| '%' as conv ->
|
2006-11-06 02:19:12 -08:00
|
|
|
check_char ib conv; scan_fmt ir f (succ i)
|
2006-05-04 05:52:22 -07:00
|
|
|
| 's' ->
|
|
|
|
let i, stp = scan_fmt_stoppers (succ i) in
|
|
|
|
let _x = scan_string stp max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_string ib)) (succ i)
|
2006-05-04 05:52:22 -07:00
|
|
|
| 'S' ->
|
|
|
|
let _x = scan_String max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_string ib)) (succ i)
|
2006-10-27 02:12:07 -07:00
|
|
|
| '[' (* ']' *) ->
|
2006-05-04 05:52:22 -07:00
|
|
|
let i, char_set = read_char_set fmt (succ i) in
|
|
|
|
let i, stp = scan_fmt_stoppers (succ i) in
|
|
|
|
let _x = scan_chars_in_char_set stp char_set max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_string ib)) (succ i)
|
2003-05-13 23:30:04 -07:00
|
|
|
| 'c' when max = 0 ->
|
|
|
|
let c = Scanning.checked_peek_char ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f c) (succ i)
|
2002-05-12 10:57:36 -07:00
|
|
|
| 'c' | 'C' as conv ->
|
2004-12-06 22:17:12 -08:00
|
|
|
if max <> 1 && max <> max_int then bad_conversion fmt i conv else
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x =
|
2003-07-07 04:13:21 -07:00
|
|
|
if conv = 'c' then scan_char max ib else scan_Char max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_char ib)) (succ i)
|
2003-07-01 09:30:12 -07:00
|
|
|
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_int_conv conv max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_int conv ib)) (succ i)
|
2006-05-04 05:52:22 -07:00
|
|
|
| 'N' as conv ->
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (get_count conv ib)) (succ i)
|
2006-05-04 05:52:22 -07:00
|
|
|
| 'f' | 'e' | 'E' | 'g' | 'G' ->
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_float max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_float ib)) (succ i)
|
2003-07-14 03:04:25 -07:00
|
|
|
| 'F' ->
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_Float max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_float ib)) (succ i)
|
2003-07-01 09:30:12 -07:00
|
|
|
| 'B' | 'b' ->
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_bool max ib in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_fmt ir (stack f (token_bool ib)) (succ i)
|
|
|
|
| 'r' ->
|
|
|
|
if ir > limr then assert false else
|
|
|
|
let token = Obj.magic v.(ir) ib in
|
|
|
|
scan_fmt (succ ir) (stack f token) (succ i)
|
2006-01-12 02:18:18 -08:00
|
|
|
| 'l' | 'n' | 'L' as conv ->
|
2006-05-04 05:52:22 -07:00
|
|
|
let i = succ i in
|
2006-11-06 02:19:12 -08:00
|
|
|
if i > lim then scan_fmt ir (stack f (get_count conv ib)) i else begin
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2006-01-12 02:18:18 -08:00
|
|
|
(* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
|
2003-07-01 09:30:12 -07:00
|
|
|
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_int_conv conv max ib in
|
2006-01-12 02:18:18 -08:00
|
|
|
(* Look back to the character that triggered the integer conversion
|
|
|
|
(this character is either 'l', 'n' or 'L'), to find the
|
|
|
|
conversion to apply to the integer token read. *)
|
2006-04-05 04:49:07 -07:00
|
|
|
begin match Sformat.get fmt (i - 1) with
|
2006-11-06 02:19:12 -08:00
|
|
|
| 'l' -> scan_fmt ir (stack f (token_int32 conv ib)) (succ i)
|
|
|
|
| 'n' -> scan_fmt ir (stack f (token_nativeint conv ib)) (succ i)
|
|
|
|
| _ -> scan_fmt ir (stack f (token_int64 conv ib)) (succ i) end
|
2006-01-12 02:18:18 -08:00
|
|
|
(* This is not an integer conversion, but a regular %l, %n or %L. *)
|
2006-11-06 02:19:12 -08:00
|
|
|
| _ -> scan_fmt ir (stack f (get_count conv ib)) i end
|
2004-09-22 02:17:21 -07:00
|
|
|
| '!' ->
|
2006-11-06 02:19:12 -08:00
|
|
|
if Scanning.end_of_input ib then scan_fmt ir f (succ i)
|
2003-07-07 04:13:21 -07:00
|
|
|
else bad_input "end of input not found"
|
2003-11-30 14:13:03 -08:00
|
|
|
| '_' ->
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_conversion true max ir f (succ i)
|
2004-09-22 02:17:21 -07:00
|
|
|
| '0' .. '9' as conv ->
|
2003-11-30 14:13:03 -08:00
|
|
|
let rec read_width accu i =
|
|
|
|
if i > lim then accu, i else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2003-11-30 14:13:03 -08:00
|
|
|
| '0' .. '9' as c ->
|
|
|
|
let accu = 10 * accu + int_value_of_char c in
|
2006-05-04 05:52:22 -07:00
|
|
|
read_width accu (succ i)
|
2003-11-30 14:13:03 -08:00
|
|
|
| _ -> accu, i in
|
2006-05-04 05:52:22 -07:00
|
|
|
let max, i = read_width (int_value_of_char conv) (succ i) in
|
2004-12-06 22:17:12 -08:00
|
|
|
if i > lim then incomplete_format fmt else begin
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2004-09-22 02:17:21 -07:00
|
|
|
| '.' ->
|
2006-05-04 05:52:22 -07:00
|
|
|
let p, i = read_width 0 (succ i) in
|
2006-11-06 02:19:12 -08:00
|
|
|
scan_conversion skip (succ (max + p)) ir f i
|
|
|
|
| _ -> scan_conversion skip max ir f i end
|
2006-10-27 02:12:07 -07:00
|
|
|
| '(' | '{' as conv (* ')' '}' *) ->
|
2004-09-22 02:17:21 -07:00
|
|
|
let i = succ i in
|
2004-12-06 22:17:12 -08:00
|
|
|
let j =
|
2006-05-04 05:52:22 -07:00
|
|
|
Printf.sub_format
|
2006-10-04 03:02:01 -07:00
|
|
|
incomplete_format bad_conversion conv fmt i in
|
|
|
|
let mf = Sformat.sub fmt i (j - 2 - i) in
|
2004-11-30 10:57:04 -08:00
|
|
|
let _x = scan_String max ib in
|
2004-10-04 13:19:44 -07:00
|
|
|
let rf = token_string ib in
|
2006-10-04 03:02:01 -07:00
|
|
|
if not (compatible_format_type rf mf) then format_mismatch rf mf ib else
|
2006-11-06 02:19:12 -08:00
|
|
|
if conv = '{' (* '}' *) then scan_fmt ir (stack f rf) j else
|
|
|
|
let nf = scan_fmt ir (Obj.magic rf) 0 in
|
|
|
|
scan_fmt ir (stack f nf) j
|
2004-12-06 22:17:12 -08:00
|
|
|
| c -> bad_conversion fmt i c
|
2002-05-09 11:26:44 -07:00
|
|
|
|
2002-10-07 05:20:07 -07:00
|
|
|
and scan_fmt_stoppers i =
|
2002-05-09 11:26:44 -07:00
|
|
|
if i > lim then i - 1, [] else
|
2006-04-05 04:49:07 -07:00
|
|
|
match Sformat.get fmt i with
|
2006-05-04 05:52:22 -07:00
|
|
|
| '@' when i < lim -> let i = succ i in i, [Sformat.get fmt i]
|
2004-12-06 22:17:12 -08:00
|
|
|
| '@' when i = lim -> incomplete_format fmt
|
2002-05-09 11:26:44 -07:00
|
|
|
| _ -> i - 1, [] in
|
|
|
|
|
2002-05-07 00:41:12 -07:00
|
|
|
Scanning.reset_token ib;
|
2002-07-11 15:39:26 -07:00
|
|
|
|
2002-06-26 02:32:27 -07:00
|
|
|
let v =
|
2006-11-06 02:19:12 -08:00
|
|
|
try scan_fmt 0 (fun () -> f) 0 with
|
2002-07-11 15:39:26 -07:00
|
|
|
| (Scan_failure _ | Failure _ | End_of_file) as exc ->
|
|
|
|
stack (delay ef ib) exc in
|
2006-11-06 02:19:12 -08:00
|
|
|
return v;;
|
|
|
|
|
|
|
|
let mkscanf ib ef fmt =
|
|
|
|
let sc = scan_format ib ef in
|
|
|
|
ascanf sc fmt;;
|
|
|
|
|
|
|
|
let kscanf ib ef fmt = mkscanf ib ef fmt;;
|
2002-06-26 02:32:27 -07:00
|
|
|
|
2002-07-28 14:29:42 -07:00
|
|
|
let bscanf ib = kscanf ib scanf_bad_input;;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-12 10:57:36 -07:00
|
|
|
let fscanf ic = bscanf (Scanning.from_channel ic);;
|
2002-05-07 00:41:12 -07:00
|
|
|
|
2002-05-12 10:57:36 -07:00
|
|
|
let sscanf s = bscanf (Scanning.from_string s);;
|
2002-06-26 02:32:27 -07:00
|
|
|
|
2002-12-08 07:16:09 -08:00
|
|
|
let scanf fmt = bscanf Scanning.stdib fmt;;
|
2005-09-20 14:42:44 -07:00
|
|
|
|
2006-01-03 10:27:32 -08:00
|
|
|
let bscanf_format ib fmt f =
|
2006-04-05 04:49:07 -07:00
|
|
|
let fmt = Sformat.unsafe_to_string fmt in
|
2005-09-20 14:42:44 -07:00
|
|
|
let fmt1 = ignore (scan_String max_int ib); token_string ib in
|
2006-01-03 10:27:32 -08:00
|
|
|
if not (compatible_format_type fmt1 fmt) then
|
|
|
|
format_mismatch fmt1 fmt ib else
|
2006-10-27 02:12:07 -07:00
|
|
|
f (string_to_format fmt1);;
|
2005-09-20 14:42:44 -07:00
|
|
|
|
2006-01-12 02:18:18 -08:00
|
|
|
let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;;
|
|
|
|
|
2006-10-27 02:12:07 -07:00
|
|
|
let quote_string s =
|
|
|
|
let b = Buffer.create (String.length s + 2) in
|
|
|
|
Buffer.add_char b '\"';
|
|
|
|
Buffer.add_string b s;
|
|
|
|
Buffer.add_char b '\"';
|
|
|
|
Buffer.contents b;;
|
|
|
|
|
|
|
|
let format_from_string s fmt =
|
|
|
|
sscanf_format (quote_string s) fmt (fun x -> x);;
|