Module Sformat is internal to printf. Better typing specifications of functions working with format strings.

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@7374 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Pierre Weis 2006-04-05 11:49:07 +00:00
parent b9b6e0fa2c
commit 176b3c0da8
6 changed files with 248 additions and 135 deletions

View File

@ -910,14 +910,16 @@ and set_tags =
**************************************************************)
module Sformat = Printf.Sformat;;
(* Error messages when processing formats. *)
(* Trailer: giving up at character number ... *)
let giving_up mess fmt i =
"fprintf: " ^ mess ^ " ``" ^ fmt ^ "'', \
"fprintf: " ^ mess ^ " ``" ^ Sformat.to_string fmt ^ "'', \
giving up at character number " ^ string_of_int i ^
(if i < String.length fmt
then " (" ^ String.make 1 fmt.[i] ^ ")."
(if i < Sformat.length fmt
then " (" ^ String.make 1 (Sformat.get fmt i) ^ ")."
else String.make 1 '.');;
(* When an invalid format deserves a special error explanation. *)
@ -963,8 +965,6 @@ let implode_rev s0 = function
| [] -> s0
| l -> String.concat "" (List.rev (s0 :: l));;
external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
(* [fprintf_out] is the printf-like function generator: given the
- [to_s] flag that tells if we are printing into a string,
- the [get_out] function that has to be called at the end of formatting,
@ -975,8 +975,7 @@ external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
applications of [fprintf_out]. *)
let mkprintf to_s get_out =
let rec kprintf k fmt =
let fmt = format_to_string fmt in
let len = String.length fmt in
let len = Sformat.length fmt in
let kpr fmt v =
let ppf = get_out fmt in
@ -996,13 +995,13 @@ let mkprintf to_s get_out =
let rec doprn n i =
if i >= len then Obj.magic (k ppf) else
match fmt.[i] with
match Sformat.get fmt i with
| '%' ->
Printf.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
| '@' ->
let i = succ i in
if i >= len then invalid_format fmt i else
begin match fmt.[i] with
begin match Sformat.get fmt i with
| '[' ->
do_pp_open_box ppf n (succ i)
| ']' ->
@ -1065,7 +1064,7 @@ let mkprintf to_s get_out =
and get_int n i c =
if i >= len then invalid_integer fmt i else
match fmt.[i] with
match Sformat.get fmt i with
| ' ' -> get_int n (succ i) c
| '%' ->
let cont_s n s i = c (format_int_of_string fmt i s) n i
@ -1077,33 +1076,34 @@ let mkprintf to_s get_out =
| _ ->
let rec get j =
if j >= len then invalid_integer fmt j else
match fmt.[j] with
match Sformat.get fmt j with
| '0' .. '9' | '-' -> get (succ j)
| _ ->
let size =
if j = i then size_of_int 0 else
format_int_of_string fmt j (String.sub fmt i (j - i)) in
let s = Sformat.sub fmt i (j - i) in
format_int_of_string fmt j s in
c size n j in
get i
and skip_gt i =
if i >= len then invalid_format fmt i else
match fmt.[i] with
match Sformat.get fmt i with
| ' ' -> skip_gt (succ i)
| '>' -> succ i
| _ -> invalid_format fmt i
and get_box_kind i =
if i >= len then Pp_box, i else
match fmt.[i] with
match Sformat.get fmt i with
| 'h' ->
let i = succ i in
if i >= len then Pp_hbox, i else
begin match fmt.[i] with
begin match Sformat.get fmt i with
| 'o' ->
let i = succ i in
if i >= len then format_invalid_arg "bad box format" fmt i else
begin match fmt.[i] with
begin match Sformat.get fmt i with
| 'v' -> Pp_hovbox, succ i
| c ->
format_invalid_arg
@ -1118,11 +1118,11 @@ let mkprintf to_s get_out =
and get_tag_name n i c =
let rec get accu n i j =
if j >= len
then c (implode_rev (String.sub fmt i (j - i)) accu) n j else
match fmt.[j] with
| '>' -> c (implode_rev (String.sub fmt i (j - i)) accu) n j
then c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j else
match Sformat.get fmt j with
| '>' -> c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j
| '%' ->
let s0 = String.sub fmt i (j - i) in
let s0 = Sformat.sub fmt i (j - i) in
let cont_s n s i = get (s :: s0 :: accu) n i i
and cont_a n printer arg i =
let s =
@ -1146,7 +1146,7 @@ let mkprintf to_s get_out =
and do_pp_break ppf n i =
if i >= len then begin pp_print_space ppf (); doprn n i end else
match fmt.[i] with
match Sformat.get fmt i with
| '<' ->
let rec got_nspaces nspaces n i =
get_int n i (got_offset nspaces)
@ -1158,7 +1158,7 @@ let mkprintf to_s get_out =
and do_pp_open_box ppf n i =
if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else
match fmt.[i] with
match Sformat.get fmt i with
| '<' ->
let kind, i = get_box_kind (succ i) in
let got_size size n i =
@ -1169,7 +1169,7 @@ let mkprintf to_s get_out =
and do_pp_open_tag ppf n i =
if i >= len then begin pp_open_tag ppf ""; doprn n i end else
match fmt.[i] with
match Sformat.get fmt i with
| '<' ->
let got_name tag_name n i =
pp_open_tag ppf tag_name;

View File

@ -13,14 +13,33 @@
(* $Id$ *)
external format_int: string -> int -> string = "caml_format_int"
external format_int32: string -> int32 -> string = "caml_int32_format"
external format_float: string -> float -> string
= "caml_format_float"
external format_int: string -> int -> string
= "caml_format_int"
external format_int32: string -> int32 -> string
= "caml_int32_format"
external format_nativeint: string -> nativeint -> string
= "caml_nativeint_format"
external format_int64: string -> int64 -> string = "caml_int64_format"
external format_float: string -> float -> string = "caml_format_float"
= "caml_nativeint_format"
external format_int64: string -> int64 -> string
= "caml_int64_format"
external format_to_string: ('a, 'b, 'c, 'd) format4 -> string = "%identity"
module Sformat = struct
external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
= "%identity"
external length : ('a, 'b, 'c, 'd) format4 -> int
= "%string_length"
external get : ('a, 'b, 'c, 'd) format4 -> int -> char
= "%string_safe_get"
external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
= "%string_unsafe_get"
(* external set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
= "%string_safe_set"
external unsafe_set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
= "%string_unsafe_set" *)
let sub fmt idx len = String.sub (unsafe_to_string fmt) idx len
let to_string fmt = sub fmt 0 (length fmt)
end;;
type index;;
@ -32,28 +51,33 @@ let succ_index = add_int_index 1;;
(* Litteral position are one-based (hence pred p instead of p). *)
let index_of_litteral_position p = index_of_int (pred p);;
let bad_conversion fmt i c =
let bad_conversion sfmt i c =
invalid_arg
("printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^
string_of_int i ^ " in format string ``" ^ fmt ^ "''");;
string_of_int i ^ " in format string ``" ^ sfmt ^ "''");;
let bad_conversion_format fmt i c =
bad_conversion (Sformat.to_string fmt) i c;;
let incomplete_format fmt =
invalid_arg
("printf: premature end of format string ``" ^ fmt ^ "''");;
("printf: premature end of format string ``" ^
Sformat.to_string fmt ^ "''");;
(* Parses a format to return the specified length and the padding direction. *)
let parse_format fmt =
let parse_format sfmt =
let rec parse neg i =
if i >= String.length fmt then (0, neg) else
match String.unsafe_get fmt i with
if i >= String.length sfmt then (0, neg) else
match String.unsafe_get sfmt i with
| '1'..'9' ->
(int_of_string (String.sub fmt i (String.length fmt - i - 1)),
(int_of_string
(String.sub sfmt i (String.length sfmt - i - 1)),
neg)
| '-' ->
parse true (succ i)
| _ ->
parse neg (succ i) in
try parse false 1 with Failure _ -> bad_conversion fmt 0 's'
try parse false 1 with Failure _ -> bad_conversion sfmt 0 's'
(* Pad a (sub) string into a blank string of length [p],
on the right if [neg] is true, on the left otherwise. *)
@ -68,18 +92,19 @@ let pad_string pad_char p neg s i len =
(* Format a string given a %s format, e.g. %40s or %-20s.
To do: ignore other flags (#, +, etc)? *)
let format_string fmt s =
let (p, neg) = parse_format fmt in
pad_string ' ' p neg s 0 (String.length s)
let format_string sfmt s =
let (p, neg) = parse_format sfmt in
pad_string ' ' p neg s 0 (String.length s);;
(* Extract a %format from [fmt] between [start] and [stop] inclusive.
'*' in the format are replaced by integers taken from the [widths] list. *)
(* Extract a format string out of [fmt] between [start] and [stop] inclusive.
'*' in the format are replaced by integers taken from the [widths] list.
extract_format returns a string. *)
let extract_format fmt start stop widths =
let skip_positional_spec start =
match String.unsafe_get fmt start with
match Sformat.unsafe_get fmt start with
| '0'..'9' ->
let rec skip_int_litteral i =
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '0'..'9' -> skip_int_litteral (succ i)
| '$' -> succ i
| _ -> start in
@ -90,7 +115,7 @@ let extract_format fmt start stop widths =
Buffer.add_char b '%';
let rec fill_format i widths =
if i <= stop then
match (String.unsafe_get fmt i, widths) with
match (Sformat.unsafe_get fmt i, widths) with
| ('*', h :: t) ->
Buffer.add_string b (string_of_int h);
let i = skip_positional_spec (succ i) in
@ -102,10 +127,13 @@ let extract_format fmt start stop widths =
fill_format start (List.rev widths);
Buffer.contents b;;
let format_int_with_conv conv fmt i =
let extract_format_int conv fmt start stop widths =
let sfmt = extract_format fmt start stop widths in
match conv with
| 'n' | 'N' -> fmt.[String.length fmt - 1] <- 'u'; format_int fmt i
| _ -> format_int fmt i
| 'n' | 'N' ->
sfmt. [String.length sfmt - 1] <- 'u';
sfmt
| _ -> sfmt;;
(* Returns the position of the last character of the meta format
string, starting from position [i], inside a given format [fmt].
@ -114,34 +142,36 @@ let format_int_with_conv conv fmt i =
%) (when [conv = '(']). Hence, [sub_format] returns the index of
the character ')' or '}' that ends the meta format, according to
the character [conv]. *)
let sub_format incomplete_format bad_conversion conv fmt i =
let len = String.length fmt in
let sub_format incomplete_format bad_conversion_format conv fmt i =
let len = Sformat.length fmt in
let rec sub_fmt c i =
let close = if c = '(' then ')' else (* '{' *) '}' in
let rec sub j =
if j >= len then incomplete_format fmt else
match fmt.[j] with
match Sformat.get fmt j with
| '%' -> sub_sub (succ j)
| _ -> sub (succ j)
and sub_sub j =
if j >= len then incomplete_format fmt else
match fmt.[j] with
match Sformat.get fmt j with
| '(' | '{' as c ->
let j = sub_fmt c (succ j) in sub (succ j)
| '}' | ')' as c ->
if c = close then j else bad_conversion fmt i c
if c = close then j else bad_conversion_format fmt i c
| _ -> sub (succ j) in
sub i in
sub_fmt conv i;;
let sub_format_for_printf = sub_format incomplete_format bad_conversion;;
let sub_format_for_printf conv =
sub_format incomplete_format bad_conversion_format conv;;
let iter_on_format_args fmt add_conv add_char =
let lim = String.length fmt - 1 in
let lim = Sformat.length fmt - 1 in
let rec scan_flags skip i =
if i > lim then incomplete_format fmt else
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '*' -> scan_flags skip (add_conv skip i 'i')
| '$' -> scan_flags skip (succ i)
| '#' | '-' | ' ' | '+' -> scan_flags skip (succ i)
@ -151,7 +181,7 @@ let iter_on_format_args fmt add_conv add_char =
| _ -> scan_conv skip i
and scan_conv skip i =
if i > lim then incomplete_format fmt else
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '%' | '!' -> succ i
| 's' | 'S' | '[' -> add_conv skip i 's'
| 'c' | 'C' -> add_conv skip i 'c'
@ -162,7 +192,7 @@ let iter_on_format_args fmt add_conv add_char =
| 'l' | 'n' | 'L' as conv ->
let j = succ i in
if j > lim then add_conv skip i 'i' else begin
match fmt.[j] with
match Sformat.get fmt j with
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
add_char (add_conv skip i conv) 'i'
| c -> add_conv skip i 'i' end
@ -172,7 +202,7 @@ let iter_on_format_args fmt add_conv add_char =
let j = sub_format_for_printf conv fmt i in
(* Add the meta specification anyway. *)
let rec loop i =
if i < j - 1 then loop (add_char i fmt.[i]) in
if i < j - 1 then loop (add_char i (Sformat.get fmt i)) in
loop i;
scan_conv skip j
| '(' as conv ->
@ -181,11 +211,11 @@ let iter_on_format_args fmt add_conv add_char =
anyway. *)
scan_fmt (add_conv skip i conv)
| '}' | ')' as conv -> add_conv skip i conv
| conv -> bad_conversion fmt i conv
| conv -> bad_conversion_format fmt i conv
and scan_fmt i =
if i < lim then
if fmt.[i] = '%'
if Sformat.get fmt i = '%'
then scan_fmt (scan_flags false (succ i))
else scan_fmt (succ i)
else i in
@ -197,7 +227,7 @@ let iter_on_format_args fmt add_conv add_char =
It also checks the well-formedness of the format string.
For instance, [summarize_format_type "A number %d\n"] is "%i". *)
let summarize_format_type fmt =
let len = String.length fmt in
let len = Sformat.length fmt in
let b = Buffer.create len in
let add_char i c = Buffer.add_char b c; succ i in
let add_conv skip i c =
@ -278,10 +308,10 @@ type param_spec = Spec_none | Spec_index of index;;
problems: the type would be dependant of the {\em value} of an integer
argument to printf. *)
let scan_positional_spec fmt got_pos n i =
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '0'..'9' as d ->
let rec get_int_litteral accu j =
match String.unsafe_get fmt j with
match Sformat.unsafe_get fmt j with
| '0'..'9' as d ->
get_int_litteral (10 * accu + (int_of_char d - 48)) (succ j)
| '$' ->
@ -320,8 +350,8 @@ let get_index spec n =
"next pos" is the position in [fmt] of the first character following
the %format in [fmt]. *)
(* Note: here, rather than test explicitly against [String.length fmt]
to detect the end of the format, we use [String.unsafe_get] and
(* Note: here, rather than test explicitly against [Sformat.length fmt]
to detect the end of the format, we use [Sformat.unsafe_get] and
rely on the fact that we'll get a "nul" character if we access
one past the end of the string. These "nul" characters are then
caught by the [_ -> bad_conversion] clauses below.
@ -335,7 +365,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
scan_positional_spec fmt got_pos n i
and scan_flags spec n widths i =
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '*' ->
let got_pos wspec i =
let (width : int) = get_arg wspec n in
@ -346,7 +376,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
| _ -> scan_conv spec n widths i
and scan_conv spec n widths i =
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '%' ->
cont_s n "%" (succ i)
| 's' | 'S' as conv ->
@ -364,7 +394,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
cont_s (next_index spec n) s (succ i)
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' | 'N' as conv ->
let (x : int) = get_arg spec n in
let s = format_int_with_conv conv (extract_format fmt pos i widths) x in
let s =
format_int (extract_format_int conv fmt pos i widths) x in
cont_s (next_index spec n) s (succ i)
| 'f' | 'e' | 'E' | 'g' | 'G' | 'F' as conv ->
let (x : float) = get_arg spec n in
@ -387,7 +418,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
let printer = get_arg spec n in
cont_t (next_index spec n) printer (succ i)
| 'l' | 'n' | 'L' as conv ->
begin match String.unsafe_get fmt (succ i) with
begin match Sformat.unsafe_get fmt (succ i) with
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
let s =
match conv with
@ -403,10 +434,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
cont_s (next_index spec n) s (i + 2)
| _ ->
let (x : int) = get_arg spec n in
cont_s
(next_index spec n)
(format_int_with_conv 'n' (extract_format fmt pos i widths) x)
(succ i)
let s = format_int (extract_format_int 'n' fmt pos i widths) x in
cont_s (next_index spec n) s (succ i)
end
| '!' -> cont_f n (succ i)
| '{' | '(' as conv (* ')' '}' *) ->
@ -417,31 +446,31 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
(* Just print the format argument as a specification. *)
cont_s
(next_index spec n)
(summarize_format_type (format_to_string xf))
(summarize_format_type xf)
j else
(* Use the format argument instead of the format specification. *)
cont_m (next_index spec n) xf j
| (* '(' *) ')' ->
cont_s n "" (succ i)
| conv ->
bad_conversion fmt i conv in
bad_conversion_format fmt i conv in
scan_positional n [] (succ pos);;
let mkprintf str get_out outc outs flush k fmt =
let fmt = format_to_string fmt in
(* let fmt = Sformat.length fmt in*)
(* out is global to this invocation of pr, and must be shared by all its
recursive calls (fif) any. *)
let out = get_out fmt in
let rec pr k n fmt v =
let len = String.length fmt in
let len = Sformat.length fmt in
let rec doprn n i =
if i >= len then Obj.magic (k out) else
match String.unsafe_get fmt i with
match Sformat.unsafe_get fmt i with
| '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
| c -> outc out c; doprn n (succ i)
and cont_s n s i =
@ -461,8 +490,8 @@ let mkprintf str get_out outc outs flush k fmt =
and cont_f n i =
flush out; doprn n i
and cont_m n xf i =
let m = add_int_index (nargs_of_format_type (format_to_string xf)) n in
pr (Obj.magic (fun _ -> doprn m i)) n (format_to_string xf) v in
let m = add_int_index (nargs_of_format_type xf) n in
pr (Obj.magic (fun _ -> doprn m i)) n xf v in
doprn n 0 in
@ -481,7 +510,7 @@ let kbprintf k b =
let bprintf b = kbprintf ignore b
let get_buff fmt =
let len = 2 * String.length fmt in
let len = 2 * Sformat.length fmt in
Buffer.create len;;
let get_contents b =

View File

@ -142,15 +142,34 @@ type index;;
external index_of_int : int -> index = "%identity";;
val scan_format : string -> 'a array -> index -> int ->
(index -> string -> int -> 'b) ->
(index -> 'c -> 'd -> int -> 'b) ->
(index -> 'e -> int -> 'b) ->
(index -> int -> 'b) ->
(index -> ('h, 'i, 'j, 'k) format4 -> int -> 'b) -> 'b
module Sformat : sig
external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
= "%identity"
external length : ('a, 'b, 'c, 'd) format4 -> int
= "%string_length"
external get : ('a, 'b, 'c, 'd) format4 -> int -> char
= "%string_safe_get"
external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
= "%string_unsafe_get"
val sub : ('a, 'b, 'c, 'd) format4 -> int -> int -> string
val to_string : ('a, 'b, 'c, 'd) format4 -> string
end
val scan_format : ('a, 'b, 'c, 'd) format4 ->
'e array ->
index ->
int ->
(index -> string -> int -> 'f) ->
(index -> 'g -> 'h -> int -> 'f) ->
(index -> 'i -> int -> 'f) ->
(index -> int -> 'f) ->
(index -> ('j, 'k, 'l, 'm) format4 -> int -> 'f) -> 'f
val sub_format :
(string -> int) -> (string -> int -> char -> int) ->
char -> string -> int -> int
val summarize_format_type : string -> string
val kapr : (string -> Obj.t array -> 'a) -> string -> 'a
(('a, 'b, 'c, 'd) format4 -> int) ->
(('a, 'b, 'c, 'd) format4 -> int -> char -> int) ->
char -> ('a, 'b, 'c, 'd) format4 -> int -> int
val summarize_format_type : ('a, 'b, 'c, 'd) format4 -> string
val kapr :
(('a, 'b, 'c, 'd) format4 -> Obj.t array -> 'e) ->
('a, 'b, 'c, 'd) format4 -> 'e

View File

@ -217,7 +217,11 @@ let from_function = create "function input";;
(* Perform bufferized input to improve efficiency. *)
let file_buffer_size = ref 1024;;
let from_file_channel fname ic =
(* To close a channel at end of input. *)
let scan_close_at_end ic = close_in ic; raise End_of_file;;
(* Scan from an input channel. *)
let from_ic scan_close_ic fname ic =
let len = !file_buffer_size in
let buf = String.create len in
let i = ref 0 in
@ -225,29 +229,90 @@ let from_file_channel fname ic =
let next () =
if !i < !lim then begin let c = buf.[!i] in incr i; c end else begin
lim := input ic buf 0 len;
if !lim = 0 then raise End_of_file else begin
if !lim = 0 then scan_close_ic ic else begin
i := 1;
buf.[0]
end
end in
create fname next;;
let from_file fname = from_file_channel fname (open_in fname);;
let from_file_bin fname = from_file_channel fname (open_in_bin fname);;
let from_ic_close_at_end = from_ic scan_close_at_end;;
let from_input_channel fname ic =
let next () = input_char ic in
create fname next;;
let from_file fname = from_ic_close_at_end fname (open_in fname);;
let from_file_bin fname = from_ic_close_at_end fname (open_in_bin fname);;
let from_channel = from_input_channel "input channel";;
(* Input channel [ic] is not allocated here, hence it may be shared (two
functions of the program may successively read from it). Furthermore, the
user may define more than one scanning buffer reading from the same [ic]
channel.
(* The scanning buffer reading from [stdin].*)
let stdib = from_input_channel "stdin" stdin;;
However, we cannot prevent scanf to use one lookahead character if needed;
this implies that multiple functions alternatively scanning the same [ic]
channel will miss characters from time to time, due to unnoticed look ahead
characters, silently read from [ic] (hence no more available for reading)
and retained inside the scanning buffer for correct scanning from the same
scanning buffer. This phenomenon is even worse in case of multiple
definition of scanning buffers from the same [ic].
Hence, we do bufferize characters to create a scnning buffer from an input
channel in order to preserve the same semantics as other from_* functions
above: two successive calls to the scanner will work appropriately, since
the bufferized character (if any) will be retained inside the scanning
buffer from a call to the next one.
Otherwise, if we do not bufferize characters, we will loose the clearly
correct scanning behaviour even for the simple regular case, when we scan
the (possibly shared) channel [ic] using a unique function, while not
gaining anything for multiple functions reading from [ic] or multiple
allocation of scanning buffers reading from the same [ic].
A more ambitious fix could be to have a memo scanning buffer allocation
for reading from input channel not allocated from within Scanf. *)
let scan_at_end ic = raise End_of_file;;
let from_channel = from_ic scan_at_end "input channel";;
(* The scanning buffer reading from [stdin].
One could try to define stdib as from_channel stdin,
but unfortunately the toplevel interaction would be wrong.
This is due to some kind of ``race condition'' when reading from stdin,
since the interactive compiler and scanf will simultaneously read the
material they need from stdin; then, confusion will result from what should
be read by the toplevel and what should be read by scanf.
This is even more complicated by the one character lookahead that scanf
is sometimes obliged to maintain: the lookahead character will be available
for the next (scanf) entry, seamingly coming from nowhere.
Also no End_of_file is raised when reading from stdin: if not enough
characters have been read, we simply ask to read more. *)
(*let stdib =
let buf = ref ""
and len = ref 0 in
let mk_buff l =
buf := String.create l;
len := l in
let i = ref 0 in
let rec next () =
if !i < !len then begin let c = !buf.[!i] in incr i; c end else
let s = input_line stdin in
let ls = String.length s in
if ls > !len then mk_buff ls;
String.blit s 0 !buf 0 ls;
i := 0;
next () in
create "stdin" next;;
*)
let stdib = from_ic scan_at_end "stdin" stdin;;
end;;
(* Formatted input functions. *)
module Sformat = Printf.Sformat;;
external string_to_format :
string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
(* Reporting errors. *)
exception Scan_failure of string;;
@ -267,11 +332,12 @@ let bad_conversion fmt i c =
invalid_arg
(Printf.sprintf
"scanf: bad conversion %%%c, at char number %i \
in format string ``%s''" c i fmt);;
in format string ``%s''" c i (Sformat.to_string fmt));;
let incomplete_format fmt =
invalid_arg
(Printf.sprintf "scanf: premature end of format string ``%s''" fmt);;
(Printf.sprintf "scanf: premature end of format string ``%s''"
(Sformat.to_string fmt));;
let bad_float () = bad_input "no dot or exponent part found in float token";;
@ -283,7 +349,8 @@ let format_mismatch fmt1 fmt2 ib =
(* Checking that 2 format string are type compatible. *)
let compatible_format_type fmt1 fmt2 =
Printf.summarize_format_type fmt1 = Printf.summarize_format_type fmt2;;
Printf.summarize_format_type (string_to_format fmt1) =
Printf.summarize_format_type (string_to_format fmt2);;
(* Checking that [c] is indeed in the input, then skips it.
In this case, the character c has been explicitely specified in the
@ -347,9 +414,12 @@ let token_float ib = float_of_string (Scanning.token ib);;
since those modules are not available to Scanf.
However, we can bind and use the corresponding primitives that are
available in the runtime. *)
external nativeint_of_string: string -> nativeint = "caml_nativeint_of_string";;
external int32_of_string : string -> int32 = "caml_int32_of_string";;
external int64_of_string : string -> int64 = "caml_int64_of_string";;
external nativeint_of_string : string -> nativeint
= "caml_nativeint_of_string";;
external int32_of_string : string -> int32
= "caml_int32_of_string";;
external int64_of_string : string -> int64
= "caml_int64_of_string";;
let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);;
let token_int32 conv ib = int32_of_string (token_int_literal conv ib);;
@ -648,29 +718,29 @@ type char_set =
(* Char sets are read as sub-strings in the format string. *)
let read_char_set fmt i =
let lim = String.length fmt - 1 in
let lim = Sformat.length fmt - 1 in
let rec find_in_set j =
if j > lim then incomplete_format fmt else
match fmt.[j] with
match Sformat.get fmt j with
| ']' -> j
| c -> find_in_set (j + 1)
and find_set i =
if i > lim then incomplete_format fmt else
match fmt.[i] with
match Sformat.get fmt i with
| ']' -> find_in_set (i + 1)
| c -> find_in_set i in
if i > lim then incomplete_format fmt else
match fmt.[i] with
match Sformat.get fmt i with
| '^' ->
let i = i + 1 in
let j = find_set i in
j, Neg_set (String.sub fmt i (j - i))
j, Neg_set (Sformat.sub fmt i (j - i))
| _ ->
let j = find_set i in
j, Pos_set (String.sub fmt i (j - i));;
j, Pos_set (Sformat.sub fmt i (j - i));;
(* Char sets are now represented as bitvects that are represented as
byte strings. *)
@ -874,11 +944,6 @@ let rec skip_whites ib =
| _ -> ()
end;;
external format_to_string :
('a, 'b, 'c, 'd) format4 -> string = "%identity";;
external string_to_format :
string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
(* The [kscanf] main scanning function.
It takes as arguments:
- an input buffer [ib] from which to read characters,
@ -899,8 +964,8 @@ external string_to_format :
aborts and applies the scanning buffer and a string that explains
the error to the error handling function [ef] (the error continuation). *)
let kscanf ib ef fmt f =
let fmt = format_to_string fmt in
let lim = String.length fmt - 1 in
let lim = Sformat.length fmt - 1 in
let return v = Obj.magic v () in
let delay f x () = f x in
@ -909,7 +974,7 @@ let kscanf ib ef fmt f =
let rec scan_fmt f i =
if i > lim then f else
match fmt.[i] with
match Sformat.get fmt i with
| ' ' -> skip_whites ib; scan_fmt f (i + 1)
| '%' ->
if i > lim then incomplete_format fmt else
@ -917,13 +982,13 @@ let kscanf ib ef fmt f =
| '@' ->
let i = i + 1 in
if i > lim then incomplete_format fmt else begin
check_char ib fmt.[i];
check_char ib (Sformat.get fmt i);
scan_fmt f (i + 1) end
| c -> check_char ib c; scan_fmt f (i + 1)
and scan_conversion skip max f i =
let stack = if skip then no_stack else stack in
match fmt.[i] with
match Sformat.get fmt i with
| '%' as conv ->
check_char ib conv; scan_fmt f (i + 1)
| 'c' when max = 0 ->
@ -961,14 +1026,14 @@ let kscanf ib ef fmt f =
| 'l' | 'n' | 'L' as conv ->
let i = i + 1 in
if i > lim then scan_fmt (stack f (get_count conv ib)) i else begin
match fmt.[i] with
match Sformat.get fmt i with
(* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
let _x = scan_int_conv conv max ib in
(* Look back to the character that triggered the integer conversion
(this character is either 'l', 'n' or 'L'), to find the
conversion to apply to the integer token read. *)
begin match fmt.[i - 1] with
begin match Sformat.get fmt (i - 1) with
| 'l' -> scan_fmt (stack f (token_int32 conv ib)) (i + 1)
| 'n' -> scan_fmt (stack f (token_nativeint conv ib)) (i + 1)
| _ -> scan_fmt (stack f (token_int64 conv ib)) (i + 1) end
@ -985,14 +1050,14 @@ let kscanf ib ef fmt f =
| '0' .. '9' as conv ->
let rec read_width accu i =
if i > lim then accu, i else
match fmt.[i] with
match Sformat.get fmt i with
| '0' .. '9' as c ->
let accu = 10 * accu + int_value_of_char c in
read_width accu (i + 1)
| _ -> accu, i in
let max, i = read_width (int_value_of_char conv) (i + 1) in
if i > lim then incomplete_format fmt else begin
match fmt.[i] with
match Sformat.get fmt i with
| '.' ->
let p, i = read_width 0 (i + 1) in
scan_conversion skip (max + p + 1) f i
@ -1001,7 +1066,7 @@ let kscanf ib ef fmt f =
let i = succ i in
let j =
Printf.sub_format incomplete_format bad_conversion conv fmt i + 1 in
let mf = String.sub fmt i (j - i - 2) in
let mf = Sformat.sub fmt i (j - i - 2) in
let _x = scan_String max ib in
let rf = token_string ib in
if not (compatible_format_type mf rf)
@ -1013,8 +1078,8 @@ let kscanf ib ef fmt f =
and scan_fmt_stoppers i =
if i > lim then i - 1, [] else
match fmt.[i] with
| '@' when i < lim -> let i = i + 1 in i, [fmt.[i]]
match Sformat.get fmt i with
| '@' when i < lim -> let i = i + 1 in i, [Sformat.get fmt i]
| '@' when i = lim -> incomplete_format fmt
| _ -> i - 1, [] in
@ -1035,7 +1100,7 @@ let sscanf s = bscanf (Scanning.from_string s);;
let scanf fmt = bscanf Scanning.stdib fmt;;
let bscanf_format ib fmt f =
let fmt = format_to_string fmt in
let fmt = Sformat.unsafe_to_string fmt in
let fmt1 = ignore (scan_String max_int ib); token_string ib in
if not (compatible_format_type fmt1 fmt) then
format_mismatch fmt1 fmt ib else
@ -1044,4 +1109,4 @@ let bscanf_format ib fmt f =
let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;;
let scan_format s fmt = sscanf_format s fmt (fun x -> x);;
let format_from_string s fmt = sscanf_format s fmt (fun x -> x);;

View File

@ -269,7 +269,7 @@ val sscanf_format :
(('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;;
(** Same as {!Scanf.bscanf}, but inputs from the given string. *)
val scan_format :
val format_from_string :
string -> ('a, 'b, 'c, 'd) format4 -> ('a, 'b, 'c, 'd) format4;;
(** Same as {!Scanf.sscanf_format}, but converts the given string to a format
string. *)

View File

@ -78,4 +78,4 @@ let catch_break on =
(* OCaml version string, must be in the format described in sys.mli. *)
let ocaml_version = "3.10+dev5 (2006-04-05)";;
let ocaml_version = "3.10+dev6 (2006-04-05)";;