Module Sformat is internal to printf. Better typing specifications of functions working with format strings.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@7374 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02master
parent
b9b6e0fa2c
commit
176b3c0da8
|
@ -910,14 +910,16 @@ and set_tags =
|
|||
|
||||
**************************************************************)
|
||||
|
||||
module Sformat = Printf.Sformat;;
|
||||
|
||||
(* Error messages when processing formats. *)
|
||||
|
||||
(* Trailer: giving up at character number ... *)
|
||||
let giving_up mess fmt i =
|
||||
"fprintf: " ^ mess ^ " ``" ^ fmt ^ "'', \
|
||||
"fprintf: " ^ mess ^ " ``" ^ Sformat.to_string fmt ^ "'', \
|
||||
giving up at character number " ^ string_of_int i ^
|
||||
(if i < String.length fmt
|
||||
then " (" ^ String.make 1 fmt.[i] ^ ")."
|
||||
(if i < Sformat.length fmt
|
||||
then " (" ^ String.make 1 (Sformat.get fmt i) ^ ")."
|
||||
else String.make 1 '.');;
|
||||
|
||||
(* When an invalid format deserves a special error explanation. *)
|
||||
|
@ -963,8 +965,6 @@ let implode_rev s0 = function
|
|||
| [] -> s0
|
||||
| l -> String.concat "" (List.rev (s0 :: l));;
|
||||
|
||||
external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
|
||||
|
||||
(* [fprintf_out] is the printf-like function generator: given the
|
||||
- [to_s] flag that tells if we are printing into a string,
|
||||
- the [get_out] function that has to be called at the end of formatting,
|
||||
|
@ -975,8 +975,7 @@ external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";;
|
|||
applications of [fprintf_out]. *)
|
||||
let mkprintf to_s get_out =
|
||||
let rec kprintf k fmt =
|
||||
let fmt = format_to_string fmt in
|
||||
let len = String.length fmt in
|
||||
let len = Sformat.length fmt in
|
||||
|
||||
let kpr fmt v =
|
||||
let ppf = get_out fmt in
|
||||
|
@ -996,13 +995,13 @@ let mkprintf to_s get_out =
|
|||
|
||||
let rec doprn n i =
|
||||
if i >= len then Obj.magic (k ppf) else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '%' ->
|
||||
Printf.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
|
||||
| '@' ->
|
||||
let i = succ i in
|
||||
if i >= len then invalid_format fmt i else
|
||||
begin match fmt.[i] with
|
||||
begin match Sformat.get fmt i with
|
||||
| '[' ->
|
||||
do_pp_open_box ppf n (succ i)
|
||||
| ']' ->
|
||||
|
@ -1065,7 +1064,7 @@ let mkprintf to_s get_out =
|
|||
|
||||
and get_int n i c =
|
||||
if i >= len then invalid_integer fmt i else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| ' ' -> get_int n (succ i) c
|
||||
| '%' ->
|
||||
let cont_s n s i = c (format_int_of_string fmt i s) n i
|
||||
|
@ -1077,33 +1076,34 @@ let mkprintf to_s get_out =
|
|||
| _ ->
|
||||
let rec get j =
|
||||
if j >= len then invalid_integer fmt j else
|
||||
match fmt.[j] with
|
||||
match Sformat.get fmt j with
|
||||
| '0' .. '9' | '-' -> get (succ j)
|
||||
| _ ->
|
||||
let size =
|
||||
if j = i then size_of_int 0 else
|
||||
format_int_of_string fmt j (String.sub fmt i (j - i)) in
|
||||
let s = Sformat.sub fmt i (j - i) in
|
||||
format_int_of_string fmt j s in
|
||||
c size n j in
|
||||
get i
|
||||
|
||||
and skip_gt i =
|
||||
if i >= len then invalid_format fmt i else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| ' ' -> skip_gt (succ i)
|
||||
| '>' -> succ i
|
||||
| _ -> invalid_format fmt i
|
||||
|
||||
and get_box_kind i =
|
||||
if i >= len then Pp_box, i else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| 'h' ->
|
||||
let i = succ i in
|
||||
if i >= len then Pp_hbox, i else
|
||||
begin match fmt.[i] with
|
||||
begin match Sformat.get fmt i with
|
||||
| 'o' ->
|
||||
let i = succ i in
|
||||
if i >= len then format_invalid_arg "bad box format" fmt i else
|
||||
begin match fmt.[i] with
|
||||
begin match Sformat.get fmt i with
|
||||
| 'v' -> Pp_hovbox, succ i
|
||||
| c ->
|
||||
format_invalid_arg
|
||||
|
@ -1118,11 +1118,11 @@ let mkprintf to_s get_out =
|
|||
and get_tag_name n i c =
|
||||
let rec get accu n i j =
|
||||
if j >= len
|
||||
then c (implode_rev (String.sub fmt i (j - i)) accu) n j else
|
||||
match fmt.[j] with
|
||||
| '>' -> c (implode_rev (String.sub fmt i (j - i)) accu) n j
|
||||
then c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j else
|
||||
match Sformat.get fmt j with
|
||||
| '>' -> c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j
|
||||
| '%' ->
|
||||
let s0 = String.sub fmt i (j - i) in
|
||||
let s0 = Sformat.sub fmt i (j - i) in
|
||||
let cont_s n s i = get (s :: s0 :: accu) n i i
|
||||
and cont_a n printer arg i =
|
||||
let s =
|
||||
|
@ -1146,7 +1146,7 @@ let mkprintf to_s get_out =
|
|||
|
||||
and do_pp_break ppf n i =
|
||||
if i >= len then begin pp_print_space ppf (); doprn n i end else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '<' ->
|
||||
let rec got_nspaces nspaces n i =
|
||||
get_int n i (got_offset nspaces)
|
||||
|
@ -1158,7 +1158,7 @@ let mkprintf to_s get_out =
|
|||
|
||||
and do_pp_open_box ppf n i =
|
||||
if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '<' ->
|
||||
let kind, i = get_box_kind (succ i) in
|
||||
let got_size size n i =
|
||||
|
@ -1169,7 +1169,7 @@ let mkprintf to_s get_out =
|
|||
|
||||
and do_pp_open_tag ppf n i =
|
||||
if i >= len then begin pp_open_tag ppf ""; doprn n i end else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '<' ->
|
||||
let got_name tag_name n i =
|
||||
pp_open_tag ppf tag_name;
|
||||
|
|
147
stdlib/printf.ml
147
stdlib/printf.ml
|
@ -13,14 +13,33 @@
|
|||
|
||||
(* $Id$ *)
|
||||
|
||||
external format_int: string -> int -> string = "caml_format_int"
|
||||
external format_int32: string -> int32 -> string = "caml_int32_format"
|
||||
external format_float: string -> float -> string
|
||||
= "caml_format_float"
|
||||
external format_int: string -> int -> string
|
||||
= "caml_format_int"
|
||||
external format_int32: string -> int32 -> string
|
||||
= "caml_int32_format"
|
||||
external format_nativeint: string -> nativeint -> string
|
||||
= "caml_nativeint_format"
|
||||
external format_int64: string -> int64 -> string = "caml_int64_format"
|
||||
external format_float: string -> float -> string = "caml_format_float"
|
||||
= "caml_nativeint_format"
|
||||
external format_int64: string -> int64 -> string
|
||||
= "caml_int64_format"
|
||||
|
||||
external format_to_string: ('a, 'b, 'c, 'd) format4 -> string = "%identity"
|
||||
module Sformat = struct
|
||||
external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
|
||||
= "%identity"
|
||||
external length : ('a, 'b, 'c, 'd) format4 -> int
|
||||
= "%string_length"
|
||||
external get : ('a, 'b, 'c, 'd) format4 -> int -> char
|
||||
= "%string_safe_get"
|
||||
external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
|
||||
= "%string_unsafe_get"
|
||||
(* external set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
|
||||
= "%string_safe_set"
|
||||
external unsafe_set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit
|
||||
= "%string_unsafe_set" *)
|
||||
let sub fmt idx len = String.sub (unsafe_to_string fmt) idx len
|
||||
let to_string fmt = sub fmt 0 (length fmt)
|
||||
end;;
|
||||
|
||||
type index;;
|
||||
|
||||
|
@ -32,28 +51,33 @@ let succ_index = add_int_index 1;;
|
|||
(* Litteral position are one-based (hence pred p instead of p). *)
|
||||
let index_of_litteral_position p = index_of_int (pred p);;
|
||||
|
||||
let bad_conversion fmt i c =
|
||||
let bad_conversion sfmt i c =
|
||||
invalid_arg
|
||||
("printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^
|
||||
string_of_int i ^ " in format string ``" ^ fmt ^ "''");;
|
||||
string_of_int i ^ " in format string ``" ^ sfmt ^ "''");;
|
||||
|
||||
let bad_conversion_format fmt i c =
|
||||
bad_conversion (Sformat.to_string fmt) i c;;
|
||||
|
||||
let incomplete_format fmt =
|
||||
invalid_arg
|
||||
("printf: premature end of format string ``" ^ fmt ^ "''");;
|
||||
("printf: premature end of format string ``" ^
|
||||
Sformat.to_string fmt ^ "''");;
|
||||
|
||||
(* Parses a format to return the specified length and the padding direction. *)
|
||||
let parse_format fmt =
|
||||
let parse_format sfmt =
|
||||
let rec parse neg i =
|
||||
if i >= String.length fmt then (0, neg) else
|
||||
match String.unsafe_get fmt i with
|
||||
if i >= String.length sfmt then (0, neg) else
|
||||
match String.unsafe_get sfmt i with
|
||||
| '1'..'9' ->
|
||||
(int_of_string (String.sub fmt i (String.length fmt - i - 1)),
|
||||
(int_of_string
|
||||
(String.sub sfmt i (String.length sfmt - i - 1)),
|
||||
neg)
|
||||
| '-' ->
|
||||
parse true (succ i)
|
||||
| _ ->
|
||||
parse neg (succ i) in
|
||||
try parse false 1 with Failure _ -> bad_conversion fmt 0 's'
|
||||
try parse false 1 with Failure _ -> bad_conversion sfmt 0 's'
|
||||
|
||||
(* Pad a (sub) string into a blank string of length [p],
|
||||
on the right if [neg] is true, on the left otherwise. *)
|
||||
|
@ -68,18 +92,19 @@ let pad_string pad_char p neg s i len =
|
|||
|
||||
(* Format a string given a %s format, e.g. %40s or %-20s.
|
||||
To do: ignore other flags (#, +, etc)? *)
|
||||
let format_string fmt s =
|
||||
let (p, neg) = parse_format fmt in
|
||||
pad_string ' ' p neg s 0 (String.length s)
|
||||
let format_string sfmt s =
|
||||
let (p, neg) = parse_format sfmt in
|
||||
pad_string ' ' p neg s 0 (String.length s);;
|
||||
|
||||
(* Extract a %format from [fmt] between [start] and [stop] inclusive.
|
||||
'*' in the format are replaced by integers taken from the [widths] list. *)
|
||||
(* Extract a format string out of [fmt] between [start] and [stop] inclusive.
|
||||
'*' in the format are replaced by integers taken from the [widths] list.
|
||||
extract_format returns a string. *)
|
||||
let extract_format fmt start stop widths =
|
||||
let skip_positional_spec start =
|
||||
match String.unsafe_get fmt start with
|
||||
match Sformat.unsafe_get fmt start with
|
||||
| '0'..'9' ->
|
||||
let rec skip_int_litteral i =
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '0'..'9' -> skip_int_litteral (succ i)
|
||||
| '$' -> succ i
|
||||
| _ -> start in
|
||||
|
@ -90,7 +115,7 @@ let extract_format fmt start stop widths =
|
|||
Buffer.add_char b '%';
|
||||
let rec fill_format i widths =
|
||||
if i <= stop then
|
||||
match (String.unsafe_get fmt i, widths) with
|
||||
match (Sformat.unsafe_get fmt i, widths) with
|
||||
| ('*', h :: t) ->
|
||||
Buffer.add_string b (string_of_int h);
|
||||
let i = skip_positional_spec (succ i) in
|
||||
|
@ -102,10 +127,13 @@ let extract_format fmt start stop widths =
|
|||
fill_format start (List.rev widths);
|
||||
Buffer.contents b;;
|
||||
|
||||
let format_int_with_conv conv fmt i =
|
||||
let extract_format_int conv fmt start stop widths =
|
||||
let sfmt = extract_format fmt start stop widths in
|
||||
match conv with
|
||||
| 'n' | 'N' -> fmt.[String.length fmt - 1] <- 'u'; format_int fmt i
|
||||
| _ -> format_int fmt i
|
||||
| 'n' | 'N' ->
|
||||
sfmt. [String.length sfmt - 1] <- 'u';
|
||||
sfmt
|
||||
| _ -> sfmt;;
|
||||
|
||||
(* Returns the position of the last character of the meta format
|
||||
string, starting from position [i], inside a given format [fmt].
|
||||
|
@ -114,34 +142,36 @@ let format_int_with_conv conv fmt i =
|
|||
%) (when [conv = '(']). Hence, [sub_format] returns the index of
|
||||
the character ')' or '}' that ends the meta format, according to
|
||||
the character [conv]. *)
|
||||
let sub_format incomplete_format bad_conversion conv fmt i =
|
||||
let len = String.length fmt in
|
||||
let sub_format incomplete_format bad_conversion_format conv fmt i =
|
||||
let len = Sformat.length fmt in
|
||||
let rec sub_fmt c i =
|
||||
let close = if c = '(' then ')' else (* '{' *) '}' in
|
||||
let rec sub j =
|
||||
if j >= len then incomplete_format fmt else
|
||||
match fmt.[j] with
|
||||
match Sformat.get fmt j with
|
||||
| '%' -> sub_sub (succ j)
|
||||
| _ -> sub (succ j)
|
||||
and sub_sub j =
|
||||
if j >= len then incomplete_format fmt else
|
||||
match fmt.[j] with
|
||||
match Sformat.get fmt j with
|
||||
| '(' | '{' as c ->
|
||||
let j = sub_fmt c (succ j) in sub (succ j)
|
||||
| '}' | ')' as c ->
|
||||
if c = close then j else bad_conversion fmt i c
|
||||
if c = close then j else bad_conversion_format fmt i c
|
||||
| _ -> sub (succ j) in
|
||||
sub i in
|
||||
sub_fmt conv i;;
|
||||
|
||||
let sub_format_for_printf = sub_format incomplete_format bad_conversion;;
|
||||
let sub_format_for_printf conv =
|
||||
sub_format incomplete_format bad_conversion_format conv;;
|
||||
|
||||
let iter_on_format_args fmt add_conv add_char =
|
||||
let lim = String.length fmt - 1 in
|
||||
|
||||
let lim = Sformat.length fmt - 1 in
|
||||
|
||||
let rec scan_flags skip i =
|
||||
if i > lim then incomplete_format fmt else
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '*' -> scan_flags skip (add_conv skip i 'i')
|
||||
| '$' -> scan_flags skip (succ i)
|
||||
| '#' | '-' | ' ' | '+' -> scan_flags skip (succ i)
|
||||
|
@ -151,7 +181,7 @@ let iter_on_format_args fmt add_conv add_char =
|
|||
| _ -> scan_conv skip i
|
||||
and scan_conv skip i =
|
||||
if i > lim then incomplete_format fmt else
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '%' | '!' -> succ i
|
||||
| 's' | 'S' | '[' -> add_conv skip i 's'
|
||||
| 'c' | 'C' -> add_conv skip i 'c'
|
||||
|
@ -162,7 +192,7 @@ let iter_on_format_args fmt add_conv add_char =
|
|||
| 'l' | 'n' | 'L' as conv ->
|
||||
let j = succ i in
|
||||
if j > lim then add_conv skip i 'i' else begin
|
||||
match fmt.[j] with
|
||||
match Sformat.get fmt j with
|
||||
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
|
||||
add_char (add_conv skip i conv) 'i'
|
||||
| c -> add_conv skip i 'i' end
|
||||
|
@ -172,7 +202,7 @@ let iter_on_format_args fmt add_conv add_char =
|
|||
let j = sub_format_for_printf conv fmt i in
|
||||
(* Add the meta specification anyway. *)
|
||||
let rec loop i =
|
||||
if i < j - 1 then loop (add_char i fmt.[i]) in
|
||||
if i < j - 1 then loop (add_char i (Sformat.get fmt i)) in
|
||||
loop i;
|
||||
scan_conv skip j
|
||||
| '(' as conv ->
|
||||
|
@ -181,11 +211,11 @@ let iter_on_format_args fmt add_conv add_char =
|
|||
anyway. *)
|
||||
scan_fmt (add_conv skip i conv)
|
||||
| '}' | ')' as conv -> add_conv skip i conv
|
||||
| conv -> bad_conversion fmt i conv
|
||||
| conv -> bad_conversion_format fmt i conv
|
||||
|
||||
and scan_fmt i =
|
||||
if i < lim then
|
||||
if fmt.[i] = '%'
|
||||
if Sformat.get fmt i = '%'
|
||||
then scan_fmt (scan_flags false (succ i))
|
||||
else scan_fmt (succ i)
|
||||
else i in
|
||||
|
@ -197,7 +227,7 @@ let iter_on_format_args fmt add_conv add_char =
|
|||
It also checks the well-formedness of the format string.
|
||||
For instance, [summarize_format_type "A number %d\n"] is "%i". *)
|
||||
let summarize_format_type fmt =
|
||||
let len = String.length fmt in
|
||||
let len = Sformat.length fmt in
|
||||
let b = Buffer.create len in
|
||||
let add_char i c = Buffer.add_char b c; succ i in
|
||||
let add_conv skip i c =
|
||||
|
@ -278,10 +308,10 @@ type param_spec = Spec_none | Spec_index of index;;
|
|||
problems: the type would be dependant of the {\em value} of an integer
|
||||
argument to printf. *)
|
||||
let scan_positional_spec fmt got_pos n i =
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '0'..'9' as d ->
|
||||
let rec get_int_litteral accu j =
|
||||
match String.unsafe_get fmt j with
|
||||
match Sformat.unsafe_get fmt j with
|
||||
| '0'..'9' as d ->
|
||||
get_int_litteral (10 * accu + (int_of_char d - 48)) (succ j)
|
||||
| '$' ->
|
||||
|
@ -320,8 +350,8 @@ let get_index spec n =
|
|||
"next pos" is the position in [fmt] of the first character following
|
||||
the %format in [fmt]. *)
|
||||
|
||||
(* Note: here, rather than test explicitly against [String.length fmt]
|
||||
to detect the end of the format, we use [String.unsafe_get] and
|
||||
(* Note: here, rather than test explicitly against [Sformat.length fmt]
|
||||
to detect the end of the format, we use [Sformat.unsafe_get] and
|
||||
rely on the fact that we'll get a "nul" character if we access
|
||||
one past the end of the string. These "nul" characters are then
|
||||
caught by the [_ -> bad_conversion] clauses below.
|
||||
|
@ -335,7 +365,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
scan_positional_spec fmt got_pos n i
|
||||
|
||||
and scan_flags spec n widths i =
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '*' ->
|
||||
let got_pos wspec i =
|
||||
let (width : int) = get_arg wspec n in
|
||||
|
@ -346,7 +376,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
| _ -> scan_conv spec n widths i
|
||||
|
||||
and scan_conv spec n widths i =
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '%' ->
|
||||
cont_s n "%" (succ i)
|
||||
| 's' | 'S' as conv ->
|
||||
|
@ -364,7 +394,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
cont_s (next_index spec n) s (succ i)
|
||||
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' | 'N' as conv ->
|
||||
let (x : int) = get_arg spec n in
|
||||
let s = format_int_with_conv conv (extract_format fmt pos i widths) x in
|
||||
let s =
|
||||
format_int (extract_format_int conv fmt pos i widths) x in
|
||||
cont_s (next_index spec n) s (succ i)
|
||||
| 'f' | 'e' | 'E' | 'g' | 'G' | 'F' as conv ->
|
||||
let (x : float) = get_arg spec n in
|
||||
|
@ -387,7 +418,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
let printer = get_arg spec n in
|
||||
cont_t (next_index spec n) printer (succ i)
|
||||
| 'l' | 'n' | 'L' as conv ->
|
||||
begin match String.unsafe_get fmt (succ i) with
|
||||
begin match Sformat.unsafe_get fmt (succ i) with
|
||||
| 'd' | 'i' | 'o' | 'x' | 'X' | 'u' ->
|
||||
let s =
|
||||
match conv with
|
||||
|
@ -403,10 +434,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
cont_s (next_index spec n) s (i + 2)
|
||||
| _ ->
|
||||
let (x : int) = get_arg spec n in
|
||||
cont_s
|
||||
(next_index spec n)
|
||||
(format_int_with_conv 'n' (extract_format fmt pos i widths) x)
|
||||
(succ i)
|
||||
let s = format_int (extract_format_int 'n' fmt pos i widths) x in
|
||||
cont_s (next_index spec n) s (succ i)
|
||||
end
|
||||
| '!' -> cont_f n (succ i)
|
||||
| '{' | '(' as conv (* ')' '}' *) ->
|
||||
|
@ -417,31 +446,31 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
|
|||
(* Just print the format argument as a specification. *)
|
||||
cont_s
|
||||
(next_index spec n)
|
||||
(summarize_format_type (format_to_string xf))
|
||||
(summarize_format_type xf)
|
||||
j else
|
||||
(* Use the format argument instead of the format specification. *)
|
||||
cont_m (next_index spec n) xf j
|
||||
| (* '(' *) ')' ->
|
||||
cont_s n "" (succ i)
|
||||
| conv ->
|
||||
bad_conversion fmt i conv in
|
||||
bad_conversion_format fmt i conv in
|
||||
|
||||
scan_positional n [] (succ pos);;
|
||||
|
||||
let mkprintf str get_out outc outs flush k fmt =
|
||||
|
||||
let fmt = format_to_string fmt in
|
||||
(* let fmt = Sformat.length fmt in*)
|
||||
(* out is global to this invocation of pr, and must be shared by all its
|
||||
recursive calls (fif) any. *)
|
||||
let out = get_out fmt in
|
||||
|
||||
let rec pr k n fmt v =
|
||||
|
||||
let len = String.length fmt in
|
||||
let len = Sformat.length fmt in
|
||||
|
||||
let rec doprn n i =
|
||||
if i >= len then Obj.magic (k out) else
|
||||
match String.unsafe_get fmt i with
|
||||
match Sformat.unsafe_get fmt i with
|
||||
| '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m
|
||||
| c -> outc out c; doprn n (succ i)
|
||||
and cont_s n s i =
|
||||
|
@ -461,8 +490,8 @@ let mkprintf str get_out outc outs flush k fmt =
|
|||
and cont_f n i =
|
||||
flush out; doprn n i
|
||||
and cont_m n xf i =
|
||||
let m = add_int_index (nargs_of_format_type (format_to_string xf)) n in
|
||||
pr (Obj.magic (fun _ -> doprn m i)) n (format_to_string xf) v in
|
||||
let m = add_int_index (nargs_of_format_type xf) n in
|
||||
pr (Obj.magic (fun _ -> doprn m i)) n xf v in
|
||||
|
||||
doprn n 0 in
|
||||
|
||||
|
@ -481,7 +510,7 @@ let kbprintf k b =
|
|||
let bprintf b = kbprintf ignore b
|
||||
|
||||
let get_buff fmt =
|
||||
let len = 2 * String.length fmt in
|
||||
let len = 2 * Sformat.length fmt in
|
||||
Buffer.create len;;
|
||||
|
||||
let get_contents b =
|
||||
|
|
|
@ -142,15 +142,34 @@ type index;;
|
|||
|
||||
external index_of_int : int -> index = "%identity";;
|
||||
|
||||
val scan_format : string -> 'a array -> index -> int ->
|
||||
(index -> string -> int -> 'b) ->
|
||||
(index -> 'c -> 'd -> int -> 'b) ->
|
||||
(index -> 'e -> int -> 'b) ->
|
||||
(index -> int -> 'b) ->
|
||||
(index -> ('h, 'i, 'j, 'k) format4 -> int -> 'b) -> 'b
|
||||
module Sformat : sig
|
||||
external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string
|
||||
= "%identity"
|
||||
external length : ('a, 'b, 'c, 'd) format4 -> int
|
||||
= "%string_length"
|
||||
external get : ('a, 'b, 'c, 'd) format4 -> int -> char
|
||||
= "%string_safe_get"
|
||||
external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char
|
||||
= "%string_unsafe_get"
|
||||
val sub : ('a, 'b, 'c, 'd) format4 -> int -> int -> string
|
||||
val to_string : ('a, 'b, 'c, 'd) format4 -> string
|
||||
end
|
||||
|
||||
val scan_format : ('a, 'b, 'c, 'd) format4 ->
|
||||
'e array ->
|
||||
index ->
|
||||
int ->
|
||||
(index -> string -> int -> 'f) ->
|
||||
(index -> 'g -> 'h -> int -> 'f) ->
|
||||
(index -> 'i -> int -> 'f) ->
|
||||
(index -> int -> 'f) ->
|
||||
(index -> ('j, 'k, 'l, 'm) format4 -> int -> 'f) -> 'f
|
||||
|
||||
val sub_format :
|
||||
(string -> int) -> (string -> int -> char -> int) ->
|
||||
char -> string -> int -> int
|
||||
val summarize_format_type : string -> string
|
||||
val kapr : (string -> Obj.t array -> 'a) -> string -> 'a
|
||||
(('a, 'b, 'c, 'd) format4 -> int) ->
|
||||
(('a, 'b, 'c, 'd) format4 -> int -> char -> int) ->
|
||||
char -> ('a, 'b, 'c, 'd) format4 -> int -> int
|
||||
val summarize_format_type : ('a, 'b, 'c, 'd) format4 -> string
|
||||
val kapr :
|
||||
(('a, 'b, 'c, 'd) format4 -> Obj.t array -> 'e) ->
|
||||
('a, 'b, 'c, 'd) format4 -> 'e
|
||||
|
|
147
stdlib/scanf.ml
147
stdlib/scanf.ml
|
@ -217,7 +217,11 @@ let from_function = create "function input";;
|
|||
(* Perform bufferized input to improve efficiency. *)
|
||||
let file_buffer_size = ref 1024;;
|
||||
|
||||
let from_file_channel fname ic =
|
||||
(* To close a channel at end of input. *)
|
||||
let scan_close_at_end ic = close_in ic; raise End_of_file;;
|
||||
|
||||
(* Scan from an input channel. *)
|
||||
let from_ic scan_close_ic fname ic =
|
||||
let len = !file_buffer_size in
|
||||
let buf = String.create len in
|
||||
let i = ref 0 in
|
||||
|
@ -225,29 +229,90 @@ let from_file_channel fname ic =
|
|||
let next () =
|
||||
if !i < !lim then begin let c = buf.[!i] in incr i; c end else begin
|
||||
lim := input ic buf 0 len;
|
||||
if !lim = 0 then raise End_of_file else begin
|
||||
if !lim = 0 then scan_close_ic ic else begin
|
||||
i := 1;
|
||||
buf.[0]
|
||||
end
|
||||
end in
|
||||
create fname next;;
|
||||
|
||||
let from_file fname = from_file_channel fname (open_in fname);;
|
||||
let from_file_bin fname = from_file_channel fname (open_in_bin fname);;
|
||||
let from_ic_close_at_end = from_ic scan_close_at_end;;
|
||||
|
||||
let from_input_channel fname ic =
|
||||
let next () = input_char ic in
|
||||
create fname next;;
|
||||
let from_file fname = from_ic_close_at_end fname (open_in fname);;
|
||||
let from_file_bin fname = from_ic_close_at_end fname (open_in_bin fname);;
|
||||
|
||||
let from_channel = from_input_channel "input channel";;
|
||||
(* Input channel [ic] is not allocated here, hence it may be shared (two
|
||||
functions of the program may successively read from it). Furthermore, the
|
||||
user may define more than one scanning buffer reading from the same [ic]
|
||||
channel.
|
||||
|
||||
(* The scanning buffer reading from [stdin].*)
|
||||
let stdib = from_input_channel "stdin" stdin;;
|
||||
However, we cannot prevent scanf to use one lookahead character if needed;
|
||||
this implies that multiple functions alternatively scanning the same [ic]
|
||||
channel will miss characters from time to time, due to unnoticed look ahead
|
||||
characters, silently read from [ic] (hence no more available for reading)
|
||||
and retained inside the scanning buffer for correct scanning from the same
|
||||
scanning buffer. This phenomenon is even worse in case of multiple
|
||||
definition of scanning buffers from the same [ic].
|
||||
|
||||
Hence, we do bufferize characters to create a scnning buffer from an input
|
||||
channel in order to preserve the same semantics as other from_* functions
|
||||
above: two successive calls to the scanner will work appropriately, since
|
||||
the bufferized character (if any) will be retained inside the scanning
|
||||
buffer from a call to the next one.
|
||||
|
||||
Otherwise, if we do not bufferize characters, we will loose the clearly
|
||||
correct scanning behaviour even for the simple regular case, when we scan
|
||||
the (possibly shared) channel [ic] using a unique function, while not
|
||||
gaining anything for multiple functions reading from [ic] or multiple
|
||||
allocation of scanning buffers reading from the same [ic].
|
||||
|
||||
A more ambitious fix could be to have a memo scanning buffer allocation
|
||||
for reading from input channel not allocated from within Scanf. *)
|
||||
|
||||
let scan_at_end ic = raise End_of_file;;
|
||||
|
||||
let from_channel = from_ic scan_at_end "input channel";;
|
||||
|
||||
(* The scanning buffer reading from [stdin].
|
||||
One could try to define stdib as from_channel stdin,
|
||||
but unfortunately the toplevel interaction would be wrong.
|
||||
This is due to some kind of ``race condition'' when reading from stdin,
|
||||
since the interactive compiler and scanf will simultaneously read the
|
||||
material they need from stdin; then, confusion will result from what should
|
||||
be read by the toplevel and what should be read by scanf.
|
||||
This is even more complicated by the one character lookahead that scanf
|
||||
is sometimes obliged to maintain: the lookahead character will be available
|
||||
for the next (scanf) entry, seamingly coming from nowhere.
|
||||
Also no End_of_file is raised when reading from stdin: if not enough
|
||||
characters have been read, we simply ask to read more. *)
|
||||
(*let stdib =
|
||||
let buf = ref ""
|
||||
and len = ref 0 in
|
||||
let mk_buff l =
|
||||
buf := String.create l;
|
||||
len := l in
|
||||
let i = ref 0 in
|
||||
let rec next () =
|
||||
if !i < !len then begin let c = !buf.[!i] in incr i; c end else
|
||||
let s = input_line stdin in
|
||||
let ls = String.length s in
|
||||
if ls > !len then mk_buff ls;
|
||||
String.blit s 0 !buf 0 ls;
|
||||
i := 0;
|
||||
next () in
|
||||
create "stdin" next;;
|
||||
*)
|
||||
let stdib = from_ic scan_at_end "stdin" stdin;;
|
||||
|
||||
end;;
|
||||
|
||||
(* Formatted input functions. *)
|
||||
|
||||
module Sformat = Printf.Sformat;;
|
||||
|
||||
external string_to_format :
|
||||
string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
|
||||
|
||||
(* Reporting errors. *)
|
||||
exception Scan_failure of string;;
|
||||
|
||||
|
@ -267,11 +332,12 @@ let bad_conversion fmt i c =
|
|||
invalid_arg
|
||||
(Printf.sprintf
|
||||
"scanf: bad conversion %%%c, at char number %i \
|
||||
in format string ``%s''" c i fmt);;
|
||||
in format string ``%s''" c i (Sformat.to_string fmt));;
|
||||
|
||||
let incomplete_format fmt =
|
||||
invalid_arg
|
||||
(Printf.sprintf "scanf: premature end of format string ``%s''" fmt);;
|
||||
(Printf.sprintf "scanf: premature end of format string ``%s''"
|
||||
(Sformat.to_string fmt));;
|
||||
|
||||
let bad_float () = bad_input "no dot or exponent part found in float token";;
|
||||
|
||||
|
@ -283,7 +349,8 @@ let format_mismatch fmt1 fmt2 ib =
|
|||
|
||||
(* Checking that 2 format string are type compatible. *)
|
||||
let compatible_format_type fmt1 fmt2 =
|
||||
Printf.summarize_format_type fmt1 = Printf.summarize_format_type fmt2;;
|
||||
Printf.summarize_format_type (string_to_format fmt1) =
|
||||
Printf.summarize_format_type (string_to_format fmt2);;
|
||||
|
||||
(* Checking that [c] is indeed in the input, then skips it.
|
||||
In this case, the character c has been explicitely specified in the
|
||||
|
@ -347,9 +414,12 @@ let token_float ib = float_of_string (Scanning.token ib);;
|
|||
since those modules are not available to Scanf.
|
||||
However, we can bind and use the corresponding primitives that are
|
||||
available in the runtime. *)
|
||||
external nativeint_of_string: string -> nativeint = "caml_nativeint_of_string";;
|
||||
external int32_of_string : string -> int32 = "caml_int32_of_string";;
|
||||
external int64_of_string : string -> int64 = "caml_int64_of_string";;
|
||||
external nativeint_of_string : string -> nativeint
|
||||
= "caml_nativeint_of_string";;
|
||||
external int32_of_string : string -> int32
|
||||
= "caml_int32_of_string";;
|
||||
external int64_of_string : string -> int64
|
||||
= "caml_int64_of_string";;
|
||||
|
||||
let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);;
|
||||
let token_int32 conv ib = int32_of_string (token_int_literal conv ib);;
|
||||
|
@ -648,29 +718,29 @@ type char_set =
|
|||
|
||||
(* Char sets are read as sub-strings in the format string. *)
|
||||
let read_char_set fmt i =
|
||||
let lim = String.length fmt - 1 in
|
||||
let lim = Sformat.length fmt - 1 in
|
||||
|
||||
let rec find_in_set j =
|
||||
if j > lim then incomplete_format fmt else
|
||||
match fmt.[j] with
|
||||
match Sformat.get fmt j with
|
||||
| ']' -> j
|
||||
| c -> find_in_set (j + 1)
|
||||
|
||||
and find_set i =
|
||||
if i > lim then incomplete_format fmt else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| ']' -> find_in_set (i + 1)
|
||||
| c -> find_in_set i in
|
||||
|
||||
if i > lim then incomplete_format fmt else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '^' ->
|
||||
let i = i + 1 in
|
||||
let j = find_set i in
|
||||
j, Neg_set (String.sub fmt i (j - i))
|
||||
j, Neg_set (Sformat.sub fmt i (j - i))
|
||||
| _ ->
|
||||
let j = find_set i in
|
||||
j, Pos_set (String.sub fmt i (j - i));;
|
||||
j, Pos_set (Sformat.sub fmt i (j - i));;
|
||||
|
||||
(* Char sets are now represented as bitvects that are represented as
|
||||
byte strings. *)
|
||||
|
@ -874,11 +944,6 @@ let rec skip_whites ib =
|
|||
| _ -> ()
|
||||
end;;
|
||||
|
||||
external format_to_string :
|
||||
('a, 'b, 'c, 'd) format4 -> string = "%identity";;
|
||||
external string_to_format :
|
||||
string -> ('a, 'b, 'c, 'd) format4 = "%identity";;
|
||||
|
||||
(* The [kscanf] main scanning function.
|
||||
It takes as arguments:
|
||||
- an input buffer [ib] from which to read characters,
|
||||
|
@ -899,8 +964,8 @@ external string_to_format :
|
|||
aborts and applies the scanning buffer and a string that explains
|
||||
the error to the error handling function [ef] (the error continuation). *)
|
||||
let kscanf ib ef fmt f =
|
||||
let fmt = format_to_string fmt in
|
||||
let lim = String.length fmt - 1 in
|
||||
|
||||
let lim = Sformat.length fmt - 1 in
|
||||
|
||||
let return v = Obj.magic v () in
|
||||
let delay f x () = f x in
|
||||
|
@ -909,7 +974,7 @@ let kscanf ib ef fmt f =
|
|||
|
||||
let rec scan_fmt f i =
|
||||
if i > lim then f else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| ' ' -> skip_whites ib; scan_fmt f (i + 1)
|
||||
| '%' ->
|
||||
if i > lim then incomplete_format fmt else
|
||||
|
@ -917,13 +982,13 @@ let kscanf ib ef fmt f =
|
|||
| '@' ->
|
||||
let i = i + 1 in
|
||||
if i > lim then incomplete_format fmt else begin
|
||||
check_char ib fmt.[i];
|
||||
check_char ib (Sformat.get fmt i);
|
||||
scan_fmt f (i + 1) end
|
||||
| c -> check_char ib c; scan_fmt f (i + 1)
|
||||
|
||||
and scan_conversion skip max f i =
|
||||
let stack = if skip then no_stack else stack in
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '%' as conv ->
|
||||
check_char ib conv; scan_fmt f (i + 1)
|
||||
| 'c' when max = 0 ->
|
||||
|
@ -961,14 +1026,14 @@ let kscanf ib ef fmt f =
|
|||
| 'l' | 'n' | 'L' as conv ->
|
||||
let i = i + 1 in
|
||||
if i > lim then scan_fmt (stack f (get_count conv ib)) i else begin
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
(* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *)
|
||||
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv ->
|
||||
let _x = scan_int_conv conv max ib in
|
||||
(* Look back to the character that triggered the integer conversion
|
||||
(this character is either 'l', 'n' or 'L'), to find the
|
||||
conversion to apply to the integer token read. *)
|
||||
begin match fmt.[i - 1] with
|
||||
begin match Sformat.get fmt (i - 1) with
|
||||
| 'l' -> scan_fmt (stack f (token_int32 conv ib)) (i + 1)
|
||||
| 'n' -> scan_fmt (stack f (token_nativeint conv ib)) (i + 1)
|
||||
| _ -> scan_fmt (stack f (token_int64 conv ib)) (i + 1) end
|
||||
|
@ -985,14 +1050,14 @@ let kscanf ib ef fmt f =
|
|||
| '0' .. '9' as conv ->
|
||||
let rec read_width accu i =
|
||||
if i > lim then accu, i else
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '0' .. '9' as c ->
|
||||
let accu = 10 * accu + int_value_of_char c in
|
||||
read_width accu (i + 1)
|
||||
| _ -> accu, i in
|
||||
let max, i = read_width (int_value_of_char conv) (i + 1) in
|
||||
if i > lim then incomplete_format fmt else begin
|
||||
match fmt.[i] with
|
||||
match Sformat.get fmt i with
|
||||
| '.' ->
|
||||
let p, i = read_width 0 (i + 1) in
|
||||
scan_conversion skip (max + p + 1) f i
|
||||
|
@ -1001,7 +1066,7 @@ let kscanf ib ef fmt f =
|
|||
let i = succ i in
|
||||
let j =
|
||||
Printf.sub_format incomplete_format bad_conversion conv fmt i + 1 in
|
||||
let mf = String.sub fmt i (j - i - 2) in
|
||||
let mf = Sformat.sub fmt i (j - i - 2) in
|
||||
let _x = scan_String max ib in
|
||||
let rf = token_string ib in
|
||||
if not (compatible_format_type mf rf)
|
||||
|
@ -1013,8 +1078,8 @@ let kscanf ib ef fmt f =
|
|||
|
||||
and scan_fmt_stoppers i =
|
||||
if i > lim then i - 1, [] else
|
||||
match fmt.[i] with
|
||||
| '@' when i < lim -> let i = i + 1 in i, [fmt.[i]]
|
||||
match Sformat.get fmt i with
|
||||
| '@' when i < lim -> let i = i + 1 in i, [Sformat.get fmt i]
|
||||
| '@' when i = lim -> incomplete_format fmt
|
||||
| _ -> i - 1, [] in
|
||||
|
||||
|
@ -1035,7 +1100,7 @@ let sscanf s = bscanf (Scanning.from_string s);;
|
|||
let scanf fmt = bscanf Scanning.stdib fmt;;
|
||||
|
||||
let bscanf_format ib fmt f =
|
||||
let fmt = format_to_string fmt in
|
||||
let fmt = Sformat.unsafe_to_string fmt in
|
||||
let fmt1 = ignore (scan_String max_int ib); token_string ib in
|
||||
if not (compatible_format_type fmt1 fmt) then
|
||||
format_mismatch fmt1 fmt ib else
|
||||
|
@ -1044,4 +1109,4 @@ let bscanf_format ib fmt f =
|
|||
|
||||
let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;;
|
||||
|
||||
let scan_format s fmt = sscanf_format s fmt (fun x -> x);;
|
||||
let format_from_string s fmt = sscanf_format s fmt (fun x -> x);;
|
||||
|
|
|
@ -269,7 +269,7 @@ val sscanf_format :
|
|||
(('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;;
|
||||
(** Same as {!Scanf.bscanf}, but inputs from the given string. *)
|
||||
|
||||
val scan_format :
|
||||
val format_from_string :
|
||||
string -> ('a, 'b, 'c, 'd) format4 -> ('a, 'b, 'c, 'd) format4;;
|
||||
(** Same as {!Scanf.sscanf_format}, but converts the given string to a format
|
||||
string. *)
|
||||
|
|
|
@ -78,4 +78,4 @@ let catch_break on =
|
|||
|
||||
(* OCaml version string, must be in the format described in sys.mli. *)
|
||||
|
||||
let ocaml_version = "3.10+dev5 (2006-04-05)";;
|
||||
let ocaml_version = "3.10+dev6 (2006-04-05)";;
|
||||
|
|
Loading…
Reference in New Issue