diff --git a/stdlib/format.ml b/stdlib/format.ml index 2fabff2e7..f4b519c8d 100644 --- a/stdlib/format.ml +++ b/stdlib/format.ml @@ -910,14 +910,16 @@ and set_tags = **************************************************************) +module Sformat = Printf.Sformat;; + (* Error messages when processing formats. *) (* Trailer: giving up at character number ... *) let giving_up mess fmt i = - "fprintf: " ^ mess ^ " ``" ^ fmt ^ "'', \ + "fprintf: " ^ mess ^ " ``" ^ Sformat.to_string fmt ^ "'', \ giving up at character number " ^ string_of_int i ^ - (if i < String.length fmt - then " (" ^ String.make 1 fmt.[i] ^ ")." + (if i < Sformat.length fmt + then " (" ^ String.make 1 (Sformat.get fmt i) ^ ")." else String.make 1 '.');; (* When an invalid format deserves a special error explanation. *) @@ -963,8 +965,6 @@ let implode_rev s0 = function | [] -> s0 | l -> String.concat "" (List.rev (s0 :: l));; -external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";; - (* [fprintf_out] is the printf-like function generator: given the - [to_s] flag that tells if we are printing into a string, - the [get_out] function that has to be called at the end of formatting, @@ -975,8 +975,7 @@ external format_to_string : ('a, 'b, 'c, 'd) format4 -> string = "%identity";; applications of [fprintf_out]. *) let mkprintf to_s get_out = let rec kprintf k fmt = - let fmt = format_to_string fmt in - let len = String.length fmt in + let len = Sformat.length fmt in let kpr fmt v = let ppf = get_out fmt in @@ -996,13 +995,13 @@ let mkprintf to_s get_out = let rec doprn n i = if i >= len then Obj.magic (k ppf) else - match fmt.[i] with + match Sformat.get fmt i with | '%' -> Printf.scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m | '@' -> let i = succ i in if i >= len then invalid_format fmt i else - begin match fmt.[i] with + begin match Sformat.get fmt i with | '[' -> do_pp_open_box ppf n (succ i) | ']' -> @@ -1065,7 +1064,7 @@ let mkprintf to_s get_out = and get_int n i c = if i >= len then invalid_integer fmt i else - match fmt.[i] with + match Sformat.get fmt i with | ' ' -> get_int n (succ i) c | '%' -> let cont_s n s i = c (format_int_of_string fmt i s) n i @@ -1077,33 +1076,34 @@ let mkprintf to_s get_out = | _ -> let rec get j = if j >= len then invalid_integer fmt j else - match fmt.[j] with + match Sformat.get fmt j with | '0' .. '9' | '-' -> get (succ j) | _ -> let size = if j = i then size_of_int 0 else - format_int_of_string fmt j (String.sub fmt i (j - i)) in + let s = Sformat.sub fmt i (j - i) in + format_int_of_string fmt j s in c size n j in get i and skip_gt i = if i >= len then invalid_format fmt i else - match fmt.[i] with + match Sformat.get fmt i with | ' ' -> skip_gt (succ i) | '>' -> succ i | _ -> invalid_format fmt i and get_box_kind i = if i >= len then Pp_box, i else - match fmt.[i] with + match Sformat.get fmt i with | 'h' -> let i = succ i in if i >= len then Pp_hbox, i else - begin match fmt.[i] with + begin match Sformat.get fmt i with | 'o' -> let i = succ i in if i >= len then format_invalid_arg "bad box format" fmt i else - begin match fmt.[i] with + begin match Sformat.get fmt i with | 'v' -> Pp_hovbox, succ i | c -> format_invalid_arg @@ -1118,11 +1118,11 @@ let mkprintf to_s get_out = and get_tag_name n i c = let rec get accu n i j = if j >= len - then c (implode_rev (String.sub fmt i (j - i)) accu) n j else - match fmt.[j] with - | '>' -> c (implode_rev (String.sub fmt i (j - i)) accu) n j + then c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j else + match Sformat.get fmt j with + | '>' -> c (implode_rev (Sformat.sub fmt i (j - i)) accu) n j | '%' -> - let s0 = String.sub fmt i (j - i) in + let s0 = Sformat.sub fmt i (j - i) in let cont_s n s i = get (s :: s0 :: accu) n i i and cont_a n printer arg i = let s = @@ -1146,7 +1146,7 @@ let mkprintf to_s get_out = and do_pp_break ppf n i = if i >= len then begin pp_print_space ppf (); doprn n i end else - match fmt.[i] with + match Sformat.get fmt i with | '<' -> let rec got_nspaces nspaces n i = get_int n i (got_offset nspaces) @@ -1158,7 +1158,7 @@ let mkprintf to_s get_out = and do_pp_open_box ppf n i = if i >= len then begin pp_open_box_gen ppf 0 Pp_box; doprn n i end else - match fmt.[i] with + match Sformat.get fmt i with | '<' -> let kind, i = get_box_kind (succ i) in let got_size size n i = @@ -1169,7 +1169,7 @@ let mkprintf to_s get_out = and do_pp_open_tag ppf n i = if i >= len then begin pp_open_tag ppf ""; doprn n i end else - match fmt.[i] with + match Sformat.get fmt i with | '<' -> let got_name tag_name n i = pp_open_tag ppf tag_name; diff --git a/stdlib/printf.ml b/stdlib/printf.ml index fe42ec861..1acb41b6e 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -13,14 +13,33 @@ (* $Id$ *) -external format_int: string -> int -> string = "caml_format_int" -external format_int32: string -> int32 -> string = "caml_int32_format" +external format_float: string -> float -> string + = "caml_format_float" +external format_int: string -> int -> string + = "caml_format_int" +external format_int32: string -> int32 -> string + = "caml_int32_format" external format_nativeint: string -> nativeint -> string - = "caml_nativeint_format" -external format_int64: string -> int64 -> string = "caml_int64_format" -external format_float: string -> float -> string = "caml_format_float" + = "caml_nativeint_format" +external format_int64: string -> int64 -> string + = "caml_int64_format" -external format_to_string: ('a, 'b, 'c, 'd) format4 -> string = "%identity" +module Sformat = struct + external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string + = "%identity" + external length : ('a, 'b, 'c, 'd) format4 -> int + = "%string_length" + external get : ('a, 'b, 'c, 'd) format4 -> int -> char + = "%string_safe_get" + external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char + = "%string_unsafe_get" +(* external set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit + = "%string_safe_set" + external unsafe_set : ('a, 'b, 'c, 'd) format4 -> int -> char -> unit + = "%string_unsafe_set" *) + let sub fmt idx len = String.sub (unsafe_to_string fmt) idx len + let to_string fmt = sub fmt 0 (length fmt) +end;; type index;; @@ -32,28 +51,33 @@ let succ_index = add_int_index 1;; (* Litteral position are one-based (hence pred p instead of p). *) let index_of_litteral_position p = index_of_int (pred p);; -let bad_conversion fmt i c = +let bad_conversion sfmt i c = invalid_arg ("printf: bad conversion %" ^ String.make 1 c ^ ", at char number " ^ - string_of_int i ^ " in format string ``" ^ fmt ^ "''");; + string_of_int i ^ " in format string ``" ^ sfmt ^ "''");; + +let bad_conversion_format fmt i c = + bad_conversion (Sformat.to_string fmt) i c;; let incomplete_format fmt = invalid_arg - ("printf: premature end of format string ``" ^ fmt ^ "''");; + ("printf: premature end of format string ``" ^ + Sformat.to_string fmt ^ "''");; (* Parses a format to return the specified length and the padding direction. *) -let parse_format fmt = +let parse_format sfmt = let rec parse neg i = - if i >= String.length fmt then (0, neg) else - match String.unsafe_get fmt i with + if i >= String.length sfmt then (0, neg) else + match String.unsafe_get sfmt i with | '1'..'9' -> - (int_of_string (String.sub fmt i (String.length fmt - i - 1)), + (int_of_string + (String.sub sfmt i (String.length sfmt - i - 1)), neg) | '-' -> parse true (succ i) | _ -> parse neg (succ i) in - try parse false 1 with Failure _ -> bad_conversion fmt 0 's' + try parse false 1 with Failure _ -> bad_conversion sfmt 0 's' (* Pad a (sub) string into a blank string of length [p], on the right if [neg] is true, on the left otherwise. *) @@ -68,18 +92,19 @@ let pad_string pad_char p neg s i len = (* Format a string given a %s format, e.g. %40s or %-20s. To do: ignore other flags (#, +, etc)? *) -let format_string fmt s = - let (p, neg) = parse_format fmt in - pad_string ' ' p neg s 0 (String.length s) +let format_string sfmt s = + let (p, neg) = parse_format sfmt in + pad_string ' ' p neg s 0 (String.length s);; -(* Extract a %format from [fmt] between [start] and [stop] inclusive. - '*' in the format are replaced by integers taken from the [widths] list. *) +(* Extract a format string out of [fmt] between [start] and [stop] inclusive. + '*' in the format are replaced by integers taken from the [widths] list. + extract_format returns a string. *) let extract_format fmt start stop widths = let skip_positional_spec start = - match String.unsafe_get fmt start with + match Sformat.unsafe_get fmt start with | '0'..'9' -> let rec skip_int_litteral i = - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '0'..'9' -> skip_int_litteral (succ i) | '$' -> succ i | _ -> start in @@ -90,7 +115,7 @@ let extract_format fmt start stop widths = Buffer.add_char b '%'; let rec fill_format i widths = if i <= stop then - match (String.unsafe_get fmt i, widths) with + match (Sformat.unsafe_get fmt i, widths) with | ('*', h :: t) -> Buffer.add_string b (string_of_int h); let i = skip_positional_spec (succ i) in @@ -102,10 +127,13 @@ let extract_format fmt start stop widths = fill_format start (List.rev widths); Buffer.contents b;; -let format_int_with_conv conv fmt i = +let extract_format_int conv fmt start stop widths = + let sfmt = extract_format fmt start stop widths in match conv with - | 'n' | 'N' -> fmt.[String.length fmt - 1] <- 'u'; format_int fmt i - | _ -> format_int fmt i + | 'n' | 'N' -> + sfmt. [String.length sfmt - 1] <- 'u'; + sfmt + | _ -> sfmt;; (* Returns the position of the last character of the meta format string, starting from position [i], inside a given format [fmt]. @@ -114,34 +142,36 @@ let format_int_with_conv conv fmt i = %) (when [conv = '(']). Hence, [sub_format] returns the index of the character ')' or '}' that ends the meta format, according to the character [conv]. *) -let sub_format incomplete_format bad_conversion conv fmt i = - let len = String.length fmt in +let sub_format incomplete_format bad_conversion_format conv fmt i = + let len = Sformat.length fmt in let rec sub_fmt c i = let close = if c = '(' then ')' else (* '{' *) '}' in let rec sub j = if j >= len then incomplete_format fmt else - match fmt.[j] with + match Sformat.get fmt j with | '%' -> sub_sub (succ j) | _ -> sub (succ j) and sub_sub j = if j >= len then incomplete_format fmt else - match fmt.[j] with + match Sformat.get fmt j with | '(' | '{' as c -> let j = sub_fmt c (succ j) in sub (succ j) | '}' | ')' as c -> - if c = close then j else bad_conversion fmt i c + if c = close then j else bad_conversion_format fmt i c | _ -> sub (succ j) in sub i in sub_fmt conv i;; -let sub_format_for_printf = sub_format incomplete_format bad_conversion;; +let sub_format_for_printf conv = + sub_format incomplete_format bad_conversion_format conv;; let iter_on_format_args fmt add_conv add_char = - let lim = String.length fmt - 1 in + + let lim = Sformat.length fmt - 1 in let rec scan_flags skip i = if i > lim then incomplete_format fmt else - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '*' -> scan_flags skip (add_conv skip i 'i') | '$' -> scan_flags skip (succ i) | '#' | '-' | ' ' | '+' -> scan_flags skip (succ i) @@ -151,7 +181,7 @@ let iter_on_format_args fmt add_conv add_char = | _ -> scan_conv skip i and scan_conv skip i = if i > lim then incomplete_format fmt else - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '%' | '!' -> succ i | 's' | 'S' | '[' -> add_conv skip i 's' | 'c' | 'C' -> add_conv skip i 'c' @@ -162,7 +192,7 @@ let iter_on_format_args fmt add_conv add_char = | 'l' | 'n' | 'L' as conv -> let j = succ i in if j > lim then add_conv skip i 'i' else begin - match fmt.[j] with + match Sformat.get fmt j with | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> add_char (add_conv skip i conv) 'i' | c -> add_conv skip i 'i' end @@ -172,7 +202,7 @@ let iter_on_format_args fmt add_conv add_char = let j = sub_format_for_printf conv fmt i in (* Add the meta specification anyway. *) let rec loop i = - if i < j - 1 then loop (add_char i fmt.[i]) in + if i < j - 1 then loop (add_char i (Sformat.get fmt i)) in loop i; scan_conv skip j | '(' as conv -> @@ -181,11 +211,11 @@ let iter_on_format_args fmt add_conv add_char = anyway. *) scan_fmt (add_conv skip i conv) | '}' | ')' as conv -> add_conv skip i conv - | conv -> bad_conversion fmt i conv + | conv -> bad_conversion_format fmt i conv and scan_fmt i = if i < lim then - if fmt.[i] = '%' + if Sformat.get fmt i = '%' then scan_fmt (scan_flags false (succ i)) else scan_fmt (succ i) else i in @@ -197,7 +227,7 @@ let iter_on_format_args fmt add_conv add_char = It also checks the well-formedness of the format string. For instance, [summarize_format_type "A number %d\n"] is "%i". *) let summarize_format_type fmt = - let len = String.length fmt in + let len = Sformat.length fmt in let b = Buffer.create len in let add_char i c = Buffer.add_char b c; succ i in let add_conv skip i c = @@ -278,10 +308,10 @@ type param_spec = Spec_none | Spec_index of index;; problems: the type would be dependant of the {\em value} of an integer argument to printf. *) let scan_positional_spec fmt got_pos n i = - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '0'..'9' as d -> let rec get_int_litteral accu j = - match String.unsafe_get fmt j with + match Sformat.unsafe_get fmt j with | '0'..'9' as d -> get_int_litteral (10 * accu + (int_of_char d - 48)) (succ j) | '$' -> @@ -320,8 +350,8 @@ let get_index spec n = "next pos" is the position in [fmt] of the first character following the %format in [fmt]. *) -(* Note: here, rather than test explicitly against [String.length fmt] - to detect the end of the format, we use [String.unsafe_get] and +(* Note: here, rather than test explicitly against [Sformat.length fmt] + to detect the end of the format, we use [Sformat.unsafe_get] and rely on the fact that we'll get a "nul" character if we access one past the end of the string. These "nul" characters are then caught by the [_ -> bad_conversion] clauses below. @@ -335,7 +365,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = scan_positional_spec fmt got_pos n i and scan_flags spec n widths i = - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '*' -> let got_pos wspec i = let (width : int) = get_arg wspec n in @@ -346,7 +376,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = | _ -> scan_conv spec n widths i and scan_conv spec n widths i = - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '%' -> cont_s n "%" (succ i) | 's' | 'S' as conv -> @@ -364,7 +394,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = cont_s (next_index spec n) s (succ i) | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' | 'N' as conv -> let (x : int) = get_arg spec n in - let s = format_int_with_conv conv (extract_format fmt pos i widths) x in + let s = + format_int (extract_format_int conv fmt pos i widths) x in cont_s (next_index spec n) s (succ i) | 'f' | 'e' | 'E' | 'g' | 'G' | 'F' as conv -> let (x : float) = get_arg spec n in @@ -387,7 +418,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = let printer = get_arg spec n in cont_t (next_index spec n) printer (succ i) | 'l' | 'n' | 'L' as conv -> - begin match String.unsafe_get fmt (succ i) with + begin match Sformat.unsafe_get fmt (succ i) with | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> let s = match conv with @@ -403,10 +434,8 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = cont_s (next_index spec n) s (i + 2) | _ -> let (x : int) = get_arg spec n in - cont_s - (next_index spec n) - (format_int_with_conv 'n' (extract_format fmt pos i widths) x) - (succ i) + let s = format_int (extract_format_int 'n' fmt pos i widths) x in + cont_s (next_index spec n) s (succ i) end | '!' -> cont_f n (succ i) | '{' | '(' as conv (* ')' '}' *) -> @@ -417,31 +446,31 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m = (* Just print the format argument as a specification. *) cont_s (next_index spec n) - (summarize_format_type (format_to_string xf)) + (summarize_format_type xf) j else (* Use the format argument instead of the format specification. *) cont_m (next_index spec n) xf j | (* '(' *) ')' -> cont_s n "" (succ i) | conv -> - bad_conversion fmt i conv in + bad_conversion_format fmt i conv in scan_positional n [] (succ pos);; let mkprintf str get_out outc outs flush k fmt = - let fmt = format_to_string fmt in +(* let fmt = Sformat.length fmt in*) (* out is global to this invocation of pr, and must be shared by all its recursive calls (fif) any. *) let out = get_out fmt in let rec pr k n fmt v = - let len = String.length fmt in + let len = Sformat.length fmt in let rec doprn n i = if i >= len then Obj.magic (k out) else - match String.unsafe_get fmt i with + match Sformat.unsafe_get fmt i with | '%' -> scan_format fmt v n i cont_s cont_a cont_t cont_f cont_m | c -> outc out c; doprn n (succ i) and cont_s n s i = @@ -461,8 +490,8 @@ let mkprintf str get_out outc outs flush k fmt = and cont_f n i = flush out; doprn n i and cont_m n xf i = - let m = add_int_index (nargs_of_format_type (format_to_string xf)) n in - pr (Obj.magic (fun _ -> doprn m i)) n (format_to_string xf) v in + let m = add_int_index (nargs_of_format_type xf) n in + pr (Obj.magic (fun _ -> doprn m i)) n xf v in doprn n 0 in @@ -481,7 +510,7 @@ let kbprintf k b = let bprintf b = kbprintf ignore b let get_buff fmt = - let len = 2 * String.length fmt in + let len = 2 * Sformat.length fmt in Buffer.create len;; let get_contents b = diff --git a/stdlib/printf.mli b/stdlib/printf.mli index fe4b8b2f8..d612f4ad3 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -142,15 +142,34 @@ type index;; external index_of_int : int -> index = "%identity";; -val scan_format : string -> 'a array -> index -> int -> - (index -> string -> int -> 'b) -> - (index -> 'c -> 'd -> int -> 'b) -> - (index -> 'e -> int -> 'b) -> - (index -> int -> 'b) -> - (index -> ('h, 'i, 'j, 'k) format4 -> int -> 'b) -> 'b +module Sformat : sig + external unsafe_to_string : ('a, 'b, 'c, 'd) format4 -> string + = "%identity" + external length : ('a, 'b, 'c, 'd) format4 -> int + = "%string_length" + external get : ('a, 'b, 'c, 'd) format4 -> int -> char + = "%string_safe_get" + external unsafe_get : ('a, 'b, 'c, 'd) format4 -> int -> char + = "%string_unsafe_get" + val sub : ('a, 'b, 'c, 'd) format4 -> int -> int -> string + val to_string : ('a, 'b, 'c, 'd) format4 -> string +end + +val scan_format : ('a, 'b, 'c, 'd) format4 -> + 'e array -> + index -> + int -> + (index -> string -> int -> 'f) -> + (index -> 'g -> 'h -> int -> 'f) -> + (index -> 'i -> int -> 'f) -> + (index -> int -> 'f) -> + (index -> ('j, 'k, 'l, 'm) format4 -> int -> 'f) -> 'f val sub_format : - (string -> int) -> (string -> int -> char -> int) -> - char -> string -> int -> int -val summarize_format_type : string -> string -val kapr : (string -> Obj.t array -> 'a) -> string -> 'a + (('a, 'b, 'c, 'd) format4 -> int) -> + (('a, 'b, 'c, 'd) format4 -> int -> char -> int) -> + char -> ('a, 'b, 'c, 'd) format4 -> int -> int +val summarize_format_type : ('a, 'b, 'c, 'd) format4 -> string +val kapr : + (('a, 'b, 'c, 'd) format4 -> Obj.t array -> 'e) -> + ('a, 'b, 'c, 'd) format4 -> 'e diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml index 030075ba3..a0a49c061 100644 --- a/stdlib/scanf.ml +++ b/stdlib/scanf.ml @@ -217,7 +217,11 @@ let from_function = create "function input";; (* Perform bufferized input to improve efficiency. *) let file_buffer_size = ref 1024;; -let from_file_channel fname ic = +(* To close a channel at end of input. *) +let scan_close_at_end ic = close_in ic; raise End_of_file;; + +(* Scan from an input channel. *) +let from_ic scan_close_ic fname ic = let len = !file_buffer_size in let buf = String.create len in let i = ref 0 in @@ -225,29 +229,90 @@ let from_file_channel fname ic = let next () = if !i < !lim then begin let c = buf.[!i] in incr i; c end else begin lim := input ic buf 0 len; - if !lim = 0 then raise End_of_file else begin + if !lim = 0 then scan_close_ic ic else begin i := 1; buf.[0] end end in create fname next;; -let from_file fname = from_file_channel fname (open_in fname);; -let from_file_bin fname = from_file_channel fname (open_in_bin fname);; +let from_ic_close_at_end = from_ic scan_close_at_end;; -let from_input_channel fname ic = - let next () = input_char ic in - create fname next;; +let from_file fname = from_ic_close_at_end fname (open_in fname);; +let from_file_bin fname = from_ic_close_at_end fname (open_in_bin fname);; -let from_channel = from_input_channel "input channel";; +(* Input channel [ic] is not allocated here, hence it may be shared (two + functions of the program may successively read from it). Furthermore, the + user may define more than one scanning buffer reading from the same [ic] + channel. -(* The scanning buffer reading from [stdin].*) -let stdib = from_input_channel "stdin" stdin;; + However, we cannot prevent scanf to use one lookahead character if needed; + this implies that multiple functions alternatively scanning the same [ic] + channel will miss characters from time to time, due to unnoticed look ahead + characters, silently read from [ic] (hence no more available for reading) + and retained inside the scanning buffer for correct scanning from the same + scanning buffer. This phenomenon is even worse in case of multiple + definition of scanning buffers from the same [ic]. + + Hence, we do bufferize characters to create a scnning buffer from an input + channel in order to preserve the same semantics as other from_* functions + above: two successive calls to the scanner will work appropriately, since + the bufferized character (if any) will be retained inside the scanning + buffer from a call to the next one. + + Otherwise, if we do not bufferize characters, we will loose the clearly + correct scanning behaviour even for the simple regular case, when we scan + the (possibly shared) channel [ic] using a unique function, while not + gaining anything for multiple functions reading from [ic] or multiple + allocation of scanning buffers reading from the same [ic]. + + A more ambitious fix could be to have a memo scanning buffer allocation + for reading from input channel not allocated from within Scanf. *) + +let scan_at_end ic = raise End_of_file;; + +let from_channel = from_ic scan_at_end "input channel";; + +(* The scanning buffer reading from [stdin]. + One could try to define stdib as from_channel stdin, + but unfortunately the toplevel interaction would be wrong. + This is due to some kind of ``race condition'' when reading from stdin, + since the interactive compiler and scanf will simultaneously read the + material they need from stdin; then, confusion will result from what should + be read by the toplevel and what should be read by scanf. + This is even more complicated by the one character lookahead that scanf + is sometimes obliged to maintain: the lookahead character will be available + for the next (scanf) entry, seamingly coming from nowhere. + Also no End_of_file is raised when reading from stdin: if not enough + characters have been read, we simply ask to read more. *) +(*let stdib = + let buf = ref "" + and len = ref 0 in + let mk_buff l = + buf := String.create l; + len := l in + let i = ref 0 in + let rec next () = + if !i < !len then begin let c = !buf.[!i] in incr i; c end else + let s = input_line stdin in + let ls = String.length s in + if ls > !len then mk_buff ls; + String.blit s 0 !buf 0 ls; + i := 0; + next () in + create "stdin" next;; +*) +let stdib = from_ic scan_at_end "stdin" stdin;; end;; (* Formatted input functions. *) +module Sformat = Printf.Sformat;; + +external string_to_format : + string -> ('a, 'b, 'c, 'd) format4 = "%identity";; + (* Reporting errors. *) exception Scan_failure of string;; @@ -267,11 +332,12 @@ let bad_conversion fmt i c = invalid_arg (Printf.sprintf "scanf: bad conversion %%%c, at char number %i \ - in format string ``%s''" c i fmt);; + in format string ``%s''" c i (Sformat.to_string fmt));; let incomplete_format fmt = invalid_arg - (Printf.sprintf "scanf: premature end of format string ``%s''" fmt);; + (Printf.sprintf "scanf: premature end of format string ``%s''" + (Sformat.to_string fmt));; let bad_float () = bad_input "no dot or exponent part found in float token";; @@ -283,7 +349,8 @@ let format_mismatch fmt1 fmt2 ib = (* Checking that 2 format string are type compatible. *) let compatible_format_type fmt1 fmt2 = - Printf.summarize_format_type fmt1 = Printf.summarize_format_type fmt2;; + Printf.summarize_format_type (string_to_format fmt1) = + Printf.summarize_format_type (string_to_format fmt2);; (* Checking that [c] is indeed in the input, then skips it. In this case, the character c has been explicitely specified in the @@ -347,9 +414,12 @@ let token_float ib = float_of_string (Scanning.token ib);; since those modules are not available to Scanf. However, we can bind and use the corresponding primitives that are available in the runtime. *) -external nativeint_of_string: string -> nativeint = "caml_nativeint_of_string";; -external int32_of_string : string -> int32 = "caml_int32_of_string";; -external int64_of_string : string -> int64 = "caml_int64_of_string";; +external nativeint_of_string : string -> nativeint + = "caml_nativeint_of_string";; +external int32_of_string : string -> int32 + = "caml_int32_of_string";; +external int64_of_string : string -> int64 + = "caml_int64_of_string";; let token_nativeint conv ib = nativeint_of_string (token_int_literal conv ib);; let token_int32 conv ib = int32_of_string (token_int_literal conv ib);; @@ -648,29 +718,29 @@ type char_set = (* Char sets are read as sub-strings in the format string. *) let read_char_set fmt i = - let lim = String.length fmt - 1 in + let lim = Sformat.length fmt - 1 in let rec find_in_set j = if j > lim then incomplete_format fmt else - match fmt.[j] with + match Sformat.get fmt j with | ']' -> j | c -> find_in_set (j + 1) and find_set i = if i > lim then incomplete_format fmt else - match fmt.[i] with + match Sformat.get fmt i with | ']' -> find_in_set (i + 1) | c -> find_in_set i in if i > lim then incomplete_format fmt else - match fmt.[i] with + match Sformat.get fmt i with | '^' -> let i = i + 1 in let j = find_set i in - j, Neg_set (String.sub fmt i (j - i)) + j, Neg_set (Sformat.sub fmt i (j - i)) | _ -> let j = find_set i in - j, Pos_set (String.sub fmt i (j - i));; + j, Pos_set (Sformat.sub fmt i (j - i));; (* Char sets are now represented as bitvects that are represented as byte strings. *) @@ -874,11 +944,6 @@ let rec skip_whites ib = | _ -> () end;; -external format_to_string : - ('a, 'b, 'c, 'd) format4 -> string = "%identity";; -external string_to_format : - string -> ('a, 'b, 'c, 'd) format4 = "%identity";; - (* The [kscanf] main scanning function. It takes as arguments: - an input buffer [ib] from which to read characters, @@ -899,8 +964,8 @@ external string_to_format : aborts and applies the scanning buffer and a string that explains the error to the error handling function [ef] (the error continuation). *) let kscanf ib ef fmt f = - let fmt = format_to_string fmt in - let lim = String.length fmt - 1 in + + let lim = Sformat.length fmt - 1 in let return v = Obj.magic v () in let delay f x () = f x in @@ -909,7 +974,7 @@ let kscanf ib ef fmt f = let rec scan_fmt f i = if i > lim then f else - match fmt.[i] with + match Sformat.get fmt i with | ' ' -> skip_whites ib; scan_fmt f (i + 1) | '%' -> if i > lim then incomplete_format fmt else @@ -917,13 +982,13 @@ let kscanf ib ef fmt f = | '@' -> let i = i + 1 in if i > lim then incomplete_format fmt else begin - check_char ib fmt.[i]; + check_char ib (Sformat.get fmt i); scan_fmt f (i + 1) end | c -> check_char ib c; scan_fmt f (i + 1) and scan_conversion skip max f i = let stack = if skip then no_stack else stack in - match fmt.[i] with + match Sformat.get fmt i with | '%' as conv -> check_char ib conv; scan_fmt f (i + 1) | 'c' when max = 0 -> @@ -961,14 +1026,14 @@ let kscanf ib ef fmt f = | 'l' | 'n' | 'L' as conv -> let i = i + 1 in if i > lim then scan_fmt (stack f (get_count conv ib)) i else begin - match fmt.[i] with + match Sformat.get fmt i with (* This is in fact an integer conversion (e.g. %ld, %ni, or %Lo). *) | 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as conv -> let _x = scan_int_conv conv max ib in (* Look back to the character that triggered the integer conversion (this character is either 'l', 'n' or 'L'), to find the conversion to apply to the integer token read. *) - begin match fmt.[i - 1] with + begin match Sformat.get fmt (i - 1) with | 'l' -> scan_fmt (stack f (token_int32 conv ib)) (i + 1) | 'n' -> scan_fmt (stack f (token_nativeint conv ib)) (i + 1) | _ -> scan_fmt (stack f (token_int64 conv ib)) (i + 1) end @@ -985,14 +1050,14 @@ let kscanf ib ef fmt f = | '0' .. '9' as conv -> let rec read_width accu i = if i > lim then accu, i else - match fmt.[i] with + match Sformat.get fmt i with | '0' .. '9' as c -> let accu = 10 * accu + int_value_of_char c in read_width accu (i + 1) | _ -> accu, i in let max, i = read_width (int_value_of_char conv) (i + 1) in if i > lim then incomplete_format fmt else begin - match fmt.[i] with + match Sformat.get fmt i with | '.' -> let p, i = read_width 0 (i + 1) in scan_conversion skip (max + p + 1) f i @@ -1001,7 +1066,7 @@ let kscanf ib ef fmt f = let i = succ i in let j = Printf.sub_format incomplete_format bad_conversion conv fmt i + 1 in - let mf = String.sub fmt i (j - i - 2) in + let mf = Sformat.sub fmt i (j - i - 2) in let _x = scan_String max ib in let rf = token_string ib in if not (compatible_format_type mf rf) @@ -1013,8 +1078,8 @@ let kscanf ib ef fmt f = and scan_fmt_stoppers i = if i > lim then i - 1, [] else - match fmt.[i] with - | '@' when i < lim -> let i = i + 1 in i, [fmt.[i]] + match Sformat.get fmt i with + | '@' when i < lim -> let i = i + 1 in i, [Sformat.get fmt i] | '@' when i = lim -> incomplete_format fmt | _ -> i - 1, [] in @@ -1035,7 +1100,7 @@ let sscanf s = bscanf (Scanning.from_string s);; let scanf fmt = bscanf Scanning.stdib fmt;; let bscanf_format ib fmt f = - let fmt = format_to_string fmt in + let fmt = Sformat.unsafe_to_string fmt in let fmt1 = ignore (scan_String max_int ib); token_string ib in if not (compatible_format_type fmt1 fmt) then format_mismatch fmt1 fmt ib else @@ -1044,4 +1109,4 @@ let bscanf_format ib fmt f = let sscanf_format s fmt f = bscanf_format (Scanning.from_string s) fmt f;; -let scan_format s fmt = sscanf_format s fmt (fun x -> x);; +let format_from_string s fmt = sscanf_format s fmt (fun x -> x);; diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli index 646fcccdb..14cd949ed 100644 --- a/stdlib/scanf.mli +++ b/stdlib/scanf.mli @@ -269,7 +269,7 @@ val sscanf_format : (('a, 'b, 'c, 'd) format4 -> 'e) -> 'e;; (** Same as {!Scanf.bscanf}, but inputs from the given string. *) -val scan_format : +val format_from_string : string -> ('a, 'b, 'c, 'd) format4 -> ('a, 'b, 'c, 'd) format4;; (** Same as {!Scanf.sscanf_format}, but converts the given string to a format string. *) diff --git a/stdlib/sys.ml b/stdlib/sys.ml index ab8694733..7ed1a5071 100644 --- a/stdlib/sys.ml +++ b/stdlib/sys.ml @@ -78,4 +78,4 @@ let catch_break on = (* OCaml version string, must be in the format described in sys.mli. *) -let ocaml_version = "3.10+dev5 (2006-04-05)";; +let ocaml_version = "3.10+dev6 (2006-04-05)";;