ocaml/stdlib/scanf.ml

430 lines
14 KiB
OCaml

(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Pierre Weis, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 2002 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU Library General Public License, with *)
(* the special exception on linking described in file ../LICENSE. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(** Formatted input functions. *)
let bad_input ib s =
let i = Scanning.char_count ib in
failwith
(Printf.sprintf
"scanf: bad input at char number %i, while scanning %s" i s);;
let bad_input_buff ib = failwith "scanf: bad input";;
let bad_format fmt i fc =
invalid_arg
(Printf.sprintf
"scanf: bad format %c, at char number %i of format %s" fc i fmt);;
(* Extracting tokens from ouput token buffer. *)
let token_int ib =
let s = Scanning.token ib in
try Pervasives.int_of_string s
with Failure "int_of_string" -> bad_input ib s;;
let token_bool ib =
match Scanning.token ib with
| "true" -> true
| "false" -> false
| s -> bad_input ib ("a boolean, found " ^ s);;
let token_char ib =
(Scanning.token ib).[0];;
let token_float ib =
let s = Scanning.token ib in
float_of_string s;;
let token_string = Scanning.token;;
(* To scan native ints, int32 and int64 integers.
We cannot access to convertion to from strings: Nativeint.of_string,
Int32.of_string, and Int64.of_string, since those module are not
available to scanf. However, we can bind and use the primitives that are
available in the runtime. *)
external nativeint_of_string: string -> nativeint = "nativeint_of_string";;
external int32_of_string : string -> int32 = "int32_of_string";;
external int64_of_string : string -> int64 = "int64_of_string";;
let token_nativeint ib =
let s = Scanning.token ib in
nativeint_of_string s;;
let token_int32 ib =
let s = Scanning.token ib in
int32_of_string s;;
let token_int64 ib =
let s = Scanning.token ib in
int64_of_string s;;
(* Scanning numbers. *)
let scan_sign max ib =
let c = Scanning.peek_char ib in
match c with
| '+' -> Scanning.store_char ib c max
| '-' -> Scanning.store_char ib c max
| c -> max;;
(* Decimal case is optimized. *)
let rec scan_decimal_digits max ib =
if max = 0 || Scanning.end_of_input ib then max else
match Scanning.peek_char ib with
| '0' .. '9' as c ->
let max = Scanning.store_char ib c max in
scan_decimal_digits max ib
| c -> max;;
(* Other cases uses a predicate argument to scan_digits. *)
let rec scan_digits digitp max ib =
if max = 0 || Scanning.end_of_input ib then max else
match Scanning.peek_char ib with
| c when digitp c ->
let max = Scanning.store_char ib c max in
scan_digits digitp max ib
| _ -> max;;
let scan_binary_digits =
let is_binary = function
| '0' .. '1' -> true
| _ -> false in
scan_digits is_binary;;
let scan_octal_digits =
let is_octal = function
| '0' .. '8' -> true
| _ -> false in
scan_digits is_octal;;
let scan_hexadecimal_digits =
let is_hexa = function
| '0' .. '9' | 'a' .. 'f' -> true
| _ -> false in
scan_digits is_hexa;;
let scan_Hexadecimal_digits =
let is_Hexa = function
| '0' .. '9' | 'A' .. 'F' -> true
| _ -> false in
scan_digits is_Hexa;;
(* Decimal integers. *)
let scan_unsigned_decimal_int max ib =
if max = 0 || Scanning.end_of_input ib then bad_input ib "an int" else
scan_decimal_digits max ib;;
let scan_optionally_signed_decimal_int max ib =
let max = scan_sign max ib in
scan_unsigned_decimal_int max ib;;
(* Scan an unsigned integer that could be given in any (common) basis.
If digits are prefixed by 0b for one of x, X, o, b the number is
assumed to be written respectively in hexadecimal, hexadecimal,
octal, or binary. *)
let scan_unsigned_int max ib =
match Scanning.peek_char ib with
| '0' as c ->
let max = Scanning.store_char ib c max in
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
begin match c with
| 'x' -> scan_hexadecimal_digits (Scanning.store_char ib c max) ib
| 'X' -> scan_Hexadecimal_digits (Scanning.store_char ib c max) ib
| 'o' -> scan_octal_digits (Scanning.store_char ib c max) ib
| 'b' -> scan_binary_digits (Scanning.store_char ib c max) ib
| c -> scan_decimal_digits max ib end
| c -> scan_decimal_digits max ib;;
let scan_optionally_signed_int max ib =
let max = scan_sign max ib in
if max = 0 || Scanning.end_of_input ib then bad_input ib "an int" else
scan_unsigned_int max ib;;
let scan_int c max ib =
match c with
| 'd' -> scan_optionally_signed_decimal_int max ib
| 'i' -> scan_optionally_signed_int max ib
| 'o' -> scan_octal_digits max ib
| 'u' -> scan_unsigned_decimal_int max ib
| 'x' -> scan_hexadecimal_digits max ib
| 'X' -> scan_Hexadecimal_digits max ib
| c -> assert false;;
(* Scanning floating point numbers. *)
let scan_frac_part max ib = scan_unsigned_decimal_int max ib;;
let scan_exp_part max ib =
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
match c with
| 'e' | 'E' as c ->
scan_optionally_signed_int (Scanning.store_char ib c max) ib
| _ -> max;;
let scan_float max ib =
let max = scan_optionally_signed_decimal_int max ib in
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
match c with
| '.' ->
let max = Scanning.store_char ib c max in
let max = scan_frac_part max ib in
scan_exp_part max ib
| c -> scan_exp_part max ib;;
let scan_string stp max ib =
let rec loop max =
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
if stp = [] then
match c with
| ' ' | '\t' | '\n' | '\r' -> max
| c -> loop (Scanning.store_char ib c max) else
if List.mem c stp then max else loop (Scanning.store_char ib c max) in
loop max;;
let scan_String stp max ib =
let rec loop esc max =
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
if stp = [] then
match c with
| '"' (* '"' helping Emacs *) ->
let max = Scanning.store_char ib c max in
if esc then loop false max else max
| '\\' ->
if esc then loop false (Scanning.store_char ib c max) else
loop true max
| c -> loop false (Scanning.store_char ib c max) else
if List.mem c stp then max else loop false (Scanning.store_char ib c max) in
loop true max;;
let scan_char max ib =
if max = 0 || Scanning.end_of_input ib then bad_input ib "a char" else
let c = Scanning.peek_char ib in
Scanning.store_char ib c max;;
let scan_Char max ib =
let rec loop esc max =
if max = 0 || Scanning.end_of_input ib then bad_input ib "a char" else
let c = Scanning.peek_char ib in
match c with
| '\'' ->
let max = Scanning.store_char ib c max in
if esc then loop false max else max
| '\\' ->
if esc then loop false (Scanning.store_char ib c max) else
loop true max
| c -> loop false (Scanning.store_char ib c max) in
loop true max;;
let scan_bool max ib =
let m =
match Scanning.peek_char ib with
| 't' -> 4
| 'f' -> 5
| _ -> 0 in
scan_string [] (min max m) ib;;
type char_set = Pos_set of string | Neg_set of string;;
let read_char_set fmt i =
let lim = String.length fmt - 1 in
let rec find_in_set i j =
if j > lim then bad_format fmt j fmt.[lim - 1] else
match fmt.[j] with
| ']' -> String.sub fmt i (j - i), j
| c -> find_in_set i (j + 1)
and find_set_sign i =
if i > lim then bad_format fmt i fmt.[lim - 1] else
match fmt.[i] with
| '^' -> let set, i = find_set (i + 1) in i, Neg_set set
| _ -> let set, i = find_set i in i, Pos_set set
and find_set i =
if i > lim then bad_format fmt i fmt.[lim - 1] else
match fmt.[i] with
| ']' -> find_in_set i (i + 1)
| c -> find_in_set i i in
find_set_sign i;;
let make_setp stp char_set =
let make_predv set =
let v = Array.make 256 false in
let lim = String.length set - 1 in
let rec loop b i =
if i <= lim then
match set.[i] with
| '-' when b ->
(* if i = 0 then b is false (since the initial call is loop false 0)
hence i >= 1 and the following is safe. *)
let c1 = set.[i - 1] in
let i = i + 1 in
if i > lim then loop false (i - 1) else
let c2 = set.[i] in
for j = int_of_char c1 to int_of_char c2 do v.(j) <- true done;
loop false (i + 1)
| c -> v.(int_of_char set.[i]) <- true; loop true (i + 1) in
loop false 0;
v in
match char_set with
| Pos_set set ->
let v = make_predv set in
List.iter (fun c -> v.(int_of_char c) <- false) stp;
(fun c -> v.(int_of_char c))
| Neg_set set ->
let v = make_predv set in
List.iter (fun c -> v.(int_of_char c) <- true) stp;
(fun c -> not (v.(int_of_char c)));;
let scan_chars_in_char_set stp char_set max ib =
let setp = make_setp stp char_set in
let rec loop max ib =
if max = 0 || Scanning.end_of_input ib then max else
let c = Scanning.peek_char ib in
if setp c then loop (Scanning.store_char ib c max) ib else max in
loop max ib;;
let rec skip_whites ib =
if not (Scanning.end_of_input ib) then
match Scanning.peek_char ib with
| ' ' | '\r' | '\t' | '\n' -> Scanning.next_char ib; skip_whites ib
| _ -> ();;
external string_of_format : ('a, 'b, 'c) format -> string = "%identity";;
(* Main scanning function:
it takes an input buffer, a format and a function.
Then it scans the format and the buffer in parallel to find out
values as specified by the format. When it founds some it applies it
to the function f and continue. *)
let bscanf ib (fmt : ('a, Scanning.scanbuf, 'c) format) f =
let fmt = string_of_format fmt in
let lim = String.length fmt - 1 in
let return v = Obj.magic v () in
let delay f x () = f x in
let stack f = delay (return f) in
let rec scan spc f i =
if i > lim then return f else
match fmt.[i] with
| '%' -> scan_width spc f (i + 1)
| '@' as t ->
let i = i + 1 in
if i > lim then bad_format fmt (i - 1) t else begin
match fmt.[i] with
| fc when Scanning.end_of_input ib -> bad_input_buff ib
| '@' as fc when Scanning.peek_char ib = fc ->
Scanning.next_char ib; scan spc f (i + 1)
| fc when Scanning.peek_char ib = fc ->
Scanning.next_char ib; scan false f (i + 1)
| fc -> bad_input_buff ib end
| ' ' | '\r' | '\t' | '\n' -> skip_whites ib; scan spc f (i + 1)
| fc when Scanning.end_of_input ib -> bad_input_buff ib
| fc when Scanning.peek_char ib = fc ->
Scanning.next_char ib; scan spc f (i + 1)
| fc -> bad_input_buff ib
and scan_width spc f i =
if i > lim then bad_format fmt i '%' else
match fmt.[i] with
| '0' .. '9' as c ->
let rec read_width accu i =
if i > lim then accu, i else
match fmt.[i] with
| '0' .. '9' as c ->
let accu = 10 * accu + (int_of_char c - int_of_char '0') in
read_width accu (i + 1)
| _ -> accu, i in
let max, j = read_width 0 i in
scan_conversion spc max f j
| _ -> scan_conversion spc max_int f i
and scan_conversion spc max f i =
if i > lim then bad_format fmt i fmt.[lim - 1] else
match fmt.[i] with
| 'c' | 'C' as conv ->
let x = if conv = 'c' then scan_char max ib else scan_Char max ib in
scan true (stack f (token_char ib)) (i + 1)
| c ->
if spc then skip_whites ib;
match c with
| fc when Scanning.end_of_input ib -> bad_input_buff ib
| '%' as fc when Scanning.peek_char ib = fc ->
Scanning.next_char ib; scan true f (i + 1)
| '%' as fc -> bad_input_buff ib
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' ->
let x = scan_int c max ib in
scan true (stack f (token_int ib)) (i + 1)
| 'f' | 'g' | 'G' | 'e' | 'E' ->
let x = scan_float max ib in
scan true (stack f (token_float ib)) (i + 1)
| 's' | 'S' as conv ->
let i, stp = scan_stoppers (i + 1) in
let x =
if conv = 's'
then scan_string stp max ib
else scan_String stp max ib in
scan true (stack f (token_string ib)) (i + 1)
| 'b' ->
let x = scan_bool max ib in
scan true (stack f (token_bool ib)) (i + 1)
| '[' ->
let i, char_set = read_char_set fmt (i + 1) in
let i, stp = scan_stoppers (i + 1) in
let x = scan_chars_in_char_set stp char_set max ib in
scan true (stack f (token_string ib)) (i + 1)
| 'l' | 'n' | 'L' as t ->
let i = i + 1 in
if i > lim then bad_format fmt (i - 1) t else begin
match fmt.[i] with
| 'd' | 'i' | 'o' | 'u' | 'x' | 'X' as c ->
let x = scan_int c max ib in
begin match t with
| 'l' -> scan true (stack f (token_int32 ib)) (i + 1)
| 'L' -> scan true (stack f (token_int64 ib)) (i + 1)
| _ -> scan true (stack f (token_nativeint ib)) (i + 1) end
| fc -> bad_format fmt i fc end
| 'N' ->
let x = Scanning.char_count ib in
scan true (stack f x) (i + 1)
| 'r' ->
Obj.magic (fun reader arg ->
let x = reader ib arg in
scan spc (stack f x) (succ i))
| c -> bad_format fmt i c
and scan_stoppers i =
if i > lim then i - 1, [] else
match fmt.[i] with
| '@' when i < lim -> let i = i + 1 in i, [fmt.[i]]
| _ -> i - 1, [] in
Scanning.reset_token ib;
scan true (fun () -> f) 0;;
let fscanf ic = bscanf (Scanning.from_channel ic);;
let scanf fmt = fscanf stdin fmt;;
let sscanf s = bscanf (Scanning.from_string s);;