diff --git a/stdlib/format.ml b/stdlib/format.ml index cbbb1a16a..7234b70a7 100644 --- a/stdlib/format.ml +++ b/stdlib/format.ml @@ -705,13 +705,13 @@ let fprintf_out str out ppf format = let print_as = ref None in - let pp_print_as_char ppf c = + let pp_print_as_char c = match !print_as with | None -> pp_print_char ppf c | Some size -> pp_print_as ppf size (String.make 1 c); print_as := None - and pp_print_as_string ppf s = + and pp_print_as_string s = match !print_as with | None -> pp_print_string ppf s | Some size -> @@ -723,6 +723,8 @@ let fprintf_out str out ppf format = Obj.magic (out ()) else match format.[i] with + | '%' -> + Printf.scan_format format i cont_s cont_a cont_t | '@' -> let j = succ i in if j >= limit then invalid_arg ("fprintf: unknown format " ^ format) @@ -762,79 +764,24 @@ let fprintf_out str out ppf format = then invalid_arg ("fprintf: bad print format " ^ format) else print_as := Some size; doprn j - | c -> format_invalid_arg "fprintf: unknown format " c end - | '%' -> - let j = skip_args (succ i) in - begin match format.[j] with - | '%' -> - pp_print_char ppf '%'; - doprn (succ j) - | 's' -> - Obj.magic(fun s -> - if j <= succ i then - pp_print_as_string ppf s - else begin - let p = - try - int_of_string (String.sub format (succ i) (j - i - 1)) - with _ -> - invalid_arg ("fprintf: bad %s format, " ^ format) in - if p > 0 && String.length s < p then begin - pp_print_as_string ppf - (String.make (p - String.length s) ' '); - pp_print_as_string ppf s end else - if p < 0 && String.length s < -p then begin - pp_print_as_string ppf s; - pp_print_as_string ppf - (String.make (-p - String.length s) ' ') end - else pp_print_as_string ppf s - end; - doprn (succ j)) - | 'c' -> - Obj.magic(fun c -> - pp_print_as_char ppf c; - doprn (succ j)) - | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> - Obj.magic(fun n -> - pp_print_as_string ppf - (format_int (String.sub format i (j - i + 1)) n); - doprn (succ j)) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - Obj.magic(fun f -> - pp_print_as_string ppf - (format_float (String.sub format i (j - i + 1)) f); - doprn (succ j)) - | 'b' -> - Obj.magic(fun b -> - pp_print_as_string ppf (string_of_bool b); - doprn (succ j)) - | 'a' -> - if str then - Obj.magic(fun printer arg -> - pp_print_as_string ppf (printer () arg); - doprn (succ j)) - else - Obj.magic(fun printer arg -> - printer ppf arg; - doprn (succ j)) - | 't' -> - if str then - Obj.magic(fun printer -> - pp_print_as_string ppf (printer ()); - doprn (succ j)) - else - Obj.magic(fun printer -> - printer ppf; - doprn (succ j)) - | c -> - format_invalid_arg "fprintf: unknown format " c + | c -> format_invalid_arg "fprintf: unknown format " c end - | c -> pp_print_as_char ppf c; doprn (succ i) + | c -> pp_print_as_char c; doprn (succ i) - and skip_args j = - match format.[j] with - | '0' .. '9' | ' ' | '.' | '-' -> skip_args (succ j) - | c -> j + and cont_s s i = + pp_print_as_string s; doprn i + and cont_a printer arg i = + if str then + pp_print_as_string ((Obj.magic printer) () arg) + else + printer ppf arg; + doprn i + and cont_t printer i = + if str then + pp_print_as_string ((Obj.magic printer) ()) + else + printer ppf; + doprn i and get_int s1 s2 i = if i >= limit then invalid_arg (s1 ^ s2) else diff --git a/stdlib/printf.ml b/stdlib/printf.ml index 7572c8846..8ea17416b 100644 --- a/stdlib/printf.ml +++ b/stdlib/printf.ml @@ -13,178 +13,201 @@ (* $Id$ *) external format_int: string -> int -> string = "format_int" +external format_int32: string -> int32 -> string = "int32_format" +external format_nativeint: string -> nativeint -> string = "nativeint_format" +external format_int64: string -> int64 -> string = "int64_format" external format_float: string -> float -> string = "format_float" -let fprintf outchan format = - let format = (Obj.magic format : string) in - let rec doprn i = - if i >= String.length format then - Obj.magic () - else begin - let c = String.unsafe_get format i in - if c <> '%' then begin - output_char outchan c; - doprn (succ i) - end else begin - let j = skip_args (succ i) in - match String.unsafe_get format j with - '%' -> - output_char outchan '%'; - doprn (succ j) - | 's' -> - Obj.magic(fun s -> - if j <= i+1 then - output_string outchan s - else begin - let p = - try - int_of_string (String.sub format (i+1) (j-i-1)) - with _ -> - invalid_arg "fprintf: bad %s format" in - if p > 0 && String.length s < p then begin - output_string outchan - (String.make (p - String.length s) ' '); - output_string outchan s - end else if p < 0 && String.length s < -p then begin - output_string outchan s; - output_string outchan - (String.make (-p - String.length s) ' ') - end else - output_string outchan s - end; - doprn (succ j)) - | 'c' -> - Obj.magic(fun c -> - output_char outchan c; - doprn (succ j)) +let bad_format fmt pos = + invalid_arg + ("printf: bad format " ^ String.sub fmt pos (String.length fmt - pos)) + +(* Format a string given a %s format, e.g. %40s or %-20s. + To do: ignore other flags (#, +, etc)? *) + +let format_string format s = + let rec parse_format neg i = + if i >= String.length format then (0, neg) else + match String.unsafe_get format i with + | '1'..'9' -> + (int_of_string (String.sub format i (String.length format - i - 1)), + neg) + | '-' -> + parse_format true (succ i) + | _ -> + parse_format neg (succ i) in + let (p, neg) = + try parse_format false 1 with Failure _ -> bad_format format 0 in + if String.length s < p then begin + let res = String.make p ' ' in + if neg + then String.blit s 0 res 0 (String.length s) + else String.blit s 0 res (p - String.length s) (String.length s); + res + end else + s + +(* Extract a %format from [fmt] between [start] and [stop] inclusive. + '*' in the format are replaced by integers taken from the [widths] list. + The function is somewhat optimized for the "no *" case. *) + +let extract_format fmt start stop widths = + match widths with + | [] -> String.sub fmt start (stop - start + 1) + | _ -> + let b = Buffer.create (stop - start + 10) in + let rec fill_format i w = + if i > stop then Buffer.contents b else + match (String.unsafe_get fmt i, w) with + ('*', h::t) -> + Buffer.add_string b (string_of_int h); fill_format (succ i) t + | ('*', []) -> + bad_format fmt start (* should not happen *) + | (c, _) -> + Buffer.add_char b c; fill_format (succ i) w + in fill_format start (List.rev widths) + +(* Decode a %format and act on it. + [fmt] is the printf format style, and [pos] points to a [%] character. + After consuming the appropriate number of arguments and formatting + them, one of the three continuations is called: + [cont_s] for outputting a string (args: string, next pos) + [cont_a] for performing a %a action (args: fn, arg, next pos) + [cont_t] for performing a %t action (args: fn, next pos) + "next pos" is the position in [fmt] of the first character following + the %format in [fmt]. *) + +(* Note: here, rather than test explicitly against [String.length fmt] + to detect the end of the format, we use [String.unsafe_get] and + rely on the fact that we'll get a "nul" character if we access + one past the end of the string. These "nul" characters are then + caught by the [_ -> bad_format] clauses below. + Don't do this at home, kids. *) + +let scan_format fmt pos cont_s cont_a cont_t = + let rec scan_flags widths i = + match String.unsafe_get fmt i with + | '*' -> + Obj.magic(fun w -> scan_flags (w :: widths) (succ i)) + | '0'..'9' | '.' | '#' | '-' | ' ' | '+' -> scan_flags widths (succ i) + | _ -> scan_conv widths i + and scan_conv widths i = + match String.unsafe_get fmt i with + | '%' -> + cont_s "%" (succ i) + | 's' -> + Obj.magic (fun (s: string) -> + if i = succ pos (* optimize for common case %s *) + then cont_s s (succ i) + else cont_s (format_string (extract_format fmt pos i widths) s) + (succ i)) + | 'c' -> + Obj.magic (fun (c: char) -> + cont_s (String.make 1 c) (succ i)) + | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> + Obj.magic(fun (n: int) -> + cont_s (format_int (extract_format fmt pos i widths) n) (succ i)) + | 'f' | 'e' | 'E' | 'g' | 'G' -> + Obj.magic(fun (f: float) -> + cont_s (format_float (extract_format fmt pos i widths) f) (succ i)) + | 'b' -> + Obj.magic(fun (b: bool) -> + cont_s (string_of_bool b) (succ i)) + | 'a' -> + Obj.magic (fun printer arg -> + cont_a printer arg (succ i)) + | 't' -> + Obj.magic (fun printer -> + cont_t printer (succ i)) + | 'l' -> + begin match String.unsafe_get fmt (succ i) with | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> - Obj.magic(fun n -> - output_string outchan - (format_int (String.sub format i (j-i+1)) n); - doprn (succ j)) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - Obj.magic(fun f -> - output_string outchan - (format_float (String.sub format i (j-i+1)) f); - doprn (succ j)) - | 'b' -> - Obj.magic(fun b -> - output_string outchan (string_of_bool b); - doprn (succ j)) - | 'a' -> - Obj.magic(fun printer arg -> - printer outchan arg; - doprn(succ j)) - | 't' -> - Obj.magic(fun printer -> - printer outchan; - doprn(succ j)) - | c -> - invalid_arg ("fprintf: unknown format") - end - end + Obj.magic(fun (n: int32) -> + cont_s (format_int32 (extract_format fmt pos (succ i) widths) n) + (i + 2)) + | _ -> + bad_format fmt pos + end + | 'n' -> + begin match String.unsafe_get fmt (succ i) with + | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> + Obj.magic(fun (n: nativeint) -> + cont_s (format_nativeint + (extract_format fmt pos (succ i) widths) + n) + (i + 2)) + | _ -> + bad_format fmt pos + end + | 'L' -> + begin match String.unsafe_get fmt (succ i) with + | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> + Obj.magic(fun (n: int64) -> + cont_s (format_int64 (extract_format fmt pos (succ i) widths) n) + (i + 2)) + | _ -> + bad_format fmt pos + end + | _ -> + bad_format fmt pos + in scan_flags [] (pos + 1) - and skip_args j = - match String.unsafe_get format j with - '0' .. '9' | ' ' | '.' | '-' -> skip_args (succ j) - | c -> j +(* Application to [fprintf], etc. See also [Format.*printf]. *) +let fprintf chan fmt = + let fmt = (Obj.magic fmt : string) in + let len = String.length fmt in + let rec doprn i = + if i >= len then Obj.magic () else + match String.unsafe_get fmt i with + | '%' -> scan_format fmt i cont_s cont_a cont_t + | c -> output_char chan c; doprn (succ i) + and cont_s s i = + output_string chan s; doprn i + and cont_a printer arg i = + printer chan arg; doprn i + and cont_t printer i = + printer chan; doprn i in doprn 0 let printf fmt = fprintf stdout fmt -and eprintf fmt = fprintf stderr fmt +let eprintf fmt = fprintf stderr fmt -let bprintf_internal tostring buf format = - let format = (Obj.magic format : string) in +let sprintf fmt = + let fmt = (Obj.magic fmt : string) in + let len = String.length fmt in + let dest = Buffer.create (len + 16) in let rec doprn i = - if i >= String.length format then - if tostring then begin - let res = Obj.magic (Buffer.contents buf) in - Buffer.clear buf; (* just in case [bs]printf is partially applied *) - res - end else - Obj.magic () - else begin - let c = String.unsafe_get format i in - if c <> '%' then begin - Buffer.add_char buf c; - doprn (succ i) - end else begin - let j = skip_args (succ i) in - match String.unsafe_get format j with - '%' -> - Buffer.add_char buf '%'; - doprn (succ j) - | 's' -> - Obj.magic(fun s -> - if j <= i+1 then - Buffer.add_string buf s - else begin - let p = - try - int_of_string (String.sub format (i+1) (j-i-1)) - with _ -> - invalid_arg "fprintf: bad %s format" in - if p > 0 && String.length s < p then begin - Buffer.add_string buf - (String.make (p - String.length s) ' '); - Buffer.add_string buf s - end else if p < 0 && String.length s < -p then begin - Buffer.add_string buf s; - Buffer.add_string buf - (String.make (-p - String.length s) ' ') - end else - Buffer.add_string buf s - end; - doprn (succ j)) - | 'c' -> - Obj.magic(fun c -> - Buffer.add_char buf c; - doprn (succ j)) - | 'd' | 'i' | 'o' | 'x' | 'X' | 'u' -> - Obj.magic(fun n -> - Buffer.add_string buf - (format_int (String.sub format i (j-i+1)) n); - doprn (succ j)) - | 'f' | 'e' | 'E' | 'g' | 'G' -> - Obj.magic(fun f -> - Buffer.add_string buf - (format_float (String.sub format i (j-i+1)) f); - doprn (succ j)) - | 'b' -> - Obj.magic(fun b -> - Buffer.add_string buf (string_of_bool b); - doprn (succ j)) - | 'a' -> - if tostring then - Obj.magic(fun printer arg -> - Buffer.add_string buf (printer () arg); - doprn(succ j)) - else - Obj.magic(fun printer arg -> - printer buf arg; - doprn(succ j)) - | 't' -> - if tostring then - Obj.magic(fun printer -> - Buffer.add_string buf (printer ()); - doprn(succ j)) - else - Obj.magic(fun printer -> - printer buf; - doprn(succ j)) - | c -> - invalid_arg ("sprintf: unknown format") - end - end - - and skip_args j = - match String.unsafe_get format j with - '0' .. '9' | ' ' | '.' | '-' -> skip_args (succ j) - | c -> j - + if i >= len then begin + let res = Buffer.contents dest in + Buffer.clear dest; (* just in case sprintf is partially applied *) + Obj.magic res + end else + match String.unsafe_get fmt i with + | '%' -> scan_format fmt i cont_s cont_a cont_t + | c -> Buffer.add_char dest c; doprn (succ i) + and cont_s s i = + Buffer.add_string dest s; doprn i + and cont_a printer arg i = + Buffer.add_string dest (printer () arg); doprn i + and cont_t printer i = + Buffer.add_string dest (printer ()); doprn i + in doprn 0 + +let bprintf dest fmt = + let fmt = (Obj.magic fmt : string) in + let len = String.length fmt in + let rec doprn i = + if i >= len then Obj.magic () else + match String.unsafe_get fmt i with + | '%' -> scan_format fmt i cont_s cont_a cont_t + | c -> Buffer.add_char dest c; doprn (succ i) + and cont_s s i = + Buffer.add_string dest s; doprn i + and cont_a printer arg i = + printer dest arg; doprn i + and cont_t printer i = + printer dest; doprn i in doprn 0 -let bprintf buf fmt = bprintf_internal false buf fmt -let sprintf fmt = bprintf_internal true (Buffer.create 16) fmt diff --git a/stdlib/printf.mli b/stdlib/printf.mli index 2dd6eae6c..809ae04d3 100644 --- a/stdlib/printf.mli +++ b/stdlib/printf.mli @@ -12,7 +12,7 @@ (* $Id$ *) -(** Formatting printing functions. *) +(** Formatted output functions. *) (** [fprintf outchan format arg1 ... argN] formats the arguments [arg1] to [argN] according to the format string [format], @@ -24,7 +24,7 @@ causes conversion and printing of one argument. Conversion specifications consist in the [%] character, followed - by optional flags and field widths, followed by one conversion + by optional flags and field widths, followed by one or two conversion character. The conversion characters and their meanings are: - [d] or [i]: convert an integer argument to signed decimal - [u]: convert an integer argument to unsigned decimal @@ -42,6 +42,12 @@ - [g] or [G]: convert a floating-point argument to decimal notation, in style [f] or [e], [E] (whichever is more compact) - [b]: convert a boolean argument to the string [true] or [false] + - [ld], [li], [lu], [lx], [lX], [lo]: convert an [int32] argument to + the format specified by the second letter (decimal, hexadecimal, etc). + - [nd], [ni], [nu], [nx], [nX], [no]: convert a [nativeint] argument to + the format specified by the second letter. + - [Ld], [Li], [Lu], [Lx], [LX], [Lo]: convert an [int64] argument to + the format specified by the second letter. - [a]: user-defined printer. Takes two arguments and apply the first one to [outchan] (the current output channel) and to the second argument. The first argument must therefore have type @@ -51,8 +57,22 @@ - [t]: same as [%a], but takes only one argument (with type [out_channel -> unit]) and apply it to [outchan]. - [%]: take no argument and output one [%] character. - - Refer to the C library [printf] function for the meaning of - flags and field width specifiers. + + The optional flags include: + - [-]: left-justify the output (default is right justification). + - [+]: for numerical conversions, prefix number with a [+] sign if positive. + - space: for numerical conversions, prefix number with a space if positive. + - [#]: request an alternate formatting style for numbers. + + The field widths are composed of an optional integer literal + indicating the minimal width of the result, possibly followed by + a dot [.] and another integer literal indicating how many digits + follow the decimal point in the [%f], [%e], and [%E] conversions. + For instance, [%6d] prints an integer, prefixing it with spaces to + fill at least 6 characters; and [%.4f] prints a float with 4 + fractional digits. Each or both of the integer literals can also be + specified as a [*], in which case an extra integer argument is taken + to specify the corresponding width or precision. Warning: if too few arguments are provided, for instance because the [printf] function is partially @@ -81,3 +101,11 @@ val sprintf: ('a, unit, string) format -> 'a (see module {!Buffer}). *) val bprintf: Buffer.t -> ('a, Buffer.t, unit) format -> 'a +(*--*) + +(* For system use only. Don't call directly. *) + +val scan_format: + string -> int -> (string -> int -> 'a) + -> ('b ->'c -> int -> 'a) + -> ('e -> int -> 'a) -> 'a