{ (***********************************************************************) (* OCamldoc *) (* *) (* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 2001 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0. *) (* *) (***********************************************************************) (* $Id$ *) (** Generation of html code to display OCaml code. *) open Lexing exception Fatal_error let fatal_error msg = prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error type error = | Illegal_character of char | Unterminated_comment | Unterminated_string | Unterminated_string_in_comment | Keyword_as_label of string ;; exception Error of error * int * int let base_escape_strings = [ ("&", "&") ; ("<", "<") ; (">", ">") ; ] let pre_escape_strings = [ (" ", " ") ; ("\n", "
\n") ; ("\t", "        ") ; ] let pre = ref false let fmt = ref Format.str_formatter (** Escape the strings which would clash with html syntax, and some other strings if we want to get a PRE style.*) let escape s = List.fold_left (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc) s (if !pre then base_escape_strings @ pre_escape_strings else base_escape_strings) (** Escape the strings which would clash with html syntax. *) let escape_base s = List.fold_left (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc) s base_escape_strings (** The output functions *) let print ?(esc=true) s = Format.pp_print_string !fmt (if esc then escape s else s) ;; let print_class ?(esc=true) cl s = print ~esc: false (""^ (if esc then escape s else s)^ "") ;; (** The table of keywords with colors *) let create_hashtable size init = let tbl = Hashtbl.create size in List.iter (fun (key, data) -> Hashtbl.add tbl key data) init; tbl (** The function used to return html code for the given comment body. *) let html_of_comment = ref (fun (s : string) -> "Odoc_ocamlhtml.html_of_comment not initialized") let keyword_table = create_hashtable 149 [ "and", "keyword" ; "as", "keyword" ; "assert", "keyword" ; "begin", "keyword" ; "class", "keyword" ; "constraint", "keyword" ; "do", "keyword" ; "done", "keyword" ; "downto", "keyword" ; "else", "keyword" ; "end", "keyword" ; "exception", "keyword" ; "external", "keyword" ; "false", "keyword" ; "for", "keyword" ; "fun", "keyword" ; "function", "keyword" ; "functor", "keyword" ; "if", "keyword" ; "in", "keyword" ; "include", "keyword" ; "inherit", "keyword" ; "initializer", "keyword" ; "lazy", "keyword" ; "let", "keyword" ; "match", "keyword" ; "method", "keyword" ; "module", "keyword" ; "mutable", "keyword" ; "new", "keyword" ; "object", "keyword" ; "of", "keyword" ; "open", "keyword" ; "or", "keyword" ; "parser", "keyword" ; "private", "keyword" ; "rec", "keyword" ; "sig", "keyword" ; "struct", "keyword" ; "then", "keyword" ; "to", "keyword" ; "true", "keyword" ; "try", "keyword" ; "type", "keyword" ; "val", "keyword" ; "virtual", "keyword" ; "when", "keyword" ; "while", "keyword" ; "with", "keyword" ; "mod", "keyword" ; "land", "keyword" ; "lor", "keyword" ; "lxor", "keyword" ; "lsl", "keyword" ; "lsr", "keyword" ; "asr", "keyword" ; ] let kwsign_class = "keywordsign" let constructor_class = "constructor" let comment_class = "comment" let string_class = "string" let code_class = "code" (** To buffer and print comments *) let margin = ref 0 let comment_buffer = Buffer.create 32 let reset_comment_buffer () = Buffer.reset comment_buffer let store_comment_char = Buffer.add_char comment_buffer let add_comment_string = Buffer.add_string comment_buffer let make_margin () = let rec iter n = if n <= 0 then "" else " "^(iter (n-1)) in iter !margin let print_comment () = let s = Buffer.contents comment_buffer in let len = String.length s in let code = if len < 1 then "(*"^(escape s)^"*)" else match s.[0] with '*' -> ( try let html = !html_of_comment (String.sub s 1 (len-1)) in "
"^(make_margin ())^""^ ""^ "(**"^html^"*)"^ "
" with e -> prerr_endline (Printexc.to_string e); "(*"^(escape s)^"*)" ) | _ -> "(*"^(escape s)^"*)" in print ~esc: false code (** To buffer string literals *) let string_buffer = Buffer.create 32 let reset_string_buffer () = Buffer.reset string_buffer let store_string_char = Buffer.add_char string_buffer let get_stored_string () = let s = Buffer.contents string_buffer in s (** To translate escape sequences *) let char_for_backslash = function | 'n' -> '\010' | 'r' -> '\013' | 'b' -> '\008' | 't' -> '\009' | c -> c let char_for_decimal_code lexbuf i = let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in Char.chr(c land 0xFF) let char_for_hexa_code lexbuf i = let c = 16 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) in Char.chr(c land 0xFF) (** To store the position of the beginning of a string and comment *) let string_start_pos = ref 0;; let comment_start_pos = ref [];; let in_comment () = !comment_start_pos <> [];; (** Error report *) open Format let report_error ppf = function | Illegal_character c -> fprintf ppf "Illegal character (%s)" (Char.escaped c) | Unterminated_comment -> fprintf ppf "Comment not terminated" | Unterminated_string -> fprintf ppf "String literal not terminated" | Unterminated_string_in_comment -> fprintf ppf "This comment contains an unterminated string literal" | Keyword_as_label kwd -> fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd ;; } let blank = [' ' '\010' '\013' '\009' '\012'] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] let symbolchar = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] let decimal_literal = ['0'-'9']+ let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+ let oct_literal = '0' ['o' 'O'] ['0'-'7']+ let bin_literal = '0' ['b' 'B'] ['0'-'1']+ let float_literal = ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)? rule token = parse blank { let s = Lexing.lexeme lexbuf in ( match s with " " -> incr margin | "\t" -> margin := !margin + 8 | "\n" -> margin := 0 | _ -> () ); print s; token lexbuf } | "_" { print "_" ; token lexbuf } | "~" { print "~" ; token lexbuf } | "~" lowercase identchar * ':' { let s = Lexing.lexeme lexbuf in let name = String.sub s 1 (String.length s - 2) in if Hashtbl.mem keyword_table name then raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)); print s ; token lexbuf } | "?" { print "?" ; token lexbuf } | "?" lowercase identchar * ':' { let s = Lexing.lexeme lexbuf in let name = String.sub s 1 (String.length s - 2) in if Hashtbl.mem keyword_table name then raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)); print s ; token lexbuf } | lowercase identchar * { let s = Lexing.lexeme lexbuf in try let cl = Hashtbl.find keyword_table s in (print_class cl s ; token lexbuf ) with Not_found -> (print s ; token lexbuf )} | uppercase identchar * { print_class constructor_class (Lexing.lexeme lexbuf) ; token lexbuf } (* No capitalized keywords *) | decimal_literal | hex_literal | oct_literal | bin_literal { print (Lexing.lexeme lexbuf) ; token lexbuf } | float_literal { print (Lexing.lexeme lexbuf) ; token lexbuf } | "\"" { reset_string_buffer(); let string_start = Lexing.lexeme_start lexbuf in string_start_pos := string_start; string lexbuf; lexbuf.Lexing.lex_start_pos <- string_start - lexbuf.Lexing.lex_abs_pos; print_class string_class ("\""^(get_stored_string())^"\"") ; token lexbuf } | "'" [^ '\\' '\''] "'" { print_class string_class (Lexing.lexeme lexbuf) ; token lexbuf } | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" { print_class string_class (Lexing.lexeme lexbuf ) ; token lexbuf } | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" { print_class string_class (Lexing.lexeme lexbuf ) ; token lexbuf } | "(*" { reset_comment_buffer (); comment_start_pos := [Lexing.lexeme_start lexbuf]; comment lexbuf ; print_comment (); token lexbuf } | "(*)" { reset_comment_buffer (); comment_start_pos := [Lexing.lexeme_start lexbuf]; comment lexbuf ; print_comment (); token lexbuf } | "*)" { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1; lexbuf.Lexing.lex_curr_p <- { lexbuf.Lexing.lex_curr_p with pos_cnum = lexbuf.Lexing.lex_curr_p.pos_cnum - 1 } ; print (Lexing.lexeme lexbuf) ; token lexbuf } | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n") (* # linenum ... *) { print (Lexing.lexeme lexbuf); token lexbuf } | "#" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "&&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "`" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "'" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "(" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ")" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "*" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "," { print (Lexing.lexeme lexbuf) ; token lexbuf } | "??" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "->" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "." { print (Lexing.lexeme lexbuf) ; token lexbuf } | ".." { print (Lexing.lexeme lexbuf) ; token lexbuf } | ":" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "::" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ":=" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ":>" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ";" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ";;" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "<" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "<-" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "=" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "[" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "[|" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "[<" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "]" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "{" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "{<" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "|" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "||" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | "|]" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ">" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ">]" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "}" { print (Lexing.lexeme lexbuf) ; token lexbuf } | ">}" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "!=" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "+" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "-" { print (Lexing.lexeme lexbuf) ; token lexbuf } | "-." { print (Lexing.lexeme lexbuf) ; token lexbuf } | "!" symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | ['~' '?'] symbolchar + { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf } | ['=' '<' '>' '|' '&' '$'] symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | ['@' '^'] symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | ['+' '-'] symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | "**" symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | ['*' '/' '%'] symbolchar * { print (Lexing.lexeme lexbuf) ; token lexbuf } | eof { () } | _ { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]), Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) } and comment = parse "(*" { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos; store_comment_char '('; store_comment_char '*'; comment lexbuf; } | "*)" { match !comment_start_pos with | [] -> assert false | [x] -> comment_start_pos := [] | _ :: l -> store_comment_char '*'; store_comment_char ')'; comment_start_pos := l; comment lexbuf; } (* These filters are useless | "\"" { reset_string_buffer(); string_start_pos := Lexing.lexeme_start lexbuf; store_comment_char '"'; begin try string lexbuf; add_comment_string ((get_stored_string()^"\"")) with Error (Unterminated_string, _, _) -> let st = List.hd !comment_start_pos in raise (Error (Unterminated_string_in_comment, st, st + 2)) end; comment lexbuf } | "'" [^ '\\' '\''] "'" { store_comment_char '\''; store_comment_char (Lexing.lexeme_char lexbuf 1); store_comment_char '\''; comment lexbuf } | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'" { store_comment_char '\''; store_comment_char '\\'; store_comment_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)) ; store_comment_char '\''; comment lexbuf } | "\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] { store_comment_char(char_for_decimal_code lexbuf 1); comment lexbuf } | "\\x" ['0'-'9' 'A'-'Z' 'a'-'z' ] ['0'-'9' 'A'-'Z' 'a'-'z'] { store_comment_char(char_for_hexa_code lexbuf 2); string lexbuf } | "''" { store_comment_char '\''; store_comment_char '\''; comment lexbuf } *) | eof { let st = List.hd !comment_start_pos in raise (Error (Unterminated_comment, st, st + 2)); } | _ { store_comment_char(Lexing.lexeme_char lexbuf 0); comment lexbuf } and string = parse '"' { () } | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] * { string lexbuf } | '\\' ['\\' '"' 'n' 't' 'b' 'r' ] { Buffer.add_string string_buffer (Lexing.lexeme lexbuf) ; string lexbuf } | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] { Buffer.add_string string_buffer (Lexing.lexeme lexbuf) ; string lexbuf } | '\\' 'x' ['0'-'9' 'A'-'Z' 'a'-'z' ] ['0'-'9' 'A'-'Z' 'a'-'z'] { Buffer.add_string string_buffer (Lexing.lexeme lexbuf) ; string lexbuf } | eof { raise (Error (Unterminated_string, !string_start_pos, !string_start_pos+1)) } | _ { store_string_char(Lexing.lexeme_char lexbuf 0); string lexbuf } { let html_of_code b ?(with_pre=true) code = let old_pre = !pre in let old_margin = !margin in let old_comment_buffer = Buffer.contents comment_buffer in let old_string_buffer = Buffer.contents string_buffer in let buf = Buffer.create 256 in let old_fmt = !fmt in fmt := Format.formatter_of_buffer buf ; pre := with_pre; margin := 0; let start = "" in let ending = "" in let html = ( try print ~esc: false start ; let lexbuf = Lexing.from_string code in let _ = token lexbuf in print ~esc: false ending ; Format.pp_print_flush !fmt () ; Buffer.contents buf with _ -> (* flush str_formatter because we already output something in it *) Format.pp_print_flush !fmt () ; start^code^ending ) in pre := old_pre; margin := old_margin ; Buffer.reset comment_buffer; Buffer.add_string comment_buffer old_comment_buffer ; Buffer.reset string_buffer; Buffer.add_string string_buffer old_string_buffer ; fmt := old_fmt ; Buffer.add_string b html }