539 lines
17 KiB
OCaml
539 lines
17 KiB
OCaml
|
|
{
|
|
(***********************************************************************)
|
|
(* OCamldoc *)
|
|
(* *)
|
|
(* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 2001 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
(* under the terms of the Q Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(** Generation of html code to display OCaml code. *)
|
|
open Lexing
|
|
|
|
exception Fatal_error
|
|
|
|
let fatal_error msg =
|
|
prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error
|
|
|
|
type error =
|
|
| Illegal_character of char
|
|
| Unterminated_comment
|
|
| Unterminated_string
|
|
| Unterminated_string_in_comment
|
|
| Keyword_as_label of string
|
|
;;
|
|
|
|
exception Error of error * int * int
|
|
|
|
let base_escape_strings = [
|
|
("&", "&") ;
|
|
("<", "<") ;
|
|
(">", ">") ;
|
|
]
|
|
|
|
let pre_escape_strings = [
|
|
(" ", " ") ;
|
|
("\n", "<br>\n") ;
|
|
("\t", " ") ;
|
|
]
|
|
|
|
|
|
let pre = ref false
|
|
let fmt = ref Format.str_formatter
|
|
|
|
(** Escape the strings which would clash with html syntax,
|
|
and some other strings if we want to get a PRE style.*)
|
|
let escape s =
|
|
List.fold_left
|
|
(fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
|
|
s
|
|
(if !pre then base_escape_strings @ pre_escape_strings else base_escape_strings)
|
|
|
|
(** Escape the strings which would clash with html syntax. *)
|
|
let escape_base s =
|
|
List.fold_left
|
|
(fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
|
|
s
|
|
base_escape_strings
|
|
|
|
(** The output functions *)
|
|
|
|
let print ?(esc=true) s =
|
|
Format.pp_print_string !fmt (if esc then escape s else s)
|
|
;;
|
|
|
|
let print_class ?(esc=true) cl s =
|
|
print ~esc: false ("<span class=\""^cl^"\">"^
|
|
(if esc then escape s else s)^
|
|
"</span>")
|
|
;;
|
|
|
|
(** The table of keywords with colors *)
|
|
let create_hashtable size init =
|
|
let tbl = Hashtbl.create size in
|
|
List.iter (fun (key, data) -> Hashtbl.add tbl key data) init;
|
|
tbl
|
|
|
|
(** The function used to return html code for the given comment body. *)
|
|
let html_of_comment = ref
|
|
(fun (s : string) -> "<b>Odoc_ocamlhtml.html_of_comment not initialized</b>")
|
|
|
|
let keyword_table =
|
|
create_hashtable 149 [
|
|
"and", "keyword" ;
|
|
"as", "keyword" ;
|
|
"assert", "keyword" ;
|
|
"begin", "keyword" ;
|
|
"class", "keyword" ;
|
|
"constraint", "keyword" ;
|
|
"do", "keyword" ;
|
|
"done", "keyword" ;
|
|
"downto", "keyword" ;
|
|
"else", "keyword" ;
|
|
"end", "keyword" ;
|
|
"exception", "keyword" ;
|
|
"external", "keyword" ;
|
|
"false", "keyword" ;
|
|
"for", "keyword" ;
|
|
"fun", "keyword" ;
|
|
"function", "keyword" ;
|
|
"functor", "keyword" ;
|
|
"if", "keyword" ;
|
|
"in", "keyword" ;
|
|
"include", "keyword" ;
|
|
"inherit", "keyword" ;
|
|
"initializer", "keyword" ;
|
|
"lazy", "keyword" ;
|
|
"let", "keyword" ;
|
|
"match", "keyword" ;
|
|
"method", "keyword" ;
|
|
"module", "keyword" ;
|
|
"mutable", "keyword" ;
|
|
"new", "keyword" ;
|
|
"object", "keyword" ;
|
|
"of", "keyword" ;
|
|
"open", "keyword" ;
|
|
"or", "keyword" ;
|
|
"parser", "keyword" ;
|
|
"private", "keyword" ;
|
|
"rec", "keyword" ;
|
|
"sig", "keyword" ;
|
|
"struct", "keyword" ;
|
|
"then", "keyword" ;
|
|
"to", "keyword" ;
|
|
"true", "keyword" ;
|
|
"try", "keyword" ;
|
|
"type", "keyword" ;
|
|
"val", "keyword" ;
|
|
"virtual", "keyword" ;
|
|
"when", "keyword" ;
|
|
"while", "keyword" ;
|
|
"with", "keyword" ;
|
|
|
|
"mod", "keyword" ;
|
|
"land", "keyword" ;
|
|
"lor", "keyword" ;
|
|
"lxor", "keyword" ;
|
|
"lsl", "keyword" ;
|
|
"lsr", "keyword" ;
|
|
"asr", "keyword" ;
|
|
]
|
|
|
|
let kwsign_class = "keywordsign"
|
|
let constructor_class = "constructor"
|
|
let comment_class = "comment"
|
|
let string_class = "string"
|
|
let code_class = "code"
|
|
|
|
|
|
(** To buffer and print comments *)
|
|
|
|
|
|
let margin = ref 0
|
|
|
|
let comment_buffer = Buffer.create 32
|
|
let reset_comment_buffer () = Buffer.reset comment_buffer
|
|
let store_comment_char = Buffer.add_char comment_buffer
|
|
|
|
let make_margin () =
|
|
let rec iter n =
|
|
if n <= 0 then ""
|
|
else " "^(iter (n-1))
|
|
in
|
|
iter !margin
|
|
|
|
let print_comment () =
|
|
let s = Buffer.contents comment_buffer in
|
|
let len = String.length s in
|
|
let code =
|
|
if len < 1 then
|
|
"<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
|
|
else
|
|
match s.[0] with
|
|
'*' ->
|
|
(
|
|
try
|
|
let html = !html_of_comment (String.sub s 1 (len-1)) in
|
|
"</code><table><tr><td>"^(make_margin ())^"</td><td>"^
|
|
"<span class=\""^comment_class^"\">"^
|
|
"(**"^html^"*)"^
|
|
"</span></td></tr></table><code class=\""^code_class^"\">"
|
|
with
|
|
e ->
|
|
prerr_endline (Printexc.to_string e);
|
|
"<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
|
|
)
|
|
| _ ->
|
|
"<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
|
|
in
|
|
print ~esc: false code
|
|
|
|
(** To buffer string literals *)
|
|
|
|
let string_buffer = Buffer.create 32
|
|
let reset_string_buffer () = Buffer.reset string_buffer
|
|
let store_string_char = Buffer.add_char string_buffer
|
|
let get_stored_string () =
|
|
let s = Buffer.contents string_buffer in
|
|
String.escaped s
|
|
|
|
(** To translate escape sequences *)
|
|
|
|
let char_for_backslash =
|
|
match Sys.os_type with
|
|
| "Unix" | "Win32" | "Cygwin" ->
|
|
begin function
|
|
| 'n' -> '\010'
|
|
| 'r' -> '\013'
|
|
| 'b' -> '\008'
|
|
| 't' -> '\009'
|
|
| c -> c
|
|
end
|
|
| "MacOS" ->
|
|
begin function
|
|
| 'n' -> '\013'
|
|
| 'r' -> '\010'
|
|
| 'b' -> '\008'
|
|
| 't' -> '\009'
|
|
| c -> c
|
|
end
|
|
| x -> fatal_error "Lexer: unknown system type"
|
|
|
|
let char_for_decimal_code lexbuf i =
|
|
let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
|
|
10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
|
|
(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
|
|
Char.chr(c land 0xFF)
|
|
|
|
(** To store the position of the beginning of a string and comment *)
|
|
let string_start_pos = ref 0;;
|
|
let comment_start_pos = ref [];;
|
|
let in_comment () = !comment_start_pos <> [];;
|
|
|
|
(** Error report *)
|
|
|
|
open Format
|
|
|
|
let report_error ppf = function
|
|
| Illegal_character c ->
|
|
fprintf ppf "Illegal character (%s)" (Char.escaped c)
|
|
| Unterminated_comment ->
|
|
fprintf ppf "Comment not terminated"
|
|
| Unterminated_string ->
|
|
fprintf ppf "String literal not terminated"
|
|
| Unterminated_string_in_comment ->
|
|
fprintf ppf "This comment contains an unterminated string literal"
|
|
| Keyword_as_label kwd ->
|
|
fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
|
|
;;
|
|
|
|
}
|
|
|
|
let blank = [' ' '\010' '\013' '\009' '\012']
|
|
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
|
|
let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
|
|
let identchar =
|
|
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
|
|
let symbolchar =
|
|
['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
|
|
let decimal_literal = ['0'-'9']+
|
|
let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
|
|
let oct_literal = '0' ['o' 'O'] ['0'-'7']+
|
|
let bin_literal = '0' ['b' 'B'] ['0'-'1']+
|
|
let float_literal =
|
|
['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
|
|
|
|
rule token = parse
|
|
blank
|
|
{
|
|
let s = Lexing.lexeme lexbuf in
|
|
(
|
|
match s with
|
|
" " -> incr margin
|
|
| "\t" -> margin := !margin + 8
|
|
| "\n" -> margin := 0
|
|
| _ -> ()
|
|
);
|
|
print s;
|
|
token lexbuf
|
|
}
|
|
| "_"
|
|
{ print "_" ; token lexbuf }
|
|
| "~" { print "~" ; token lexbuf }
|
|
| "~" lowercase identchar * ':'
|
|
{ let s = Lexing.lexeme lexbuf in
|
|
let name = String.sub s 1 (String.length s - 2) in
|
|
if Hashtbl.mem keyword_table name then
|
|
raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
|
|
Lexing.lexeme_end lexbuf));
|
|
print s ; token lexbuf }
|
|
| "?" { print "?" ; token lexbuf }
|
|
| "?" lowercase identchar * ':'
|
|
{ let s = Lexing.lexeme lexbuf in
|
|
let name = String.sub s 1 (String.length s - 2) in
|
|
if Hashtbl.mem keyword_table name then
|
|
raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
|
|
Lexing.lexeme_end lexbuf));
|
|
print s ; token lexbuf }
|
|
| lowercase identchar *
|
|
{ let s = Lexing.lexeme lexbuf in
|
|
try
|
|
let cl = Hashtbl.find keyword_table s in
|
|
(print_class cl s ; token lexbuf )
|
|
with Not_found ->
|
|
(print s ; token lexbuf )}
|
|
| uppercase identchar *
|
|
{ print_class constructor_class (Lexing.lexeme lexbuf) ; token lexbuf } (* No capitalized keywords *)
|
|
| decimal_literal | hex_literal | oct_literal | bin_literal
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| float_literal
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "\""
|
|
{ reset_string_buffer();
|
|
let string_start = Lexing.lexeme_start lexbuf in
|
|
string_start_pos := string_start;
|
|
string lexbuf;
|
|
lexbuf.Lexing.lex_start_pos <-
|
|
string_start - lexbuf.Lexing.lex_abs_pos;
|
|
print_class string_class ("\""^(get_stored_string())^"\"") ;
|
|
token lexbuf }
|
|
| "'" [^ '\\' '\''] "'"
|
|
{ print_class string_class (Lexing.lexeme lexbuf) ;
|
|
token lexbuf }
|
|
| "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
|
|
{ print_class string_class (Lexing.lexeme lexbuf ) ;
|
|
token lexbuf }
|
|
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
|
|
{ print_class string_class (Lexing.lexeme lexbuf ) ;
|
|
token lexbuf }
|
|
| "(*"
|
|
{
|
|
reset_comment_buffer ();
|
|
comment_start_pos := [Lexing.lexeme_start lexbuf];
|
|
comment lexbuf ;
|
|
print_comment ();
|
|
token lexbuf }
|
|
| "(*)"
|
|
{ reset_comment_buffer ();
|
|
comment_start_pos := [Lexing.lexeme_start lexbuf];
|
|
comment lexbuf ;
|
|
print_comment ();
|
|
token lexbuf
|
|
}
|
|
| "*)"
|
|
{ lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
|
|
print (Lexing.lexeme lexbuf) ;
|
|
token lexbuf
|
|
}
|
|
| "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n")
|
|
(* # linenum ... *)
|
|
{
|
|
print (Lexing.lexeme lexbuf);
|
|
token lexbuf
|
|
}
|
|
| "#" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "&&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "`" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "'" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "(" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ")" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "*" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "," { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "??" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "->" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "." { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ".." { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ":" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "::" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ":=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ":>" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ";" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ";;" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "<-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "[" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "[|" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "[<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "{" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "{<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "|" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "||" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "|]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ">" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ">]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ">}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
|
|
| "!=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "+" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "-." { print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
|
|
| "!" symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ['~' '?'] symbolchar +
|
|
{ print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ['=' '<' '>' '|' '&' '$'] symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ['@' '^'] symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ['+' '-'] symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| "**" symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| ['*' '/' '%'] symbolchar *
|
|
{ print (Lexing.lexeme lexbuf) ; token lexbuf }
|
|
| eof { () }
|
|
| _
|
|
{ raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),
|
|
Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }
|
|
|
|
and comment = parse
|
|
"(*"
|
|
{ comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
|
|
store_comment_char '(';
|
|
store_comment_char '*';
|
|
comment lexbuf;
|
|
}
|
|
| "*)"
|
|
{ match !comment_start_pos with
|
|
| [] -> assert false
|
|
| [x] -> comment_start_pos := []
|
|
| _ :: l ->
|
|
store_comment_char '*';
|
|
store_comment_char ')';
|
|
comment_start_pos := l;
|
|
comment lexbuf;
|
|
}
|
|
| "\""
|
|
{ reset_string_buffer();
|
|
string_start_pos := Lexing.lexeme_start lexbuf;
|
|
store_comment_char '"';
|
|
begin try string lexbuf
|
|
with Error (Unterminated_string, _, _) ->
|
|
let st = List.hd !comment_start_pos in
|
|
raise (Error (Unterminated_string_in_comment, st, st + 2))
|
|
end;
|
|
comment lexbuf }
|
|
| "''"
|
|
{
|
|
store_comment_char '\'';
|
|
store_comment_char '\'';
|
|
comment lexbuf }
|
|
| "'" [^ '\\' '\''] "'"
|
|
{
|
|
store_comment_char '\'';
|
|
store_comment_char (Lexing.lexeme_char lexbuf 1);
|
|
store_comment_char '\'';
|
|
comment lexbuf }
|
|
| "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
|
|
{
|
|
store_comment_char '\'';
|
|
store_comment_char '\\';
|
|
store_comment_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)) ;
|
|
store_comment_char '\'';
|
|
comment lexbuf }
|
|
| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
|
|
{
|
|
store_comment_char '\'';
|
|
store_comment_char '\\';
|
|
store_comment_char(char_for_decimal_code lexbuf 1);
|
|
store_comment_char '\'';
|
|
comment lexbuf }
|
|
| eof
|
|
{ let st = List.hd !comment_start_pos in
|
|
raise (Error (Unterminated_comment, st, st + 2));
|
|
}
|
|
| _
|
|
{ store_comment_char(Lexing.lexeme_char lexbuf 0);
|
|
comment lexbuf }
|
|
|
|
and string = parse
|
|
'"'
|
|
{ () }
|
|
| '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
|
|
{ string lexbuf }
|
|
| '\\' ['\\' '"' 'n' 't' 'b' 'r']
|
|
{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
|
|
string lexbuf }
|
|
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
|
|
{ store_string_char(char_for_decimal_code lexbuf 1);
|
|
string lexbuf }
|
|
| eof
|
|
{ raise (Error (Unterminated_string,
|
|
!string_start_pos, !string_start_pos+1)) }
|
|
| _
|
|
{ store_string_char(Lexing.lexeme_char lexbuf 0);
|
|
string lexbuf }
|
|
{
|
|
|
|
let html_of_code ?(with_pre=true) code =
|
|
let old_pre = !pre in
|
|
let old_margin = !margin in
|
|
let old_comment_buffer = Buffer.contents comment_buffer in
|
|
let old_string_buffer = Buffer.contents string_buffer in
|
|
let buf = Buffer.create 256 in
|
|
let old_fmt = !fmt in
|
|
fmt := Format.formatter_of_buffer buf ;
|
|
pre := with_pre;
|
|
margin := 0;
|
|
|
|
|
|
let start = "<code class=\""^code_class^"\">" in
|
|
let ending = "</code>" in
|
|
let html =
|
|
(
|
|
try
|
|
print ~esc: false start ;
|
|
let lexbuf = Lexing.from_string code in
|
|
let _ = token lexbuf in
|
|
print ~esc: false ending ;
|
|
Format.pp_print_flush !fmt () ;
|
|
Buffer.contents buf
|
|
with
|
|
_ ->
|
|
(* flush str_formatter because we already output
|
|
something in it *)
|
|
Format.pp_print_flush !fmt () ;
|
|
start^code^ending
|
|
)
|
|
in
|
|
pre := old_pre;
|
|
margin := old_margin ;
|
|
Buffer.reset comment_buffer;
|
|
Buffer.add_string comment_buffer old_comment_buffer ;
|
|
Buffer.reset string_buffer;
|
|
Buffer.add_string string_buffer old_string_buffer ;
|
|
fmt := old_fmt ;
|
|
|
|
html
|
|
|
|
}
|