522 lines
12 KiB
OCaml
522 lines
12 KiB
OCaml
|
{
|
|||
|
(***********************************************************************)
|
|||
|
(* OCamldoc *)
|
|||
|
(* *)
|
|||
|
(* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *)
|
|||
|
(* *)
|
|||
|
(* Copyright 2001 Institut National de Recherche en Informatique et *)
|
|||
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|||
|
(* under the terms of the Q Public License version 1.0. *)
|
|||
|
(* *)
|
|||
|
(***********************************************************************)
|
|||
|
|
|||
|
(** The lexer for string to build text structures. *)
|
|||
|
|
|||
|
open Lexing
|
|||
|
open Odoc_text_parser
|
|||
|
|
|||
|
let line_number = ref 0
|
|||
|
let char_number = ref 0
|
|||
|
|
|||
|
let string_buffer = Buffer.create 32
|
|||
|
|
|||
|
(** Fonction de remise <20> z<>ro de la chaine de caract<63>res tampon *)
|
|||
|
let reset_string_buffer () = Buffer.reset string_buffer
|
|||
|
|
|||
|
(** Fonction d'ajout d'un caract<63>re dans la chaine de caract<63>res tampon *)
|
|||
|
let ajout_char_string = Buffer.add_char string_buffer
|
|||
|
|
|||
|
(** Add a string to the buffer. *)
|
|||
|
let ajout_string = Buffer.add_string string_buffer
|
|||
|
|
|||
|
let lecture_string () = Buffer.contents string_buffer
|
|||
|
|
|||
|
|
|||
|
(** the variable which will contain the description string.
|
|||
|
Is initialized when we encounter the start of a special comment. *)
|
|||
|
|
|||
|
let description = ref ""
|
|||
|
|
|||
|
let blank = "[ \013\009\012]"
|
|||
|
|
|||
|
|
|||
|
let print_DEBUG s = print_string s; print_newline ()
|
|||
|
|
|||
|
(** this flag indicates whether we're in a string between begin_code and end_code tokens, to
|
|||
|
remember the number of open '[' and handle ']' correctly. *)
|
|||
|
let open_brackets = ref 0
|
|||
|
|
|||
|
(** this flag indicates if we're in verbatim mode or not, to handle any special expression
|
|||
|
like a string when we're in verbatim mode.*)
|
|||
|
let verb_mode = ref false
|
|||
|
|
|||
|
(** this flag indicates if we're in latex mode or not, to handle any special expression
|
|||
|
like a string when we're in latex mode.*)
|
|||
|
let latex_mode = ref false
|
|||
|
|
|||
|
(** this flag indicates if we're in shortcut list mode or not, to handle end_shortcut_list correctly.*)
|
|||
|
let shortcut_list_mode = ref false
|
|||
|
|
|||
|
(** this flag indicates if we're in an element reference. *)
|
|||
|
let ele_ref_mode = ref false
|
|||
|
|
|||
|
(** this flag indicates if we're in a preformatted code string. *)
|
|||
|
let code_pre_mode = ref false
|
|||
|
|
|||
|
let init () =
|
|||
|
open_brackets := 0;
|
|||
|
verb_mode := false;
|
|||
|
latex_mode := false;
|
|||
|
shortcut_list_mode := false;
|
|||
|
ele_ref_mode := false ;
|
|||
|
code_pre_mode := false ;
|
|||
|
line_number := 0 ;
|
|||
|
char_number := 0
|
|||
|
|
|||
|
let incr_cpts lexbuf =
|
|||
|
let s = Lexing.lexeme lexbuf in
|
|||
|
let l = Str.split_delim (Str.regexp_string "\n") s in
|
|||
|
match List.rev l with
|
|||
|
[] -> () (* should not occur *)
|
|||
|
| [s2] -> (* no newline *)
|
|||
|
char_number := !char_number + (String.length s2)
|
|||
|
| s2 :: _ ->
|
|||
|
line_number := !line_number + ((List.length l) - 1) ;
|
|||
|
char_number := String.length s2
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
(** html marks, to use as alternative possible special strings *)
|
|||
|
|
|||
|
let html_bold = "<"('b'|'B')">"
|
|||
|
let html_end_bold = "</"('b'|'B')">"
|
|||
|
let html_italic = "<"('i'|'I')">"
|
|||
|
let html_end_italic = "</"('i'|'I')">"
|
|||
|
let html_title = "<"('h'|'H')(['0'-'9'])+">"
|
|||
|
let html_end_title = "</"('h'|'H')(['0'-'9'])+">"
|
|||
|
let html_list = "<"('u'|'U')('l'|'L')">"
|
|||
|
let html_end_list = "</"('u'|'U')('l'|'L')">"
|
|||
|
let html_enum = "<"('o'|'O')('l'|'L')">"
|
|||
|
let html_end_enum = "</"('o'|'O')('l'|'L')">"
|
|||
|
let html_item = "<"('l'|'L')('i'|'I')">"
|
|||
|
let html_end_item = "</"('l'|'L')('i'|'I')">"
|
|||
|
let html_code = "<"('c'|'C')('o'|'O')('d'|'D')('e'|'E')">"
|
|||
|
let html_end_code = "</"('c'|'C')('o'|'O')('d'|'D')('e'|'E')">"
|
|||
|
let html_center = "<"('c'|'C')('e'|'E')('n'|'N')('t'|'T')('e'|'E')('r'|'R')">"
|
|||
|
let html_end_center = "</"('c'|'C')('e'|'E')('n'|'N')('t'|'T')('e'|'E')('r'|'R')">"
|
|||
|
let html_left = "<"('l'|'L')('e'|'E')('f'|'F')('t'|'T')">"
|
|||
|
let html_end_left = "</"('l'|'L')('e'|'E')('f'|'F')('t'|'T')">"
|
|||
|
let html_right = "<"('r'|'R')('i'|'I')('g'|'G')('h'|'H')('t'|'T')">"
|
|||
|
let html_end_right = "</"('r'|'R')('i'|'I')('g'|'G')('h'|'H')('t'|'T')">"
|
|||
|
|
|||
|
|
|||
|
let blank = [' ' '\013' '\009' '\012']
|
|||
|
let blank_nl = [' ' '\013' '\009' '\012' '\010']
|
|||
|
let label = ['a'-'z']+['a'-'z' 'A'-'Z' '0'-'9' '_']*
|
|||
|
|
|||
|
(** special strings *)
|
|||
|
|
|||
|
let end = "}"
|
|||
|
| html_end_bold
|
|||
|
| html_end_italic
|
|||
|
| html_end_title
|
|||
|
| html_end_list
|
|||
|
| html_end_enum
|
|||
|
| html_end_item
|
|||
|
| html_end_center
|
|||
|
let begin_title =
|
|||
|
("{" ['0'-'9']+(":"label)? blank_nl)
|
|||
|
| html_title
|
|||
|
|
|||
|
let begin_bold = "{b"blank_nl | html_bold
|
|||
|
let begin_emp = "{e"blank_nl
|
|||
|
let begin_center = "{C"blank_nl | html_center
|
|||
|
let begin_left = "{L"blank_nl
|
|||
|
let begin_right = "{R"blank_nl
|
|||
|
let begin_italic = "{i"blank_nl | html_italic
|
|||
|
let begin_list = "{ul" | html_list
|
|||
|
let begin_enum = "{ol" | html_enum
|
|||
|
let begin_item = "{li"blank_nl | "{- " | html_item
|
|||
|
let begin_link = "{{:"
|
|||
|
let begin_latex = "{%"blank_nl
|
|||
|
let end_latex = "%}"
|
|||
|
let begin_code = "[" | html_code
|
|||
|
let end_code = "]" | html_end_code
|
|||
|
let begin_code_pre = "{["
|
|||
|
let end_code_pre = "]}"
|
|||
|
let begin_verb = "{v"blank_nl
|
|||
|
let end_verb = blank_nl"v}"
|
|||
|
let begin_ele_ref = "{!"blank_nl | "{!"
|
|||
|
let begin_superscript = "{^"blank_nl | "{^"
|
|||
|
let begin_subscript = "{_"blank_nl | "{_"
|
|||
|
|
|||
|
let shortcut_list_item = '\n'blank*"- "
|
|||
|
let shortcut_enum_item = '\n'blank*"+ "
|
|||
|
let end_shortcut_list = '\n'(blank*'\n')+
|
|||
|
|
|||
|
rule main = parse
|
|||
|
| "\\{"
|
|||
|
| "\\}"
|
|||
|
| "\\["
|
|||
|
| "\\]"
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
let s = Lexing.lexeme lexbuf in
|
|||
|
Char (String.sub s 1 1)
|
|||
|
}
|
|||
|
|
|||
|
| end
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
let _ =
|
|||
|
if !ele_ref_mode then
|
|||
|
ele_ref_mode := false
|
|||
|
in
|
|||
|
END
|
|||
|
}
|
|||
|
| begin_title
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
let s = Lexing.lexeme lexbuf in
|
|||
|
try
|
|||
|
(* chech if the "{..." or html_title mark was used. *)
|
|||
|
if s.[0] = '<' then
|
|||
|
let (n, l) = (2, (String.length s - 3)) in
|
|||
|
let s2 = String.sub s n l in
|
|||
|
Title (int_of_string s2, None)
|
|||
|
else
|
|||
|
let (n, l) = (1, (String.length s - 2)) in
|
|||
|
let s2 = String.sub s n l in
|
|||
|
try
|
|||
|
let i = String.index s2 ':' in
|
|||
|
let s_n = String.sub s2 0 i in
|
|||
|
let s_label = String.sub s2 (i+1) (l-i-1) in
|
|||
|
Title (int_of_string s_n, Some s_label)
|
|||
|
with
|
|||
|
Not_found ->
|
|||
|
Title (int_of_string s2, None)
|
|||
|
with
|
|||
|
_ ->
|
|||
|
Title (1, None)
|
|||
|
}
|
|||
|
| begin_bold
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
BOLD
|
|||
|
}
|
|||
|
| begin_italic
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
ITALIC
|
|||
|
}
|
|||
|
| begin_link
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
LINK
|
|||
|
}
|
|||
|
| begin_emp
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
EMP
|
|||
|
}
|
|||
|
| begin_superscript
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
SUPERSCRIPT
|
|||
|
}
|
|||
|
| begin_subscript
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
SUBSCRIPT
|
|||
|
}
|
|||
|
| begin_center
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
CENTER
|
|||
|
}
|
|||
|
| begin_left
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
LEFT
|
|||
|
}
|
|||
|
| begin_right
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode
|
|||
|
or (!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
RIGHT
|
|||
|
}
|
|||
|
| begin_list
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
LIST
|
|||
|
}
|
|||
|
| begin_enum
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
ENUM
|
|||
|
}
|
|||
|
| begin_item
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
ITEM
|
|||
|
}
|
|||
|
| begin_latex
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or
|
|||
|
(!open_brackets >= 1) or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
(
|
|||
|
latex_mode := true;
|
|||
|
LATEX
|
|||
|
)
|
|||
|
}
|
|||
|
| end_latex
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or (!open_brackets >= 1) or !code_pre_mode or
|
|||
|
!ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
(
|
|||
|
latex_mode := false;
|
|||
|
END_LATEX
|
|||
|
)
|
|||
|
}
|
|||
|
| begin_code end_code
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
}
|
|||
|
|
|||
|
| begin_code
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
if !open_brackets <= 0 then
|
|||
|
(
|
|||
|
open_brackets := 1;
|
|||
|
CODE
|
|||
|
)
|
|||
|
else
|
|||
|
(
|
|||
|
incr open_brackets;
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
)
|
|||
|
}
|
|||
|
| end_code
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
if !open_brackets > 1 then
|
|||
|
(
|
|||
|
decr open_brackets;
|
|||
|
Char "]"
|
|||
|
)
|
|||
|
else
|
|||
|
(
|
|||
|
open_brackets := 0;
|
|||
|
END_CODE
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
| begin_code_pre end_code_pre
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
}
|
|||
|
|
|||
|
| begin_code_pre
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
(
|
|||
|
code_pre_mode := true;
|
|||
|
CODE_PRE
|
|||
|
)
|
|||
|
}
|
|||
|
| end_code_pre
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
if !code_pre_mode then
|
|||
|
(
|
|||
|
code_pre_mode := false;
|
|||
|
END_CODE_PRE
|
|||
|
)
|
|||
|
else
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
}
|
|||
|
|
|||
|
| begin_ele_ref end
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
}
|
|||
|
|
|||
|
| begin_ele_ref
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !verb_mode or !latex_mode or !code_pre_mode or !open_brackets >= 1 then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
if not !ele_ref_mode then
|
|||
|
(
|
|||
|
ele_ref_mode := true;
|
|||
|
ELE_REF
|
|||
|
)
|
|||
|
else
|
|||
|
(
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
| begin_verb
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
(
|
|||
|
verb_mode := true;
|
|||
|
VERB
|
|||
|
)
|
|||
|
}
|
|||
|
| end_verb
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
(
|
|||
|
verb_mode := false;
|
|||
|
END_VERB
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
| shortcut_list_item
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !shortcut_list_mode then
|
|||
|
(
|
|||
|
SHORTCUT_LIST_ITEM
|
|||
|
)
|
|||
|
else
|
|||
|
(
|
|||
|
shortcut_list_mode := true;
|
|||
|
BEGIN_SHORTCUT_LIST_ITEM
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
| shortcut_enum_item
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !shortcut_list_mode then
|
|||
|
SHORTCUT_ENUM_ITEM
|
|||
|
else
|
|||
|
(
|
|||
|
shortcut_list_mode := true;
|
|||
|
BEGIN_SHORTCUT_ENUM_ITEM
|
|||
|
)
|
|||
|
}
|
|||
|
| end_shortcut_list
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
lexbuf.Lexing.lex_abs_pos <- lexbuf.Lexing.lex_abs_pos - 1;
|
|||
|
lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
|
|||
|
lexbuf.Lexing.lex_last_pos <- lexbuf.Lexing.lex_last_pos - 1;
|
|||
|
decr line_number ;
|
|||
|
if !shortcut_list_mode then
|
|||
|
(
|
|||
|
shortcut_list_mode := false;
|
|||
|
(* go back one char to re-use the last '\n', so we can
|
|||
|
restart another shortcut-list with a single blank line,
|
|||
|
and not two.*)
|
|||
|
END_SHORTCUT_LIST
|
|||
|
)
|
|||
|
else
|
|||
|
BLANK_LINE
|
|||
|
}
|
|||
|
|
|||
|
| eof { EOF }
|
|||
|
|
|||
|
| "{"
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
if !latex_mode or (!open_brackets >= 1) or !code_pre_mode or !ele_ref_mode then
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
else
|
|||
|
ERROR
|
|||
|
}
|
|||
|
| _
|
|||
|
{
|
|||
|
incr_cpts lexbuf ;
|
|||
|
Char (Lexing.lexeme lexbuf)
|
|||
|
}
|
|||
|
|
|||
|
|