correct fontification for strings and comments

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@11289 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Damien Doligez 2011-11-29 15:18:41 +00:00
parent a3aad303be
commit 07a128aea1
1 changed files with 304 additions and 37 deletions

View File

@ -1,30 +1,5 @@
;(***********************************************************************)
;(* *)
;(* OCaml *)
;(* *)
;(* Jacques Garrigue and Ian T Zimmerman *)
;(* *)
;(* Copyright 1997 Institut National de Recherche en Informatique et *)
;(* en Automatique. All rights reserved. This file is distributed *)
;(* under the terms of the GNU General Public License. *)
;(* *)
;(***********************************************************************)
;; caml-font: font-lock support for OCaml files
;;
;; rewrite and clean-up.
;; Changes:
;; - fontify strings and comments using syntactic font lock
;; - define a `font-lock-syntactic-face-function' to fontify ocamldoc comments
;; - fontify infix operators like mod, land, lsl, etc.
;; - fontify line number directives
;; - fontify "failwith" and "invalid_arg" like "raise"
;; - fontify '\x..' character constants
;; - use the regexp-opt function to build regexps (more readable)
;; - use backquote and comma in sexp (more readable)
;; - drop the `caml-quote-char' variable (I don't use caml-light :))
;; - stop doing weird things with faces
;; now with perfect parsing of comments and strings
(require 'font-lock)
@ -48,9 +23,6 @@
(defconst caml-font-lock-keywords
`(
;character literals
("'\\(.\\|\\\\\\([ntbr\"'\\\\]\\|[0-9]\\{3\\}\\|x[0-9A-Fa-f]\\{2\\}\\)\\)'"
. font-lock-string-face)
;modules and constructors
("`?\\<[A-Z][A-Za-z0-9_']*\\>" . font-lock-function-name-face)
;definition
@ -99,14 +71,298 @@
((looking-at "(\\*\\*[^*]") 'caml-font-doccomment-face)
(t 'font-lock-comment-face)))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; In order to correctly fontify an OCaml buffer, it is necessary to
; lex the buffer to tell what is a comment and what is a string.
; We do this incrementally in a hook
; (font-lock-extend-after-change-region-function), which is called
; whenever the buffer changes. It sets the syntax-table property
; on each beginning and end of chars, strings, and comments.
; This mode handles correctly all the strange cases in the following
; OCaml code.
;
; let l' _ = ();;
; let _' _ = ();;
; let l' = ();;
; let b2_' = ();;
; let a'a' = ();;
; let f2 _ _ = ();;
; let f3 _ _ _ = ();;
; let f' _ _ _ _ _ = ();;
; let hello = ();;
;
; (* ==== easy stuff ==== *)
;
; (* a comment *)
; (* "a string" in a comment *)
; (* "another string *)" in a comment *)
; (* not a string '"' in a comment *)
; "a string";;
; '"';; (* not a string *)
;
; (* ==== hard stuff ==== *)
;
; l'"' not not a string ";;
; _'"' also not not a string";;
; f2 0l'"';; (* not not not a string *)
; f2 0_'"';; (* also not not not a string *)
; f3 0.0l'"' not not not not a string ";;
; f3 0.0_'"';; (* not not not not not a string *)
; f2 0b01_'"';; (* not not not a string *)
; f3 0b2_'"' not not not not a string ";;
; f3 0b02_'"';; (* not not not not not a string *)
; '\'';; (* a char *)
; '
; ';; (* a char *)
; '^M
; ';; (* also a char [replace ^M with one CR character] *)
; a'a';; (* not a char *)
; type '
; a' t = X;; (* also not a char *)
;
; (* ==== far-out stuff ==== *)
;
; f'"'" "*) print_endline "hello";;(* \"" ;;
; (* f'"'" "*) print_endline "hello";;(* \"" ;; *)
(defconst caml-font-ident-re
"[A-Za-z_\300-\326\330-\366\370-\377][A-Za-z_\300-\326\330-\366\370-\377'0-9]*"
)
(defconst caml-font-int-re
"\\(0[xX][0-9A-Fa-f][0-9A-Fa-f_]*\\|0[oO][0-7][0-7_]*\\|0[bB][01][01_]*\\)[lLn]?"
)
; decimal integers are folded into the RE for floats to get longest-match
; without using posix-looking-at
(defconst caml-font-decimal-re
"[0-9][0-9_]*\\([lLn]\\|\\.[0-9_]*\\)?\\([eE][+-]?[0-9][0-9_]*\\)?"
)
; match any ident or numeral token
(defconst caml-font-ident-or-num-re
(concat caml-font-ident-re "\\|" caml-font-int-re "\\|" caml-font-decimal-re)
)
; match any char token
(defconst caml-font-char-re
"'\\(\015\012\\|[^\\']\\|\\(\\\\\\([\\'\"ntbr ]\\|[0-9][0-9][0-9]\\|x[0-9A-Fa-f][0-9A-Fa-f]\\)\\)\\)'"
)
; match a quote followed by a newline
(defconst caml-font-quote-newline-re
"'\\(\015\012\\|[\012\015]\\)"
)
; match any token or sequence of tokens that cannot contain a
; quote, double quote, a start of comment, or a newline
; note: this is only to go faster than one character at a time
(defconst caml-font-other-re
"[^A-Za-z_0-9\012\015\300-\326\330-\366\370-\377'\"(]+"
)
; match any sequence of non-special characters in a comment
; note: this is only to go faster than one character at a time
(defconst caml-font-other-comment-re
"[^(*\"'\012\015]+"
)
; match any sequence of non-special characters in a string
; note: this is only to go faster than one character at a time
(defconst caml-font-other-string-re
"[^\\\"\012\015]"
)
; match a newline
(defconst caml-font-newline-re
"\\(\015\012\\|[\012\015]\\)"
)
; Put the 'caml-font-state property with the given state on the
; character before pos. Return nil if it was already there, t if not.
(defun caml-font-put-state (pos state)
(if (equal state (get-text-property (1- pos) 'caml-font-state))
nil
(put-text-property (1- pos) pos 'caml-font-state state)
t)
)
; Same as looking-at, but erase properties 'caml-font-state and
; 'syntax-table from the matched range
(defun caml-font-looking-at (re)
(let ((result (looking-at re)))
(when result
(remove-text-properties (match-beginning 0) (match-end 0)
'(syntax-table nil caml-font-state nil)))
result)
)
; Annotate the buffer starting at point in state (st . depth)
; Set the 'syntax-table property on beginnings and ends of:
; - strings
; - chars
; - comments
; Also set the 'caml-font-state property on each LF character that is
; not preceded by a single quote. The property gives the state of the
; lexer (nil or t) after reading that character.
; Leave the point at a point where the pre-existing 'caml-font-state
; property is consistent with the new parse, or at the end of the buffer.
; depth is the depth of nested comments at this point
; it must be a non-negative integer
; st can be:
; nil -- we are in the base state
; t -- we are within a string
(defun caml-font-annotate (st depth)
(let ((continue t))
(while (and continue (not (eobp)))
(cond
((and (equal st nil) (= depth 0)) ; base state, outside comment
(cond
((caml-font-looking-at caml-font-ident-or-num-re)
(goto-char (match-end 0)))
((caml-font-looking-at caml-font-char-re)
(put-text-property (point) (1+ (point))
'syntax-table (string-to-syntax "|"))
(put-text-property (1- (match-end 0)) (match-end 0)
'syntax-table (string-to-syntax "|"))
(goto-char (match-end 0)))
((caml-font-looking-at caml-font-quote-newline-re)
(goto-char (match-end 0)))
((caml-font-looking-at "\"")
(put-text-property (point) (1+ (point))
'syntax-table (string-to-syntax "|"))
(goto-char (match-end 0))
(setq st t))
((caml-font-looking-at "(\\*")
(put-text-property (point) (1+ (point))
'syntax-table (string-to-syntax "!"))
(goto-char (match-end 0))
(setq depth 1))
((looking-at caml-font-newline-re)
(goto-char (match-end 0))
(setq continue (caml-font-put-state (match-end 0) '(nil . 0))))
((caml-font-looking-at caml-font-other-re)
(goto-char (match-end 0)))
(t
(remove-text-properties (point) (1+ (point))
'(syntax-table nil caml-font-state nil))
(goto-char (1+ (point))))))
((equal st nil) ; base state inside comment
(cond
((caml-font-looking-at "(\\*")
(goto-char (match-end 0))
(setq depth (1+ depth)))
((caml-font-looking-at "\\*)")
(goto-char (match-end 0))
(setq depth (1- depth))
(when (= depth 0)
(put-text-property (1- (point)) (point)
'syntax-table (string-to-syntax "!"))))
((caml-font-looking-at "\"")
(goto-char (match-end 0))
(setq st t))
((caml-font-looking-at caml-font-char-re)
(goto-char (match-end 0)))
((caml-font-looking-at caml-font-quote-newline-re)
(goto-char (match-end 0)))
((caml-font-looking-at "''")
(goto-char (match-end 0)))
((looking-at caml-font-newline-re)
(goto-char (match-end 0))
(setq continue (caml-font-put-state (match-end 0) (cons nil depth))))
((caml-font-looking-at caml-font-other-comment-re)
(goto-char (match-end 0)))
(t
(remove-text-properties (point) (1+ (point))
'(syntax-table nil caml-font-state nil))
(goto-char (1+ (point))))))
(t ; string state inside or outside a comment
(cond
((caml-font-looking-at "\"")
(when (= depth 0)
(put-text-property (point) (1+ (point))
'syntax-table (string-to-syntax "|")))
(goto-char (1+ (point)))
(setq st nil))
((caml-font-looking-at "\\\\[\"\\]")
(goto-char (match-end 0)))
((looking-at caml-font-newline-re)
(goto-char (match-end 0))
(setq continue (caml-font-put-state (match-end 0) (cons t depth))))
((caml-font-looking-at caml-font-other-string-re)
(goto-char (match-end 0)))
(t
(remove-text-properties (point) (1+ (point))
'(syntax-table nil caml-font-state nil))
(goto-char (1+ (point)))))))))
)
; This is the hook function for font-lock-extend-after-change-function
; It finds the nearest saved state at the left of the changed text,
; calls caml-font-annotate to set the 'caml-font-state and 'syntax-table
; properties, then returns the range that was parsed by caml-font-annotate.
(defun caml-font-extend-after-change (beg end &optional old-len)
(save-excursion
(save-match-data
(let ((caml-font-modified (buffer-modified-p))
start-at
end-at
state)
(remove-text-properties beg end '(syntax-table nil caml-font-state nil))
(setq start-at
(or (and (> beg (point-min))
(get-text-property (1- beg) 'caml-font-state)
beg)
(previous-single-property-change beg 'caml-font-state)
(point-min)))
(setq state (or (and (> start-at (point-min))
(get-text-property (1- start-at) 'caml-font-state))
(cons nil 0)))
(goto-char start-at)
(caml-font-annotate (car state) (cdr state))
(setq end-at (point))
(restore-buffer-modified-p caml-font-modified)
(cons start-at end-at))))
)
; We don't use the normal caml-mode syntax table because it contains an
; approximation of strings and comments that interferes with our
; annotations.
(defconst caml-font-syntax-table
(let ((tbl (make-syntax-table)))
(modify-syntax-entry ?' "w" tbl)
(modify-syntax-entry ?_ "w" tbl)
(modify-syntax-entry ?\" "." tbl)
(modify-syntax-entry '(?\300 . ?\326) "w" tbl)
(modify-syntax-entry '(?\330 . ?\366) "w" tbl)
(modify-syntax-entry '(?\370 . ?\377) "w" tbl)
tbl))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; font-lock commands are similar for caml-mode and inferior-caml-mode
(defun caml-font-set-font-lock ()
(setq parse-sexp-lookup-properties t)
(setq font-lock-defaults
'(caml-font-lock-keywords
nil nil nil nil
(font-lock-syntactic-face-function . caml-font-syntactic-face)))
(font-lock-mode 1))
(list
'caml-font-lock-keywords ; keywords
nil ; keywords-only
nil ; case-fold
nil ; syntax-alist
nil ; syntax-begin
(cons 'font-lock-syntax-table caml-font-syntax-table)
'(font-lock-extend-after-change-region-function
. caml-font-extend-after-change)
'(font-lock-syntactic-face-function . caml-font-syntactic-face)
))
(caml-font-extend-after-change (point-min) (point-max) 0)
(font-lock-mode 1)
)
(add-hook 'caml-mode-hook 'caml-font-set-font-lock)
@ -116,11 +372,22 @@
,@caml-font-lock-keywords))
(defun inferior-caml-set-font-lock ()
(setq parse-sexp-lookup-properties t)
(setq font-lock-defaults
'(inferior-caml-font-lock-keywords
nil nil nil nil
(font-lock-syntactic-face-function . caml-font-syntactic-face)))
(font-lock-mode 1))
(list
'inferior-caml-font-lock-keywords ; keywords
nil ; keywords-only
nil ; case-fold
nil ; syntax-alist
nil ; syntax-begin
(cons 'font-lock-syntax-table caml-font-syntax-table)
'(font-lock-extend-after-change-region-function
. caml-font-extend-after-change)
'(font-lock-syntactic-face-function . caml-font-syntactic-face)
))
(caml-font-extend-after-change (point-min) (point-max) 0)
(font-lock-mode 1)
)
(add-hook 'inferior-caml-mode-hooks 'inferior-caml-set-font-lock)
(provide 'caml-font)