From 63ed31c369566e12d39a731c90fe74a4a553e14f Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 16 Sep 2018 09:27:20 +0100 Subject: [PATCH] improve errors --- parsing/lexer.mli | 2 +- parsing/lexer.mll | 38 +++++++++++-------- testsuite/tests/lexing/escape.ml | 11 ++++++ testsuite/tests/lexing/escape.ocaml.reference | 24 ++++++++++++ testsuite/tests/lexing/ocamltests | 1 + .../tests/lexing/uchar_esc.ocaml.reference | 12 +++--- 6 files changed, 66 insertions(+), 22 deletions(-) create mode 100644 testsuite/tests/lexing/escape.ml create mode 100644 testsuite/tests/lexing/escape.ocaml.reference diff --git a/parsing/lexer.mli b/parsing/lexer.mli index 6ae521eeb..f8831e5e0 100644 --- a/parsing/lexer.mli +++ b/parsing/lexer.mli @@ -26,7 +26,7 @@ val skip_hash_bang: Lexing.lexbuf -> unit type error = | Illegal_character of char - | Illegal_escape of string + | Illegal_escape of string * string option | Unterminated_comment of Location.t | Unterminated_string | Unterminated_string_in_comment of Location.t * Location.t diff --git a/parsing/lexer.mll b/parsing/lexer.mll index e7ae97bd2..5498664d7 100644 --- a/parsing/lexer.mll +++ b/parsing/lexer.mll @@ -22,7 +22,7 @@ open Parser type error = | Illegal_character of char - | Illegal_escape of string + | Illegal_escape of string * string option | Unterminated_comment of Location.t | Unterminated_string | Unterminated_string_in_comment of Location.t * Location.t @@ -157,6 +157,10 @@ let char_for_backslash = function | 't' -> '\009' | c -> c +let illegal_escape lexbuf reason = + let error = Illegal_escape (Lexing.lexeme lexbuf, Some reason) in + raise (Error (error, Location.curr lexbuf)) + let char_for_decimal_code lexbuf i = let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + @@ -164,8 +168,9 @@ let char_for_decimal_code lexbuf i = if (c < 0 || c > 255) then if in_comment () then 'x' - else raise (Error(Illegal_escape (Lexing.lexeme lexbuf), - Location.curr lexbuf)) + else + illegal_escape lexbuf + (Printf.sprintf "%d is outside the range of ASCII characters." c) else Char.chr c let char_for_octal_code lexbuf i = @@ -175,8 +180,9 @@ let char_for_octal_code lexbuf i = if (c < 0 || c > 255) then if in_comment () then 'x' - else raise (Error(Illegal_escape (Lexing.lexeme lexbuf), - Location.curr lexbuf)) + else + illegal_escape lexbuf + (Printf.sprintf "o%o (=%d) is outside the range of ASCII characters." c c) else Char.chr c let char_for_hexadecimal_code lexbuf i = @@ -184,20 +190,19 @@ let char_for_hexadecimal_code lexbuf i = Char.chr byte let uchar_for_uchar_escape lexbuf = - let err e = - raise - (Error (Illegal_escape (Lexing.lexeme lexbuf ^ e), Location.curr lexbuf)) - in let len = Lexing.lexeme_end lexbuf - Lexing.lexeme_start lexbuf in let first = 3 (* skip opening \u{ *) in let last = len - 2 (* skip closing } *) in let digit_count = last - first + 1 in match digit_count > 6 with - | true -> err ", too many digits, expected 1 to 6 hexadecimal digits" + | true -> + illegal_escape lexbuf + "too many digits, expected 1 to 6 hexadecimal digits" | false -> let cp = hex_num_value lexbuf ~first ~last in if Uchar.is_valid cp then Uchar.unsafe_of_int cp else - err (", " ^ Printf.sprintf "%X" cp ^ " is not a Unicode scalar value") + illegal_escape lexbuf + (Printf.sprintf "%X is not a Unicode scalar value" cp) (* recover the name from a LABEL or OPTLABEL token *) @@ -255,9 +260,12 @@ open Format let prepare_error loc = function | Illegal_character c -> Location.errorf ~loc "Illegal character (%s)" (Char.escaped c) - | Illegal_escape s -> + | Illegal_escape (s, explanation) -> Location.errorf ~loc - "Illegal backslash escape in string or character (%s)" s + "Illegal backslash escape in string or character (%s)%t" s + (fun ppf -> match explanation with + | None -> () + | Some expl -> fprintf ppf ": %s" expl) | Unterminated_comment _ -> Location.errorf ~loc "Comment not terminated" | Unterminated_string -> @@ -407,7 +415,7 @@ rule token = parse | "\'\\" _ { let l = Lexing.lexeme lexbuf in let esc = String.sub l 1 (String.length l - 1) in - raise (Error(Illegal_escape esc, Location.curr lexbuf)) + raise (Error(Illegal_escape (esc, None), Location.curr lexbuf)) } | "(*" { let s, loc = with_comment_buffer comment lexbuf in @@ -656,7 +664,7 @@ and string = parse | '\\' _ { if not (in_comment ()) then begin (* Should be an error, but we are very lax. - raise (Error (Illegal_escape (Lexing.lexeme lexbuf), + raise (Error (Illegal_escape (Lexing.lexeme lexbuf, None), Location.curr lexbuf)) *) let loc = Location.curr lexbuf in diff --git a/testsuite/tests/lexing/escape.ml b/testsuite/tests/lexing/escape.ml new file mode 100644 index 000000000..bcb753131 --- /dev/null +++ b/testsuite/tests/lexing/escape.ml @@ -0,0 +1,11 @@ +(* TEST + * toplevel +*) + +(* Errors *) + +let invalid = "\99" ;; +let invalid = "\999" ;; +let invalid = "\o777" ;; +let invalid = "\o77" ;; +let invalid = "\o99" ;; diff --git a/testsuite/tests/lexing/escape.ocaml.reference b/testsuite/tests/lexing/escape.ocaml.reference new file mode 100644 index 000000000..75d63aa00 --- /dev/null +++ b/testsuite/tests/lexing/escape.ocaml.reference @@ -0,0 +1,24 @@ +Line 7, characters 15-17: + let invalid = "\99" ;; + ^^ +Warning 14: illegal backslash escape in string. +val invalid : string = "\\99" +Line 1, characters 15-19: + let invalid = "\999" ;; + ^^^^ +Error: Illegal backslash escape in string or character (\999): 999 is outside the range of ASCII characters. +Line 1, characters 15-20: + let invalid = "\o777" ;; + ^^^^^ +Error: Illegal backslash escape in string or character (\o777): o777 (=511) is outside the range of ASCII characters. +Line 1, characters 15-17: + let invalid = "\o77" ;; + ^^ +Warning 14: illegal backslash escape in string. +val invalid : string = "\\o77" +Line 1, characters 15-17: + let invalid = "\o99" ;; + ^^ +Warning 14: illegal backslash escape in string. +val invalid : string = "\\o99" + diff --git a/testsuite/tests/lexing/ocamltests b/testsuite/tests/lexing/ocamltests index c08e182c8..4b21c4a7e 100644 --- a/testsuite/tests/lexing/ocamltests +++ b/testsuite/tests/lexing/ocamltests @@ -1 +1,2 @@ +escape.ml uchar_esc.ml diff --git a/testsuite/tests/lexing/uchar_esc.ocaml.reference b/testsuite/tests/lexing/uchar_esc.ocaml.reference index e85748644..a847bc2ee 100644 --- a/testsuite/tests/lexing/uchar_esc.ocaml.reference +++ b/testsuite/tests/lexing/uchar_esc.ocaml.reference @@ -1,27 +1,27 @@ Line 5, characters 18-27: let invalid_sv = "\u{0D800}" ;; ^^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{0D800}, D800 is not a Unicode scalar value) +Error: Illegal backslash escape in string or character (\u{0D800}): D800 is not a Unicode scalar value Line 1, characters 18-26: let invalid_sv = "\u{D800}" ;; ^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{D800}, D800 is not a Unicode scalar value) +Error: Illegal backslash escape in string or character (\u{D800}): D800 is not a Unicode scalar value Line 1, characters 18-26: let invalid_sv = "\u{D900}" ;; ^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{D900}, D900 is not a Unicode scalar value) +Error: Illegal backslash escape in string or character (\u{D900}): D900 is not a Unicode scalar value Line 1, characters 18-26: let invalid_sv = "\u{DFFF}" ;; ^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{DFFF}, DFFF is not a Unicode scalar value) +Error: Illegal backslash escape in string or character (\u{DFFF}): DFFF is not a Unicode scalar value Line 1, characters 18-28: let invalid_sv = "\u{110000} ;; ^^^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{110000}, 110000 is not a Unicode scalar value) +Error: Illegal backslash escape in string or character (\u{110000}): 110000 is not a Unicode scalar value Line 2, characters 23-35: let too_many_digits = "\u{01234567}" ;; ^^^^^^^^^^^^ -Error: Illegal backslash escape in string or character (\u{01234567}, too many digits, expected 1 to 6 hexadecimal digits) +Error: Illegal backslash escape in string or character (\u{01234567}): too many digits, expected 1 to 6 hexadecimal digits Line 1, characters 21-23: let no_hex_digits = "\u{}" ;; ^^