From 44fff5c92a700f4abd5ec1191830d2d5699f6e97 Mon Sep 17 00:00:00 2001 From: Leo White Date: Tue, 7 Apr 2020 11:47:59 +0100 Subject: [PATCH] Add ppx-only unary operators --- parsing/lexer.mll | 10 ++- parsing/pprintast.ml | 8 +-- .../tests/parsing/extension_operators.ml | 67 +++++++++++++++++++ typing/env.ml | 12 +++- 4 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 testsuite/tests/parsing/extension_operators.ml diff --git a/parsing/lexer.mll b/parsing/lexer.mll index 6d68b59e3..95339044e 100644 --- a/parsing/lexer.mll +++ b/parsing/lexer.mll @@ -331,10 +331,14 @@ let lowercase_latin1 = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase_latin1 = ['A'-'Z' '\192'-'\214' '\216'-'\222'] let identchar_latin1 = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] +(* This should be kept in sync with the [is_identchar] function in [env.ml] *) + let symbolchar = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] let dotsymbolchar = ['!' '$' '%' '&' '*' '+' '-' '/' ':' '=' '>' '?' '@' '^' '|'] +let symbolchar_or_hash = + symbolchar | '#' let kwdopchar = ['$' '&' '*' '+' '-' '/' '<' '=' '>' '@' '^' '|'] @@ -547,9 +551,9 @@ rule token = parse | "-" { MINUS } | "-." { MINUSDOT } - | "!" symbolchar + as op + | "!" symbolchar_or_hash + as op { PREFIXOP op } - | ['~' '?'] symbolchar + as op + | ['~' '?'] symbolchar_or_hash + as op { PREFIXOP op } | ['=' '<' '>' '|' '&' '$'] symbolchar * as op { INFIXOP0 op } @@ -562,7 +566,7 @@ rule token = parse | '%' { PERCENT } | ['*' '/' '%'] symbolchar * as op { INFIXOP3 op } - | '#' (symbolchar | '#') + as op + | '#' symbolchar_or_hash + as op { HASHOP op } | "let" kwdopchar dotsymbolchar * as op { LETOP op } diff --git a/parsing/pprintast.ml b/parsing/pprintast.ml index 442fd6d73..28f528a8f 100644 --- a/parsing/pprintast.ml +++ b/parsing/pprintast.ml @@ -1210,11 +1210,11 @@ and payload ctxt f = function (expression ctxt) e (item_attributes ctxt) attrs | PStr x -> structure ctxt f x - | PTyp x -> pp f ":"; core_type ctxt f x - | PSig x -> pp f ":"; signature ctxt f x - | PPat (x, None) -> pp f "?"; pattern ctxt f x + | PTyp x -> pp f ":@ "; core_type ctxt f x + | PSig x -> pp f ":@ "; signature ctxt f x + | PPat (x, None) -> pp f "?@ "; pattern ctxt f x | PPat (x, Some e) -> - pp f "?"; pattern ctxt f x; + pp f "?@ "; pattern ctxt f x; pp f " when "; expression ctxt f e (* transform [f = fun g h -> ..] to [f g h = ... ] could be improved *) diff --git a/testsuite/tests/parsing/extension_operators.ml b/testsuite/tests/parsing/extension_operators.ml new file mode 100644 index 000000000..683ec5952 --- /dev/null +++ b/testsuite/tests/parsing/extension_operators.ml @@ -0,0 +1,67 @@ +(* TEST + * expect +*) + +let f o x = o##x;; +[%%expect {| +Line 1, characters 13-15: +1 | let f o x = o##x;; + ^^ +Error: '##' is not a valid value identifier. +|}] + +let f x = !#x +[%%expect {| +Line 1, characters 10-12: +1 | let f x = !#x + ^^ +Error: '!#' is not a valid value identifier. +|}] + +let f x = ?#x +[%%expect {| +Line 1, characters 10-12: +1 | let f x = ?#x + ^^ +Error: '?#' is not a valid value identifier. +|}] + +let f x = ~#x +[%%expect {| +Line 1, characters 10-12: +1 | let f x = ~#x + ^^ +Error: '~#' is not a valid value identifier. +|}] + +let f o x = o#-#x +[%%expect {| +Line 1, characters 13-16: +1 | let f o x = o#-#x + ^^^ +Error: '#-#' is not a valid value identifier. +|}] + +let f x = !-#x +[%%expect {| +Line 1, characters 10-13: +1 | let f x = !-#x + ^^^ +Error: '!-#' is not a valid value identifier. +|}] + +let f x = ?-#x +[%%expect {| +Line 1, characters 10-13: +1 | let f x = ?-#x + ^^^ +Error: '?-#' is not a valid value identifier. +|}] + +let f x = ~-#x +[%%expect {| +Line 1, characters 10-13: +1 | let f x = ~-#x + ^^^ +Error: '~-#' is not a valid value identifier. +|}] diff --git a/typing/env.ml b/typing/env.ml index fd46348db..ee293098c 100644 --- a/typing/env.ml +++ b/typing/env.ml @@ -1479,6 +1479,16 @@ let module_declaration_address env id presence md = | Mp_present -> EnvLazy.create_forced (Aident id) +let is_identchar c = + (* This should be kept in sync with the [identchar_latin1] character class + in [lexer.mll] *) + match c with + | 'A'..'Z' | 'a'..'z' | '_' | '\192'..'\214' + | '\216'..'\246' | '\248'..'\255' | '\'' | '0'..'9' -> + true + | _ -> + false + let rec components_of_module_maker {cm_env; cm_freshening_subst; cm_prefixing_subst; cm_path; cm_addr; cm_mty} : _ result = @@ -1655,7 +1665,7 @@ and check_value_name name loc = (* Note: we could also check here general validity of the identifier, to protect against bad identifiers forged by -pp or -ppx preprocessors. *) - if String.length name > 0 && (name.[0] = '#') then + if String.length name > 0 && not (is_identchar name.[0]) then for i = 1 to String.length name - 1 do if name.[i] = '#' then error (Illegal_value_name(loc, name))