Merge pull request #2151 from fpottier/improvements
Parser improvements, with a possible bug fix along the way.master
commit
f6837be875
5
Changes
5
Changes
|
@ -471,8 +471,9 @@ Working version
|
|||
### Internal/compiler-libs changes:
|
||||
|
||||
- GPR#292: use Menhir as the parser generator for the OCaml parser.
|
||||
Satellite GPRs: GPR#1844, GPR#1846, GPR#1853, GPR#1850, GPR#1934
|
||||
(Gabriel Scherer, Nicolás Ojeda Bär, Frédéric Bour and Thomas Refis,
|
||||
Satellite GPRs: GPR#1844, GPR#1846, GPR#1853, GPR#1850, GPR#1934, GPR#2151
|
||||
(Gabriel Scherer, Nicolás Ojeda Bär, Frédéric Bour, Thomas Refis
|
||||
and François Pottier,
|
||||
review by Nicolás Ojeda Bär, Leo White and David Allsopp)
|
||||
|
||||
- GPR#374: use Misc.try_finally for resource cleanup in the compiler
|
||||
|
|
2
Makefile
2
Makefile
|
@ -1318,7 +1318,7 @@ beforedepend:: bytecomp/opcodes.ml
|
|||
|
||||
# Testing the parser -- see parsing/HACKING.adoc
|
||||
|
||||
SOURCE_FILES=$(shell git ls-files '*.ml' '*.mli')
|
||||
SOURCE_FILES=$(shell git ls-files '*.ml' '*.mli' | grep -v boot/menhir/parser)
|
||||
|
||||
AST_FILES=$(addsuffix .ast,$(SOURCE_FILES))
|
||||
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
# updated result. Use it to make permanent changes to the compiler
|
||||
# parser.
|
||||
#
|
||||
# - demote-menhir undoes the effect of promote-menhir. The files in
|
||||
# the boot/ directory that are affected by promote-menhir and are
|
||||
# under version control are restored to their normal state (HEAD).
|
||||
#
|
||||
# - test-menhir builds the parser from parser.mly without storing it
|
||||
# in the boot/ directory, and only checks that the generated parser
|
||||
# builds correctly. Use it to quickly check if a parser.mly change
|
||||
|
@ -44,7 +48,7 @@ MENHIR ?= menhir
|
|||
|
||||
## Menhir compilation flags
|
||||
|
||||
MENHIRFLAGS := --explain --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer\
|
||||
MENHIRFLAGS := --explain --dump --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer \
|
||||
--lalr --strict --table -lg 1 -la 1 \
|
||||
--unused-token COMMENT --unused-token DOCSTRING --unused-token EOL\
|
||||
--unused-token GREATERRBRACKET --fixed-exception
|
||||
|
@ -61,13 +65,13 @@ MENHIRFLAGS := --explain --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer\
|
|||
|
||||
.PHONY: promote-menhir
|
||||
promote-menhir: parsing/parser.mly
|
||||
$(MAKE) import-menhirLib
|
||||
@ $(MAKE) import-menhirLib
|
||||
$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
|
||||
# The generated parser.ml may contain lexer directives containing
|
||||
# the absolute path to Menhir's standard library on the promoter's machine.
|
||||
# This is benign but will generate pointless churn if another developer
|
||||
# rebuilds the same grammar (from the same Menhir version).
|
||||
for f in $(addprefix parser.,ml mli) ; do \
|
||||
@ for f in $(addprefix parser.,ml mli) ; do \
|
||||
sed \
|
||||
's,^#\(.*\)"[^"]*/menhir/standard.mly",#\1"menhir/standard.mly",g' \
|
||||
parsing/$$f \
|
||||
|
@ -83,12 +87,20 @@ promote-menhir: parsing/parser.mly
|
|||
|
||||
.PHONY: import-menhirLib
|
||||
import-menhirLib:
|
||||
mkdir -p boot/menhir
|
||||
cp \
|
||||
@ mkdir -p boot/menhir
|
||||
@ cp \
|
||||
$(addprefix `$(MENHIR) --suggest-menhirLib`/menhirLib.,ml mli) \
|
||||
boot/menhir
|
||||
|
||||
|
||||
## demote-menhir
|
||||
|
||||
DEMOTE:=menhirLib.ml menhirLib.mli parser.ml parser.mli
|
||||
|
||||
.PHONY: demote-menhir
|
||||
demote-menhir:
|
||||
git checkout HEAD -- $(addprefix boot/menhir/,$(DEMOTE))
|
||||
|
||||
## test-menhir
|
||||
|
||||
# This rule assumes that the `parsing/` sources and its dependencies
|
||||
|
@ -133,3 +145,17 @@ depend-menhir:
|
|||
# define in Makefile, so it can only be invoked from the main Makefile
|
||||
|
||||
include .depend.menhir
|
||||
|
||||
## interpret-menhir
|
||||
|
||||
# This rule runs Menhir in interactive mode.
|
||||
# The user can enter sentences, such as:
|
||||
# implementation: TYPE LIDENT EQUAL LIDENT EOF
|
||||
# and see how Menhir interprets them.
|
||||
|
||||
interpret-menhir:
|
||||
@ echo "Please wait, I am building the LALR automaton..."
|
||||
@ $(MENHIR) $(MENHIRFLAGS) parsing/parser.mly \
|
||||
--interpret \
|
||||
--interpret-show-cst \
|
||||
--trace \
|
||||
|
|
|
@ -3513,5 +3513,5 @@ module MakeEngineTable (T : TableFormat.TABLES) = struct
|
|||
end
|
||||
end
|
||||
module StaticVersion = struct
|
||||
let require_20181026 = ()
|
||||
let require_20181113 = ()
|
||||
end
|
||||
|
|
|
@ -1701,5 +1701,5 @@ module MakeEngineTable
|
|||
and type nonterminal = int
|
||||
end
|
||||
module StaticVersion : sig
|
||||
val require_20181026 : unit
|
||||
val require_20181113 : unit
|
||||
end
|
||||
|
|
53303
boot/menhir/parser.ml
53303
boot/menhir/parser.ml
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,54 @@
|
|||
# Conflicts
|
||||
|
||||
Some of the conflicts and issues in the grammar are documented here.
|
||||
|
||||
## A variant type that lists a single atomic type
|
||||
|
||||
Why can't `[t]` be considered a valid atomic type? (A variant type.)
|
||||
|
||||
(This is related to MPR #3835.)
|
||||
|
||||
A class type that begins with `[t] foo` could continue as follows:
|
||||
|
||||
```
|
||||
[t] foo -> <class_type>
|
||||
```
|
||||
|
||||
Here `t` is understood as a variant type,
|
||||
and is used as an actual parameter of the parameterized type `'a foo`.
|
||||
|
||||
Or it could continue as follows:
|
||||
|
||||
```
|
||||
[t] foo
|
||||
```
|
||||
|
||||
Here `t` is a type (there is no variant type)
|
||||
and is used as an actual parameter of the class `['a] foo`.
|
||||
|
||||
After we have read the closing bracket and are looking ahead at `foo`,
|
||||
we need to decide which of the above two situations we have. (The first
|
||||
situation requires a reduction; the second situation requires shifting.)
|
||||
But we cannot decide yet; we would need to look at the arrow `->` beyond
|
||||
`foo` in order to decide. In this example LR(2) is required; in general,
|
||||
`foo` could be replaced with an arbitrary qualified name, so unbounded
|
||||
lookahead is required.
|
||||
|
||||
As a result of this issue, we must abandon the idea that `[t]` could be
|
||||
a well-formed variant type. In the syntax of atomic types, instead of:
|
||||
|
||||
```
|
||||
atomic_type: LBRACKET row_field RBRACKET
|
||||
```
|
||||
|
||||
we must use the more restricted form:
|
||||
|
||||
```
|
||||
atomic_type: LBRACKET tag_field RBRACKET
|
||||
```
|
||||
|
||||
In other words, we rule out exactly the following:
|
||||
|
||||
```
|
||||
atomic_type: LBRACKET atomic_type RBRACKET
|
||||
```
|
|
@ -0,0 +1,20 @@
|
|||
# VIPs
|
||||
|
||||
A VIP is a common syntax error, for which a good error message should be
|
||||
given.
|
||||
|
||||
## Structures versus signatures
|
||||
|
||||
Everything that is allowed in a structure but forbidden in a signature,
|
||||
or vice-versa, is a VIP. For instance, writing:
|
||||
|
||||
```
|
||||
exception A = B
|
||||
```
|
||||
|
||||
is allowed in a structure, but forbidden in a signature. (Here, we might
|
||||
wish to make the error message depend on the lookahead token; the token
|
||||
`=` suggests that the user confuses a structure and a signature.)
|
||||
|
||||
Similarly, writing `struct` where `sig` is expected, or vice-versa, is
|
||||
probably a common mistake.
|
2418
parsing/parser.mly
2418
parsing/parser.mly
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
class ['a, _] foo : object method bar : 'a -> 'a end
|
|
@ -0,0 +1,15 @@
|
|||
(* TEST
|
||||
flags = "-i"
|
||||
* setup-ocamlc.byte-build-env
|
||||
** ocamlc.byte
|
||||
*** check-ocamlc.byte-output
|
||||
*)
|
||||
|
||||
(* This test is valid OCaml code.
|
||||
It uses an anonymous type variable as a formal parameter in a class
|
||||
declaration. This used to be rejected by the parser, even though the
|
||||
printer (ocamlc -i) could in fact produce it. *)
|
||||
|
||||
class ['a, _] foo = object
|
||||
method bar: 'a -> 'a = fun x -> x
|
||||
end
|
|
@ -0,0 +1,2 @@
|
|||
File "arrow_ambiguity.ml", line 29, characters 0-0:
|
||||
Error: Syntax error
|
|
@ -0,0 +1,28 @@
|
|||
(* TEST
|
||||
ocamlc_byte_exit_status = "2"
|
||||
* setup-ocamlc.byte-build-env
|
||||
** ocamlc.byte
|
||||
*** check-ocamlc.byte-output
|
||||
*)
|
||||
|
||||
(* A potential ambiguity arises because the arrow -> is used
|
||||
both in the syntax of core types and module types, and
|
||||
(furthermore) the construction "T with type t = ..." means
|
||||
that a module type can end with a core type. *)
|
||||
|
||||
module type T = sig type t end
|
||||
|
||||
(* This is OK *)
|
||||
module type Foo =
|
||||
(T with type t = int) -> T
|
||||
|
||||
(* This is OK *)
|
||||
module type Bar =
|
||||
T with type t = int -> int
|
||||
|
||||
(* This is not OK.
|
||||
Therefore the shift/reduce conflict on MINUSGREATER
|
||||
must be solved in favor of shifting. This is why
|
||||
MINUSGREATER is declared right-associative. *)
|
||||
module type Bar =
|
||||
T with type t = int -> T
|
|
@ -0,0 +1,4 @@
|
|||
File "constructor_declarations.ml", line 24, characters 2-3:
|
||||
24 | | A of int
|
||||
^
|
||||
Error: Syntax error
|
|
@ -0,0 +1,25 @@
|
|||
(* TEST
|
||||
ocamlc_byte_exit_status = "2"
|
||||
* setup-ocamlc.byte-build-env
|
||||
** ocamlc.byte
|
||||
*** check-ocamlc.byte-output
|
||||
*)
|
||||
|
||||
(* Allowed. *)
|
||||
type t =
|
||||
A of int
|
||||
| B of bool
|
||||
|
||||
(* Allowed. *)
|
||||
type u =
|
||||
| A of int
|
||||
| B of bool
|
||||
|
||||
(* Allowed. *)
|
||||
type v = |
|
||||
|
||||
(* Disallowed, but was allowed in 4.07. *)
|
||||
type w =
|
||||
|
|
||||
| A of int
|
||||
| B of bool
|
|
@ -78,11 +78,24 @@ module Manual : sig
|
|||
|
||||
(** The comment for the class type my_class_type *)
|
||||
class type my_class_type = object
|
||||
(** This is a docstring that OCaml <= 4.07.1 drops.
|
||||
For some reason, when a class type begins with two docstrings,
|
||||
it keeps only the second one.
|
||||
This is fixed by GPR#2151. *)
|
||||
|
||||
(** The comment for variable x. *)
|
||||
val mutable x : int
|
||||
|
||||
(** The commend for method m. *)
|
||||
method m : int -> int
|
||||
|
||||
(** This is a docstring that OCaml <= 4.07.1 misplaces.
|
||||
For some reason, when a class type ends with two docstrings,
|
||||
it keeps both of them, but exchanges their order.
|
||||
This is again fixed by GPR#2151. *)
|
||||
|
||||
(** Another docstring that OCaml <= 4.07.1 misplaces. *)
|
||||
|
||||
end
|
||||
|
||||
(** The comment for module Foo *)
|
||||
|
@ -230,8 +243,13 @@ module Manual :
|
|||
end[@@ocaml.doc " The comment for class my_class "]
|
||||
class type my_class_type =
|
||||
object
|
||||
[@@@ocaml.text
|
||||
" This is a docstring that OCaml <= 4.07.1 drops.\n For some reason, when a class type begins with two docstrings,\n it keeps only the second one.\n This is fixed by GPR#2151. "]
|
||||
val mutable x : int[@@ocaml.doc " The comment for variable x. "]
|
||||
method m : int -> int[@@ocaml.doc " The commend for method m. "]
|
||||
[@@@ocaml.text
|
||||
" This is a docstring that OCaml <= 4.07.1 misplaces.\n For some reason, when a class type ends with two docstrings,\n it keeps both of them, but exchanges their order.\n This is again fixed by GPR#2151. "]
|
||||
[@@@ocaml.text " Another docstring that OCaml <= 4.07.1 misplaces. "]
|
||||
end[@@ocaml.doc " The comment for the class type my_class_type "]
|
||||
module Foo :
|
||||
sig
|
||||
|
@ -294,8 +312,8 @@ module Manual :
|
|||
module type my_module_type = sig val x : int end[@@ocaml.doc
|
||||
" The comment for module type my_module_type. "]
|
||||
end ;;
|
||||
Line 128, characters 12-14:
|
||||
128 | inherit cl
|
||||
Line 141, characters 12-14:
|
||||
141 | inherit cl
|
||||
^^
|
||||
Error: Unbound class cl
|
||||
|}]
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
[
|
||||
structure_item (hash_ambiguity.ml[8,140+0]..[8,140+28])
|
||||
Pstr_class
|
||||
[
|
||||
class_declaration (hash_ambiguity.ml[8,140+0]..[8,140+28])
|
||||
pci_virt = Concrete
|
||||
pci_params =
|
||||
[
|
||||
core_type (hash_ambiguity.ml[8,140+7]..[8,140+9])
|
||||
Ptyp_var a
|
||||
]
|
||||
pci_name = "list" (hash_ambiguity.ml[8,140+11]..[8,140+15])
|
||||
pci_expr =
|
||||
class_expr (hash_ambiguity.ml[8,140+18]..[8,140+28])
|
||||
Pcl_structure
|
||||
class_structure
|
||||
pattern (hash_ambiguity.ml[8,140+24]..[8,140+24]) ghost
|
||||
Ppat_any
|
||||
[]
|
||||
]
|
||||
structure_item (hash_ambiguity.ml[9,169+0]..[9,169+27])
|
||||
Pstr_type Rec
|
||||
[
|
||||
type_declaration "t" (hash_ambiguity.ml[9,169+8]..[9,169+9]) (hash_ambiguity.ml[9,169+0]..[9,169+27])
|
||||
ptype_params =
|
||||
[
|
||||
core_type (hash_ambiguity.ml[9,169+5]..[9,169+7])
|
||||
Ptyp_var a
|
||||
]
|
||||
ptype_cstrs =
|
||||
[]
|
||||
ptype_kind =
|
||||
Ptype_abstract
|
||||
ptype_private = Public
|
||||
ptype_manifest =
|
||||
Some
|
||||
core_type (hash_ambiguity.ml[9,169+12]..[9,169+27])
|
||||
Ptyp_alias "a"
|
||||
core_type (hash_ambiguity.ml[9,169+12]..[9,169+21])
|
||||
Ptyp_class "list" (hash_ambiguity.ml[9,169+17]..[9,169+21])
|
||||
[
|
||||
core_type (hash_ambiguity.ml[9,169+12]..[9,169+15])
|
||||
Ptyp_constr "int" (hash_ambiguity.ml[9,169+12]..[9,169+15])
|
||||
[]
|
||||
]
|
||||
]
|
||||
structure_item (hash_ambiguity.ml[15,425+0]..[15,425+26])
|
||||
Pstr_type Rec
|
||||
[
|
||||
type_declaration "u" (hash_ambiguity.ml[15,425+8]..[15,425+9]) (hash_ambiguity.ml[15,425+0]..[15,425+26])
|
||||
ptype_params =
|
||||
[
|
||||
core_type (hash_ambiguity.ml[15,425+5]..[15,425+7])
|
||||
Ptyp_var a
|
||||
]
|
||||
ptype_cstrs =
|
||||
[]
|
||||
ptype_kind =
|
||||
Ptype_variant
|
||||
[
|
||||
(hash_ambiguity.ml[15,425+12]..[15,425+26])
|
||||
"A" (hash_ambiguity.ml[15,425+12]..[15,425+13])
|
||||
[
|
||||
core_type (hash_ambiguity.ml[15,425+17]..[15,425+26])
|
||||
Ptyp_class "list" (hash_ambiguity.ml[15,425+22]..[15,425+26])
|
||||
[
|
||||
core_type (hash_ambiguity.ml[15,425+17]..[15,425+20])
|
||||
Ptyp_constr "int" (hash_ambiguity.ml[15,425+17]..[15,425+20])
|
||||
[]
|
||||
]
|
||||
]
|
||||
None
|
||||
]
|
||||
ptype_private = Public
|
||||
ptype_manifest =
|
||||
None
|
||||
]
|
||||
structure_item (hash_ambiguity.ml[17,453+0]..[17,453+32])
|
||||
Pstr_type Rec
|
||||
[
|
||||
type_declaration "v" (hash_ambiguity.ml[17,453+8]..[17,453+9]) (hash_ambiguity.ml[17,453+0]..[17,453+32])
|
||||
ptype_params =
|
||||
[
|
||||
core_type (hash_ambiguity.ml[17,453+5]..[17,453+7])
|
||||
Ptyp_var a
|
||||
]
|
||||
ptype_cstrs =
|
||||
[]
|
||||
ptype_kind =
|
||||
Ptype_variant
|
||||
[
|
||||
(hash_ambiguity.ml[17,453+12]..[17,453+32])
|
||||
"A" (hash_ambiguity.ml[17,453+12]..[17,453+13])
|
||||
[
|
||||
core_type (hash_ambiguity.ml[17,453+17]..[17,453+20])
|
||||
Ptyp_constr "int" (hash_ambiguity.ml[17,453+17]..[17,453+20])
|
||||
[]
|
||||
core_type (hash_ambiguity.ml[17,453+23]..[17,453+32])
|
||||
Ptyp_class "list" (hash_ambiguity.ml[17,453+28]..[17,453+32])
|
||||
[
|
||||
core_type (hash_ambiguity.ml[17,453+23]..[17,453+26])
|
||||
Ptyp_constr "int" (hash_ambiguity.ml[17,453+23]..[17,453+26])
|
||||
[]
|
||||
]
|
||||
]
|
||||
None
|
||||
]
|
||||
ptype_private = Public
|
||||
ptype_manifest =
|
||||
None
|
||||
]
|
||||
]
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
(* TEST
|
||||
flags = "-stop-after parsing -dparsetree"
|
||||
* setup-ocamlc.byte-build-env
|
||||
** ocamlc.byte
|
||||
*** check-ocamlc.byte-output
|
||||
*)
|
||||
|
||||
class ['a] list = object end
|
||||
type 'a t = int #list as 'a
|
||||
(* Here, "int #list" must be understood as a type.
|
||||
Another interpretation would be to understand just "int"
|
||||
as a type and view "#list" as a toplevel directive.
|
||||
A syntax error would then be reported at "as". *)
|
||||
|
||||
type 'a u = A of int #list
|
||||
|
||||
type 'a v = A of int * int #list
|
|
@ -1,8 +1,12 @@
|
|||
anonymous_class_parameter.ml
|
||||
arrow_ambiguity.ml
|
||||
attributes.ml
|
||||
broken_invariants.ml
|
||||
constructor_declarations.ml
|
||||
docstrings.ml
|
||||
extended_indexoperators.ml
|
||||
extensions.ml
|
||||
hash_ambiguity.ml
|
||||
int_and_float_with_modifier.ml
|
||||
pr6604_2.ml
|
||||
pr6604_3.ml
|
||||
|
|
|
@ -31,4 +31,8 @@ Line 2, characters 24-27:
|
|||
2 | module type Rejected3 = sig
|
||||
^^^
|
||||
This 'sig' might be unmatched
|
||||
Line 3, characters 7-13:
|
||||
3 | type nonrec t := int
|
||||
^^^^^^
|
||||
Error: Syntax error: nonrec flag not expected.
|
||||
|
||||
|
|
|
@ -30,3 +30,7 @@ module type Rejected3 = sig
|
|||
type t3 := int
|
||||
and u3 = char
|
||||
end;;
|
||||
|
||||
module type Rejected0 = sig
|
||||
type nonrec t := int
|
||||
end;;
|
||||
|
|
Loading…
Reference in New Issue