Merge pull request #2151 from fpottier/improvements

Parser improvements, with a possible bug fix along the way.
master
Gabriel Scherer 2018-11-22 08:23:58 +01:00 committed by GitHub
commit f6837be875
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 29970 additions and 26111 deletions

View File

@ -471,8 +471,9 @@ Working version
### Internal/compiler-libs changes:
- GPR#292: use Menhir as the parser generator for the OCaml parser.
Satellite GPRs: GPR#1844, GPR#1846, GPR#1853, GPR#1850, GPR#1934
(Gabriel Scherer, Nicolás Ojeda Bär, Frédéric Bour and Thomas Refis,
Satellite GPRs: GPR#1844, GPR#1846, GPR#1853, GPR#1850, GPR#1934, GPR#2151
(Gabriel Scherer, Nicolás Ojeda Bär, Frédéric Bour, Thomas Refis
and François Pottier,
review by Nicolás Ojeda Bär, Leo White and David Allsopp)
- GPR#374: use Misc.try_finally for resource cleanup in the compiler

View File

@ -1318,7 +1318,7 @@ beforedepend:: bytecomp/opcodes.ml
# Testing the parser -- see parsing/HACKING.adoc
SOURCE_FILES=$(shell git ls-files '*.ml' '*.mli')
SOURCE_FILES=$(shell git ls-files '*.ml' '*.mli' | grep -v boot/menhir/parser)
AST_FILES=$(addsuffix .ast,$(SOURCE_FILES))

View File

@ -24,6 +24,10 @@
# updated result. Use it to make permanent changes to the compiler
# parser.
#
# - demote-menhir undoes the effect of promote-menhir. The files in
# the boot/ directory that are affected by promote-menhir and are
# under version control are restored to their normal state (HEAD).
#
# - test-menhir builds the parser from parser.mly without storing it
# in the boot/ directory, and only checks that the generated parser
# builds correctly. Use it to quickly check if a parser.mly change
@ -44,7 +48,7 @@ MENHIR ?= menhir
## Menhir compilation flags
MENHIRFLAGS := --explain --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer\
MENHIRFLAGS := --explain --dump --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer \
--lalr --strict --table -lg 1 -la 1 \
--unused-token COMMENT --unused-token DOCSTRING --unused-token EOL\
--unused-token GREATERRBRACKET --fixed-exception
@ -61,13 +65,13 @@ MENHIRFLAGS := --explain --ocamlc "$(CAMLC) $(COMPFLAGS)" --infer\
.PHONY: promote-menhir
promote-menhir: parsing/parser.mly
$(MAKE) import-menhirLib
@ $(MAKE) import-menhirLib
$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
# The generated parser.ml may contain lexer directives containing
# the absolute path to Menhir's standard library on the promoter's machine.
# This is benign but will generate pointless churn if another developer
# rebuilds the same grammar (from the same Menhir version).
for f in $(addprefix parser.,ml mli) ; do \
@ for f in $(addprefix parser.,ml mli) ; do \
sed \
's,^#\(.*\)"[^"]*/menhir/standard.mly",#\1"menhir/standard.mly",g' \
parsing/$$f \
@ -83,12 +87,20 @@ promote-menhir: parsing/parser.mly
.PHONY: import-menhirLib
import-menhirLib:
mkdir -p boot/menhir
cp \
@ mkdir -p boot/menhir
@ cp \
$(addprefix `$(MENHIR) --suggest-menhirLib`/menhirLib.,ml mli) \
boot/menhir
## demote-menhir
DEMOTE:=menhirLib.ml menhirLib.mli parser.ml parser.mli
.PHONY: demote-menhir
demote-menhir:
git checkout HEAD -- $(addprefix boot/menhir/,$(DEMOTE))
## test-menhir
# This rule assumes that the `parsing/` sources and its dependencies
@ -133,3 +145,17 @@ depend-menhir:
# define in Makefile, so it can only be invoked from the main Makefile
include .depend.menhir
## interpret-menhir
# This rule runs Menhir in interactive mode.
# The user can enter sentences, such as:
# implementation: TYPE LIDENT EQUAL LIDENT EOF
# and see how Menhir interprets them.
interpret-menhir:
@ echo "Please wait, I am building the LALR automaton..."
@ $(MENHIR) $(MENHIRFLAGS) parsing/parser.mly \
--interpret \
--interpret-show-cst \
--trace \

View File

@ -3513,5 +3513,5 @@ module MakeEngineTable (T : TableFormat.TABLES) = struct
end
end
module StaticVersion = struct
let require_20181026 = ()
let require_20181113 = ()
end

View File

@ -1701,5 +1701,5 @@ module MakeEngineTable
and type nonterminal = int
end
module StaticVersion : sig
val require_20181026 : unit
val require_20181113 : unit
end

File diff suppressed because one or more lines are too long

54
parsing/CONFLICTS.md Normal file
View File

@ -0,0 +1,54 @@
# Conflicts
Some of the conflicts and issues in the grammar are documented here.
## A variant type that lists a single atomic type
Why can't `[t]` be considered a valid atomic type? (A variant type.)
(This is related to MPR #3835.)
A class type that begins with `[t] foo` could continue as follows:
```
[t] foo -> <class_type>
```
Here `t` is understood as a variant type,
and is used as an actual parameter of the parameterized type `'a foo`.
Or it could continue as follows:
```
[t] foo
```
Here `t` is a type (there is no variant type)
and is used as an actual parameter of the class `['a] foo`.
After we have read the closing bracket and are looking ahead at `foo`,
we need to decide which of the above two situations we have. (The first
situation requires a reduction; the second situation requires shifting.)
But we cannot decide yet; we would need to look at the arrow `->` beyond
`foo` in order to decide. In this example LR(2) is required; in general,
`foo` could be replaced with an arbitrary qualified name, so unbounded
lookahead is required.
As a result of this issue, we must abandon the idea that `[t]` could be
a well-formed variant type. In the syntax of atomic types, instead of:
```
atomic_type: LBRACKET row_field RBRACKET
```
we must use the more restricted form:
```
atomic_type: LBRACKET tag_field RBRACKET
```
In other words, we rule out exactly the following:
```
atomic_type: LBRACKET atomic_type RBRACKET
```

20
parsing/VIPs.md Normal file
View File

@ -0,0 +1,20 @@
# VIPs
A VIP is a common syntax error, for which a good error message should be
given.
## Structures versus signatures
Everything that is allowed in a structure but forbidden in a signature,
or vice-versa, is a VIP. For instance, writing:
```
exception A = B
```
is allowed in a structure, but forbidden in a signature. (Here, we might
wish to make the error message depend on the lookahead token; the token
`=` suggests that the user confuses a structure and a signature.)
Similarly, writing `struct` where `sig` is expected, or vice-versa, is
probably a common mistake.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
class ['a, _] foo : object method bar : 'a -> 'a end

View File

@ -0,0 +1,15 @@
(* TEST
flags = "-i"
* setup-ocamlc.byte-build-env
** ocamlc.byte
*** check-ocamlc.byte-output
*)
(* This test is valid OCaml code.
It uses an anonymous type variable as a formal parameter in a class
declaration. This used to be rejected by the parser, even though the
printer (ocamlc -i) could in fact produce it. *)
class ['a, _] foo = object
method bar: 'a -> 'a = fun x -> x
end

View File

@ -0,0 +1,2 @@
File "arrow_ambiguity.ml", line 29, characters 0-0:
Error: Syntax error

View File

@ -0,0 +1,28 @@
(* TEST
ocamlc_byte_exit_status = "2"
* setup-ocamlc.byte-build-env
** ocamlc.byte
*** check-ocamlc.byte-output
*)
(* A potential ambiguity arises because the arrow -> is used
both in the syntax of core types and module types, and
(furthermore) the construction "T with type t = ..." means
that a module type can end with a core type. *)
module type T = sig type t end
(* This is OK *)
module type Foo =
(T with type t = int) -> T
(* This is OK *)
module type Bar =
T with type t = int -> int
(* This is not OK.
Therefore the shift/reduce conflict on MINUSGREATER
must be solved in favor of shifting. This is why
MINUSGREATER is declared right-associative. *)
module type Bar =
T with type t = int -> T

View File

@ -0,0 +1,4 @@
File "constructor_declarations.ml", line 24, characters 2-3:
24 | | A of int
^
Error: Syntax error

View File

@ -0,0 +1,25 @@
(* TEST
ocamlc_byte_exit_status = "2"
* setup-ocamlc.byte-build-env
** ocamlc.byte
*** check-ocamlc.byte-output
*)
(* Allowed. *)
type t =
A of int
| B of bool
(* Allowed. *)
type u =
| A of int
| B of bool
(* Allowed. *)
type v = |
(* Disallowed, but was allowed in 4.07. *)
type w =
|
| A of int
| B of bool

View File

@ -78,11 +78,24 @@ module Manual : sig
(** The comment for the class type my_class_type *)
class type my_class_type = object
(** This is a docstring that OCaml <= 4.07.1 drops.
For some reason, when a class type begins with two docstrings,
it keeps only the second one.
This is fixed by GPR#2151. *)
(** The comment for variable x. *)
val mutable x : int
(** The commend for method m. *)
method m : int -> int
(** This is a docstring that OCaml <= 4.07.1 misplaces.
For some reason, when a class type ends with two docstrings,
it keeps both of them, but exchanges their order.
This is again fixed by GPR#2151. *)
(** Another docstring that OCaml <= 4.07.1 misplaces. *)
end
(** The comment for module Foo *)
@ -230,8 +243,13 @@ module Manual :
end[@@ocaml.doc " The comment for class my_class "]
class type my_class_type =
object
[@@@ocaml.text
" This is a docstring that OCaml <= 4.07.1 drops.\n For some reason, when a class type begins with two docstrings,\n it keeps only the second one.\n This is fixed by GPR#2151. "]
val mutable x : int[@@ocaml.doc " The comment for variable x. "]
method m : int -> int[@@ocaml.doc " The commend for method m. "]
[@@@ocaml.text
" This is a docstring that OCaml <= 4.07.1 misplaces.\n For some reason, when a class type ends with two docstrings,\n it keeps both of them, but exchanges their order.\n This is again fixed by GPR#2151. "]
[@@@ocaml.text " Another docstring that OCaml <= 4.07.1 misplaces. "]
end[@@ocaml.doc " The comment for the class type my_class_type "]
module Foo :
sig
@ -294,8 +312,8 @@ module Manual :
module type my_module_type = sig val x : int end[@@ocaml.doc
" The comment for module type my_module_type. "]
end ;;
Line 128, characters 12-14:
128 | inherit cl
Line 141, characters 12-14:
141 | inherit cl
^^
Error: Unbound class cl
|}]

View File

@ -0,0 +1,113 @@
[
structure_item (hash_ambiguity.ml[8,140+0]..[8,140+28])
Pstr_class
[
class_declaration (hash_ambiguity.ml[8,140+0]..[8,140+28])
pci_virt = Concrete
pci_params =
[
core_type (hash_ambiguity.ml[8,140+7]..[8,140+9])
Ptyp_var a
]
pci_name = "list" (hash_ambiguity.ml[8,140+11]..[8,140+15])
pci_expr =
class_expr (hash_ambiguity.ml[8,140+18]..[8,140+28])
Pcl_structure
class_structure
pattern (hash_ambiguity.ml[8,140+24]..[8,140+24]) ghost
Ppat_any
[]
]
structure_item (hash_ambiguity.ml[9,169+0]..[9,169+27])
Pstr_type Rec
[
type_declaration "t" (hash_ambiguity.ml[9,169+8]..[9,169+9]) (hash_ambiguity.ml[9,169+0]..[9,169+27])
ptype_params =
[
core_type (hash_ambiguity.ml[9,169+5]..[9,169+7])
Ptyp_var a
]
ptype_cstrs =
[]
ptype_kind =
Ptype_abstract
ptype_private = Public
ptype_manifest =
Some
core_type (hash_ambiguity.ml[9,169+12]..[9,169+27])
Ptyp_alias "a"
core_type (hash_ambiguity.ml[9,169+12]..[9,169+21])
Ptyp_class "list" (hash_ambiguity.ml[9,169+17]..[9,169+21])
[
core_type (hash_ambiguity.ml[9,169+12]..[9,169+15])
Ptyp_constr "int" (hash_ambiguity.ml[9,169+12]..[9,169+15])
[]
]
]
structure_item (hash_ambiguity.ml[15,425+0]..[15,425+26])
Pstr_type Rec
[
type_declaration "u" (hash_ambiguity.ml[15,425+8]..[15,425+9]) (hash_ambiguity.ml[15,425+0]..[15,425+26])
ptype_params =
[
core_type (hash_ambiguity.ml[15,425+5]..[15,425+7])
Ptyp_var a
]
ptype_cstrs =
[]
ptype_kind =
Ptype_variant
[
(hash_ambiguity.ml[15,425+12]..[15,425+26])
"A" (hash_ambiguity.ml[15,425+12]..[15,425+13])
[
core_type (hash_ambiguity.ml[15,425+17]..[15,425+26])
Ptyp_class "list" (hash_ambiguity.ml[15,425+22]..[15,425+26])
[
core_type (hash_ambiguity.ml[15,425+17]..[15,425+20])
Ptyp_constr "int" (hash_ambiguity.ml[15,425+17]..[15,425+20])
[]
]
]
None
]
ptype_private = Public
ptype_manifest =
None
]
structure_item (hash_ambiguity.ml[17,453+0]..[17,453+32])
Pstr_type Rec
[
type_declaration "v" (hash_ambiguity.ml[17,453+8]..[17,453+9]) (hash_ambiguity.ml[17,453+0]..[17,453+32])
ptype_params =
[
core_type (hash_ambiguity.ml[17,453+5]..[17,453+7])
Ptyp_var a
]
ptype_cstrs =
[]
ptype_kind =
Ptype_variant
[
(hash_ambiguity.ml[17,453+12]..[17,453+32])
"A" (hash_ambiguity.ml[17,453+12]..[17,453+13])
[
core_type (hash_ambiguity.ml[17,453+17]..[17,453+20])
Ptyp_constr "int" (hash_ambiguity.ml[17,453+17]..[17,453+20])
[]
core_type (hash_ambiguity.ml[17,453+23]..[17,453+32])
Ptyp_class "list" (hash_ambiguity.ml[17,453+28]..[17,453+32])
[
core_type (hash_ambiguity.ml[17,453+23]..[17,453+26])
Ptyp_constr "int" (hash_ambiguity.ml[17,453+23]..[17,453+26])
[]
]
]
None
]
ptype_private = Public
ptype_manifest =
None
]
]

View File

@ -0,0 +1,17 @@
(* TEST
flags = "-stop-after parsing -dparsetree"
* setup-ocamlc.byte-build-env
** ocamlc.byte
*** check-ocamlc.byte-output
*)
class ['a] list = object end
type 'a t = int #list as 'a
(* Here, "int #list" must be understood as a type.
Another interpretation would be to understand just "int"
as a type and view "#list" as a toplevel directive.
A syntax error would then be reported at "as". *)
type 'a u = A of int #list
type 'a v = A of int * int #list

View File

@ -1,8 +1,12 @@
anonymous_class_parameter.ml
arrow_ambiguity.ml
attributes.ml
broken_invariants.ml
constructor_declarations.ml
docstrings.ml
extended_indexoperators.ml
extensions.ml
hash_ambiguity.ml
int_and_float_with_modifier.ml
pr6604_2.ml
pr6604_3.ml

View File

@ -31,4 +31,8 @@ Line 2, characters 24-27:
2 | module type Rejected3 = sig
^^^
This 'sig' might be unmatched
Line 3, characters 7-13:
3 | type nonrec t := int
^^^^^^
Error: Syntax error: nonrec flag not expected.

View File

@ -30,3 +30,7 @@ module type Rejected3 = sig
type t3 := int
and u3 = char
end;;
module type Rejected0 = sig
type nonrec t := int
end;;