ocaml/lex/parser.mly

192 lines
5.0 KiB
OCaml

/***********************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* */
/* Copyright 1996 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the Q Public License version 1.0. */
/* */
/***********************************************************************/
/* The grammar for lexer definitions */
%{
open Syntax
(* Auxiliaries for the parser. *)
let named_regexps =
(Hashtbl.create 13 : (string, regular_expression) Hashtbl.t)
let regexp_for_string s =
let rec re_string n =
if n >= String.length s then Epsilon
else if succ n = String.length s then
Characters (Cset.singleton (Char.code s.[n]))
else
Sequence
(Characters(Cset.singleton (Char.code s.[n])),
re_string (succ n))
in re_string 0
let rec remove_as = function
| Bind (e,_) -> remove_as e
| Epsilon|Eof|Characters _ as e -> e
| Sequence (e1, e2) -> Sequence (remove_as e1, remove_as e2)
| Alternative (e1, e2) -> Alternative (remove_as e1, remove_as e2)
| Repetition e -> Repetition (remove_as e)
let as_cset = function
| Characters s -> s
| _ -> raise Cset.Bad
%}
%token <string> Tident
%token <int> Tchar
%token <string> Tstring
%token <Syntax.location> Taction
%token Trule Tparse Tparse_shortest Tand Tequal Tend Tor Tunderscore Teof
Tlbracket Trbracket
%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas Tsharp
%right Tas
%left Tor
%nonassoc CONCAT
%nonassoc Tmaybe Tstar Tplus
%left Tsharp
%nonassoc Tident Tchar Tstring Tunderscore Teof Tlbracket Tlparen
%start lexer_definition
%type <Syntax.lexer_definition> lexer_definition
%%
lexer_definition:
header named_regexps Trule definition other_definitions header Tend
{ {header = $1;
entrypoints = $4 :: List.rev $5;
trailer = $6} }
;
header:
Taction
{ $1 }
| /*epsilon*/
{ { start_pos = 0; end_pos = 0; start_line = 1; start_col = 0 } }
;
named_regexps:
named_regexps Tlet Tident Tequal regexp
{ Hashtbl.add named_regexps $3 $5 }
| /*epsilon*/
{ () }
;
other_definitions:
other_definitions Tand definition
{ $3::$1 }
| /*epsilon*/
{ [] }
;
definition:
Tident arguments Tequal Tparse entry
{ {name=$1 ; shortest=false ; args=$2 ; clauses=$5} }
| Tident arguments Tequal Tparse_shortest entry
{ {name=$1 ; shortest=true ; args=$2 ; clauses=$5} }
;
arguments:
Tident arguments { $1::$2 }
| /*epsilon*/ { [] }
;
entry:
case rest_of_entry
{ $1::List.rev $2 }
| Tor case rest_of_entry
{ $2::List.rev $3 }
;
rest_of_entry:
rest_of_entry Tor case
{ $3::$1 }
|
{ [] }
;
case:
regexp Taction
{ ($1,$2) }
;
regexp:
Tunderscore
{ Characters Cset.all_chars }
| Teof
{ Eof }
| Tchar
{ Characters (Cset.singleton $1) }
| Tstring
{ regexp_for_string $1 }
| Tlbracket char_class Trbracket
{ Characters $2 }
| regexp Tstar
{ Repetition $1 }
| regexp Tmaybe
{ Alternative(Epsilon, $1) }
| regexp Tplus
{ Sequence(Repetition (remove_as $1), $1) }
| regexp Tsharp regexp
{
let s1 = as_cset $1
and s2 = as_cset $3 in
Characters (Cset.diff s1 s2)
}
| regexp Tor regexp
{ Alternative($1,$3) }
| regexp regexp %prec CONCAT
{ Sequence($1,$2) }
| Tlparen regexp Trparen
{ $2 }
| Tident
{ try
Hashtbl.find named_regexps $1
with Not_found ->
let p = Parsing.symbol_start_pos () in
Printf.eprintf "File \"%s\", line %d, character %d:\n\
Reference to unbound regexp name `%s'.\n"
p.Lexing.pos_fname p.Lexing.pos_lnum
(p.Lexing.pos_cnum - p.Lexing.pos_bol)
$1;
exit 2 }
| regexp Tas ident
{let p1 = Parsing.rhs_start_pos 3
and p2 = Parsing.rhs_end_pos 3 in
let p = {
start_pos = p1.Lexing.pos_cnum ;
end_pos = p2.Lexing.pos_cnum ;
start_line = p1.Lexing.pos_lnum ;
start_col = p1.Lexing.pos_cnum - p1.Lexing.pos_bol ; } in
Bind ($1, ($3, p))}
;
ident:
Tident {$1}
;
char_class:
Tcaret char_class1
{ Cset.complement $2 }
| char_class1
{ $1 }
;
char_class1:
Tchar Tdash Tchar
{ Cset.interval $1 $3 }
| Tchar
{ Cset.singleton $1 }
| char_class1 char_class1 %prec CONCAT
{ Cset.union $1 $2 }
;
%%