Miniml: first try

This commit is contained in:
Ekdohibs 2019-01-15 23:53:45 +01:00
parent f8f186f9ca
commit 5120d27792
6 changed files with 524 additions and 0 deletions

41
miniml/compiler/Makefile Normal file
View File

@ -0,0 +1,41 @@
OBJS=lexer.cmo parser.cmo compile.cmo driver.cmo
GENERATED=lexer.ml parser.ml parser.mli
FLAGS=-annot -g
MENHIR_FLAGS=-v --infer --ocamlc "ocamlc $(FLAGS)"
OCAML=ocamlc
all: miniml
.PHONY: miniml
miniml: $(OBJS)
$(OCAML) $(FLAGS) -o $@ $(OBJS)
.SUFFIXES: .mli .ml .cmi .cmo .mll .mly
.mli.cmi:
$(OCAML) $(FLAGS) -c $<
.ml.cmo:
$(OCAML) $(FLAGS) -c $<
.mll.ml:
ocamllex $<
.mly.ml:
menhir $(MENHIR_FLAGS) $<
.mly.mli:
menhir $(MENHIR_FLAGS) $<
parser.mly: ast.cmi
clean:
rm -f *.cm[iox] *.o *.annot *~ miniml $(GENERATED)
rm -f parser.output parser.automaton parser.conflicts
rm -f .depend
.depend depend:$(GENERATED)
rm -f .depend
ocamldep *.ml *.mli > .depend
include .depend

28
miniml/compiler/ast.mli Normal file
View File

@ -0,0 +1,28 @@
type pattern =
| PVar of string
| PConstructor of string * string list
type typerepr =
| ISum of (string * bool) list
| IRecord of string list
type constant =
| CString of string
| CUnit
type expr =
| EVar of string
| EConstant of constant
| EConstr of string * expr list
| EGetfield of expr * string
| ERecord of (string * expr) list
| ERecordwith of expr * (string * expr) list
| EApply of string * expr list
| EIf of expr * expr * expr
| EChain of expr * expr
| EMatch of expr * (pattern * expr) list
| ELet of pattern * expr * expr
type definition =
| MLet of string * string list * expr
| MTypedef of string * typerepr

207
miniml/compiler/compile.ml Normal file
View File

@ -0,0 +1,207 @@
open Ast
module SMap = Map.Make(String)
let list_max = List.fold_left max 0
let pattern_vars = function
| PVar v -> if v = "_" then 0 else 1
| PConstructor (_, l) -> List.fold_left (+) 0 (List.map (fun v -> if v = "_" then 0 else 1) l)
let rec expr_tempvars = function
| EVar _ -> 0
| EConstant _ -> 0
| EConstr (_, l) -> if l = [] then 0 else 1 + list_max (List.map expr_tempvars l)
| EGetfield (e, _) -> expr_tempvars e
| ERecord l -> 1 + list_max (List.map (fun (_, e) -> expr_tempvars e) l)
| ERecordwith (e, l) ->
max (expr_tempvars e) (1 + list_max (List.map (fun (_, e) -> expr_tempvars e) l))
| EApply (_, l) ->
max (List.length l) (list_max (List.mapi (fun i e -> i + expr_tempvars e) l))
| EIf (e1, e2, e3) -> max (expr_tempvars e1) (max (expr_tempvars e2) (expr_tempvars e3))
| EChain (e1, e2) -> max (expr_tempvars e1) (expr_tempvars e2)
| EMatch (e, l) ->
max (expr_tempvars e) (list_max (List.map (fun (p, e) -> pattern_vars p + expr_tempvars e) l))
| ELet (p, e1, e2) ->
max (expr_tempvars e1) (pattern_vars p + expr_tempvars e2)
let pp_sep_string x ff () = Format.fprintf ff "%s" x
let pp_sep_comma = pp_sep_string ", "
let print_decl ff = function
| MLet (name, args, _) ->
if args = [] then
Format.fprintf ff "value %s;@." name
else
Format.fprintf ff "value %s(%a);@." name
(Format.pp_print_list
~pp_sep:pp_sep_comma
(fun ff x -> Format.fprintf ff "value %s" x)) args
| MTypedef (name, ISum l) ->
let c1 = ref 0 in
let c2 = ref 0 in
List.iter (fun (n, b) ->
let c = if b then c1 else c2 in
Format.fprintf ff "#define tag__%s %d@." n !c; incr c) l;
Format.fprintf ff "@."
| MTypedef (name, IRecord l) ->
List.iteri (fun i n -> Format.fprintf ff "#define field__%s %d@." n i) l;
Format.fprintf ff "@."
let rec print_tempvars ff tv =
if tv > 0 then begin
print_tempvars ff (tv - 1);
Format.fprintf ff "CAMLlocal1(tmp__%d);" (tv - 1)
end
let env_get env v =
try "tmp__" ^ (string_of_int (SMap.find v env)) with Not_found -> v
let rec range a b = if a >= b then [] else a :: range (a + 1) b
let lref = ref 0
let gen_label () =
incr lref; "label" ^ string_of_int !lref
let rec split_pattern_matching = function
| [] -> [], [], None
| (PVar v, e) :: _ -> [], [], Some (v, e)
| (PConstructor (c, l), e) :: r ->
let a, b, v = split_pattern_matching r in
if l = [] then (c, e) :: a, b, v else a, (c, l, e) :: b, v
let fun_env = SMap.singleton "assert" "caml_assert"
let get_fun f = try SMap.find f fun_env with Not_found -> f
let rec print_expr ff env tvindex rf1 rf2 = function
| EVar v -> Format.fprintf ff "%t%s%t" rf1 (env_get env v) rf2
| EConstant _ -> assert false
| EConstr (name, []) -> Format.fprintf ff "%t(Val_long(tag__%s))%t" rf1 name rf2
| EConstr (name, args) ->
Format.fprintf ff "tmp__%d = caml_alloc(%d, tag__%s);@," tvindex (List.length args) name;
List.iteri (fun i e ->
print_expr ff env (tvindex + 1)
(fun ff -> Format.fprintf ff "Store_field(tmp__%d, %d, " tvindex i)
(fun ff -> Format.fprintf ff ");@,")
e) args;
Format.fprintf ff "%ttmp__%d%t" rf1 tvindex rf2
| EGetfield (e, f) ->
print_expr ff env tvindex (fun ff -> Format.fprintf ff "tmp =") (fun ff -> Format.fprintf ff ";@,") e;
Format.fprintf ff "%tField(tmp, field__%s)%t" rf1 f rf2
| ERecord args ->
assert (List.length args > 0);
Format.fprintf ff "tmp__%d = caml_alloc(%d, 0);@," tvindex (List.length args);
List.iter (fun (f, e) ->
print_expr ff env (tvindex + 1)
(fun ff -> Format.fprintf ff "Store_field(tmp__%d, field__%s, " tvindex f)
(fun ff -> Format.fprintf ff ");@,")
e) args;
Format.fprintf ff "%ttmp__%d%t" rf1 tvindex rf2
| ERecordwith (e, args) ->
print_expr ff env tvindex (fun ff -> Format.fprintf ff "tmp__%d =" tvindex) (fun ff -> Format.fprintf ff ";@,") e;
Format.fprintf ff "tmp = caml_alloc(Wosize_val(tmp__%d), 0);@," tvindex;
Format.fprintf ff "for (size_t i = 0; i < Wosize_val(tmp__%d); i++) { Store_field(tmp, i, Field(tmp__%d, i)); }; tmp__%d = tmp;@," tvindex tvindex tvindex;
List.iter (fun (f, e) ->
print_expr ff env (tvindex + 1)
(fun ff -> Format.fprintf ff "Store_field(tmp__%d, field__%s, " tvindex f)
(fun ff -> Format.fprintf ff ");@,")
e) args;
Format.fprintf ff "%ttmp__%d%t" rf1 tvindex rf2
| EApply (f, args) ->
List.iteri (fun i e ->
print_expr ff env (tvindex + i)
(fun ff -> Format.fprintf ff "tmp__%d = " (tvindex + i))
(fun ff -> Format.fprintf ff ";@,")
e) args;
Format.fprintf ff "%t(%s(%a))%t" rf1 (get_fun f)
(Format.pp_print_list ~pp_sep:pp_sep_comma (fun ff x -> Format.fprintf ff "tmp__%d" x))
(range tvindex (tvindex + (List.length args))) rf2
| EIf (e1, e2, e3) ->
print_expr ff env tvindex (fun ff -> Format.fprintf ff "tmp = ") (fun ff -> Format.fprintf ff ";@,") e1;
Format.fprintf ff "@[<v 2>if (tmp != Val_false) {@,";
print_expr ff env tvindex rf1 rf2 e2;
Format.fprintf ff "@]@,@[<v 2>} else {@,";
print_expr ff env tvindex rf1 rf2 e3;
Format.fprintf ff "@]@,}@,"
| EChain (e1, e2) ->
print_expr ff env tvindex (fun ff -> Format.fprintf ff "tmp = ") (fun ff -> Format.fprintf ff ";@,") e1;
print_expr ff env tvindex rf1 rf2 e2;
| EMatch (e, l) ->
print_expr ff env tvindex (fun ff -> Format.fprintf ff "tmp = ") (fun ff -> Format.fprintf ff ";@,") e;
let no_arg, with_arg, default = split_pattern_matching l in
assert (l <> []);
let print_default () =
let (vn, e) = match default with Some vn -> vn | None -> assert false in
if vn = "_" then begin
print_expr ff env tvindex rf1 rf2 e
end else begin
Format.fprintf ff "tmp__%d = tmp;@," tvindex;
print_expr ff (SMap.add vn tvindex env) (tvindex + 1) rf1 rf2 e
end
in
if (no_arg = [] && with_arg = []) then begin
print_default ()
end else begin
let has_def = default <> None in
let lab_def = if has_def then gen_label () else "BUG" in
let do_def ff = if has_def then Format.fprintf ff "goto %s;" lab_def else Format.fprintf ff "assert(0);" in
Format.fprintf ff "@[<v 2>if (Is_long(tmp)) { switch (Int_val(tmp)) {@,";
List.iter (fun (c, e) ->
Format.fprintf ff "@[<v 2>case tag__%s:@," c;
print_expr ff env tvindex rf1 rf2 e;
Format.fprintf ff "@]@,"
) no_arg;
Format.fprintf ff "default: %t@]@,@[<v 2>}} else { switch (Tag_val(tmp)) {@," do_def;
List.iter (fun (c, l, e) ->
Format.fprintf ff "@[<v 2>case tag__%s:@," c;
let rec set_variables tvindex env i = function
| [] -> tvindex, env
| "_" :: l -> set_variables tvindex env (i + 1) l
| x :: l ->
Format.fprintf ff "tmp__%d = Field(tmp, %d);@," tvindex i;
set_variables (tvindex + 1) (SMap.add x tvindex env) (i + 1) l
in
let ntv, nenv = set_variables tvindex env 0 l in
print_expr ff nenv ntv rf1 rf2 e;
Format.fprintf ff "@]@,"
) with_arg;
Format.fprintf ff "default: %t@]@,}}@," do_def;
if has_def then begin
Format.fprintf ff "@[<v 2>if (0) { %s:@," lab_def;
print_default ();
Format.fprintf ff "@]@,}@,"
end
end
| ELet (p, e1, e2) -> print_expr ff env tvindex rf1 rf2 (EMatch (e1, [(p, e2)]))
let print_def ff = function
| MLet (name, args, body) when args <> [] ->
Format.fprintf ff "@[<v 2>value %s(%a) {@," name
(Format.pp_print_list
~pp_sep:pp_sep_comma
(fun ff x -> Format.fprintf ff "value %s" x)) args;
Format.fprintf ff "value tmp;@,";
Format.fprintf ff "CAMLparam%d(%a);@," (List.length args) (Format.pp_print_list ~pp_sep:pp_sep_comma Format.pp_print_string) args;
let tv = expr_tempvars body in
Format.fprintf ff "%a@," print_tempvars tv;
print_expr ff SMap.empty 0
(fun ff -> Format.fprintf ff "CAMLdrop; return ")
(fun ff -> Format.fprintf ff ";@,")
body;
Format.fprintf ff "@]@,}@.@."
| _ -> ()
let print_init ff = function
| MLet (name, [], body) ->
Format.fprintf ff "%s =@," name;
assert false
| _ -> ()
let compile_and_print ff defs =
List.iter (print_decl ff) defs;
List.iter (print_def ff) defs;
Format.fprintf ff "@[<v 2>void init() {@,";
List.iter (print_init ff) defs;
Format.fprintf ff "@]@,}@."

20
miniml/compiler/driver.ml Normal file
View File

@ -0,0 +1,20 @@
open Lexing
let input_file = Sys.argv.(1)
let in_chan = open_in input_file
let lexbuf = Lexing.from_channel in_chan
let report_error filename start_pos end_pos =
let start_col = start_pos.pos_cnum - start_pos.pos_bol + 1 in
let end_col = end_pos.pos_cnum - start_pos.pos_bol + 1 in
Format.eprintf "File \"%s\", line %d, characters %d-%d:\n" filename start_pos.pos_lnum start_col end_col
let defs =
try Parser.definitions Lexer.token lexbuf
with Parser.Error ->
begin
report_error input_file (Lexing.lexeme_start_p lexbuf) (Lexing.lexeme_end_p lexbuf);
Format.eprintf "Syntax error@."; exit 1
end
let () = Compile.compile_and_print Format.std_formatter defs

80
miniml/compiler/lexer.mll Normal file
View File

@ -0,0 +1,80 @@
{
open Parser
open Lexing
exception Lexing_error of string
let kw = [
"else", ELSE;
"if", IF;
"in", IN;
"let", LET;
"match", MATCH;
"of", OF;
"then", THEN;
"type", TYPE;
"with", WITH
]
let keywords = Hashtbl.create (List.length kw)
let () = List.iter (fun (a,b) -> Hashtbl.add keywords a b) kw
let newline lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <-
{ pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
}
let digits = ['0'-'9']
let alpha = ['a'-'z'] | ['A'-'Z']
let hex_digit = digits | ['A'-'F'] | ['a'-'f']
let ident_car = alpha | '_' | '\'' | digits
let lident = (['a'-'z'] | '_') ident_car*
let uident = ['A'-'Z'] ident_car*
let integer_literal = '-'? digits (digits | '_')*
let escape_sequence = '\\'
('\\' | '\"' | '\'' | 'n' | 't' | 'b' | 'r' | ' ' |
(digits digits digits) | ('x' hex_digit hex_digit))
let whitespace = [ ' ' '\t' ]
rule token = parse
| whitespace+ { token lexbuf }
| "(*" { comment lexbuf ; token lexbuf }
| "\n" { newline lexbuf ; token lexbuf }
| "(" { LPAREN }
| ")" { RPAREN }
| "{" { LBRACE }
| "}" { RBRACE }
| "," { COMMA }
| ";" { SEMICOLON }
| "=" { EQ }
| "|" { BAR }
| "->" { MINUSGT }
| "." { DOT }
| eof { EOF }
| '\"' { STRING (String.concat "" (string_chars lexbuf)) }
| lident as s { try Hashtbl.find keywords s
with Not_found -> LIDENT s }
| uident as s { UIDENT s }
| _ as c { raise (Lexing_error ("Illegal caracter:" ^ (String.make 1 c))) }
and comment = parse
| "\n" { newline lexbuf; comment lexbuf }
| "*)" { () }
| "(*" { comment lexbuf; comment lexbuf }
| _ { comment lexbuf }
| eof { raise (Lexing_error "Unterminated comment") }
and string_chars = parse
| '\"' { [] }
| '\n' { newline lexbuf; "\n" :: (string_chars lexbuf) }
| [^ '\\' '\"'] as c { (String.make 1 c) :: (string_chars lexbuf)}
| escape_sequence as s { s :: (string_chars lexbuf) }
| '\\' { string_escape_newline lexbuf; string_chars lexbuf }
| _ { raise (Lexing_error "Unrecognized escape sequence") }
and string_escape_newline = parse
| '\n' { newline lexbuf; string_skip_indent lexbuf }
and string_skip_indent = parse
| [' ' '\t']* { }

148
miniml/compiler/parser.mly Normal file
View File

@ -0,0 +1,148 @@
%{
open Ast
%}
%token LPAREN
%token RPAREN
%token COMMA
%token MINUSGT
%token DOT
%token SEMICOLON
%token EQ
%token LBRACE
%token BAR
%token RBRACE
%token ELSE
%token IF
%token IN
%token LET
%token MATCH
%token OF
%token THEN
%token TYPE
%token WITH
%token EOF
%token <string> STRING
%token <string> LIDENT
%token <string> UIDENT
%right MINUSGT
%left BAR
%nonassoc LET MATCH
%right SEMICOLON
%right list_prec
%nonassoc THEN
%nonassoc ELSE
%nonassoc comma_prec
%left COMMA
%left EQ
%nonassoc dot_prec
%nonassoc DOT
%nonassoc RPAREN
%start definitions
%type <Ast.definition list> definitions
%%
list2(X):
| x1 = X; x2 = X { [x1; x2] }
| x = X; l = list2(X) { x :: l }
separated_list2(DELIM, X):
| x1 = X; DELIM; x2 = X { [x1; x2] }
| x = X; DELIM; l = separated_list2(DELIM, X) { x :: l }
comma_separated_list2_(X):
| x1 = X; COMMA; x2 = X { [x2; x1] }
| l = comma_separated_list2_(X); COMMA; x = X { x :: l }
%inline comma_separated_list2(X):
l = comma_separated_list2_(X) { List.rev l }
separated_semi_opt(X):
| x = X; ioption(SEMICOLON) { [x] }
| x = X; SEMICOLON; l = separated_semi_opt(X) { x :: l }
constant:
| s = STRING { CString s }
| LPAREN; RPAREN { CUnit }
record_list_(X):
| f = LIDENT; EQ; x = X %prec list_prec { [(f, x)] }
| l = record_list_(X); SEMICOLON; f = LIDENT; EQ; x = X %prec list_prec
{ (f, x) :: l }
%inline record_list(X):
l = record_list_(X) { List.rev l }
record(X):
LBRACE; l = record_list(X); option(SEMICOLON); RBRACE { l }
pattern:
| x = LIDENT { PVar x }
| c = UIDENT { PConstructor (c, []) }
| c = UIDENT x = LIDENT { PConstructor (c, [x]) }
| c = UIDENT LPAREN l = separated_nonempty_list(COMMA, LIDENT) RPAREN
{ PConstructor (c, l) }
| l = comma_separated_list2(LIDENT) { PConstructor ("", l) }
simple_expr:
| v = LIDENT { EVar v }
| c = constant { EConstant c }
| c = UIDENT %prec dot_prec
{ EConstr (c, []) }
| LPAREN; e = expr; RPAREN { e }
| e = simple_expr; DOT; f = LIDENT
{ EGetfield (e, f) }
| l = record(expr)
{ ERecord l }
| LBRACE; e = simple_expr; WITH; l = record_list(expr); RBRACE
{ ERecordwith (e, l) }
expr:
| e = simple_expr { e }
| f = LIDENT; l = nonempty_list(simple_expr) (* %prec appl_prec *)
{ EApply (f, l) }
| l = comma_separated_list2(expr) %prec comma_prec
{ EConstr ("", l) }
| IF; e1 = expr; THEN; e2 = expr; ELSE; e3 = expr
{ EIf (e1, e2, e3) }
| e1 = expr; SEMICOLON; e2 = expr
{ EChain (e1, e2) }
| MATCH; e = expr; WITH; p = pattern_matching (* %prec MATCH *)
{ EMatch (e, p) }
| LET; p = pattern; EQ; e1 = expr; IN; e2 = expr %prec LET
{ ELet (p, e1, e2) }
%inline pattern_line:
p = pattern; MINUSGT; e = expr
{ (p, e) }
pattern_lines:
| p = pattern_line { [p] }
| p = pattern_line; BAR; l = pattern_lines { p :: l }
%inline pattern_matching:
ioption(BAR); l = pattern_lines { l }
%inline type_representation:
| ioption(BAR); l = separated_nonempty_list(BAR, constr_decl)
{ (ISum l) }
| LBRACE; l = separated_semi_opt(LIDENT); RBRACE
{ (IRecord l) }
constr_decl:
| n = UIDENT { (n, false) }
| n = UIDENT; OF; LPAREN; RPAREN { (n, true) }
definition:
| LET; name = LIDENT; vars = list(LIDENT); EQ; body = expr { MLet (name, vars, body) }
| TYPE; n = LIDENT; EQ; r = type_representation { MTypedef (n, r) }
definitions:
| l = list(definition) EOF { l }