Merge pull request #5353 from Vexu/parser

Self-hosted parser fixes
This commit is contained in:
Vexu 2020-05-16 15:56:03 +03:00 committed by GitHub
commit 758de71d97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 172 additions and 116 deletions

View File

@ -10098,7 +10098,7 @@ FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? Link
VarDecl <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? LinkSection? (EQUAL Expr)? SEMICOLON
ContainerField <- IDENTIFIER (COLON TypeExpr)? (EQUAL Expr)?
ContainerField <- KEYWORD_comptime? IDENTIFIER (COLON TypeExpr)? (EQUAL Expr)?
# *** Block Level ***
Statement
@ -10212,7 +10212,7 @@ PrimaryTypeExpr
/ KEYWORD_error DOT IDENTIFIER
/ KEYWORD_false
/ KEYWORD_null
/ KEYWORD_promise
/ KEYWORD_anyframe
/ KEYWORD_true
/ KEYWORD_undefined
/ KEYWORD_unreachable
@ -10356,7 +10356,7 @@ PrefixOp
PrefixTypeOp
<- QUESTIONMARK
/ KEYWORD_promise MINUSRARROW
/ KEYWORD_anyframe MINUSRARROW
/ ArrayTypeStart (ByteAlign / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
/ PtrTypeStart (KEYWORD_align LPAREN Expr (COLON INTEGER COLON INTEGER)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)*
@ -10503,6 +10503,7 @@ end_of_word <- ![a-zA-Z0-9_] skip
KEYWORD_align <- 'align' end_of_word
KEYWORD_allowzero <- 'allowzero' end_of_word
KEYWORD_and <- 'and' end_of_word
KEYWORD_anyframe <- 'anyframe' end_of_word
KEYWORD_asm <- 'asm' end_of_word
KEYWORD_async <- 'async' end_of_word
KEYWORD_await <- 'await' end_of_word
@ -10529,7 +10530,6 @@ KEYWORD_null <- 'null' end_of_word
KEYWORD_or <- 'or' end_of_word
KEYWORD_orelse <- 'orelse' end_of_word
KEYWORD_packed <- 'packed' end_of_word
KEYWORD_promise <- 'promise' end_of_word
KEYWORD_pub <- 'pub' end_of_word
KEYWORD_resume <- 'resume' end_of_word
KEYWORD_return <- 'return' end_of_word
@ -10556,7 +10556,7 @@ keyword <- KEYWORD_align / KEYWORD_and / KEYWORD_allowzero / KEYWORD_asm
/ KEYWORD_error / KEYWORD_export / KEYWORD_extern / KEYWORD_false
/ KEYWORD_fn / KEYWORD_for / KEYWORD_if / KEYWORD_inline
/ KEYWORD_noalias / KEYWORD_null / KEYWORD_or
/ KEYWORD_orelse / KEYWORD_packed / KEYWORD_promise / KEYWORD_pub
/ KEYWORD_orelse / KEYWORD_packed / KEYWORD_anyframe / KEYWORD_pub
/ KEYWORD_resume / KEYWORD_return / KEYWORD_linksection
/ KEYWORD_struct / KEYWORD_suspend
/ KEYWORD_switch / KEYWORD_test / KEYWORD_threadlocal / KEYWORD_true / KEYWORD_try

View File

@ -986,14 +986,22 @@ pub const Node = struct {
comptime_token: ?TokenIndex,
noalias_token: ?TokenIndex,
name_token: ?TokenIndex,
type_node: *Node,
var_args_token: ?TokenIndex,
param_type: ParamType,
pub const ParamType = union(enum) {
var_type: *Node,
var_args: TokenIndex,
type_expr: *Node,
};
pub fn iterate(self: *ParamDecl, index: usize) ?*Node {
var i = index;
if (i < 1) {
return if (self.var_args_token == null) self.type_node else null;
switch (self.param_type) {
.var_args => return null,
.var_type, .type_expr => |node| return node,
}
}
i -= 1;
@ -1004,12 +1012,17 @@ pub const Node = struct {
if (self.comptime_token) |comptime_token| return comptime_token;
if (self.noalias_token) |noalias_token| return noalias_token;
if (self.name_token) |name_token| return name_token;
return self.type_node.firstToken();
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.firstToken(),
}
}
pub fn lastToken(self: *const ParamDecl) TokenIndex {
if (self.var_args_token) |var_args_token| return var_args_token;
return self.type_node.lastToken();
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.lastToken(),
}
}
};

View File

@ -57,16 +57,10 @@ pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!*Tree {
fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!*Node.Root {
const node = try arena.create(Node.Root);
node.* = .{
.decls = try parseContainerMembers(arena, it, tree),
.eof_token = eatToken(it, .Eof) orelse blk: {
// parseContainerMembers will try to skip as much
// invalid tokens as it can so this can only be a '}'
const tok = eatToken(it, .RBrace).?;
try tree.errors.push(.{
.ExpectedContainerMembers = .{ .token = tok },
});
break :blk tok;
},
.decls = try parseContainerMembers(arena, it, tree, true),
// parseContainerMembers will try to skip as much
// invalid tokens as it can so this can only be the EOF
.eof_token = eatToken(it, .Eof).?,
};
return node;
}
@ -75,10 +69,10 @@ fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error
/// <- TestDecl ContainerMembers
/// / TopLevelComptime ContainerMembers
/// / KEYWORD_pub? TopLevelDecl ContainerMembers
/// / KEYWORD_pub? ContainerField COMMA ContainerMembers
/// / KEYWORD_pub? ContainerField
/// / ContainerField COMMA ContainerMembers
/// / ContainerField
/// /
fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !Node.Root.DeclList {
fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree, top_level: bool) !Node.Root.DeclList {
var list = Node.Root.DeclList.init(arena);
var field_state: union(enum) {
@ -205,9 +199,15 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No
// try to continue parsing
const index = it.index;
findNextContainerMember(it);
switch (it.peek().?.id) {
.Eof, .RBrace => break,
const next = it.peek().?.id;
switch (next) {
.Eof => break,
else => {
if (next == .RBrace) {
if (!top_level) break;
_ = nextToken(it);
}
// add error and continue
try tree.errors.push(.{
.ExpectedToken = .{ .token = index, .expected_id = .Comma },
@ -228,12 +228,18 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No
});
}
switch (it.peek().?.id) {
.Eof, .RBrace => break,
const next = it.peek().?.id;
switch (next) {
.Eof => break,
else => {
const index = it.index;
if (next == .RBrace) {
if (!top_level) break;
_ = nextToken(it);
}
// this was likely not supposed to end yet,
// try to find the next declaration
const index = it.index;
findNextContainerMember(it);
try tree.errors.push(.{
.ExpectedContainerMembers = .{ .token = index },
@ -278,7 +284,10 @@ fn findNextContainerMember(it: *TokenIterator) void {
}
},
.LParen, .LBracket, .LBrace => level += 1,
.RParen, .RBracket, .RBrace => {
.RParen, .RBracket => {
if (level != 0) level -= 1;
},
.RBrace => {
if (level == 0) {
// end of container, exit
putBackToken(it, tok.index);
@ -402,20 +411,16 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
fn_node.*.extern_export_inline_token = extern_export_inline_token;
fn_node.*.lib_name = lib_name;
if (eatToken(it, .Semicolon)) |_| return node;
if (parseBlock(arena, it, tree) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
if (try expectNodeRecoverable(arena, it, tree, parseBlock, .{
// since parseBlock only return error.ParseError on
// a missing '}' we can assume this function was
// supposed to end here.
error.ParseError => return node,
}) |body_node| {
fn_node.body_node = body_node;
return node;
}
try tree.errors.push(.{
.ExpectedSemiOrLBrace = .{ .token = it.index },
});
return error.ParseError;
})) |body_node| {
fn_node.body_node = body_node;
}
return node;
}
if (extern_export_inline_token) |token| {
@ -490,14 +495,11 @@ fn parseFnProto(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
const exclamation_token = eatToken(it, .Bang);
const return_type_expr = (try parseVarType(arena, it, tree)) orelse
(try parseTypeExpr(arena, it, tree)) orelse blk: {
try tree.errors.push(.{
.ExpectedReturnType = .{ .token = it.index },
});
try expectNodeRecoverable(arena, it, tree, parseTypeExpr, .{
// most likely the user forgot to specify the return type.
// Mark return type as invalid and try to continue.
break :blk null;
};
.ExpectedReturnType = .{ .token = it.index },
});
// TODO https://github.com/ziglang/zig/issues/3750
const R = Node.FnProto.ReturnType;
@ -508,9 +510,10 @@ fn parseFnProto(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
else
R{ .Explicit = return_type_expr.? };
const var_args_token = if (params.len > 0)
params.at(params.len - 1).*.cast(Node.ParamDecl).?.var_args_token
else
const var_args_token = if (params.len > 0) blk: {
const param_type = params.at(params.len - 1).*.cast(Node.ParamDecl).?.param_type;
break :blk if (param_type == .var_args) param_type.var_args else null;
} else
null;
const fn_proto_node = try arena.create(Node.FnProto);
@ -707,12 +710,7 @@ fn parseStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!?*No
if (try parseLabeledStatement(arena, it, tree)) |node| return node;
if (try parseSwitchExpr(arena, it, tree)) |node| return node;
if (try parseAssignExpr(arena, it, tree)) |node| {
_ = eatToken(it, .Semicolon) orelse {
try tree.errors.push(.{
.ExpectedToken = .{ .token = it.index, .expected_id = .Semicolon },
});
// pretend we saw a semicolon and continue parsing
};
_ = try expectTokenRecoverable(it, tree, .Semicolon);
return node;
}
@ -727,16 +725,12 @@ fn parseIfStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
const if_prefix = if_node.cast(Node.If).?;
const block_expr = (try parseBlockExpr(arena, it, tree));
const assign_expr = if (block_expr == null) blk: {
break :blk (try parseAssignExpr(arena, it, tree)) orelse null;
} else null;
if (block_expr == null and assign_expr == null) {
try tree.errors.push(.{
const assign_expr = if (block_expr == null)
try expectNode(arena, it, tree, parseAdditionExpr, .{
.ExpectedBlockOrAssignment = .{ .token = it.index },
});
return error.ParseError;
}
})
else
null;
const semicolon = if (assign_expr != null) eatToken(it, .Semicolon) else null;
@ -773,10 +767,9 @@ fn parseIfStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
try tree.errors.push(.{
.ExpectedSemiOrElse = .{ .token = it.index },
});
return error.ParseError;
}
unreachable;
return if_node;
}
/// LabeledStatement <- BlockLabel? (Block / LoopStatement)
@ -882,7 +875,8 @@ fn parseForStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
try tree.errors.push(.{
.ExpectedSemiOrElse = .{ .token = it.index },
});
return null;
return node;
}
return null;
@ -944,7 +938,8 @@ fn parseWhileStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*No
try tree.errors.push(.{
.ExpectedSemiOrElse = .{ .token = it.index },
});
return null;
return node;
}
return null;
@ -956,12 +951,7 @@ fn parseWhileStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*No
fn parseBlockExprStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
if (try parseBlockExpr(arena, it, tree)) |node| return node;
if (try parseAssignExpr(arena, it, tree)) |node| {
_ = eatToken(it, .Semicolon) orelse {
try tree.errors.push(.{
.ExpectedToken = .{ .token = it.index, .expected_id = .Semicolon },
});
// pretend we saw a semicolon and continue parsing
};
_ = try expectTokenRecoverable(it, tree, .Semicolon);
return node;
}
return null;
@ -1478,17 +1468,19 @@ fn parsePrimaryTypeExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*N
return &node.base;
}
if (eatToken(it, .Keyword_error)) |token| {
const period = try expectToken(it, tree, .Period);
const identifier = try expectNode(arena, it, tree, parseIdentifier, .{
const period = try expectTokenRecoverable(it, tree, .Period);
const identifier = try expectNodeRecoverable(arena, it, tree, parseIdentifier, .{
.ExpectedIdentifier = .{ .token = it.index },
});
const global_error_set = try createLiteral(arena, Node.ErrorType, token);
if (period == null or identifier == null) return global_error_set;
const node = try arena.create(Node.InfixOp);
node.* = .{
.op_token = period,
.op_token = period.?,
.lhs = global_error_set,
.op = .Period,
.rhs = identifier,
.rhs = identifier.?,
};
return &node.base;
}
@ -1948,15 +1940,8 @@ fn parseParamDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
.comptime_token = comptime_token,
.noalias_token = noalias_token,
.name_token = name_token,
// TODO: These should be squished into a ParamType enum
.type_node = undefined,
.var_args_token = null,
.param_type = param_type,
};
switch (param_type) {
.VarType => |node| param_decl.type_node = node,
.TypeExpr => |node| param_decl.type_node = node,
.VarArgs => |token| param_decl.var_args_token = token,
}
return &param_decl.base;
}
@ -1964,20 +1949,15 @@ fn parseParamDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
/// <- KEYWORD_var
/// / DOT3
/// / TypeExpr
fn parseParamType(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?ParamType {
if (try parseVarType(arena, it, tree)) |node| return ParamType{ .VarType = node };
if (eatToken(it, .Ellipsis3)) |token| return ParamType{ .VarArgs = token };
if (try parseTypeExpr(arena, it, tree)) |node| return ParamType{ .TypeExpr = node };
fn parseParamType(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?Node.ParamDecl.ParamType {
// TODO cast from tuple to error union is broken
const P = Node.ParamDecl.ParamType;
if (try parseVarType(arena, it, tree)) |node| return P{ .var_type = node };
if (eatToken(it, .Ellipsis3)) |token| return P{ .var_args = token };
if (try parseTypeExpr(arena, it, tree)) |node| return P{ .type_expr = node };
return null;
}
// TODO: Move to ast.Node.ParamDecl.ParamType
const ParamType = union(enum) {
VarType: *Node,
VarArgs: TokenIndex,
TypeExpr: *Node,
};
/// IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
fn parseIfPrefix(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
const if_token = eatToken(it, .Keyword_if) orelse return null;
@ -2778,7 +2758,7 @@ fn parsePtrTypeStart(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node
fn parseContainerDeclAuto(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {
const node = (try parseContainerDeclType(arena, it, tree)) orelse return null;
const lbrace = try expectToken(it, tree, .LBrace);
const members = try parseContainerMembers(arena, it, tree);
const members = try parseContainerMembers(arena, it, tree, false);
const rbrace = try expectToken(it, tree, .RBrace);
const decl_type = node.cast(Node.ContainerDecl).?;
@ -3250,6 +3230,11 @@ fn eatAnnotatedToken(it: *TokenIterator, id: Token.Id) ?AnnotatedToken {
}
fn expectToken(it: *TokenIterator, tree: *Tree, id: Token.Id) Error!TokenIndex {
return (try expectTokenRecoverable(it, tree, id)) orelse
error.ParseError;
}
fn expectTokenRecoverable(it: *TokenIterator, tree: *Tree, id: Token.Id) !?TokenIndex {
const token = nextToken(it);
if (token.ptr.id != id) {
try tree.errors.push(.{
@ -3257,7 +3242,7 @@ fn expectToken(it: *TokenIterator, tree: *Tree, id: Token.Id) Error!TokenIndex {
});
// go back so that we can recover properly
putBackToken(it, token.index);
return error.ParseError;
return null;
}
return token.index;
}
@ -3297,9 +3282,20 @@ fn expectNode(
parseFn: NodeParseFn,
err: AstError, // if parsing fails
) Error!*Node {
return (try expectNodeRecoverable(arena, it, tree, parseFn, err)) orelse
return error.ParseError;
}
fn expectNodeRecoverable(
arena: *Allocator,
it: *TokenIterator,
tree: *Tree,
parseFn: NodeParseFn,
err: AstError, // if parsing fails
) !?*Node {
return (try parseFn(arena, it, tree)) orelse {
try tree.errors.push(err);
return error.ParseError;
return null;
};
}

View File

@ -148,6 +148,58 @@ test "recovery: invalid parameter" {
});
}
test "recovery: extra '}' at top level" {
try testError(
\\}}}
\\test "" {
\\ a && b;
\\}
, &[_]Error{
.ExpectedContainerMembers,
.ExpectedContainerMembers,
.ExpectedContainerMembers,
.InvalidAnd,
});
}
test "recovery: mismatched bracket at top level" {
try testError(
\\const S = struct {
\\ arr: 128]?G
\\};
, &[_]Error{
.ExpectedToken,
});
}
test "recovery: invalid global error set access" {
try testError(
\\test "" {
\\ error && foo;
\\}
, &[_]Error{
.ExpectedToken,
.ExpectedIdentifier,
.InvalidAnd,
});
}
test "recovery: missing semicolon after if, for, while stmt" {
try testError(
\\test "" {
\\ if (foo) bar
\\ for (foo) |a| bar
\\ while (foo) bar
\\ a && b;
\\}
, &[_]Error{
.ExpectedSemiOrElse,
.ExpectedSemiOrElse,
.ExpectedSemiOrElse,
.InvalidAnd,
});
}
test "zig fmt: top-level fields" {
try testCanonical(
\\a: did_you_know,

View File

@ -2150,10 +2150,9 @@ fn renderParamDecl(
try renderToken(tree, stream, name_token, indent, start_col, Space.None);
try renderToken(tree, stream, tree.nextToken(name_token), indent, start_col, Space.Space); // :
}
if (param_decl.var_args_token) |var_args_token| {
try renderToken(tree, stream, var_args_token, indent, start_col, space);
} else {
try renderExpression(allocator, stream, tree, indent, start_col, param_decl.type_node, space);
switch (param_decl.param_type) {
.var_args => |token| try renderToken(tree, stream, token, indent, start_col, space),
.var_type, .type_expr => |node| try renderExpression(allocator, stream, tree, indent, start_col, node, space),
}
}

View File

@ -501,7 +501,7 @@ fn visitFnDecl(c: *Context, fn_decl: *const ZigClangFunctionDecl) Error!void {
const param = @fieldParentPtr(ast.Node.ParamDecl, "base", p.*);
const param_name = if (param.name_token) |name_tok|
tokenSlice(c, name_tok)
else if (param.var_args_token != null) {
else if (param.param_type == .var_args) {
assert(it.next() == null);
_ = proto_node.params.pop();
break;
@ -4103,8 +4103,7 @@ fn transCreateNodeMacroFn(c: *Context, name: []const u8, ref: *ast.Node, proto_a
.comptime_token = null,
.noalias_token = param.noalias_token,
.name_token = param_name_tok,
.type_node = param.type_node,
.var_args_token = null,
.param_type = param.param_type,
};
try fn_params.push(&param_node.base);
}
@ -4678,8 +4677,7 @@ fn finishTransFnProto(
.comptime_token = null,
.noalias_token = noalias_tok,
.name_token = param_name_tok,
.type_node = type_node,
.var_args_token = null,
.param_type = .{ .type_expr = type_node },
};
try fn_params.push(&param_node.base);
@ -4699,8 +4697,7 @@ fn finishTransFnProto(
.comptime_token = null,
.noalias_token = null,
.name_token = null,
.type_node = undefined, // Note: Accessing this causes an access violation. Need to check .var_args_token first before trying this field
.var_args_token = try appendToken(rp.c, .Ellipsis3, "..."),
.param_type = .{ .var_args = try appendToken(rp.c, .Ellipsis3, "...") }
};
try fn_params.push(&var_arg_node.base);
}
@ -5108,8 +5105,7 @@ fn transMacroFnDefine(c: *Context, it: *CTokenList.Iterator, source: []const u8,
.comptime_token = null,
.noalias_token = null,
.name_token = param_name_tok,
.type_node = &identifier.base,
.var_args_token = null,
.param_type = .{ .type_expr = &identifier.base },
};
try fn_params.push(&param_node.base);

View File

@ -538,8 +538,8 @@ enum ContainerFieldState {
// <- TestDecl ContainerMembers
// / TopLevelComptime ContainerMembers
// / KEYWORD_pub? TopLevelDecl ContainerMembers
// / KEYWORD_comptime? ContainerField COMMA ContainerMembers
// / KEYWORD_comptime? ContainerField
// / ContainerField COMMA ContainerMembers
// / ContainerField
// /
static AstNodeContainerDecl ast_parse_container_members(ParseContext *pc) {
AstNodeContainerDecl res = {};
@ -862,7 +862,7 @@ static AstNode *ast_parse_var_decl(ParseContext *pc) {
return res;
}
// ContainerField <- IDENTIFIER (COLON TypeExpr ByteAlign?)? (EQUAL Expr)?
// ContainerField <- KEYWORD_comptime? IDENTIFIER (COLON TypeExpr ByteAlign?)? (EQUAL Expr)?
static AstNode *ast_parse_container_field(ParseContext *pc) {
Token *identifier = eat_token_if(pc, TokenIdSymbol);
if (identifier == nullptr)