stage2 parser: different memory layout of ParamDecl

Instead of being its own node, it's a struct inside FnProto.
Instead of FnProto having a SinglyLinkedList of ParamDecl nodes,
ParamDecls are appended directly in memory after the FnProto.

throughput: 72.2 MiB/s => 72.9 MiB/s
maxrss: 70 KB => 68 KB

Importantly, the API is improved as well since the data is arranged
linearly in memory.
master
Andrew Kelley 2020-05-20 17:39:54 -04:00
parent 5db9f306ba
commit 7c2c0e36f8
3 changed files with 123 additions and 105 deletions

View File

@ -477,7 +477,6 @@ pub const Node = struct {
ErrorTag,
AsmInput,
AsmOutput,
ParamDecl,
FieldInitializer,
};
@ -533,7 +532,6 @@ pub const Node = struct {
switch (n.id) {
.Root,
.ContainerField,
.ParamDecl,
.Block,
.Payload,
.PointerPayload,
@ -819,7 +817,7 @@ pub const Node = struct {
return @ptrCast(*ContainerDecl, bytes.ptr);
}
pub fn free(self: *Decl, allocator: *mem.Allocator) void {
pub fn free(self: *ContainerDecl, allocator: *mem.Allocator) void {
const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)];
allocator.free(bytes);
}
@ -979,13 +977,14 @@ pub const Node = struct {
}
};
/// The params are directly after the FnProto in memory.
pub const FnProto = struct {
base: Node = Node{ .id = .FnProto },
doc_comments: ?*DocComment,
visib_token: ?TokenIndex,
fn_token: TokenIndex,
name_token: ?TokenIndex,
params: ParamList,
params_len: NodeIndex,
return_type: ReturnType,
var_args_token: ?TokenIndex,
extern_export_inline_token: ?TokenIndex,
@ -997,16 +996,75 @@ pub const Node = struct {
is_extern_prototype: bool = false, // TODO: Remove once extern fn rewriting is
is_async: bool = false, // TODO: remove once async fn rewriting is
pub const ParamList = LinkedList(*Node);
pub const ReturnType = union(enum) {
Explicit: *Node,
InferErrorSet: *Node,
Invalid: TokenIndex,
};
pub const ParamDecl = struct {
doc_comments: ?*DocComment,
comptime_token: ?TokenIndex,
noalias_token: ?TokenIndex,
name_token: ?TokenIndex,
param_type: ParamType,
pub const ParamType = union(enum) {
var_type: *Node,
var_args: TokenIndex,
type_expr: *Node,
};
pub fn iterate(self: *const ParamDecl) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = null };
}
pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node {
var i = it.index;
it.index += 1;
if (i < 1) {
switch (self.param_type) {
.var_args => return null,
.var_type, .type_expr => |node| return node,
}
}
i -= 1;
return null;
}
pub fn firstToken(self: *const ParamDecl) TokenIndex {
if (self.comptime_token) |comptime_token| return comptime_token;
if (self.noalias_token) |noalias_token| return noalias_token;
if (self.name_token) |name_token| return name_token;
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.firstToken(),
}
}
pub fn lastToken(self: *const ParamDecl) TokenIndex {
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.lastToken(),
}
}
};
/// After this the caller must initialize the params list.
pub fn alloc(allocator: *mem.Allocator, params_len: NodeIndex) !*FnProto {
const bytes = try allocator.alignedAlloc(u8, @alignOf(FnProto), sizeInBytes(params_len));
return @ptrCast(*FnProto, bytes.ptr);
}
pub fn free(self: *FnProto, allocator: *mem.Allocator) void {
const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.params_len)];
allocator.free(bytes);
}
pub fn iterate(self: *const FnProto) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = self.params.first };
return .{ .parent_node = &self.base, .index = 0, .node = null };
}
pub fn iterateNext(self: *const FnProto, it: *Node.Iterator) ?*Node {
@ -1018,11 +1076,17 @@ pub const Node = struct {
i -= 1;
}
if (it.node) |param| {
it.index -= 1;
it.node = param.next;
return param.data;
if (i < self.params_len) {
switch (self.paramsConst()[i].param_type) {
.var_type => |n| return n,
.var_args => {
i += 1;
it.index += 1;
},
.type_expr => |n| return n,
}
}
i -= self.params_len;
if (self.align_expr) |align_expr| {
if (i < 1) return align_expr;
@ -1064,6 +1128,20 @@ pub const Node = struct {
.Invalid => |tok| return tok,
}
}
pub fn params(self: *FnProto) []ParamDecl {
const decls_start = @ptrCast([*]u8, self) + @sizeOf(FnProto);
return @ptrCast([*]ParamDecl, decls_start)[0..self.params_len];
}
pub fn paramsConst(self: *const FnProto) []const ParamDecl {
const decls_start = @ptrCast([*]const u8, self) + @sizeOf(FnProto);
return @ptrCast([*]const ParamDecl, decls_start)[0..self.params_len];
}
fn sizeInBytes(params_len: NodeIndex) usize {
return @sizeOf(FnProto) + @sizeOf(ParamDecl) * @as(usize, params_len);
}
};
pub const AnyFrameType = struct {
@ -1102,57 +1180,6 @@ pub const Node = struct {
}
};
pub const ParamDecl = struct {
base: Node = Node{ .id = .ParamDecl },
doc_comments: ?*DocComment,
comptime_token: ?TokenIndex,
noalias_token: ?TokenIndex,
name_token: ?TokenIndex,
param_type: ParamType,
pub const ParamType = union(enum) {
var_type: *Node,
var_args: TokenIndex,
type_expr: *Node,
};
pub fn iterate(self: *const ParamDecl) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = null };
}
pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node {
var i = it.index;
it.index += 1;
if (i < 1) {
switch (self.param_type) {
.var_args => return null,
.var_type, .type_expr => |node| return node,
}
}
i -= 1;
return null;
}
pub fn firstToken(self: *const ParamDecl) TokenIndex {
if (self.comptime_token) |comptime_token| return comptime_token;
if (self.noalias_token) |noalias_token| return noalias_token;
if (self.name_token) |name_token| return name_token;
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.firstToken(),
}
}
pub fn lastToken(self: *const ParamDecl) TokenIndex {
switch (self.param_type) {
.var_args => |tok| return tok,
.var_type, .type_expr => |node| return node.lastToken(),
}
}
};
pub const Block = struct {
base: Node = Node{ .id = .Block },
label: ?TokenIndex,

View File

@ -71,7 +71,7 @@ const Parser = struct {
const eof_token = p.eatToken(.Eof).?;
const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token);
std.mem.copy(*ast.Node, node.decls(), decls);
std.mem.copy(*Node, node.decls(), decls);
return node;
}
@ -96,8 +96,8 @@ const Parser = struct {
/// / ContainerField COMMA ContainerMembers
/// / ContainerField
/// /
fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node {
var list = std.ArrayList(*ast.Node).init(p.gpa);
fn parseContainerMembers(p: *Parser, top_level: bool) ![]*Node {
var list = std.ArrayList(*Node).init(p.gpa);
defer list.deinit();
var field_state: union(enum) {
@ -522,6 +522,7 @@ const Parser = struct {
const name_token = p.eatToken(.Identifier);
const lparen = try p.expectToken(.LParen);
const params = try p.parseParamDeclList(&var_args_token);
defer p.gpa.free(params);
const rparen = try p.expectToken(.RParen);
const align_expr = try p.parseByteAlign();
const section_expr = try p.parseLinkSection();
@ -544,13 +545,13 @@ const Parser = struct {
else
R{ .Explicit = return_type_expr.? };
const fn_proto_node = try p.arena.allocator.create(Node.FnProto);
const fn_proto_node = try Node.FnProto.alloc(&p.arena.allocator, params.len);
fn_proto_node.* = .{
.doc_comments = null,
.visib_token = null,
.fn_token = fn_token,
.name_token = name_token,
.params = params,
.params_len = params.len,
.return_type = return_type,
.var_args_token = var_args_token,
.extern_export_inline_token = null,
@ -562,6 +563,7 @@ const Parser = struct {
.is_extern_prototype = is_extern,
.is_async = is_async,
};
std.mem.copy(Node.FnProto.ParamDecl, fn_proto_node.params(), params);
return &fn_proto_node.base;
}
@ -621,7 +623,7 @@ const Parser = struct {
var type_expr: ?*Node = null;
if (p.eatToken(.Colon)) |_| {
if (p.eatToken(.Keyword_var)) |var_tok| {
const node = try p.arena.allocator.create(ast.Node.VarType);
const node = try p.arena.allocator.create(Node.VarType);
node.* = .{ .token = var_tok };
type_expr = &node.base;
} else {
@ -1936,7 +1938,7 @@ const Parser = struct {
}
/// ParamDecl <- (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType
fn parseParamDecl(p: *Parser) !?*Node {
fn parseParamDecl(p: *Parser, list: *std.ArrayList(Node.FnProto.ParamDecl)) !bool {
const doc_comments = try p.parseDocComment();
const noalias_token = p.eatToken(.Keyword_noalias);
const comptime_token = if (noalias_token == null) p.eatToken(.Keyword_comptime) else null;
@ -1951,31 +1953,30 @@ const Parser = struct {
if (noalias_token == null and
comptime_token == null and
name_token == null and
doc_comments == null) return null;
doc_comments == null) return false;
try p.errors.append(p.gpa, .{
.ExpectedParamType = .{ .token = p.tok_i },
});
return error.ParseError;
};
const param_decl = try p.arena.allocator.create(Node.ParamDecl);
param_decl.* = .{
(try list.addOne()).* = .{
.doc_comments = doc_comments,
.comptime_token = comptime_token,
.noalias_token = noalias_token,
.name_token = name_token,
.param_type = param_type,
};
return &param_decl.base;
return true;
}
/// ParamType
/// <- KEYWORD_var
/// / DOT3
/// / TypeExpr
fn parseParamType(p: *Parser) !?Node.ParamDecl.ParamType {
fn parseParamType(p: *Parser) !?Node.FnProto.ParamDecl.ParamType {
// TODO cast from tuple to error union is broken
const P = Node.ParamDecl.ParamType;
const P = Node.FnProto.ParamDecl.ParamType;
if (try p.parseVarType()) |node| return P{ .var_type = node };
if (p.eatToken(.Ellipsis3)) |token| return P{ .var_args = token };
if (try p.parseTypeExpr()) |node| return P{ .type_expr = node };
@ -2587,7 +2588,7 @@ const Parser = struct {
if (p.eatToken(.Ellipsis2) != null) {
const end_expr = try p.parseExpr();
const sentinel: ?*ast.Node = if (p.eatToken(.Colon) != null)
const sentinel: ?*Node = if (p.eatToken(.Colon) != null)
try p.parseExpr()
else
null;
@ -2616,7 +2617,7 @@ const Parser = struct {
if (p.eatToken(.Period)) |period| {
if (try p.parseIdentifier()) |identifier| {
// TODO: It's a bit weird to return an InfixOp from the SuffixOp parser.
// Should there be an ast.Node.SuffixOp.FieldAccess variant? Or should
// Should there be an Node.SuffixOp.FieldAccess variant? Or should
// this grammar rule be altered?
const node = try p.arena.allocator.create(Node.InfixOp);
node.* = .{
@ -2652,13 +2653,13 @@ const Parser = struct {
/// ExprList <- (Expr COMMA)* Expr?
fn parseFnCallArguments(p: *Parser) !?AnnotatedParamList {
if (p.eatToken(.LParen) == null) return null;
const list = try ListParseFn(Node.FnProto.ParamList, parseExpr)(p);
const list = try ListParseFn(std.SinglyLinkedList(*Node), parseExpr)(p);
const rparen = try p.expectToken(.RParen);
return AnnotatedParamList{ .list = list, .rparen = rparen };
}
const AnnotatedParamList = struct {
list: Node.FnProto.ParamList, // NOTE: may also be any other type SegmentedList(*Node, 2)
list: std.SinglyLinkedList(*Node),
rparen: TokenIndex,
};
@ -2797,14 +2798,14 @@ const Parser = struct {
.lbrace_token = lbrace,
.rbrace_token = rbrace,
};
std.mem.copy(*ast.Node, node.fieldsAndDecls(), members);
std.mem.copy(*Node, node.fieldsAndDecls(), members);
return &node.base;
}
/// Holds temporary data until we are ready to construct the full ContainerDecl AST node.
const ContainerDeclType = struct {
kind_token: TokenIndex,
init_arg_expr: ast.Node.ContainerDecl.InitArg,
init_arg_expr: Node.ContainerDecl.InitArg,
};
/// ContainerDeclType
@ -2893,14 +2894,11 @@ const Parser = struct {
}
/// ParamDeclList <- (ParamDecl COMMA)* ParamDecl?
fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) !Node.FnProto.ParamList {
var list = Node.FnProto.ParamList{};
var list_it = &list.first;
var last: ?*Node = null;
while (try p.parseParamDecl()) |node| {
last = node;
list_it = try p.llpush(*Node, list_it, node);
fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) ![]Node.FnProto.ParamDecl {
var list = std.ArrayList(Node.FnProto.ParamDecl).init(p.gpa);
defer list.deinit();
while (try p.parseParamDecl(&list)) {
switch (p.tokens[p.tok_i].id) {
.Comma => _ = p.nextToken(),
// all possible delimiters
@ -2914,13 +2912,13 @@ const Parser = struct {
},
}
}
if (last) |node| {
const param_type = node.cast(Node.ParamDecl).?.param_type;
if (list.items.len != 0) {
const param_type = list.items[list.items.len - 1].param_type;
if (param_type == .var_args) {
var_args_token.* = param_type.var_args;
}
}
return list;
return list.toOwnedSlice();
}
const NodeParseFn = fn (p: *Parser) Error!?*Node;

View File

@ -1479,13 +1479,11 @@ fn renderExpression(
try renderToken(tree, stream, lparen, indent, start_col, Space.None); // (
// render all on one line, no trailing comma
var it = fn_proto.params.first;
while (it) |param_decl_node_node| : (it = param_decl_node_node.next) {
const param_decl_node = param_decl_node_node.data;
try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl_node, Space.None);
for (fn_proto.params()) |param_decl, i| {
try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl, Space.None);
if (param_decl_node_node.next != null) {
const comma = tree.nextToken(param_decl_node.lastToken());
if (i + 1 < fn_proto.params_len) {
const comma = tree.nextToken(param_decl.lastToken());
try renderToken(tree, stream, comma, indent, start_col, Space.Space); // ,
}
}
@ -1494,11 +1492,9 @@ fn renderExpression(
const new_indent = indent + indent_delta;
try renderToken(tree, stream, lparen, new_indent, start_col, Space.Newline); // (
var it = fn_proto.params.first;
while (it) |param_decl_node_node| : (it = param_decl_node_node.next) {
const param_decl_node = param_decl_node_node.data;
for (fn_proto.params()) |param_decl| {
try stream.writeByteNTimes(' ', new_indent);
try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl_node, Space.Comma);
try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl, Space.Comma);
}
try stream.writeByteNTimes(' ', indent);
}
@ -2079,7 +2075,6 @@ fn renderExpression(
.VarDecl,
.Use,
.TestDecl,
.ParamDecl,
=> unreachable,
}
}
@ -2162,11 +2157,9 @@ fn renderParamDecl(
tree: *ast.Tree,
indent: usize,
start_col: *usize,
base: *ast.Node,
param_decl: ast.Node.FnProto.ParamDecl,
space: Space,
) (@TypeOf(stream).Error || Error)!void {
const param_decl = @fieldParentPtr(ast.Node.ParamDecl, "base", base);
try renderDocComments(tree, stream, param_decl, indent, start_col);
if (param_decl.comptime_token) |comptime_token| {