fields and decls: ArrayList appending, memcpy to ast arena

This makes fields and decl ast nodes part of the Root and ContainerDecl
AST nodes.

Surprisingly, it's a performance regression from using a singly-linked
list for these nodes:

throughput: 76.5 MiB/s => 69.4 MiB/s

However it has much better memory usage:

maxrss: 392 KB => 77 KB

It's also better API for consumers of the parser, since it is a flat
list in memory.
master
Andrew Kelley 2020-05-20 16:13:02 -04:00
parent b1bcdc96ca
commit 64c149ca19
3 changed files with 137 additions and 86 deletions

View File

@ -6,6 +6,7 @@ const mem = std.mem;
const Token = std.zig.Token;
pub const TokenIndex = usize;
pub const NodeIndex = usize;
pub const Tree = struct {
/// Reference to externally-owned data.
@ -616,40 +617,62 @@ pub const Node = struct {
}
}
/// The decls data follows this struct in memory as an array of Node pointers.
pub const Root = struct {
base: Node = Node{ .id = .Root },
decls: DeclList,
eof_token: TokenIndex,
decls_len: NodeIndex,
pub const DeclList = LinkedList(*Node);
/// After this the caller must initialize the decls list.
pub fn create(allocator: *mem.Allocator, decls_len: NodeIndex, eof_token: TokenIndex) !*Root {
const bytes = try allocator.alignedAlloc(u8, @alignOf(Root), sizeInBytes(decls_len));
const self = @ptrCast(*Root, bytes.ptr);
self.* = .{
.eof_token = eof_token,
.decls_len = decls_len,
};
return self;
}
pub fn destroy(self: *Decl, allocator: *mem.Allocator) void {
const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.decls_len)];
allocator.free(bytes);
}
pub fn iterate(self: *const Root) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = self.decls.first };
return .{ .parent_node = &self.base, .index = 0, .node = null };
}
pub fn iterateNext(self: *const Root, it: *Node.Iterator) ?*Node {
const decl = it.node orelse return null;
it.node = decl.next;
return decl.data;
var i = it.index;
it.index += 1;
if (i < self.decls_len) return self.declsConst()[i];
return null;
}
pub fn decls(self: *Root) []*Node {
const decls_start = @ptrCast([*]u8, self) + @sizeOf(Root);
return @ptrCast([*]*Node, decls_start)[0..self.decls_len];
}
pub fn declsConst(self: *const Root) []const *Node {
const decls_start = @ptrCast([*]const u8, self) + @sizeOf(Root);
return @ptrCast([*]const *Node, decls_start)[0..self.decls_len];
}
pub fn firstToken(self: *const Root) TokenIndex {
if (self.decls.first) |first| {
return first.data.firstToken();
} else {
return self.eof_token;
}
if (self.decls_len == 0) return self.eof_token;
return self.declsConst()[0].firstToken();
}
pub fn lastToken(self: *const Root) TokenIndex {
if (self.decls.first) |first| {
var node = first;
while (true) {
node = node.next orelse return node.data.lastToken();
}
} else {
return self.eof_token;
if (self.decls_len == 0) return self.eof_token;
return self.declsConst()[self.decls_len - 1].lastToken();
}
fn sizeInBytes(decls_len: NodeIndex) usize {
return @sizeOf(Root) + @sizeOf(*Node) * @as(usize, decls_len);
}
};
@ -777,14 +800,12 @@ pub const Node = struct {
pub const ContainerDecl = struct {
base: Node = Node{ .id = .ContainerDecl },
layout_token: ?TokenIndex,
kind_token: TokenIndex,
init_arg_expr: InitArg,
fields_and_decls: DeclList,
layout_token: ?TokenIndex,
lbrace_token: TokenIndex,
rbrace_token: TokenIndex,
pub const DeclList = Root.DeclList;
fields_and_decls_len: NodeIndex,
init_arg_expr: InitArg,
pub const InitArg = union(enum) {
None,
@ -792,8 +813,19 @@ pub const Node = struct {
Type: *Node,
};
/// After this the caller must initialize the fields_and_decls list.
pub fn alloc(allocator: *mem.Allocator, fields_and_decls_len: NodeIndex) !*ContainerDecl {
const bytes = try allocator.alignedAlloc(u8, @alignOf(ContainerDecl), sizeInBytes(fields_and_decls_len));
return @ptrCast(*ContainerDecl, bytes.ptr);
}
pub fn free(self: *Decl, allocator: *mem.Allocator) void {
const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)];
allocator.free(bytes);
}
pub fn iterate(self: *const ContainerDecl) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = self.fields_and_decls.first };
return .{ .parent_node = &self.base, .index = 0, .node = null };
}
pub fn iterateNext(self: *const ContainerDecl, it: *Node.Iterator) ?*Node {
@ -808,10 +840,8 @@ pub const Node = struct {
.None, .Enum => {},
}
if (it.node) |child| {
it.node = child.next;
return child.data;
}
if (i < self.fields_and_decls_len) return self.fieldsAndDeclsConst()[i];
i -= self.fields_and_decls_len;
return null;
}
@ -826,6 +856,20 @@ pub const Node = struct {
pub fn lastToken(self: *const ContainerDecl) TokenIndex {
return self.rbrace_token;
}
pub fn fieldsAndDecls(self: *ContainerDecl) []*Node {
const decls_start = @ptrCast([*]u8, self) + @sizeOf(ContainerDecl);
return @ptrCast([*]*Node, decls_start)[0..self.fields_and_decls_len];
}
pub fn fieldsAndDeclsConst(self: *const ContainerDecl) []const *Node {
const decls_start = @ptrCast([*]const u8, self) + @sizeOf(ContainerDecl);
return @ptrCast([*]const *Node, decls_start)[0..self.fields_and_decls_len];
}
fn sizeInBytes(fields_and_decls_len: NodeIndex) usize {
return @sizeOf(ContainerDecl) + @sizeOf(*Node) * @as(usize, fields_and_decls_len);
}
};
pub const ContainerField = struct {
@ -1116,7 +1160,7 @@ pub const Node = struct {
statements: StatementList,
rbrace: TokenIndex,
pub const StatementList = Root.DeclList;
pub const StatementList = LinkedList(*Node);
pub fn iterate(self: *const Block) Node.Iterator {
return .{ .parent_node = &self.base, .index = 0, .node = self.statements.first };
@ -2615,7 +2659,7 @@ pub const Node = struct {
test "iterate" {
var root = Node.Root{
.base = Node{ .id = Node.Id.Root },
.decls = Node.Root.DeclList.init(std.testing.allocator),
.decls_len = 0,
.eof_token = 0,
};
var base = &root.base;

View File

@ -63,16 +63,20 @@ const Parser = struct {
/// Root <- skip ContainerMembers eof
fn parseRoot(p: *Parser) Allocator.Error!*Node.Root {
const node = try p.arena.allocator.create(Node.Root);
node.* = .{
.decls = try parseContainerMembers(p, true),
const decls = try parseContainerMembers(p, true);
defer p.gpa.free(decls);
// parseContainerMembers will try to skip as much
// invalid tokens as it can so this can only be the EOF
.eof_token = p.eatToken(.Eof).?,
};
const eof_token = p.eatToken(.Eof).?;
const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token);
std.mem.copy(*ast.Node, node.decls(), decls);
return node;
}
/// Helper function for appending elements to a singly linked list.
fn llpush(
p: *Parser,
comptime T: type,
@ -92,9 +96,9 @@ const Parser = struct {
/// / ContainerField COMMA ContainerMembers
/// / ContainerField
/// /
fn parseContainerMembers(p: *Parser, top_level: bool) !Node.Root.DeclList {
var list = Node.Root.DeclList{};
var list_it = &list.first;
fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node {
var list = std.ArrayList(*ast.Node).init(p.gpa);
defer list.deinit();
var field_state: union(enum) {
/// no fields have been seen
@ -110,7 +114,7 @@ const Parser = struct {
while (true) {
if (try p.parseContainerDocComments()) |node| {
list_it = try p.llpush(*Node, list_it, node);
try list.append(node);
continue;
}
@ -127,7 +131,7 @@ const Parser = struct {
field_state = .{ .end = node.firstToken() };
}
node.cast(Node.TestDecl).?.doc_comments = doc_comments;
list_it = try p.llpush(*Node, list_it, node);
try list.append(node);
continue;
}
@ -142,7 +146,7 @@ const Parser = struct {
field_state = .{ .end = node.firstToken() };
}
node.cast(Node.Comptime).?.doc_comments = doc_comments;
list_it = try p.llpush(*Node, list_it, node);
try list.append(node);
continue;
}
@ -173,7 +177,7 @@ const Parser = struct {
},
else => unreachable,
}
list_it = try p.llpush(*Node, list_it, node);
try list.append(node);
if (try p.parseAppendedDocComment(node.lastToken())) |appended_comment| {
switch (node.id) {
.FnProto => {},
@ -215,7 +219,7 @@ const Parser = struct {
const field = node.cast(Node.ContainerField).?;
field.doc_comments = doc_comments;
list_it = try p.llpush(*Node, list_it, node);
try list.append(node);
const comma = p.eatToken(.Comma) orelse {
// try to continue parsing
const index = p.tok_i;
@ -275,7 +279,7 @@ const Parser = struct {
}
}
return list;
return list.toOwnedSlice();
}
/// Attempts to find next container member by searching for certain tokens
@ -2778,24 +2782,36 @@ const Parser = struct {
/// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE
fn parseContainerDeclAuto(p: *Parser) !?*Node {
const node = (try p.parseContainerDeclType()) orelse return null;
const container_decl_type = (try p.parseContainerDeclType()) orelse return null;
const lbrace = try p.expectToken(.LBrace);
const members = try p.parseContainerMembers(false);
defer p.gpa.free(members);
const rbrace = try p.expectToken(.RBrace);
const decl_type = node.cast(Node.ContainerDecl).?;
decl_type.fields_and_decls = members;
decl_type.lbrace_token = lbrace;
decl_type.rbrace_token = rbrace;
return node;
const node = try Node.ContainerDecl.alloc(&p.arena.allocator, members.len);
node.* = .{
.layout_token = null,
.kind_token = container_decl_type.kind_token,
.init_arg_expr = container_decl_type.init_arg_expr,
.fields_and_decls_len = members.len,
.lbrace_token = lbrace,
.rbrace_token = rbrace,
};
std.mem.copy(*ast.Node, node.fieldsAndDecls(), members);
return &node.base;
}
/// Holds temporary data until we are ready to construct the full ContainerDecl AST node.
const ContainerDeclType = struct {
kind_token: TokenIndex,
init_arg_expr: ast.Node.ContainerDecl.InitArg,
};
/// ContainerDeclType
/// <- KEYWORD_struct
/// / KEYWORD_enum (LPAREN Expr RPAREN)?
/// / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)?
fn parseContainerDeclType(p: *Parser) !?*Node {
fn parseContainerDeclType(p: *Parser) !?ContainerDeclType {
const kind_token = p.nextToken();
const init_arg_expr = switch (kind_token.ptr.id) {
@ -2838,16 +2854,10 @@ const Parser = struct {
},
};
const node = try p.arena.allocator.create(Node.ContainerDecl);
node.* = .{
.layout_token = null,
return ContainerDeclType{
.kind_token = kind_token.index,
.init_arg_expr = init_arg_expr,
.fields_and_decls = undefined, // set by caller
.lbrace_token = undefined, // set by caller
.rbrace_token = undefined, // set by caller
};
return &node.base;
}
/// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN

View File

@ -79,9 +79,10 @@ fn renderRoot(
}
var start_col: usize = 0;
var it = tree.root_node.decls.first orelse return;
var decl_i: ast.NodeIndex = 0;
const root_decls = tree.root_node.decls();
while (true) {
var decl = it.data;
var decl = root_decls[decl_i];
// This loop does the following:
//
@ -130,14 +131,15 @@ fn renderRoot(
token_index = decl.firstToken();
while (!fmt_active) {
it = it.next orelse {
decl_i += 1;
if (decl_i >= root_decls.len) {
// If there's no next reformatted `decl`, just copy the
// remaining input tokens and bail out.
const start = tree.tokens[copy_start_token_index].start;
try copyFixingWhitespace(stream, tree.source[start..]);
return;
};
decl = it.data;
}
decl = root_decls[decl_i];
var decl_first_token_index = decl.firstToken();
while (token_index < decl_first_token_index) : (token_index += 1) {
@ -178,8 +180,9 @@ fn renderRoot(
}
try renderTopLevelDecl(allocator, stream, tree, 0, &start_col, decl);
it = it.next orelse return;
try renderExtraNewline(tree, stream, &start_col, it.data);
decl_i += 1;
if (decl_i >= root_decls.len) return;
try renderExtraNewline(tree, stream, &start_col, root_decls[decl_i]);
}
}
@ -1189,7 +1192,7 @@ fn renderExpression(
},
}
if (container_decl.fields_and_decls.first == null) {
if (container_decl.fields_and_decls_len == 0) {
try renderToken(tree, stream, container_decl.lbrace_token, indent + indent_delta, start_col, Space.None); // {
return renderToken(tree, stream, container_decl.rbrace_token, indent, start_col, space); // }
}
@ -1203,18 +1206,18 @@ fn renderExpression(
break :blk tree.tokens[maybe_comma].id == .Comma;
};
const fields_and_decls = container_decl.fieldsAndDecls();
// Check if the first declaration and the { are on the same line
const src_has_newline = !tree.tokensOnSameLine(
container_decl.lbrace_token,
container_decl.fields_and_decls.first.?.data.firstToken(),
fields_and_decls[0].firstToken(),
);
// We can only print all the elements in-line if all the
// declarations inside are fields
const src_has_only_fields = blk: {
var it = container_decl.fields_and_decls.first;
while (it) |decl_node| : (it = decl_node.next) {
const decl = decl_node.data;
for (fields_and_decls) |decl| {
if (decl.id != .ContainerField) break :blk false;
}
break :blk true;
@ -1225,14 +1228,12 @@ fn renderExpression(
const new_indent = indent + indent_delta;
try renderToken(tree, stream, container_decl.lbrace_token, new_indent, start_col, .Newline); // {
var it = container_decl.fields_and_decls.first;
while (it) |decl_node| : (it = decl_node.next) {
const decl = decl_node.data;
for (fields_and_decls) |decl, i| {
try stream.writeByteNTimes(' ', new_indent);
try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, .Newline);
if (decl_node.next) |next_decl| {
try renderExtraNewline(tree, stream, start_col, next_decl.data);
if (i + 1 < fields_and_decls.len) {
try renderExtraNewline(tree, stream, start_col, fields_and_decls[i + 1]);
}
}
@ -1245,10 +1246,8 @@ fn renderExpression(
const new_indent = indent + indent_delta;
try stream.writeByteNTimes(' ', new_indent);
var it = container_decl.fields_and_decls.first;
while (it) |decl_node| : (it = decl_node.next) {
const decl = decl_node.data;
const space_after_decl: Space = if (decl_node.next == null) .Newline else .Space;
for (fields_and_decls) |decl, i| {
const space_after_decl: Space = if (i + 1 >= fields_and_decls.len) .Newline else .Space;
try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, space_after_decl);
}
@ -1257,9 +1256,7 @@ fn renderExpression(
// All the declarations on the same line
try renderToken(tree, stream, container_decl.lbrace_token, indent, start_col, .Space); // {
var it = container_decl.fields_and_decls.first;
while (it) |decl_node| : (it = decl_node.next) {
const decl = decl_node.data;
for (fields_and_decls) |decl| {
try renderContainerDecl(allocator, stream, tree, indent, start_col, decl, .Space);
}
}