From 64c149ca19cc299485693acbda325eee51d37f28 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 20 May 2020 16:13:02 -0400 Subject: [PATCH] fields and decls: ArrayList appending, memcpy to ast arena This makes fields and decl ast nodes part of the Root and ContainerDecl AST nodes. Surprisingly, it's a performance regression from using a singly-linked list for these nodes: throughput: 76.5 MiB/s => 69.4 MiB/s However it has much better memory usage: maxrss: 392 KB => 77 KB It's also better API for consumers of the parser, since it is a flat list in memory. --- lib/std/zig/ast.zig | 106 +++++++++++++++++++++++++++++------------ lib/std/zig/parse.zig | 72 ++++++++++++++++------------ lib/std/zig/render.zig | 45 ++++++++--------- 3 files changed, 137 insertions(+), 86 deletions(-) diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 846b6e504..a1b285671 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -6,6 +6,7 @@ const mem = std.mem; const Token = std.zig.Token; pub const TokenIndex = usize; +pub const NodeIndex = usize; pub const Tree = struct { /// Reference to externally-owned data. @@ -616,40 +617,62 @@ pub const Node = struct { } } + /// The decls data follows this struct in memory as an array of Node pointers. pub const Root = struct { base: Node = Node{ .id = .Root }, - decls: DeclList, eof_token: TokenIndex, + decls_len: NodeIndex, - pub const DeclList = LinkedList(*Node); + /// After this the caller must initialize the decls list. + pub fn create(allocator: *mem.Allocator, decls_len: NodeIndex, eof_token: TokenIndex) !*Root { + const bytes = try allocator.alignedAlloc(u8, @alignOf(Root), sizeInBytes(decls_len)); + const self = @ptrCast(*Root, bytes.ptr); + self.* = .{ + .eof_token = eof_token, + .decls_len = decls_len, + }; + return self; + } + + pub fn destroy(self: *Decl, allocator: *mem.Allocator) void { + const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.decls_len)]; + allocator.free(bytes); + } pub fn iterate(self: *const Root) Node.Iterator { - return .{ .parent_node = &self.base, .index = 0, .node = self.decls.first }; + return .{ .parent_node = &self.base, .index = 0, .node = null }; } pub fn iterateNext(self: *const Root, it: *Node.Iterator) ?*Node { - const decl = it.node orelse return null; - it.node = decl.next; - return decl.data; + var i = it.index; + it.index += 1; + + if (i < self.decls_len) return self.declsConst()[i]; + return null; + } + + pub fn decls(self: *Root) []*Node { + const decls_start = @ptrCast([*]u8, self) + @sizeOf(Root); + return @ptrCast([*]*Node, decls_start)[0..self.decls_len]; + } + + pub fn declsConst(self: *const Root) []const *Node { + const decls_start = @ptrCast([*]const u8, self) + @sizeOf(Root); + return @ptrCast([*]const *Node, decls_start)[0..self.decls_len]; } pub fn firstToken(self: *const Root) TokenIndex { - if (self.decls.first) |first| { - return first.data.firstToken(); - } else { - return self.eof_token; - } + if (self.decls_len == 0) return self.eof_token; + return self.declsConst()[0].firstToken(); } pub fn lastToken(self: *const Root) TokenIndex { - if (self.decls.first) |first| { - var node = first; - while (true) { - node = node.next orelse return node.data.lastToken(); - } - } else { - return self.eof_token; - } + if (self.decls_len == 0) return self.eof_token; + return self.declsConst()[self.decls_len - 1].lastToken(); + } + + fn sizeInBytes(decls_len: NodeIndex) usize { + return @sizeOf(Root) + @sizeOf(*Node) * @as(usize, decls_len); } }; @@ -777,14 +800,12 @@ pub const Node = struct { pub const ContainerDecl = struct { base: Node = Node{ .id = .ContainerDecl }, - layout_token: ?TokenIndex, kind_token: TokenIndex, - init_arg_expr: InitArg, - fields_and_decls: DeclList, + layout_token: ?TokenIndex, lbrace_token: TokenIndex, rbrace_token: TokenIndex, - - pub const DeclList = Root.DeclList; + fields_and_decls_len: NodeIndex, + init_arg_expr: InitArg, pub const InitArg = union(enum) { None, @@ -792,8 +813,19 @@ pub const Node = struct { Type: *Node, }; + /// After this the caller must initialize the fields_and_decls list. + pub fn alloc(allocator: *mem.Allocator, fields_and_decls_len: NodeIndex) !*ContainerDecl { + const bytes = try allocator.alignedAlloc(u8, @alignOf(ContainerDecl), sizeInBytes(fields_and_decls_len)); + return @ptrCast(*ContainerDecl, bytes.ptr); + } + + pub fn free(self: *Decl, allocator: *mem.Allocator) void { + const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)]; + allocator.free(bytes); + } + pub fn iterate(self: *const ContainerDecl) Node.Iterator { - return .{ .parent_node = &self.base, .index = 0, .node = self.fields_and_decls.first }; + return .{ .parent_node = &self.base, .index = 0, .node = null }; } pub fn iterateNext(self: *const ContainerDecl, it: *Node.Iterator) ?*Node { @@ -808,10 +840,8 @@ pub const Node = struct { .None, .Enum => {}, } - if (it.node) |child| { - it.node = child.next; - return child.data; - } + if (i < self.fields_and_decls_len) return self.fieldsAndDeclsConst()[i]; + i -= self.fields_and_decls_len; return null; } @@ -826,6 +856,20 @@ pub const Node = struct { pub fn lastToken(self: *const ContainerDecl) TokenIndex { return self.rbrace_token; } + + pub fn fieldsAndDecls(self: *ContainerDecl) []*Node { + const decls_start = @ptrCast([*]u8, self) + @sizeOf(ContainerDecl); + return @ptrCast([*]*Node, decls_start)[0..self.fields_and_decls_len]; + } + + pub fn fieldsAndDeclsConst(self: *const ContainerDecl) []const *Node { + const decls_start = @ptrCast([*]const u8, self) + @sizeOf(ContainerDecl); + return @ptrCast([*]const *Node, decls_start)[0..self.fields_and_decls_len]; + } + + fn sizeInBytes(fields_and_decls_len: NodeIndex) usize { + return @sizeOf(ContainerDecl) + @sizeOf(*Node) * @as(usize, fields_and_decls_len); + } }; pub const ContainerField = struct { @@ -1116,7 +1160,7 @@ pub const Node = struct { statements: StatementList, rbrace: TokenIndex, - pub const StatementList = Root.DeclList; + pub const StatementList = LinkedList(*Node); pub fn iterate(self: *const Block) Node.Iterator { return .{ .parent_node = &self.base, .index = 0, .node = self.statements.first }; @@ -2615,7 +2659,7 @@ pub const Node = struct { test "iterate" { var root = Node.Root{ .base = Node{ .id = Node.Id.Root }, - .decls = Node.Root.DeclList.init(std.testing.allocator), + .decls_len = 0, .eof_token = 0, }; var base = &root.base; diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 7abd3b300..3ae72cd02 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -63,16 +63,20 @@ const Parser = struct { /// Root <- skip ContainerMembers eof fn parseRoot(p: *Parser) Allocator.Error!*Node.Root { - const node = try p.arena.allocator.create(Node.Root); - node.* = .{ - .decls = try parseContainerMembers(p, true), - // parseContainerMembers will try to skip as much - // invalid tokens as it can so this can only be the EOF - .eof_token = p.eatToken(.Eof).?, - }; + const decls = try parseContainerMembers(p, true); + defer p.gpa.free(decls); + + // parseContainerMembers will try to skip as much + // invalid tokens as it can so this can only be the EOF + const eof_token = p.eatToken(.Eof).?; + + const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token); + std.mem.copy(*ast.Node, node.decls(), decls); + return node; } + /// Helper function for appending elements to a singly linked list. fn llpush( p: *Parser, comptime T: type, @@ -92,9 +96,9 @@ const Parser = struct { /// / ContainerField COMMA ContainerMembers /// / ContainerField /// / - fn parseContainerMembers(p: *Parser, top_level: bool) !Node.Root.DeclList { - var list = Node.Root.DeclList{}; - var list_it = &list.first; + fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node { + var list = std.ArrayList(*ast.Node).init(p.gpa); + defer list.deinit(); var field_state: union(enum) { /// no fields have been seen @@ -110,7 +114,7 @@ const Parser = struct { while (true) { if (try p.parseContainerDocComments()) |node| { - list_it = try p.llpush(*Node, list_it, node); + try list.append(node); continue; } @@ -127,7 +131,7 @@ const Parser = struct { field_state = .{ .end = node.firstToken() }; } node.cast(Node.TestDecl).?.doc_comments = doc_comments; - list_it = try p.llpush(*Node, list_it, node); + try list.append(node); continue; } @@ -142,7 +146,7 @@ const Parser = struct { field_state = .{ .end = node.firstToken() }; } node.cast(Node.Comptime).?.doc_comments = doc_comments; - list_it = try p.llpush(*Node, list_it, node); + try list.append(node); continue; } @@ -173,7 +177,7 @@ const Parser = struct { }, else => unreachable, } - list_it = try p.llpush(*Node, list_it, node); + try list.append(node); if (try p.parseAppendedDocComment(node.lastToken())) |appended_comment| { switch (node.id) { .FnProto => {}, @@ -215,7 +219,7 @@ const Parser = struct { const field = node.cast(Node.ContainerField).?; field.doc_comments = doc_comments; - list_it = try p.llpush(*Node, list_it, node); + try list.append(node); const comma = p.eatToken(.Comma) orelse { // try to continue parsing const index = p.tok_i; @@ -275,7 +279,7 @@ const Parser = struct { } } - return list; + return list.toOwnedSlice(); } /// Attempts to find next container member by searching for certain tokens @@ -2778,24 +2782,36 @@ const Parser = struct { /// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE fn parseContainerDeclAuto(p: *Parser) !?*Node { - const node = (try p.parseContainerDeclType()) orelse return null; + const container_decl_type = (try p.parseContainerDeclType()) orelse return null; const lbrace = try p.expectToken(.LBrace); const members = try p.parseContainerMembers(false); + defer p.gpa.free(members); const rbrace = try p.expectToken(.RBrace); - const decl_type = node.cast(Node.ContainerDecl).?; - decl_type.fields_and_decls = members; - decl_type.lbrace_token = lbrace; - decl_type.rbrace_token = rbrace; - - return node; + const node = try Node.ContainerDecl.alloc(&p.arena.allocator, members.len); + node.* = .{ + .layout_token = null, + .kind_token = container_decl_type.kind_token, + .init_arg_expr = container_decl_type.init_arg_expr, + .fields_and_decls_len = members.len, + .lbrace_token = lbrace, + .rbrace_token = rbrace, + }; + std.mem.copy(*ast.Node, node.fieldsAndDecls(), members); + return &node.base; } + /// Holds temporary data until we are ready to construct the full ContainerDecl AST node. + const ContainerDeclType = struct { + kind_token: TokenIndex, + init_arg_expr: ast.Node.ContainerDecl.InitArg, + }; + /// ContainerDeclType /// <- KEYWORD_struct /// / KEYWORD_enum (LPAREN Expr RPAREN)? /// / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)? - fn parseContainerDeclType(p: *Parser) !?*Node { + fn parseContainerDeclType(p: *Parser) !?ContainerDeclType { const kind_token = p.nextToken(); const init_arg_expr = switch (kind_token.ptr.id) { @@ -2838,16 +2854,10 @@ const Parser = struct { }, }; - const node = try p.arena.allocator.create(Node.ContainerDecl); - node.* = .{ - .layout_token = null, + return ContainerDeclType{ .kind_token = kind_token.index, .init_arg_expr = init_arg_expr, - .fields_and_decls = undefined, // set by caller - .lbrace_token = undefined, // set by caller - .rbrace_token = undefined, // set by caller }; - return &node.base; } /// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index bcd7ef488..cfae09dcd 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -79,9 +79,10 @@ fn renderRoot( } var start_col: usize = 0; - var it = tree.root_node.decls.first orelse return; + var decl_i: ast.NodeIndex = 0; + const root_decls = tree.root_node.decls(); while (true) { - var decl = it.data; + var decl = root_decls[decl_i]; // This loop does the following: // @@ -130,14 +131,15 @@ fn renderRoot( token_index = decl.firstToken(); while (!fmt_active) { - it = it.next orelse { + decl_i += 1; + if (decl_i >= root_decls.len) { // If there's no next reformatted `decl`, just copy the // remaining input tokens and bail out. const start = tree.tokens[copy_start_token_index].start; try copyFixingWhitespace(stream, tree.source[start..]); return; - }; - decl = it.data; + } + decl = root_decls[decl_i]; var decl_first_token_index = decl.firstToken(); while (token_index < decl_first_token_index) : (token_index += 1) { @@ -178,8 +180,9 @@ fn renderRoot( } try renderTopLevelDecl(allocator, stream, tree, 0, &start_col, decl); - it = it.next orelse return; - try renderExtraNewline(tree, stream, &start_col, it.data); + decl_i += 1; + if (decl_i >= root_decls.len) return; + try renderExtraNewline(tree, stream, &start_col, root_decls[decl_i]); } } @@ -1189,7 +1192,7 @@ fn renderExpression( }, } - if (container_decl.fields_and_decls.first == null) { + if (container_decl.fields_and_decls_len == 0) { try renderToken(tree, stream, container_decl.lbrace_token, indent + indent_delta, start_col, Space.None); // { return renderToken(tree, stream, container_decl.rbrace_token, indent, start_col, space); // } } @@ -1203,18 +1206,18 @@ fn renderExpression( break :blk tree.tokens[maybe_comma].id == .Comma; }; + const fields_and_decls = container_decl.fieldsAndDecls(); + // Check if the first declaration and the { are on the same line const src_has_newline = !tree.tokensOnSameLine( container_decl.lbrace_token, - container_decl.fields_and_decls.first.?.data.firstToken(), + fields_and_decls[0].firstToken(), ); // We can only print all the elements in-line if all the // declarations inside are fields const src_has_only_fields = blk: { - var it = container_decl.fields_and_decls.first; - while (it) |decl_node| : (it = decl_node.next) { - const decl = decl_node.data; + for (fields_and_decls) |decl| { if (decl.id != .ContainerField) break :blk false; } break :blk true; @@ -1225,14 +1228,12 @@ fn renderExpression( const new_indent = indent + indent_delta; try renderToken(tree, stream, container_decl.lbrace_token, new_indent, start_col, .Newline); // { - var it = container_decl.fields_and_decls.first; - while (it) |decl_node| : (it = decl_node.next) { - const decl = decl_node.data; + for (fields_and_decls) |decl, i| { try stream.writeByteNTimes(' ', new_indent); try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, .Newline); - if (decl_node.next) |next_decl| { - try renderExtraNewline(tree, stream, start_col, next_decl.data); + if (i + 1 < fields_and_decls.len) { + try renderExtraNewline(tree, stream, start_col, fields_and_decls[i + 1]); } } @@ -1245,10 +1246,8 @@ fn renderExpression( const new_indent = indent + indent_delta; try stream.writeByteNTimes(' ', new_indent); - var it = container_decl.fields_and_decls.first; - while (it) |decl_node| : (it = decl_node.next) { - const decl = decl_node.data; - const space_after_decl: Space = if (decl_node.next == null) .Newline else .Space; + for (fields_and_decls) |decl, i| { + const space_after_decl: Space = if (i + 1 >= fields_and_decls.len) .Newline else .Space; try renderContainerDecl(allocator, stream, tree, new_indent, start_col, decl, space_after_decl); } @@ -1257,9 +1256,7 @@ fn renderExpression( // All the declarations on the same line try renderToken(tree, stream, container_decl.lbrace_token, indent, start_col, .Space); // { - var it = container_decl.fields_and_decls.first; - while (it) |decl_node| : (it = decl_node.next) { - const decl = decl_node.data; + for (fields_and_decls) |decl| { try renderContainerDecl(allocator, stream, tree, indent, start_col, decl, .Space); } }