From 7c2c0e36f8378d8efaf64d859bdffb91007db82a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 20 May 2020 17:39:54 -0400 Subject: [PATCH] stage2 parser: different memory layout of ParamDecl Instead of being its own node, it's a struct inside FnProto. Instead of FnProto having a SinglyLinkedList of ParamDecl nodes, ParamDecls are appended directly in memory after the FnProto. throughput: 72.2 MiB/s => 72.9 MiB/s maxrss: 70 KB => 68 KB Importantly, the API is improved as well since the data is arranged linearly in memory. --- lib/std/zig/ast.zig | 151 ++++++++++++++++++++++++----------------- lib/std/zig/parse.zig | 56 ++++++++------- lib/std/zig/render.zig | 21 ++---- 3 files changed, 123 insertions(+), 105 deletions(-) diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index a1b285671..b23f0705c 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -477,7 +477,6 @@ pub const Node = struct { ErrorTag, AsmInput, AsmOutput, - ParamDecl, FieldInitializer, }; @@ -533,7 +532,6 @@ pub const Node = struct { switch (n.id) { .Root, .ContainerField, - .ParamDecl, .Block, .Payload, .PointerPayload, @@ -819,7 +817,7 @@ pub const Node = struct { return @ptrCast(*ContainerDecl, bytes.ptr); } - pub fn free(self: *Decl, allocator: *mem.Allocator) void { + pub fn free(self: *ContainerDecl, allocator: *mem.Allocator) void { const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.fields_and_decls_len)]; allocator.free(bytes); } @@ -979,13 +977,14 @@ pub const Node = struct { } }; + /// The params are directly after the FnProto in memory. pub const FnProto = struct { base: Node = Node{ .id = .FnProto }, doc_comments: ?*DocComment, visib_token: ?TokenIndex, fn_token: TokenIndex, name_token: ?TokenIndex, - params: ParamList, + params_len: NodeIndex, return_type: ReturnType, var_args_token: ?TokenIndex, extern_export_inline_token: ?TokenIndex, @@ -997,16 +996,75 @@ pub const Node = struct { is_extern_prototype: bool = false, // TODO: Remove once extern fn rewriting is is_async: bool = false, // TODO: remove once async fn rewriting is - pub const ParamList = LinkedList(*Node); - pub const ReturnType = union(enum) { Explicit: *Node, InferErrorSet: *Node, Invalid: TokenIndex, }; + pub const ParamDecl = struct { + doc_comments: ?*DocComment, + comptime_token: ?TokenIndex, + noalias_token: ?TokenIndex, + name_token: ?TokenIndex, + param_type: ParamType, + + pub const ParamType = union(enum) { + var_type: *Node, + var_args: TokenIndex, + type_expr: *Node, + }; + + pub fn iterate(self: *const ParamDecl) Node.Iterator { + return .{ .parent_node = &self.base, .index = 0, .node = null }; + } + + pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node { + var i = it.index; + it.index += 1; + + if (i < 1) { + switch (self.param_type) { + .var_args => return null, + .var_type, .type_expr => |node| return node, + } + } + i -= 1; + + return null; + } + + pub fn firstToken(self: *const ParamDecl) TokenIndex { + if (self.comptime_token) |comptime_token| return comptime_token; + if (self.noalias_token) |noalias_token| return noalias_token; + if (self.name_token) |name_token| return name_token; + switch (self.param_type) { + .var_args => |tok| return tok, + .var_type, .type_expr => |node| return node.firstToken(), + } + } + + pub fn lastToken(self: *const ParamDecl) TokenIndex { + switch (self.param_type) { + .var_args => |tok| return tok, + .var_type, .type_expr => |node| return node.lastToken(), + } + } + }; + + /// After this the caller must initialize the params list. + pub fn alloc(allocator: *mem.Allocator, params_len: NodeIndex) !*FnProto { + const bytes = try allocator.alignedAlloc(u8, @alignOf(FnProto), sizeInBytes(params_len)); + return @ptrCast(*FnProto, bytes.ptr); + } + + pub fn free(self: *FnProto, allocator: *mem.Allocator) void { + const bytes = @ptrCast([*]u8, self)[0..sizeInBytes(self.params_len)]; + allocator.free(bytes); + } + pub fn iterate(self: *const FnProto) Node.Iterator { - return .{ .parent_node = &self.base, .index = 0, .node = self.params.first }; + return .{ .parent_node = &self.base, .index = 0, .node = null }; } pub fn iterateNext(self: *const FnProto, it: *Node.Iterator) ?*Node { @@ -1018,11 +1076,17 @@ pub const Node = struct { i -= 1; } - if (it.node) |param| { - it.index -= 1; - it.node = param.next; - return param.data; + if (i < self.params_len) { + switch (self.paramsConst()[i].param_type) { + .var_type => |n| return n, + .var_args => { + i += 1; + it.index += 1; + }, + .type_expr => |n| return n, + } } + i -= self.params_len; if (self.align_expr) |align_expr| { if (i < 1) return align_expr; @@ -1064,6 +1128,20 @@ pub const Node = struct { .Invalid => |tok| return tok, } } + + pub fn params(self: *FnProto) []ParamDecl { + const decls_start = @ptrCast([*]u8, self) + @sizeOf(FnProto); + return @ptrCast([*]ParamDecl, decls_start)[0..self.params_len]; + } + + pub fn paramsConst(self: *const FnProto) []const ParamDecl { + const decls_start = @ptrCast([*]const u8, self) + @sizeOf(FnProto); + return @ptrCast([*]const ParamDecl, decls_start)[0..self.params_len]; + } + + fn sizeInBytes(params_len: NodeIndex) usize { + return @sizeOf(FnProto) + @sizeOf(ParamDecl) * @as(usize, params_len); + } }; pub const AnyFrameType = struct { @@ -1102,57 +1180,6 @@ pub const Node = struct { } }; - pub const ParamDecl = struct { - base: Node = Node{ .id = .ParamDecl }, - doc_comments: ?*DocComment, - comptime_token: ?TokenIndex, - noalias_token: ?TokenIndex, - name_token: ?TokenIndex, - param_type: ParamType, - - pub const ParamType = union(enum) { - var_type: *Node, - var_args: TokenIndex, - type_expr: *Node, - }; - - pub fn iterate(self: *const ParamDecl) Node.Iterator { - return .{ .parent_node = &self.base, .index = 0, .node = null }; - } - - pub fn iterateNext(self: *const ParamDecl, it: *Node.Iterator) ?*Node { - var i = it.index; - it.index += 1; - - if (i < 1) { - switch (self.param_type) { - .var_args => return null, - .var_type, .type_expr => |node| return node, - } - } - i -= 1; - - return null; - } - - pub fn firstToken(self: *const ParamDecl) TokenIndex { - if (self.comptime_token) |comptime_token| return comptime_token; - if (self.noalias_token) |noalias_token| return noalias_token; - if (self.name_token) |name_token| return name_token; - switch (self.param_type) { - .var_args => |tok| return tok, - .var_type, .type_expr => |node| return node.firstToken(), - } - } - - pub fn lastToken(self: *const ParamDecl) TokenIndex { - switch (self.param_type) { - .var_args => |tok| return tok, - .var_type, .type_expr => |node| return node.lastToken(), - } - } - }; - pub const Block = struct { base: Node = Node{ .id = .Block }, label: ?TokenIndex, diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 3ae72cd02..8f171805c 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -71,7 +71,7 @@ const Parser = struct { const eof_token = p.eatToken(.Eof).?; const node = try Node.Root.create(&p.arena.allocator, decls.len, eof_token); - std.mem.copy(*ast.Node, node.decls(), decls); + std.mem.copy(*Node, node.decls(), decls); return node; } @@ -96,8 +96,8 @@ const Parser = struct { /// / ContainerField COMMA ContainerMembers /// / ContainerField /// / - fn parseContainerMembers(p: *Parser, top_level: bool) ![]*ast.Node { - var list = std.ArrayList(*ast.Node).init(p.gpa); + fn parseContainerMembers(p: *Parser, top_level: bool) ![]*Node { + var list = std.ArrayList(*Node).init(p.gpa); defer list.deinit(); var field_state: union(enum) { @@ -522,6 +522,7 @@ const Parser = struct { const name_token = p.eatToken(.Identifier); const lparen = try p.expectToken(.LParen); const params = try p.parseParamDeclList(&var_args_token); + defer p.gpa.free(params); const rparen = try p.expectToken(.RParen); const align_expr = try p.parseByteAlign(); const section_expr = try p.parseLinkSection(); @@ -544,13 +545,13 @@ const Parser = struct { else R{ .Explicit = return_type_expr.? }; - const fn_proto_node = try p.arena.allocator.create(Node.FnProto); + const fn_proto_node = try Node.FnProto.alloc(&p.arena.allocator, params.len); fn_proto_node.* = .{ .doc_comments = null, .visib_token = null, .fn_token = fn_token, .name_token = name_token, - .params = params, + .params_len = params.len, .return_type = return_type, .var_args_token = var_args_token, .extern_export_inline_token = null, @@ -562,6 +563,7 @@ const Parser = struct { .is_extern_prototype = is_extern, .is_async = is_async, }; + std.mem.copy(Node.FnProto.ParamDecl, fn_proto_node.params(), params); return &fn_proto_node.base; } @@ -621,7 +623,7 @@ const Parser = struct { var type_expr: ?*Node = null; if (p.eatToken(.Colon)) |_| { if (p.eatToken(.Keyword_var)) |var_tok| { - const node = try p.arena.allocator.create(ast.Node.VarType); + const node = try p.arena.allocator.create(Node.VarType); node.* = .{ .token = var_tok }; type_expr = &node.base; } else { @@ -1936,7 +1938,7 @@ const Parser = struct { } /// ParamDecl <- (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType - fn parseParamDecl(p: *Parser) !?*Node { + fn parseParamDecl(p: *Parser, list: *std.ArrayList(Node.FnProto.ParamDecl)) !bool { const doc_comments = try p.parseDocComment(); const noalias_token = p.eatToken(.Keyword_noalias); const comptime_token = if (noalias_token == null) p.eatToken(.Keyword_comptime) else null; @@ -1951,31 +1953,30 @@ const Parser = struct { if (noalias_token == null and comptime_token == null and name_token == null and - doc_comments == null) return null; + doc_comments == null) return false; try p.errors.append(p.gpa, .{ .ExpectedParamType = .{ .token = p.tok_i }, }); return error.ParseError; }; - const param_decl = try p.arena.allocator.create(Node.ParamDecl); - param_decl.* = .{ + (try list.addOne()).* = .{ .doc_comments = doc_comments, .comptime_token = comptime_token, .noalias_token = noalias_token, .name_token = name_token, .param_type = param_type, }; - return ¶m_decl.base; + return true; } /// ParamType /// <- KEYWORD_var /// / DOT3 /// / TypeExpr - fn parseParamType(p: *Parser) !?Node.ParamDecl.ParamType { + fn parseParamType(p: *Parser) !?Node.FnProto.ParamDecl.ParamType { // TODO cast from tuple to error union is broken - const P = Node.ParamDecl.ParamType; + const P = Node.FnProto.ParamDecl.ParamType; if (try p.parseVarType()) |node| return P{ .var_type = node }; if (p.eatToken(.Ellipsis3)) |token| return P{ .var_args = token }; if (try p.parseTypeExpr()) |node| return P{ .type_expr = node }; @@ -2587,7 +2588,7 @@ const Parser = struct { if (p.eatToken(.Ellipsis2) != null) { const end_expr = try p.parseExpr(); - const sentinel: ?*ast.Node = if (p.eatToken(.Colon) != null) + const sentinel: ?*Node = if (p.eatToken(.Colon) != null) try p.parseExpr() else null; @@ -2616,7 +2617,7 @@ const Parser = struct { if (p.eatToken(.Period)) |period| { if (try p.parseIdentifier()) |identifier| { // TODO: It's a bit weird to return an InfixOp from the SuffixOp parser. - // Should there be an ast.Node.SuffixOp.FieldAccess variant? Or should + // Should there be an Node.SuffixOp.FieldAccess variant? Or should // this grammar rule be altered? const node = try p.arena.allocator.create(Node.InfixOp); node.* = .{ @@ -2652,13 +2653,13 @@ const Parser = struct { /// ExprList <- (Expr COMMA)* Expr? fn parseFnCallArguments(p: *Parser) !?AnnotatedParamList { if (p.eatToken(.LParen) == null) return null; - const list = try ListParseFn(Node.FnProto.ParamList, parseExpr)(p); + const list = try ListParseFn(std.SinglyLinkedList(*Node), parseExpr)(p); const rparen = try p.expectToken(.RParen); return AnnotatedParamList{ .list = list, .rparen = rparen }; } const AnnotatedParamList = struct { - list: Node.FnProto.ParamList, // NOTE: may also be any other type SegmentedList(*Node, 2) + list: std.SinglyLinkedList(*Node), rparen: TokenIndex, }; @@ -2797,14 +2798,14 @@ const Parser = struct { .lbrace_token = lbrace, .rbrace_token = rbrace, }; - std.mem.copy(*ast.Node, node.fieldsAndDecls(), members); + std.mem.copy(*Node, node.fieldsAndDecls(), members); return &node.base; } /// Holds temporary data until we are ready to construct the full ContainerDecl AST node. const ContainerDeclType = struct { kind_token: TokenIndex, - init_arg_expr: ast.Node.ContainerDecl.InitArg, + init_arg_expr: Node.ContainerDecl.InitArg, }; /// ContainerDeclType @@ -2893,14 +2894,11 @@ const Parser = struct { } /// ParamDeclList <- (ParamDecl COMMA)* ParamDecl? - fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) !Node.FnProto.ParamList { - var list = Node.FnProto.ParamList{}; - var list_it = &list.first; - var last: ?*Node = null; - while (try p.parseParamDecl()) |node| { - last = node; - list_it = try p.llpush(*Node, list_it, node); + fn parseParamDeclList(p: *Parser, var_args_token: *?TokenIndex) ![]Node.FnProto.ParamDecl { + var list = std.ArrayList(Node.FnProto.ParamDecl).init(p.gpa); + defer list.deinit(); + while (try p.parseParamDecl(&list)) { switch (p.tokens[p.tok_i].id) { .Comma => _ = p.nextToken(), // all possible delimiters @@ -2914,13 +2912,13 @@ const Parser = struct { }, } } - if (last) |node| { - const param_type = node.cast(Node.ParamDecl).?.param_type; + if (list.items.len != 0) { + const param_type = list.items[list.items.len - 1].param_type; if (param_type == .var_args) { var_args_token.* = param_type.var_args; } } - return list; + return list.toOwnedSlice(); } const NodeParseFn = fn (p: *Parser) Error!?*Node; diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index cfae09dcd..fbdfe51bc 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -1479,13 +1479,11 @@ fn renderExpression( try renderToken(tree, stream, lparen, indent, start_col, Space.None); // ( // render all on one line, no trailing comma - var it = fn_proto.params.first; - while (it) |param_decl_node_node| : (it = param_decl_node_node.next) { - const param_decl_node = param_decl_node_node.data; - try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl_node, Space.None); + for (fn_proto.params()) |param_decl, i| { + try renderParamDecl(allocator, stream, tree, indent, start_col, param_decl, Space.None); - if (param_decl_node_node.next != null) { - const comma = tree.nextToken(param_decl_node.lastToken()); + if (i + 1 < fn_proto.params_len) { + const comma = tree.nextToken(param_decl.lastToken()); try renderToken(tree, stream, comma, indent, start_col, Space.Space); // , } } @@ -1494,11 +1492,9 @@ fn renderExpression( const new_indent = indent + indent_delta; try renderToken(tree, stream, lparen, new_indent, start_col, Space.Newline); // ( - var it = fn_proto.params.first; - while (it) |param_decl_node_node| : (it = param_decl_node_node.next) { - const param_decl_node = param_decl_node_node.data; + for (fn_proto.params()) |param_decl| { try stream.writeByteNTimes(' ', new_indent); - try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl_node, Space.Comma); + try renderParamDecl(allocator, stream, tree, new_indent, start_col, param_decl, Space.Comma); } try stream.writeByteNTimes(' ', indent); } @@ -2079,7 +2075,6 @@ fn renderExpression( .VarDecl, .Use, .TestDecl, - .ParamDecl, => unreachable, } } @@ -2162,11 +2157,9 @@ fn renderParamDecl( tree: *ast.Tree, indent: usize, start_col: *usize, - base: *ast.Node, + param_decl: ast.Node.FnProto.ParamDecl, space: Space, ) (@TypeOf(stream).Error || Error)!void { - const param_decl = @fieldParentPtr(ast.Node.ParamDecl, "base", base); - try renderDocComments(tree, stream, param_decl, indent, start_col); if (param_decl.comptime_token) |comptime_token| {