// JSON parser conforming to RFC8259. // // https://tools.ietf.org/html/rfc8259 const std = @import("index.zig"); const debug = std.debug; const mem = std.mem; const u1 = @IntType(false, 1); const u256 = @IntType(false, 256); // A single token slice into the parent string. // // Use `token.slice()` on the input at the current position to get the current slice. pub const Token = struct { id: Id, // How many bytes do we skip before counting offset: u1, // Whether string contains a \uXXXX sequence and cannot be zero-copied string_has_escape: bool, // Whether number is simple and can be represented by an integer (i.e. no `.` or `e`) number_is_integer: bool, // How many bytes from the current position behind the start of this token is. count: usize, pub const Id = enum { ObjectBegin, ObjectEnd, ArrayBegin, ArrayEnd, String, Number, True, False, Null, }; pub fn init(id: Id, count: usize, offset: u1) Token { return Token{ .id = id, .offset = offset, .string_has_escape = false, .number_is_integer = true, .count = count, }; } pub fn initString(count: usize, has_unicode_escape: bool) Token { return Token{ .id = Id.String, .offset = 0, .string_has_escape = has_unicode_escape, .number_is_integer = true, .count = count, }; } pub fn initNumber(count: usize, number_is_integer: bool) Token { return Token{ .id = Id.Number, .offset = 0, .string_has_escape = false, .number_is_integer = number_is_integer, .count = count, }; } // A marker token is a zero-length pub fn initMarker(id: Id) Token { return Token{ .id = id, .offset = 0, .string_has_escape = false, .number_is_integer = true, .count = 0, }; } // Slice into the underlying input string. pub fn slice(self: *const Token, input: []const u8, i: usize) []const u8 { return input[i + self.offset - self.count .. i + self.offset]; } }; // A small streaming JSON parser. This accepts input one byte at a time and returns tokens as // they are encountered. No copies or allocations are performed during parsing and the entire // parsing state requires ~40-50 bytes of stack space. // // Conforms strictly to RFC8529. // // For a non-byte based wrapper, consider using TokenStream instead. pub const StreamingParser = struct { // Current state state: State, // How many bytes we have counted for the current token count: usize, // What state to follow after parsing a string (either property or value string) after_string_state: State, // What state to follow after parsing a value (either top-level or value end) after_value_state: State, // If we stopped now, would the complete parsed string to now be a valid json string complete: bool, // Current token flags to pass through to the next generated, see Token. string_has_escape: bool, number_is_integer: bool, // Bit-stack for nested object/map literals (max 255 nestings). stack: u256, stack_used: u8, const object_bit = 0; const array_bit = 1; const max_stack_size = @maxValue(u8); pub fn init() StreamingParser { var p: StreamingParser = undefined; p.reset(); return p; } pub fn reset(p: *StreamingParser) void { p.state = State.TopLevelBegin; p.count = 0; // Set before ever read in main transition function p.after_string_state = undefined; p.after_value_state = State.ValueEnd; // handle end of values normally p.stack = 0; p.stack_used = 0; p.complete = false; p.string_has_escape = false; p.number_is_integer = true; } pub const State = enum { // These must be first with these explicit values as we rely on them for indexing the // bit-stack directly and avoiding a branch. ObjectSeparator = 0, ValueEnd = 1, TopLevelBegin, TopLevelEnd, ValueBegin, ValueBeginNoClosing, String, StringUtf8Byte3, StringUtf8Byte2, StringUtf8Byte1, StringEscapeCharacter, StringEscapeHexUnicode4, StringEscapeHexUnicode3, StringEscapeHexUnicode2, StringEscapeHexUnicode1, Number, NumberMaybeDotOrExponent, NumberMaybeDigitOrDotOrExponent, NumberFractionalRequired, NumberFractional, NumberMaybeExponent, NumberExponent, NumberExponentDigitsRequired, NumberExponentDigits, TrueLiteral1, TrueLiteral2, TrueLiteral3, FalseLiteral1, FalseLiteral2, FalseLiteral3, FalseLiteral4, NullLiteral1, NullLiteral2, NullLiteral3, // Only call this function to generate array/object final state. pub fn fromInt(x: var) State { debug.assert(x == 0 or x == 1); const T = @TagType(State); return State(T(x)); } }; pub const Error = error{ InvalidTopLevel, TooManyNestedItems, TooManyClosingItems, InvalidValueBegin, InvalidValueEnd, UnbalancedBrackets, UnbalancedBraces, UnexpectedClosingBracket, UnexpectedClosingBrace, InvalidNumber, InvalidSeparator, InvalidLiteral, InvalidEscapeCharacter, InvalidUnicodeHexSymbol, InvalidUtf8Byte, InvalidTopLevelTrailing, InvalidControlCharacter, }; // Give another byte to the parser and obtain any new tokens. This may (rarely) return two // tokens. token2 is always null if token1 is null. // // There is currently no error recovery on a bad stream. pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void { token1.* = null; token2.* = null; p.count += 1; // unlikely if (try p.transition(c, token1)) { _ = try p.transition(c, token2); } } // Perform a single transition on the state machine and return any possible token. fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool { switch (p.state) { State.TopLevelBegin => switch (c) { '{' => { p.stack <<= 1; p.stack |= object_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ObjectSeparator; token.* = Token.initMarker(Token.Id.ObjectBegin); }, '[' => { p.stack <<= 1; p.stack |= array_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ValueEnd; token.* = Token.initMarker(Token.Id.ArrayBegin); }, '-' => { p.number_is_integer = true; p.state = State.Number; p.after_value_state = State.TopLevelEnd; p.count = 0; }, '0' => { p.number_is_integer = true; p.state = State.NumberMaybeDotOrExponent; p.after_value_state = State.TopLevelEnd; p.count = 0; }, '1'...'9' => { p.number_is_integer = true; p.state = State.NumberMaybeDigitOrDotOrExponent; p.after_value_state = State.TopLevelEnd; p.count = 0; }, '"' => { p.state = State.String; p.after_value_state = State.TopLevelEnd; // We don't actually need the following since after_value_state should override. p.after_string_state = State.ValueEnd; p.string_has_escape = false; p.count = 0; }, 't' => { p.state = State.TrueLiteral1; p.after_value_state = State.TopLevelEnd; p.count = 0; }, 'f' => { p.state = State.FalseLiteral1; p.after_value_state = State.TopLevelEnd; p.count = 0; }, 'n' => { p.state = State.NullLiteral1; p.after_value_state = State.TopLevelEnd; p.count = 0; }, 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidTopLevel; }, }, State.TopLevelEnd => switch (c) { 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidTopLevelTrailing; }, }, State.ValueBegin => switch (c) { // NOTE: These are shared in ValueEnd as well, think we can reorder states to // be a bit clearer and avoid this duplication. '}' => { // unlikely if (p.stack & 1 != object_bit) { return error.UnexpectedClosingBracket; } if (p.stack_used == 0) { return error.TooManyClosingItems; } p.state = State.ValueBegin; p.after_string_state = State.fromInt(p.stack & 1); p.stack >>= 1; p.stack_used -= 1; switch (p.stack_used) { 0 => { p.complete = true; p.state = State.TopLevelEnd; }, else => { p.state = State.ValueEnd; }, } token.* = Token.initMarker(Token.Id.ObjectEnd); }, ']' => { if (p.stack & 1 != array_bit) { return error.UnexpectedClosingBrace; } if (p.stack_used == 0) { return error.TooManyClosingItems; } p.state = State.ValueBegin; p.after_string_state = State.fromInt(p.stack & 1); p.stack >>= 1; p.stack_used -= 1; switch (p.stack_used) { 0 => { p.complete = true; p.state = State.TopLevelEnd; }, else => { p.state = State.ValueEnd; }, } token.* = Token.initMarker(Token.Id.ArrayEnd); }, '{' => { if (p.stack_used == max_stack_size) { return error.TooManyNestedItems; } p.stack <<= 1; p.stack |= object_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ObjectSeparator; token.* = Token.initMarker(Token.Id.ObjectBegin); }, '[' => { if (p.stack_used == max_stack_size) { return error.TooManyNestedItems; } p.stack <<= 1; p.stack |= array_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ValueEnd; token.* = Token.initMarker(Token.Id.ArrayBegin); }, '-' => { p.state = State.Number; p.count = 0; }, '0' => { p.state = State.NumberMaybeDotOrExponent; p.count = 0; }, '1'...'9' => { p.state = State.NumberMaybeDigitOrDotOrExponent; p.count = 0; }, '"' => { p.state = State.String; p.count = 0; }, 't' => { p.state = State.TrueLiteral1; p.count = 0; }, 'f' => { p.state = State.FalseLiteral1; p.count = 0; }, 'n' => { p.state = State.NullLiteral1; p.count = 0; }, 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidValueBegin; }, }, // TODO: A bit of duplication here and in the following state, redo. State.ValueBeginNoClosing => switch (c) { '{' => { if (p.stack_used == max_stack_size) { return error.TooManyNestedItems; } p.stack <<= 1; p.stack |= object_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ObjectSeparator; token.* = Token.initMarker(Token.Id.ObjectBegin); }, '[' => { if (p.stack_used == max_stack_size) { return error.TooManyNestedItems; } p.stack <<= 1; p.stack |= array_bit; p.stack_used += 1; p.state = State.ValueBegin; p.after_string_state = State.ValueEnd; token.* = Token.initMarker(Token.Id.ArrayBegin); }, '-' => { p.state = State.Number; p.count = 0; }, '0' => { p.state = State.NumberMaybeDotOrExponent; p.count = 0; }, '1'...'9' => { p.state = State.NumberMaybeDigitOrDotOrExponent; p.count = 0; }, '"' => { p.state = State.String; p.count = 0; }, 't' => { p.state = State.TrueLiteral1; p.count = 0; }, 'f' => { p.state = State.FalseLiteral1; p.count = 0; }, 'n' => { p.state = State.NullLiteral1; p.count = 0; }, 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidValueBegin; }, }, State.ValueEnd => switch (c) { ',' => { p.after_string_state = State.fromInt(p.stack & 1); p.state = State.ValueBeginNoClosing; }, ']' => { if (p.stack_used == 0) { return error.UnbalancedBrackets; } p.state = State.ValueEnd; p.after_string_state = State.fromInt(p.stack & 1); p.stack >>= 1; p.stack_used -= 1; if (p.stack_used == 0) { p.complete = true; p.state = State.TopLevelEnd; } token.* = Token.initMarker(Token.Id.ArrayEnd); }, '}' => { if (p.stack_used == 0) { return error.UnbalancedBraces; } p.state = State.ValueEnd; p.after_string_state = State.fromInt(p.stack & 1); p.stack >>= 1; p.stack_used -= 1; if (p.stack_used == 0) { p.complete = true; p.state = State.TopLevelEnd; } token.* = Token.initMarker(Token.Id.ObjectEnd); }, 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidValueEnd; }, }, State.ObjectSeparator => switch (c) { ':' => { p.state = State.ValueBegin; p.after_string_state = State.ValueEnd; }, 0x09, 0x0A, 0x0D, 0x20 => { // whitespace }, else => { return error.InvalidSeparator; }, }, State.String => switch (c) { 0x00...0x1F => { return error.InvalidControlCharacter; }, '"' => { p.state = p.after_string_state; if (p.after_value_state == State.TopLevelEnd) { p.state = State.TopLevelEnd; p.complete = true; } token.* = Token.initString(p.count - 1, p.string_has_escape); }, '\\' => { p.state = State.StringEscapeCharacter; }, 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => { // non-control ascii }, 0xC0...0xDF => { p.state = State.StringUtf8Byte1; }, 0xE0...0xEF => { p.state = State.StringUtf8Byte2; }, 0xF0...0xFF => { p.state = State.StringUtf8Byte3; }, else => { return error.InvalidUtf8Byte; }, }, State.StringUtf8Byte3 => switch (c >> 6) { 0b10 => p.state = State.StringUtf8Byte2, else => return error.InvalidUtf8Byte, }, State.StringUtf8Byte2 => switch (c >> 6) { 0b10 => p.state = State.StringUtf8Byte1, else => return error.InvalidUtf8Byte, }, State.StringUtf8Byte1 => switch (c >> 6) { 0b10 => p.state = State.String, else => return error.InvalidUtf8Byte, }, State.StringEscapeCharacter => switch (c) { // NOTE: '/' is allowed as an escaped character but it also is allowed // as unescaped according to the RFC. There is a reported errata which suggests // removing the non-escaped variant but it makes more sense to simply disallow // it as an escape code here. // // The current JSONTestSuite tests rely on both of this behaviour being present // however, so we default to the status quo where both are accepted until this // is further clarified. '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => { p.string_has_escape = true; p.state = State.String; }, 'u' => { p.string_has_escape = true; p.state = State.StringEscapeHexUnicode4; }, else => { return error.InvalidEscapeCharacter; }, }, State.StringEscapeHexUnicode4 => switch (c) { '0'...'9', 'A'...'F', 'a'...'f' => { p.state = State.StringEscapeHexUnicode3; }, else => return error.InvalidUnicodeHexSymbol, }, State.StringEscapeHexUnicode3 => switch (c) { '0'...'9', 'A'...'F', 'a'...'f' => { p.state = State.StringEscapeHexUnicode2; }, else => return error.InvalidUnicodeHexSymbol, }, State.StringEscapeHexUnicode2 => switch (c) { '0'...'9', 'A'...'F', 'a'...'f' => { p.state = State.StringEscapeHexUnicode1; }, else => return error.InvalidUnicodeHexSymbol, }, State.StringEscapeHexUnicode1 => switch (c) { '0'...'9', 'A'...'F', 'a'...'f' => { p.state = State.String; }, else => return error.InvalidUnicodeHexSymbol, }, State.Number => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '0' => { p.state = State.NumberMaybeDotOrExponent; }, '1'...'9' => { p.state = State.NumberMaybeDigitOrDotOrExponent; }, else => { return error.InvalidNumber; }, } }, State.NumberMaybeDotOrExponent => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '.' => { p.number_is_integer = false; p.state = State.NumberFractionalRequired; }, 'e', 'E' => { p.number_is_integer = false; p.state = State.NumberExponent; }, else => { p.state = p.after_value_state; token.* = Token.initNumber(p.count, p.number_is_integer); return true; }, } }, State.NumberMaybeDigitOrDotOrExponent => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '.' => { p.number_is_integer = false; p.state = State.NumberFractionalRequired; }, 'e', 'E' => { p.number_is_integer = false; p.state = State.NumberExponent; }, '0'...'9' => { // another digit }, else => { p.state = p.after_value_state; token.* = Token.initNumber(p.count, p.number_is_integer); return true; }, } }, State.NumberFractionalRequired => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '0'...'9' => { p.state = State.NumberFractional; }, else => { return error.InvalidNumber; }, } }, State.NumberFractional => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '0'...'9' => { // another digit }, 'e', 'E' => { p.number_is_integer = false; p.state = State.NumberExponent; }, else => { p.state = p.after_value_state; token.* = Token.initNumber(p.count, p.number_is_integer); return true; }, } }, State.NumberMaybeExponent => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { 'e', 'E' => { p.number_is_integer = false; p.state = State.NumberExponent; }, else => { p.state = p.after_value_state; token.* = Token.initNumber(p.count, p.number_is_integer); return true; }, } }, State.NumberExponent => switch (c) { '-', '+' => { p.complete = false; p.state = State.NumberExponentDigitsRequired; }, '0'...'9' => { p.complete = p.after_value_state == State.TopLevelEnd; p.state = State.NumberExponentDigits; }, else => { return error.InvalidNumber; }, }, State.NumberExponentDigitsRequired => switch (c) { '0'...'9' => { p.complete = p.after_value_state == State.TopLevelEnd; p.state = State.NumberExponentDigits; }, else => { return error.InvalidNumber; }, }, State.NumberExponentDigits => { p.complete = p.after_value_state == State.TopLevelEnd; switch (c) { '0'...'9' => { // another digit }, else => { p.state = p.after_value_state; token.* = Token.initNumber(p.count, p.number_is_integer); return true; }, } }, State.TrueLiteral1 => switch (c) { 'r' => p.state = State.TrueLiteral2, else => return error.InvalidLiteral, }, State.TrueLiteral2 => switch (c) { 'u' => p.state = State.TrueLiteral3, else => return error.InvalidLiteral, }, State.TrueLiteral3 => switch (c) { 'e' => { p.state = p.after_value_state; p.complete = p.state == State.TopLevelEnd; token.* = Token.init(Token.Id.True, p.count + 1, 1); }, else => { return error.InvalidLiteral; }, }, State.FalseLiteral1 => switch (c) { 'a' => p.state = State.FalseLiteral2, else => return error.InvalidLiteral, }, State.FalseLiteral2 => switch (c) { 'l' => p.state = State.FalseLiteral3, else => return error.InvalidLiteral, }, State.FalseLiteral3 => switch (c) { 's' => p.state = State.FalseLiteral4, else => return error.InvalidLiteral, }, State.FalseLiteral4 => switch (c) { 'e' => { p.state = p.after_value_state; p.complete = p.state == State.TopLevelEnd; token.* = Token.init(Token.Id.False, p.count + 1, 1); }, else => { return error.InvalidLiteral; }, }, State.NullLiteral1 => switch (c) { 'u' => p.state = State.NullLiteral2, else => return error.InvalidLiteral, }, State.NullLiteral2 => switch (c) { 'l' => p.state = State.NullLiteral3, else => return error.InvalidLiteral, }, State.NullLiteral3 => switch (c) { 'l' => { p.state = p.after_value_state; p.complete = p.state == State.TopLevelEnd; token.* = Token.init(Token.Id.Null, p.count + 1, 1); }, else => { return error.InvalidLiteral; }, }, } return false; } }; // A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens. pub const TokenStream = struct { i: usize, slice: []const u8, parser: StreamingParser, token: ?Token, pub fn init(slice: []const u8) TokenStream { return TokenStream{ .i = 0, .slice = slice, .parser = StreamingParser.init(), .token = null, }; } pub fn next(self: *TokenStream) !?Token { if (self.token) |token| { self.token = null; return token; } var t1: ?Token = undefined; var t2: ?Token = undefined; while (self.i < self.slice.len) { try self.parser.feed(self.slice[self.i], &t1, &t2); self.i += 1; if (t1) |token| { self.token = t2; return token; } } if (self.i > self.slice.len) { try self.parser.feed(' ', &t1, &t2); self.i += 1; if (t1) |token| { return token; } } return null; } }; fn checkNext(p: *TokenStream, id: Token.Id) void { const token = (p.next() catch unreachable).?; debug.assert(token.id == id); } test "token" { const s = \\{ \\ "Image": { \\ "Width": 800, \\ "Height": 600, \\ "Title": "View from 15th Floor", \\ "Thumbnail": { \\ "Url": "http://www.example.com/image/481989943", \\ "Height": 125, \\ "Width": 100 \\ }, \\ "Animated" : false, \\ "IDs": [116, 943, 234, 38793] \\ } \\} ; var p = TokenStream.init(s); checkNext(&p, Token.Id.ObjectBegin); checkNext(&p, Token.Id.String); // Image checkNext(&p, Token.Id.ObjectBegin); checkNext(&p, Token.Id.String); // Width checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.String); // Height checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.String); // Title checkNext(&p, Token.Id.String); checkNext(&p, Token.Id.String); // Thumbnail checkNext(&p, Token.Id.ObjectBegin); checkNext(&p, Token.Id.String); // Url checkNext(&p, Token.Id.String); checkNext(&p, Token.Id.String); // Height checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.String); // Width checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.ObjectEnd); checkNext(&p, Token.Id.String); // Animated checkNext(&p, Token.Id.False); checkNext(&p, Token.Id.String); // IDs checkNext(&p, Token.Id.ArrayBegin); checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.Number); checkNext(&p, Token.Id.ArrayEnd); checkNext(&p, Token.Id.ObjectEnd); checkNext(&p, Token.Id.ObjectEnd); debug.assert((try p.next()) == null); } // Validate a JSON string. This does not limit number precision so a decoder may not necessarily // be able to decode the string even if this returns true. pub fn validate(s: []const u8) bool { var p = StreamingParser.init(); for (s) |c, i| { var token1: ?Token = undefined; var token2: ?Token = undefined; p.feed(c, &token1, &token2) catch |err| { return false; }; } return p.complete; } test "json validate" { debug.assert(validate("{}")); } const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const ArrayList = std.ArrayList; const HashMap = std.HashMap; pub const ValueTree = struct { arena: ArenaAllocator, root: Value, pub fn deinit(self: *ValueTree) void { self.arena.deinit(); } }; pub const ObjectMap = HashMap([]const u8, Value, mem.hash_slice_u8, mem.eql_slice_u8); pub const Value = union(enum) { Null, Bool: bool, Integer: i64, Float: f64, String: []const u8, Array: ArrayList(Value), Object: ObjectMap, pub fn dump(self: *const Value) void { switch (self.*) { Value.Null => { debug.warn("null"); }, Value.Bool => |inner| { debug.warn("{}", inner); }, Value.Integer => |inner| { debug.warn("{}", inner); }, Value.Float => |inner| { debug.warn("{.5}", inner); }, Value.String => |inner| { debug.warn("\"{}\"", inner); }, Value.Array => |inner| { var not_first = false; debug.warn("["); for (inner.toSliceConst()) |value| { if (not_first) { debug.warn(","); } not_first = true; value.dump(); } debug.warn("]"); }, Value.Object => |inner| { var not_first = false; debug.warn("{{"); var it = inner.iterator(); while (it.next()) |entry| { if (not_first) { debug.warn(","); } not_first = true; debug.warn("\"{}\":", entry.key); entry.value.dump(); } debug.warn("}}"); }, } } pub fn dumpIndent(self: *const Value, indent: usize) void { if (indent == 0) { self.dump(); } else { self.dumpIndentLevel(indent, 0); } } fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void { switch (self.*) { Value.Null => { debug.warn("null"); }, Value.Bool => |inner| { debug.warn("{}", inner); }, Value.Integer => |inner| { debug.warn("{}", inner); }, Value.Float => |inner| { debug.warn("{.5}", inner); }, Value.String => |inner| { debug.warn("\"{}\"", inner); }, Value.Array => |inner| { var not_first = false; debug.warn("[\n"); for (inner.toSliceConst()) |value| { if (not_first) { debug.warn(",\n"); } not_first = true; padSpace(level + indent); value.dumpIndentLevel(indent, level + indent); } debug.warn("\n"); padSpace(level); debug.warn("]"); }, Value.Object => |inner| { var not_first = false; debug.warn("{{\n"); var it = inner.iterator(); while (it.next()) |entry| { if (not_first) { debug.warn(",\n"); } not_first = true; padSpace(level + indent); debug.warn("\"{}\": ", entry.key); entry.value.dumpIndentLevel(indent, level + indent); } debug.warn("\n"); padSpace(level); debug.warn("}}"); }, } } fn padSpace(indent: usize) void { var i: usize = 0; while (i < indent) : (i += 1) { debug.warn(" "); } } }; // A non-stream JSON parser which constructs a tree of Value's. pub const Parser = struct { allocator: *Allocator, state: State, copy_strings: bool, // Stores parent nodes and un-combined Values. stack: ArrayList(Value), const State = enum { ObjectKey, ObjectValue, ArrayValue, Simple, }; pub fn init(allocator: *Allocator, copy_strings: bool) Parser { return Parser{ .allocator = allocator, .state = State.Simple, .copy_strings = copy_strings, .stack = ArrayList(Value).init(allocator), }; } pub fn deinit(p: *Parser) void { p.stack.deinit(); } pub fn reset(p: *Parser) void { p.state = State.Simple; p.stack.shrink(0); } pub fn parse(p: *Parser, input: []const u8) !ValueTree { var s = TokenStream.init(input); var arena = ArenaAllocator.init(p.allocator); errdefer arena.deinit(); while (try s.next()) |token| { try p.transition(&arena.allocator, input, s.i - 1, token); } debug.assert(p.stack.len == 1); return ValueTree{ .arena = arena, .root = p.stack.at(0), }; } // Even though p.allocator exists, we take an explicit allocator so that allocation state // can be cleaned up on error correctly during a `parse` on call. fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void { switch (p.state) { State.ObjectKey => switch (token.id) { Token.Id.ObjectEnd => { if (p.stack.len == 1) { return; } var value = p.stack.pop(); try p.pushToParent(value); }, Token.Id.String => { try p.stack.append(try p.parseString(allocator, token, input, i)); p.state = State.ObjectValue; }, else => { unreachable; }, }, State.ObjectValue => { var object = &p.stack.items[p.stack.len - 2].Object; var key = p.stack.items[p.stack.len - 1].String; switch (token.id) { Token.Id.ObjectBegin => { try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); p.state = State.ObjectKey; }, Token.Id.ArrayBegin => { try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); p.state = State.ArrayValue; }, Token.Id.String => { _ = try object.put(key, try p.parseString(allocator, token, input, i)); _ = p.stack.pop(); p.state = State.ObjectKey; }, Token.Id.Number => { _ = try object.put(key, try p.parseNumber(token, input, i)); _ = p.stack.pop(); p.state = State.ObjectKey; }, Token.Id.True => { _ = try object.put(key, Value{ .Bool = true }); _ = p.stack.pop(); p.state = State.ObjectKey; }, Token.Id.False => { _ = try object.put(key, Value{ .Bool = false }); _ = p.stack.pop(); p.state = State.ObjectKey; }, Token.Id.Null => { _ = try object.put(key, Value.Null); _ = p.stack.pop(); p.state = State.ObjectKey; }, Token.Id.ObjectEnd, Token.Id.ArrayEnd => { unreachable; }, } }, State.ArrayValue => { var array = &p.stack.items[p.stack.len - 1].Array; switch (token.id) { Token.Id.ArrayEnd => { if (p.stack.len == 1) { return; } var value = p.stack.pop(); try p.pushToParent(value); }, Token.Id.ObjectBegin => { try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); p.state = State.ObjectKey; }, Token.Id.ArrayBegin => { try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); p.state = State.ArrayValue; }, Token.Id.String => { try array.append(try p.parseString(allocator, token, input, i)); }, Token.Id.Number => { try array.append(try p.parseNumber(token, input, i)); }, Token.Id.True => { try array.append(Value{ .Bool = true }); }, Token.Id.False => { try array.append(Value{ .Bool = false }); }, Token.Id.Null => { try array.append(Value.Null); }, Token.Id.ObjectEnd => { unreachable; }, } }, State.Simple => switch (token.id) { Token.Id.ObjectBegin => { try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); p.state = State.ObjectKey; }, Token.Id.ArrayBegin => { try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); p.state = State.ArrayValue; }, Token.Id.String => { try p.stack.append(try p.parseString(allocator, token, input, i)); }, Token.Id.Number => { try p.stack.append(try p.parseNumber(token, input, i)); }, Token.Id.True => { try p.stack.append(Value{ .Bool = true }); }, Token.Id.False => { try p.stack.append(Value{ .Bool = false }); }, Token.Id.Null => { try p.stack.append(Value.Null); }, Token.Id.ObjectEnd, Token.Id.ArrayEnd => { unreachable; }, }, } } fn pushToParent(p: *Parser, value: *const Value) !void { switch (p.stack.at(p.stack.len - 1)) { // Object Parent -> [ ..., object, , value ] Value.String => |key| { _ = p.stack.pop(); var object = &p.stack.items[p.stack.len - 1].Object; _ = try object.put(key, value); p.state = State.ObjectKey; }, // Array Parent -> [ ..., , value ] Value.Array => |*array| { try array.append(value); p.state = State.ArrayValue; }, else => { unreachable; }, } } fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value { // TODO: We don't strictly have to copy values which do not contain any escape // characters if flagged with the option. const slice = token.slice(input, i); return Value{ .String = try mem.dupe(p.allocator, u8, slice) }; } fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value { return if (token.number_is_integer) Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) } else @panic("TODO: fmt.parseFloat not yet implemented"); } }; test "json parser dynamic" { var p = Parser.init(debug.global_allocator, false); defer p.deinit(); const s = \\{ \\ "Image": { \\ "Width": 800, \\ "Height": 600, \\ "Title": "View from 15th Floor", \\ "Thumbnail": { \\ "Url": "http://www.example.com/image/481989943", \\ "Height": 125, \\ "Width": 100 \\ }, \\ "Animated" : false, \\ "IDs": [116, 943, 234, 38793] \\ } \\} ; var tree = try p.parse(s); defer tree.deinit(); var root = tree.root; var image = root.Object.get("Image").?.value; const width = image.Object.get("Width").?.value; debug.assert(width.Integer == 800); const height = image.Object.get("Height").?.value; debug.assert(height.Integer == 600); const title = image.Object.get("Title").?.value; debug.assert(mem.eql(u8, title.String, "View from 15th Floor")); const animated = image.Object.get("Animated").?.value; debug.assert(animated.Bool == false); }