1390 lines
45 KiB
Zig
1390 lines
45 KiB
Zig
// JSON parser conforming to RFC8259.
|
|
//
|
|
// https://tools.ietf.org/html/rfc8259
|
|
|
|
const std = @import("index.zig");
|
|
const debug = std.debug;
|
|
const mem = std.mem;
|
|
|
|
// A single token slice into the parent string.
|
|
//
|
|
// Use `token.slice()` on the input at the current position to get the current slice.
|
|
pub const Token = struct {
|
|
id: Id,
|
|
// How many bytes do we skip before counting
|
|
offset: u1,
|
|
// Whether string contains a \uXXXX sequence and cannot be zero-copied
|
|
string_has_escape: bool,
|
|
// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
|
|
number_is_integer: bool,
|
|
// How many bytes from the current position behind the start of this token is.
|
|
count: usize,
|
|
|
|
pub const Id = enum {
|
|
ObjectBegin,
|
|
ObjectEnd,
|
|
ArrayBegin,
|
|
ArrayEnd,
|
|
String,
|
|
Number,
|
|
True,
|
|
False,
|
|
Null,
|
|
};
|
|
|
|
pub fn init(id: Id, count: usize, offset: u1) Token {
|
|
return Token{
|
|
.id = id,
|
|
.offset = offset,
|
|
.string_has_escape = false,
|
|
.number_is_integer = true,
|
|
.count = count,
|
|
};
|
|
}
|
|
|
|
pub fn initString(count: usize, has_unicode_escape: bool) Token {
|
|
return Token{
|
|
.id = Id.String,
|
|
.offset = 0,
|
|
.string_has_escape = has_unicode_escape,
|
|
.number_is_integer = true,
|
|
.count = count,
|
|
};
|
|
}
|
|
|
|
pub fn initNumber(count: usize, number_is_integer: bool) Token {
|
|
return Token{
|
|
.id = Id.Number,
|
|
.offset = 0,
|
|
.string_has_escape = false,
|
|
.number_is_integer = number_is_integer,
|
|
.count = count,
|
|
};
|
|
}
|
|
|
|
// A marker token is a zero-length
|
|
pub fn initMarker(id: Id) Token {
|
|
return Token{
|
|
.id = id,
|
|
.offset = 0,
|
|
.string_has_escape = false,
|
|
.number_is_integer = true,
|
|
.count = 0,
|
|
};
|
|
}
|
|
|
|
// Slice into the underlying input string.
|
|
pub fn slice(self: *const Token, input: []const u8, i: usize) []const u8 {
|
|
return input[i + self.offset - self.count .. i + self.offset];
|
|
}
|
|
};
|
|
|
|
// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
|
|
// they are encountered. No copies or allocations are performed during parsing and the entire
|
|
// parsing state requires ~40-50 bytes of stack space.
|
|
//
|
|
// Conforms strictly to RFC8529.
|
|
//
|
|
// For a non-byte based wrapper, consider using TokenStream instead.
|
|
pub const StreamingParser = struct {
|
|
// Current state
|
|
state: State,
|
|
// How many bytes we have counted for the current token
|
|
count: usize,
|
|
// What state to follow after parsing a string (either property or value string)
|
|
after_string_state: State,
|
|
// What state to follow after parsing a value (either top-level or value end)
|
|
after_value_state: State,
|
|
// If we stopped now, would the complete parsed string to now be a valid json string
|
|
complete: bool,
|
|
// Current token flags to pass through to the next generated, see Token.
|
|
string_has_escape: bool,
|
|
number_is_integer: bool,
|
|
|
|
// Bit-stack for nested object/map literals (max 255 nestings).
|
|
stack: u256,
|
|
stack_used: u8,
|
|
|
|
const object_bit = 0;
|
|
const array_bit = 1;
|
|
const max_stack_size = @maxValue(u8);
|
|
|
|
pub fn init() StreamingParser {
|
|
var p: StreamingParser = undefined;
|
|
p.reset();
|
|
return p;
|
|
}
|
|
|
|
pub fn reset(p: *StreamingParser) void {
|
|
p.state = State.TopLevelBegin;
|
|
p.count = 0;
|
|
// Set before ever read in main transition function
|
|
p.after_string_state = undefined;
|
|
p.after_value_state = State.ValueEnd; // handle end of values normally
|
|
p.stack = 0;
|
|
p.stack_used = 0;
|
|
p.complete = false;
|
|
p.string_has_escape = false;
|
|
p.number_is_integer = true;
|
|
}
|
|
|
|
pub const State = enum {
|
|
// These must be first with these explicit values as we rely on them for indexing the
|
|
// bit-stack directly and avoiding a branch.
|
|
ObjectSeparator = 0,
|
|
ValueEnd = 1,
|
|
|
|
TopLevelBegin,
|
|
TopLevelEnd,
|
|
|
|
ValueBegin,
|
|
ValueBeginNoClosing,
|
|
|
|
String,
|
|
StringUtf8Byte3,
|
|
StringUtf8Byte2,
|
|
StringUtf8Byte1,
|
|
StringEscapeCharacter,
|
|
StringEscapeHexUnicode4,
|
|
StringEscapeHexUnicode3,
|
|
StringEscapeHexUnicode2,
|
|
StringEscapeHexUnicode1,
|
|
|
|
Number,
|
|
NumberMaybeDotOrExponent,
|
|
NumberMaybeDigitOrDotOrExponent,
|
|
NumberFractionalRequired,
|
|
NumberFractional,
|
|
NumberMaybeExponent,
|
|
NumberExponent,
|
|
NumberExponentDigitsRequired,
|
|
NumberExponentDigits,
|
|
|
|
TrueLiteral1,
|
|
TrueLiteral2,
|
|
TrueLiteral3,
|
|
|
|
FalseLiteral1,
|
|
FalseLiteral2,
|
|
FalseLiteral3,
|
|
FalseLiteral4,
|
|
|
|
NullLiteral1,
|
|
NullLiteral2,
|
|
NullLiteral3,
|
|
|
|
// Only call this function to generate array/object final state.
|
|
pub fn fromInt(x: var) State {
|
|
debug.assert(x == 0 or x == 1);
|
|
const T = @TagType(State);
|
|
return @intToEnum(State, @intCast(T, x));
|
|
}
|
|
};
|
|
|
|
pub const Error = error{
|
|
InvalidTopLevel,
|
|
TooManyNestedItems,
|
|
TooManyClosingItems,
|
|
InvalidValueBegin,
|
|
InvalidValueEnd,
|
|
UnbalancedBrackets,
|
|
UnbalancedBraces,
|
|
UnexpectedClosingBracket,
|
|
UnexpectedClosingBrace,
|
|
InvalidNumber,
|
|
InvalidSeparator,
|
|
InvalidLiteral,
|
|
InvalidEscapeCharacter,
|
|
InvalidUnicodeHexSymbol,
|
|
InvalidUtf8Byte,
|
|
InvalidTopLevelTrailing,
|
|
InvalidControlCharacter,
|
|
};
|
|
|
|
// Give another byte to the parser and obtain any new tokens. This may (rarely) return two
|
|
// tokens. token2 is always null if token1 is null.
|
|
//
|
|
// There is currently no error recovery on a bad stream.
|
|
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
|
token1.* = null;
|
|
token2.* = null;
|
|
p.count += 1;
|
|
|
|
// unlikely
|
|
if (try p.transition(c, token1)) {
|
|
_ = try p.transition(c, token2);
|
|
}
|
|
}
|
|
|
|
// Perform a single transition on the state machine and return any possible token.
|
|
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
|
|
switch (p.state) {
|
|
State.TopLevelBegin => switch (c) {
|
|
'{' => {
|
|
p.stack <<= 1;
|
|
p.stack |= object_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ObjectSeparator;
|
|
|
|
token.* = Token.initMarker(Token.Id.ObjectBegin);
|
|
},
|
|
'[' => {
|
|
p.stack <<= 1;
|
|
p.stack |= array_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ValueEnd;
|
|
|
|
token.* = Token.initMarker(Token.Id.ArrayBegin);
|
|
},
|
|
'-' => {
|
|
p.number_is_integer = true;
|
|
p.state = State.Number;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.number_is_integer = true;
|
|
p.state = State.NumberMaybeDotOrExponent;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.number_is_integer = true;
|
|
p.state = State.NumberMaybeDigitOrDotOrExponent;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = State.String;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
// We don't actually need the following since after_value_state should override.
|
|
p.after_string_state = State.ValueEnd;
|
|
p.string_has_escape = false;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = State.TrueLiteral1;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = State.FalseLiteral1;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = State.NullLiteral1;
|
|
p.after_value_state = State.TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidTopLevel;
|
|
},
|
|
},
|
|
|
|
State.TopLevelEnd => switch (c) {
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidTopLevelTrailing;
|
|
},
|
|
},
|
|
|
|
State.ValueBegin => switch (c) {
|
|
// NOTE: These are shared in ValueEnd as well, think we can reorder states to
|
|
// be a bit clearer and avoid this duplication.
|
|
'}' => {
|
|
// unlikely
|
|
if (p.stack & 1 != object_bit) {
|
|
return error.UnexpectedClosingBracket;
|
|
}
|
|
if (p.stack_used == 0) {
|
|
return error.TooManyClosingItems;
|
|
}
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.fromInt(p.stack & 1);
|
|
|
|
p.stack >>= 1;
|
|
p.stack_used -= 1;
|
|
|
|
switch (p.stack_used) {
|
|
0 => {
|
|
p.complete = true;
|
|
p.state = State.TopLevelEnd;
|
|
},
|
|
else => {
|
|
p.state = State.ValueEnd;
|
|
},
|
|
}
|
|
|
|
token.* = Token.initMarker(Token.Id.ObjectEnd);
|
|
},
|
|
']' => {
|
|
if (p.stack & 1 != array_bit) {
|
|
return error.UnexpectedClosingBrace;
|
|
}
|
|
if (p.stack_used == 0) {
|
|
return error.TooManyClosingItems;
|
|
}
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.fromInt(p.stack & 1);
|
|
|
|
p.stack >>= 1;
|
|
p.stack_used -= 1;
|
|
|
|
switch (p.stack_used) {
|
|
0 => {
|
|
p.complete = true;
|
|
p.state = State.TopLevelEnd;
|
|
},
|
|
else => {
|
|
p.state = State.ValueEnd;
|
|
},
|
|
}
|
|
|
|
token.* = Token.initMarker(Token.Id.ArrayEnd);
|
|
},
|
|
'{' => {
|
|
if (p.stack_used == max_stack_size) {
|
|
return error.TooManyNestedItems;
|
|
}
|
|
|
|
p.stack <<= 1;
|
|
p.stack |= object_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ObjectSeparator;
|
|
|
|
token.* = Token.initMarker(Token.Id.ObjectBegin);
|
|
},
|
|
'[' => {
|
|
if (p.stack_used == max_stack_size) {
|
|
return error.TooManyNestedItems;
|
|
}
|
|
|
|
p.stack <<= 1;
|
|
p.stack |= array_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ValueEnd;
|
|
|
|
token.* = Token.initMarker(Token.Id.ArrayBegin);
|
|
},
|
|
'-' => {
|
|
p.state = State.Number;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.state = State.NumberMaybeDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.state = State.NumberMaybeDigitOrDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = State.String;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = State.TrueLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = State.FalseLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = State.NullLiteral1;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueBegin;
|
|
},
|
|
},
|
|
|
|
// TODO: A bit of duplication here and in the following state, redo.
|
|
State.ValueBeginNoClosing => switch (c) {
|
|
'{' => {
|
|
if (p.stack_used == max_stack_size) {
|
|
return error.TooManyNestedItems;
|
|
}
|
|
|
|
p.stack <<= 1;
|
|
p.stack |= object_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ObjectSeparator;
|
|
|
|
token.* = Token.initMarker(Token.Id.ObjectBegin);
|
|
},
|
|
'[' => {
|
|
if (p.stack_used == max_stack_size) {
|
|
return error.TooManyNestedItems;
|
|
}
|
|
|
|
p.stack <<= 1;
|
|
p.stack |= array_bit;
|
|
p.stack_used += 1;
|
|
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ValueEnd;
|
|
|
|
token.* = Token.initMarker(Token.Id.ArrayBegin);
|
|
},
|
|
'-' => {
|
|
p.state = State.Number;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.state = State.NumberMaybeDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.state = State.NumberMaybeDigitOrDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = State.String;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = State.TrueLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = State.FalseLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = State.NullLiteral1;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueBegin;
|
|
},
|
|
},
|
|
|
|
State.ValueEnd => switch (c) {
|
|
',' => {
|
|
p.after_string_state = State.fromInt(p.stack & 1);
|
|
p.state = State.ValueBeginNoClosing;
|
|
},
|
|
']' => {
|
|
if (p.stack_used == 0) {
|
|
return error.UnbalancedBrackets;
|
|
}
|
|
|
|
p.state = State.ValueEnd;
|
|
p.after_string_state = State.fromInt(p.stack & 1);
|
|
|
|
p.stack >>= 1;
|
|
p.stack_used -= 1;
|
|
|
|
if (p.stack_used == 0) {
|
|
p.complete = true;
|
|
p.state = State.TopLevelEnd;
|
|
}
|
|
|
|
token.* = Token.initMarker(Token.Id.ArrayEnd);
|
|
},
|
|
'}' => {
|
|
if (p.stack_used == 0) {
|
|
return error.UnbalancedBraces;
|
|
}
|
|
|
|
p.state = State.ValueEnd;
|
|
p.after_string_state = State.fromInt(p.stack & 1);
|
|
|
|
p.stack >>= 1;
|
|
p.stack_used -= 1;
|
|
|
|
if (p.stack_used == 0) {
|
|
p.complete = true;
|
|
p.state = State.TopLevelEnd;
|
|
}
|
|
|
|
token.* = Token.initMarker(Token.Id.ObjectEnd);
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueEnd;
|
|
},
|
|
},
|
|
|
|
State.ObjectSeparator => switch (c) {
|
|
':' => {
|
|
p.state = State.ValueBegin;
|
|
p.after_string_state = State.ValueEnd;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidSeparator;
|
|
},
|
|
},
|
|
|
|
State.String => switch (c) {
|
|
0x00...0x1F => {
|
|
return error.InvalidControlCharacter;
|
|
},
|
|
'"' => {
|
|
p.state = p.after_string_state;
|
|
if (p.after_value_state == State.TopLevelEnd) {
|
|
p.state = State.TopLevelEnd;
|
|
p.complete = true;
|
|
}
|
|
|
|
token.* = Token.initString(p.count - 1, p.string_has_escape);
|
|
},
|
|
'\\' => {
|
|
p.state = State.StringEscapeCharacter;
|
|
},
|
|
0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
|
|
// non-control ascii
|
|
},
|
|
0xC0...0xDF => {
|
|
p.state = State.StringUtf8Byte1;
|
|
},
|
|
0xE0...0xEF => {
|
|
p.state = State.StringUtf8Byte2;
|
|
},
|
|
0xF0...0xFF => {
|
|
p.state = State.StringUtf8Byte3;
|
|
},
|
|
else => {
|
|
return error.InvalidUtf8Byte;
|
|
},
|
|
},
|
|
|
|
State.StringUtf8Byte3 => switch (c >> 6) {
|
|
0b10 => p.state = State.StringUtf8Byte2,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
|
|
State.StringUtf8Byte2 => switch (c >> 6) {
|
|
0b10 => p.state = State.StringUtf8Byte1,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
|
|
State.StringUtf8Byte1 => switch (c >> 6) {
|
|
0b10 => p.state = State.String,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
|
|
State.StringEscapeCharacter => switch (c) {
|
|
// NOTE: '/' is allowed as an escaped character but it also is allowed
|
|
// as unescaped according to the RFC. There is a reported errata which suggests
|
|
// removing the non-escaped variant but it makes more sense to simply disallow
|
|
// it as an escape code here.
|
|
//
|
|
// The current JSONTestSuite tests rely on both of this behaviour being present
|
|
// however, so we default to the status quo where both are accepted until this
|
|
// is further clarified.
|
|
'"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
|
|
p.string_has_escape = true;
|
|
p.state = State.String;
|
|
},
|
|
'u' => {
|
|
p.string_has_escape = true;
|
|
p.state = State.StringEscapeHexUnicode4;
|
|
},
|
|
else => {
|
|
return error.InvalidEscapeCharacter;
|
|
},
|
|
},
|
|
|
|
State.StringEscapeHexUnicode4 => switch (c) {
|
|
'0'...'9', 'A'...'F', 'a'...'f' => {
|
|
p.state = State.StringEscapeHexUnicode3;
|
|
},
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
},
|
|
|
|
State.StringEscapeHexUnicode3 => switch (c) {
|
|
'0'...'9', 'A'...'F', 'a'...'f' => {
|
|
p.state = State.StringEscapeHexUnicode2;
|
|
},
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
},
|
|
|
|
State.StringEscapeHexUnicode2 => switch (c) {
|
|
'0'...'9', 'A'...'F', 'a'...'f' => {
|
|
p.state = State.StringEscapeHexUnicode1;
|
|
},
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
},
|
|
|
|
State.StringEscapeHexUnicode1 => switch (c) {
|
|
'0'...'9', 'A'...'F', 'a'...'f' => {
|
|
p.state = State.String;
|
|
},
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
},
|
|
|
|
State.Number => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'0' => {
|
|
p.state = State.NumberMaybeDotOrExponent;
|
|
},
|
|
'1'...'9' => {
|
|
p.state = State.NumberMaybeDigitOrDotOrExponent;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberMaybeDotOrExponent => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'.' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberFractionalRequired;
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = Token.initNumber(p.count, p.number_is_integer);
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberMaybeDigitOrDotOrExponent => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'.' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberFractionalRequired;
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberExponent;
|
|
},
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = Token.initNumber(p.count, p.number_is_integer);
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberFractionalRequired => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
p.state = State.NumberFractional;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberFractional => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = Token.initNumber(p.count, p.number_is_integer);
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberMaybeExponent => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = State.NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = Token.initNumber(p.count, p.number_is_integer);
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.NumberExponent => switch (c) {
|
|
'-', '+' => {
|
|
p.complete = false;
|
|
p.state = State.NumberExponentDigitsRequired;
|
|
},
|
|
'0'...'9' => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
p.state = State.NumberExponentDigits;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
},
|
|
|
|
State.NumberExponentDigitsRequired => switch (c) {
|
|
'0'...'9' => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
p.state = State.NumberExponentDigits;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
},
|
|
|
|
State.NumberExponentDigits => {
|
|
p.complete = p.after_value_state == State.TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = Token.initNumber(p.count, p.number_is_integer);
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
State.TrueLiteral1 => switch (c) {
|
|
'r' => p.state = State.TrueLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.TrueLiteral2 => switch (c) {
|
|
'u' => p.state = State.TrueLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.TrueLiteral3 => switch (c) {
|
|
'e' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == State.TopLevelEnd;
|
|
token.* = Token.init(Token.Id.True, p.count + 1, 1);
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
|
|
State.FalseLiteral1 => switch (c) {
|
|
'a' => p.state = State.FalseLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.FalseLiteral2 => switch (c) {
|
|
'l' => p.state = State.FalseLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.FalseLiteral3 => switch (c) {
|
|
's' => p.state = State.FalseLiteral4,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.FalseLiteral4 => switch (c) {
|
|
'e' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == State.TopLevelEnd;
|
|
token.* = Token.init(Token.Id.False, p.count + 1, 1);
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
|
|
State.NullLiteral1 => switch (c) {
|
|
'u' => p.state = State.NullLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.NullLiteral2 => switch (c) {
|
|
'l' => p.state = State.NullLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
State.NullLiteral3 => switch (c) {
|
|
'l' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == State.TopLevelEnd;
|
|
token.* = Token.init(Token.Id.Null, p.count + 1, 1);
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
|
|
pub const TokenStream = struct {
|
|
i: usize,
|
|
slice: []const u8,
|
|
parser: StreamingParser,
|
|
token: ?Token,
|
|
|
|
pub fn init(slice: []const u8) TokenStream {
|
|
return TokenStream{
|
|
.i = 0,
|
|
.slice = slice,
|
|
.parser = StreamingParser.init(),
|
|
.token = null,
|
|
};
|
|
}
|
|
|
|
pub fn next(self: *TokenStream) !?Token {
|
|
if (self.token) |token| {
|
|
self.token = null;
|
|
return token;
|
|
}
|
|
|
|
var t1: ?Token = undefined;
|
|
var t2: ?Token = undefined;
|
|
|
|
while (self.i < self.slice.len) {
|
|
try self.parser.feed(self.slice[self.i], &t1, &t2);
|
|
self.i += 1;
|
|
|
|
if (t1) |token| {
|
|
self.token = t2;
|
|
return token;
|
|
}
|
|
}
|
|
|
|
if (self.i > self.slice.len) {
|
|
try self.parser.feed(' ', &t1, &t2);
|
|
self.i += 1;
|
|
|
|
if (t1) |token| {
|
|
return token;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
};
|
|
|
|
fn checkNext(p: *TokenStream, id: Token.Id) void {
|
|
const token = (p.next() catch unreachable).?;
|
|
debug.assert(token.id == id);
|
|
}
|
|
|
|
test "token" {
|
|
const s =
|
|
\\{
|
|
\\ "Image": {
|
|
\\ "Width": 800,
|
|
\\ "Height": 600,
|
|
\\ "Title": "View from 15th Floor",
|
|
\\ "Thumbnail": {
|
|
\\ "Url": "http://www.example.com/image/481989943",
|
|
\\ "Height": 125,
|
|
\\ "Width": 100
|
|
\\ },
|
|
\\ "Animated" : false,
|
|
\\ "IDs": [116, 943, 234, 38793]
|
|
\\ }
|
|
\\}
|
|
;
|
|
|
|
var p = TokenStream.init(s);
|
|
|
|
checkNext(&p, Token.Id.ObjectBegin);
|
|
checkNext(&p, Token.Id.String); // Image
|
|
checkNext(&p, Token.Id.ObjectBegin);
|
|
checkNext(&p, Token.Id.String); // Width
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.String); // Height
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.String); // Title
|
|
checkNext(&p, Token.Id.String);
|
|
checkNext(&p, Token.Id.String); // Thumbnail
|
|
checkNext(&p, Token.Id.ObjectBegin);
|
|
checkNext(&p, Token.Id.String); // Url
|
|
checkNext(&p, Token.Id.String);
|
|
checkNext(&p, Token.Id.String); // Height
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.String); // Width
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.ObjectEnd);
|
|
checkNext(&p, Token.Id.String); // Animated
|
|
checkNext(&p, Token.Id.False);
|
|
checkNext(&p, Token.Id.String); // IDs
|
|
checkNext(&p, Token.Id.ArrayBegin);
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.Number);
|
|
checkNext(&p, Token.Id.ArrayEnd);
|
|
checkNext(&p, Token.Id.ObjectEnd);
|
|
checkNext(&p, Token.Id.ObjectEnd);
|
|
|
|
debug.assert((try p.next()) == null);
|
|
}
|
|
|
|
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
|
|
// be able to decode the string even if this returns true.
|
|
pub fn validate(s: []const u8) bool {
|
|
var p = StreamingParser.init();
|
|
|
|
for (s) |c, i| {
|
|
var token1: ?Token = undefined;
|
|
var token2: ?Token = undefined;
|
|
|
|
p.feed(c, &token1, &token2) catch |err| {
|
|
return false;
|
|
};
|
|
}
|
|
|
|
return p.complete;
|
|
}
|
|
|
|
test "json validate" {
|
|
debug.assert(validate("{}"));
|
|
}
|
|
|
|
const Allocator = std.mem.Allocator;
|
|
const ArenaAllocator = std.heap.ArenaAllocator;
|
|
const ArrayList = std.ArrayList;
|
|
const HashMap = std.HashMap;
|
|
|
|
pub const ValueTree = struct {
|
|
arena: ArenaAllocator,
|
|
root: Value,
|
|
|
|
pub fn deinit(self: *ValueTree) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
|
|
pub const ObjectMap = HashMap([]const u8, Value, mem.hash_slice_u8, mem.eql_slice_u8);
|
|
|
|
pub const Value = union(enum) {
|
|
Null,
|
|
Bool: bool,
|
|
Integer: i64,
|
|
Float: f64,
|
|
String: []const u8,
|
|
Array: ArrayList(Value),
|
|
Object: ObjectMap,
|
|
|
|
pub fn dump(self: *const Value) void {
|
|
switch (self.*) {
|
|
Value.Null => {
|
|
debug.warn("null");
|
|
},
|
|
Value.Bool => |inner| {
|
|
debug.warn("{}", inner);
|
|
},
|
|
Value.Integer => |inner| {
|
|
debug.warn("{}", inner);
|
|
},
|
|
Value.Float => |inner| {
|
|
debug.warn("{.5}", inner);
|
|
},
|
|
Value.String => |inner| {
|
|
debug.warn("\"{}\"", inner);
|
|
},
|
|
Value.Array => |inner| {
|
|
var not_first = false;
|
|
debug.warn("[");
|
|
for (inner.toSliceConst()) |value| {
|
|
if (not_first) {
|
|
debug.warn(",");
|
|
}
|
|
not_first = true;
|
|
value.dump();
|
|
}
|
|
debug.warn("]");
|
|
},
|
|
Value.Object => |inner| {
|
|
var not_first = false;
|
|
debug.warn("{{");
|
|
var it = inner.iterator();
|
|
|
|
while (it.next()) |entry| {
|
|
if (not_first) {
|
|
debug.warn(",");
|
|
}
|
|
not_first = true;
|
|
debug.warn("\"{}\":", entry.key);
|
|
entry.value.dump();
|
|
}
|
|
debug.warn("}}");
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn dumpIndent(self: *const Value, indent: usize) void {
|
|
if (indent == 0) {
|
|
self.dump();
|
|
} else {
|
|
self.dumpIndentLevel(indent, 0);
|
|
}
|
|
}
|
|
|
|
fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
|
|
switch (self.*) {
|
|
Value.Null => {
|
|
debug.warn("null");
|
|
},
|
|
Value.Bool => |inner| {
|
|
debug.warn("{}", inner);
|
|
},
|
|
Value.Integer => |inner| {
|
|
debug.warn("{}", inner);
|
|
},
|
|
Value.Float => |inner| {
|
|
debug.warn("{.5}", inner);
|
|
},
|
|
Value.String => |inner| {
|
|
debug.warn("\"{}\"", inner);
|
|
},
|
|
Value.Array => |inner| {
|
|
var not_first = false;
|
|
debug.warn("[\n");
|
|
|
|
for (inner.toSliceConst()) |value| {
|
|
if (not_first) {
|
|
debug.warn(",\n");
|
|
}
|
|
not_first = true;
|
|
padSpace(level + indent);
|
|
value.dumpIndentLevel(indent, level + indent);
|
|
}
|
|
debug.warn("\n");
|
|
padSpace(level);
|
|
debug.warn("]");
|
|
},
|
|
Value.Object => |inner| {
|
|
var not_first = false;
|
|
debug.warn("{{\n");
|
|
var it = inner.iterator();
|
|
|
|
while (it.next()) |entry| {
|
|
if (not_first) {
|
|
debug.warn(",\n");
|
|
}
|
|
not_first = true;
|
|
padSpace(level + indent);
|
|
debug.warn("\"{}\": ", entry.key);
|
|
entry.value.dumpIndentLevel(indent, level + indent);
|
|
}
|
|
debug.warn("\n");
|
|
padSpace(level);
|
|
debug.warn("}}");
|
|
},
|
|
}
|
|
}
|
|
|
|
fn padSpace(indent: usize) void {
|
|
var i: usize = 0;
|
|
while (i < indent) : (i += 1) {
|
|
debug.warn(" ");
|
|
}
|
|
}
|
|
};
|
|
|
|
// A non-stream JSON parser which constructs a tree of Value's.
|
|
pub const Parser = struct {
|
|
allocator: *Allocator,
|
|
state: State,
|
|
copy_strings: bool,
|
|
// Stores parent nodes and un-combined Values.
|
|
stack: ArrayList(Value),
|
|
|
|
const State = enum {
|
|
ObjectKey,
|
|
ObjectValue,
|
|
ArrayValue,
|
|
Simple,
|
|
};
|
|
|
|
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
|
|
return Parser{
|
|
.allocator = allocator,
|
|
.state = State.Simple,
|
|
.copy_strings = copy_strings,
|
|
.stack = ArrayList(Value).init(allocator),
|
|
};
|
|
}
|
|
|
|
pub fn deinit(p: *Parser) void {
|
|
p.stack.deinit();
|
|
}
|
|
|
|
pub fn reset(p: *Parser) void {
|
|
p.state = State.Simple;
|
|
p.stack.shrink(0);
|
|
}
|
|
|
|
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
|
|
var s = TokenStream.init(input);
|
|
|
|
var arena = ArenaAllocator.init(p.allocator);
|
|
errdefer arena.deinit();
|
|
|
|
while (try s.next()) |token| {
|
|
try p.transition(&arena.allocator, input, s.i - 1, token);
|
|
}
|
|
|
|
debug.assert(p.stack.len == 1);
|
|
|
|
return ValueTree{
|
|
.arena = arena,
|
|
.root = p.stack.at(0),
|
|
};
|
|
}
|
|
|
|
// Even though p.allocator exists, we take an explicit allocator so that allocation state
|
|
// can be cleaned up on error correctly during a `parse` on call.
|
|
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
|
|
switch (p.state) {
|
|
State.ObjectKey => switch (token.id) {
|
|
Token.Id.ObjectEnd => {
|
|
if (p.stack.len == 1) {
|
|
return;
|
|
}
|
|
|
|
var value = p.stack.pop();
|
|
try p.pushToParent(value);
|
|
},
|
|
Token.Id.String => {
|
|
try p.stack.append(try p.parseString(allocator, token, input, i));
|
|
p.state = State.ObjectValue;
|
|
},
|
|
else => {
|
|
unreachable;
|
|
},
|
|
},
|
|
State.ObjectValue => {
|
|
var object = &p.stack.items[p.stack.len - 2].Object;
|
|
var key = p.stack.items[p.stack.len - 1].String;
|
|
|
|
switch (token.id) {
|
|
Token.Id.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
|
|
p.state = State.ArrayValue;
|
|
},
|
|
Token.Id.String => {
|
|
_ = try object.put(key, try p.parseString(allocator, token, input, i));
|
|
_ = p.stack.pop();
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.Number => {
|
|
_ = try object.put(key, try p.parseNumber(token, input, i));
|
|
_ = p.stack.pop();
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.True => {
|
|
_ = try object.put(key, Value{ .Bool = true });
|
|
_ = p.stack.pop();
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.False => {
|
|
_ = try object.put(key, Value{ .Bool = false });
|
|
_ = p.stack.pop();
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.Null => {
|
|
_ = try object.put(key, Value.Null);
|
|
_ = p.stack.pop();
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
|
|
unreachable;
|
|
},
|
|
}
|
|
},
|
|
State.ArrayValue => {
|
|
var array = &p.stack.items[p.stack.len - 1].Array;
|
|
|
|
switch (token.id) {
|
|
Token.Id.ArrayEnd => {
|
|
if (p.stack.len == 1) {
|
|
return;
|
|
}
|
|
|
|
var value = p.stack.pop();
|
|
try p.pushToParent(value);
|
|
},
|
|
Token.Id.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
|
|
p.state = State.ArrayValue;
|
|
},
|
|
Token.Id.String => {
|
|
try array.append(try p.parseString(allocator, token, input, i));
|
|
},
|
|
Token.Id.Number => {
|
|
try array.append(try p.parseNumber(token, input, i));
|
|
},
|
|
Token.Id.True => {
|
|
try array.append(Value{ .Bool = true });
|
|
},
|
|
Token.Id.False => {
|
|
try array.append(Value{ .Bool = false });
|
|
},
|
|
Token.Id.Null => {
|
|
try array.append(Value.Null);
|
|
},
|
|
Token.Id.ObjectEnd => {
|
|
unreachable;
|
|
},
|
|
}
|
|
},
|
|
State.Simple => switch (token.id) {
|
|
Token.Id.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = State.ObjectKey;
|
|
},
|
|
Token.Id.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
|
|
p.state = State.ArrayValue;
|
|
},
|
|
Token.Id.String => {
|
|
try p.stack.append(try p.parseString(allocator, token, input, i));
|
|
},
|
|
Token.Id.Number => {
|
|
try p.stack.append(try p.parseNumber(token, input, i));
|
|
},
|
|
Token.Id.True => {
|
|
try p.stack.append(Value{ .Bool = true });
|
|
},
|
|
Token.Id.False => {
|
|
try p.stack.append(Value{ .Bool = false });
|
|
},
|
|
Token.Id.Null => {
|
|
try p.stack.append(Value.Null);
|
|
},
|
|
Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
|
|
unreachable;
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
fn pushToParent(p: *Parser, value: *const Value) !void {
|
|
switch (p.stack.at(p.stack.len - 1)) {
|
|
// Object Parent -> [ ..., object, <key>, value ]
|
|
Value.String => |key| {
|
|
_ = p.stack.pop();
|
|
|
|
var object = &p.stack.items[p.stack.len - 1].Object;
|
|
_ = try object.put(key, value);
|
|
p.state = State.ObjectKey;
|
|
},
|
|
// Array Parent -> [ ..., <array>, value ]
|
|
Value.Array => |*array| {
|
|
try array.append(value.*);
|
|
p.state = State.ArrayValue;
|
|
},
|
|
else => {
|
|
unreachable;
|
|
},
|
|
}
|
|
}
|
|
|
|
fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
|
|
// TODO: We don't strictly have to copy values which do not contain any escape
|
|
// characters if flagged with the option.
|
|
const slice = token.slice(input, i);
|
|
return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
|
|
}
|
|
|
|
fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value {
|
|
return if (token.number_is_integer)
|
|
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
|
|
else
|
|
@panic("TODO: fmt.parseFloat not yet implemented");
|
|
}
|
|
};
|
|
|
|
test "json parser dynamic" {
|
|
var p = Parser.init(debug.global_allocator, false);
|
|
defer p.deinit();
|
|
|
|
const s =
|
|
\\{
|
|
\\ "Image": {
|
|
\\ "Width": 800,
|
|
\\ "Height": 600,
|
|
\\ "Title": "View from 15th Floor",
|
|
\\ "Thumbnail": {
|
|
\\ "Url": "http://www.example.com/image/481989943",
|
|
\\ "Height": 125,
|
|
\\ "Width": 100
|
|
\\ },
|
|
\\ "Animated" : false,
|
|
\\ "IDs": [116, 943, 234, 38793]
|
|
\\ }
|
|
\\}
|
|
;
|
|
|
|
var tree = try p.parse(s);
|
|
defer tree.deinit();
|
|
|
|
var root = tree.root;
|
|
|
|
var image = root.Object.get("Image").?.value;
|
|
|
|
const width = image.Object.get("Width").?.value;
|
|
debug.assert(width.Integer == 800);
|
|
|
|
const height = image.Object.get("Height").?.value;
|
|
debug.assert(height.Integer == 600);
|
|
|
|
const title = image.Object.get("Title").?.value;
|
|
debug.assert(mem.eql(u8, title.String, "View from 15th Floor"));
|
|
|
|
const animated = image.Object.get("Animated").?.value;
|
|
debug.assert(animated.Bool == false);
|
|
}
|