Add json.TokenStream (#1062)
This hides some of the low-level parsing details from the StreamingParser. These don't need to be known when parsing a complete slice at once (which is we can usually do). Also, remove `Json` from Parser names. The namespace `json` is sufficient.
This commit is contained in:
parent
f389e53735
commit
e7f141b376
237
std/json.zig
237
std/json.zig
@ -3,6 +3,7 @@
|
|||||||
// https://tools.ietf.org/html/rfc8259
|
// https://tools.ietf.org/html/rfc8259
|
||||||
|
|
||||||
const std = @import("index.zig");
|
const std = @import("index.zig");
|
||||||
|
const debug = std.debug;
|
||||||
const mem = std.mem;
|
const mem = std.mem;
|
||||||
|
|
||||||
const u1 = @IntType(false, 1);
|
const u1 = @IntType(false, 1);
|
||||||
@ -86,7 +87,9 @@ pub const Token = struct {
|
|||||||
// parsing state requires ~40-50 bytes of stack space.
|
// parsing state requires ~40-50 bytes of stack space.
|
||||||
//
|
//
|
||||||
// Conforms strictly to RFC8529.
|
// Conforms strictly to RFC8529.
|
||||||
pub const StreamingJsonParser = struct {
|
//
|
||||||
|
// For a non-byte based wrapper, consider using TokenStream instead.
|
||||||
|
pub const StreamingParser = struct {
|
||||||
// Current state
|
// Current state
|
||||||
state: State,
|
state: State,
|
||||||
// How many bytes we have counted for the current token
|
// How many bytes we have counted for the current token
|
||||||
@ -109,13 +112,13 @@ pub const StreamingJsonParser = struct {
|
|||||||
const array_bit = 1;
|
const array_bit = 1;
|
||||||
const max_stack_size = @maxValue(u8);
|
const max_stack_size = @maxValue(u8);
|
||||||
|
|
||||||
pub fn init() StreamingJsonParser {
|
pub fn init() StreamingParser {
|
||||||
var p: StreamingJsonParser = undefined;
|
var p: StreamingParser = undefined;
|
||||||
p.reset();
|
p.reset();
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset(p: *StreamingJsonParser) void {
|
pub fn reset(p: *StreamingParser) void {
|
||||||
p.state = State.TopLevelBegin;
|
p.state = State.TopLevelBegin;
|
||||||
p.count = 0;
|
p.count = 0;
|
||||||
// Set before ever read in main transition function
|
// Set before ever read in main transition function
|
||||||
@ -175,7 +178,7 @@ pub const StreamingJsonParser = struct {
|
|||||||
|
|
||||||
// Only call this function to generate array/object final state.
|
// Only call this function to generate array/object final state.
|
||||||
pub fn fromInt(x: var) State {
|
pub fn fromInt(x: var) State {
|
||||||
std.debug.assert(x == 0 or x == 1);
|
debug.assert(x == 0 or x == 1);
|
||||||
const T = @TagType(State);
|
const T = @TagType(State);
|
||||||
return State(T(x));
|
return State(T(x));
|
||||||
}
|
}
|
||||||
@ -205,7 +208,7 @@ pub const StreamingJsonParser = struct {
|
|||||||
// tokens. token2 is always null if token1 is null.
|
// tokens. token2 is always null if token1 is null.
|
||||||
//
|
//
|
||||||
// There is currently no error recovery on a bad stream.
|
// There is currently no error recovery on a bad stream.
|
||||||
pub fn feed(p: *StreamingJsonParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
||||||
token1.* = null;
|
token1.* = null;
|
||||||
token2.* = null;
|
token2.* = null;
|
||||||
p.count += 1;
|
p.count += 1;
|
||||||
@ -217,7 +220,7 @@ pub const StreamingJsonParser = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Perform a single transition on the state machine and return any possible token.
|
// Perform a single transition on the state machine and return any possible token.
|
||||||
fn transition(p: *StreamingJsonParser, c: u8, token: *?Token) Error!bool {
|
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
|
||||||
switch (p.state) {
|
switch (p.state) {
|
||||||
State.TopLevelBegin => switch (c) {
|
State.TopLevelBegin => switch (c) {
|
||||||
'{' => {
|
'{' => {
|
||||||
@ -852,10 +855,116 @@ pub const StreamingJsonParser = struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
|
||||||
|
pub const TokenStream = struct {
|
||||||
|
i: usize,
|
||||||
|
slice: []const u8,
|
||||||
|
parser: StreamingParser,
|
||||||
|
token: ?Token,
|
||||||
|
|
||||||
|
pub fn init(slice: []const u8) TokenStream {
|
||||||
|
return TokenStream{
|
||||||
|
.i = 0,
|
||||||
|
.slice = slice,
|
||||||
|
.parser = StreamingParser.init(),
|
||||||
|
.token = null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(self: *TokenStream) !?Token {
|
||||||
|
if (self.token) |token| {
|
||||||
|
self.token = null;
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
var t1: ?Token = undefined;
|
||||||
|
var t2: ?Token = undefined;
|
||||||
|
|
||||||
|
while (self.i < self.slice.len) {
|
||||||
|
try self.parser.feed(self.slice[self.i], &t1, &t2);
|
||||||
|
self.i += 1;
|
||||||
|
|
||||||
|
if (t1) |token| {
|
||||||
|
self.token = t2;
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self.i > self.slice.len) {
|
||||||
|
try self.parser.feed(' ', &t1, &t2);
|
||||||
|
self.i += 1;
|
||||||
|
|
||||||
|
if (t1) |token| {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fn checkNext(p: *TokenStream, id: Token.Id) void {
|
||||||
|
const token = ??(p.next() catch unreachable);
|
||||||
|
debug.assert(token.id == id);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "token" {
|
||||||
|
const s =
|
||||||
|
\\{
|
||||||
|
\\ "Image": {
|
||||||
|
\\ "Width": 800,
|
||||||
|
\\ "Height": 600,
|
||||||
|
\\ "Title": "View from 15th Floor",
|
||||||
|
\\ "Thumbnail": {
|
||||||
|
\\ "Url": "http://www.example.com/image/481989943",
|
||||||
|
\\ "Height": 125,
|
||||||
|
\\ "Width": 100
|
||||||
|
\\ },
|
||||||
|
\\ "Animated" : false,
|
||||||
|
\\ "IDs": [116, 943, 234, 38793]
|
||||||
|
\\ }
|
||||||
|
\\}
|
||||||
|
;
|
||||||
|
|
||||||
|
var p = TokenStream.init(s);
|
||||||
|
|
||||||
|
checkNext(&p, Token.Id.ObjectBegin);
|
||||||
|
checkNext(&p, Token.Id.String); // Image
|
||||||
|
checkNext(&p, Token.Id.ObjectBegin);
|
||||||
|
checkNext(&p, Token.Id.String); // Width
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.String); // Height
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.String); // Title
|
||||||
|
checkNext(&p, Token.Id.String);
|
||||||
|
checkNext(&p, Token.Id.String); // Thumbnail
|
||||||
|
checkNext(&p, Token.Id.ObjectBegin);
|
||||||
|
checkNext(&p, Token.Id.String); // Url
|
||||||
|
checkNext(&p, Token.Id.String);
|
||||||
|
checkNext(&p, Token.Id.String); // Height
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.String); // Width
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.ObjectEnd);
|
||||||
|
checkNext(&p, Token.Id.String); // Animated
|
||||||
|
checkNext(&p, Token.Id.False);
|
||||||
|
checkNext(&p, Token.Id.String); // IDs
|
||||||
|
checkNext(&p, Token.Id.ArrayBegin);
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.Number);
|
||||||
|
checkNext(&p, Token.Id.ArrayEnd);
|
||||||
|
checkNext(&p, Token.Id.ObjectEnd);
|
||||||
|
checkNext(&p, Token.Id.ObjectEnd);
|
||||||
|
|
||||||
|
debug.assert((try p.next()) == null);
|
||||||
|
}
|
||||||
|
|
||||||
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
|
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
|
||||||
// be able to decode the string even if this returns true.
|
// be able to decode the string even if this returns true.
|
||||||
pub fn validate(s: []const u8) bool {
|
pub fn validate(s: []const u8) bool {
|
||||||
var p = StreamingJsonParser.init();
|
var p = StreamingParser.init();
|
||||||
|
|
||||||
for (s) |c, i| {
|
for (s) |c, i| {
|
||||||
var token1: ?Token = undefined;
|
var token1: ?Token = undefined;
|
||||||
@ -897,46 +1006,46 @@ pub const Value = union(enum) {
|
|||||||
pub fn dump(self: *const Value) void {
|
pub fn dump(self: *const Value) void {
|
||||||
switch (self.*) {
|
switch (self.*) {
|
||||||
Value.Null => {
|
Value.Null => {
|
||||||
std.debug.warn("null");
|
debug.warn("null");
|
||||||
},
|
},
|
||||||
Value.Bool => |inner| {
|
Value.Bool => |inner| {
|
||||||
std.debug.warn("{}", inner);
|
debug.warn("{}", inner);
|
||||||
},
|
},
|
||||||
Value.Integer => |inner| {
|
Value.Integer => |inner| {
|
||||||
std.debug.warn("{}", inner);
|
debug.warn("{}", inner);
|
||||||
},
|
},
|
||||||
Value.Float => |inner| {
|
Value.Float => |inner| {
|
||||||
std.debug.warn("{.5}", inner);
|
debug.warn("{.5}", inner);
|
||||||
},
|
},
|
||||||
Value.String => |inner| {
|
Value.String => |inner| {
|
||||||
std.debug.warn("\"{}\"", inner);
|
debug.warn("\"{}\"", inner);
|
||||||
},
|
},
|
||||||
Value.Array => |inner| {
|
Value.Array => |inner| {
|
||||||
var not_first = false;
|
var not_first = false;
|
||||||
std.debug.warn("[");
|
debug.warn("[");
|
||||||
for (inner.toSliceConst()) |value| {
|
for (inner.toSliceConst()) |value| {
|
||||||
if (not_first) {
|
if (not_first) {
|
||||||
std.debug.warn(",");
|
debug.warn(",");
|
||||||
}
|
}
|
||||||
not_first = true;
|
not_first = true;
|
||||||
value.dump();
|
value.dump();
|
||||||
}
|
}
|
||||||
std.debug.warn("]");
|
debug.warn("]");
|
||||||
},
|
},
|
||||||
Value.Object => |inner| {
|
Value.Object => |inner| {
|
||||||
var not_first = false;
|
var not_first = false;
|
||||||
std.debug.warn("{{");
|
debug.warn("{{");
|
||||||
var it = inner.iterator();
|
var it = inner.iterator();
|
||||||
|
|
||||||
while (it.next()) |entry| {
|
while (it.next()) |entry| {
|
||||||
if (not_first) {
|
if (not_first) {
|
||||||
std.debug.warn(",");
|
debug.warn(",");
|
||||||
}
|
}
|
||||||
not_first = true;
|
not_first = true;
|
||||||
std.debug.warn("\"{}\":", entry.key);
|
debug.warn("\"{}\":", entry.key);
|
||||||
entry.value.dump();
|
entry.value.dump();
|
||||||
}
|
}
|
||||||
std.debug.warn("}}");
|
debug.warn("}}");
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -952,53 +1061,53 @@ pub const Value = union(enum) {
|
|||||||
fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
|
fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
|
||||||
switch (self.*) {
|
switch (self.*) {
|
||||||
Value.Null => {
|
Value.Null => {
|
||||||
std.debug.warn("null");
|
debug.warn("null");
|
||||||
},
|
},
|
||||||
Value.Bool => |inner| {
|
Value.Bool => |inner| {
|
||||||
std.debug.warn("{}", inner);
|
debug.warn("{}", inner);
|
||||||
},
|
},
|
||||||
Value.Integer => |inner| {
|
Value.Integer => |inner| {
|
||||||
std.debug.warn("{}", inner);
|
debug.warn("{}", inner);
|
||||||
},
|
},
|
||||||
Value.Float => |inner| {
|
Value.Float => |inner| {
|
||||||
std.debug.warn("{.5}", inner);
|
debug.warn("{.5}", inner);
|
||||||
},
|
},
|
||||||
Value.String => |inner| {
|
Value.String => |inner| {
|
||||||
std.debug.warn("\"{}\"", inner);
|
debug.warn("\"{}\"", inner);
|
||||||
},
|
},
|
||||||
Value.Array => |inner| {
|
Value.Array => |inner| {
|
||||||
var not_first = false;
|
var not_first = false;
|
||||||
std.debug.warn("[\n");
|
debug.warn("[\n");
|
||||||
|
|
||||||
for (inner.toSliceConst()) |value| {
|
for (inner.toSliceConst()) |value| {
|
||||||
if (not_first) {
|
if (not_first) {
|
||||||
std.debug.warn(",\n");
|
debug.warn(",\n");
|
||||||
}
|
}
|
||||||
not_first = true;
|
not_first = true;
|
||||||
padSpace(level + indent);
|
padSpace(level + indent);
|
||||||
value.dumpIndentLevel(indent, level + indent);
|
value.dumpIndentLevel(indent, level + indent);
|
||||||
}
|
}
|
||||||
std.debug.warn("\n");
|
debug.warn("\n");
|
||||||
padSpace(level);
|
padSpace(level);
|
||||||
std.debug.warn("]");
|
debug.warn("]");
|
||||||
},
|
},
|
||||||
Value.Object => |inner| {
|
Value.Object => |inner| {
|
||||||
var not_first = false;
|
var not_first = false;
|
||||||
std.debug.warn("{{\n");
|
debug.warn("{{\n");
|
||||||
var it = inner.iterator();
|
var it = inner.iterator();
|
||||||
|
|
||||||
while (it.next()) |entry| {
|
while (it.next()) |entry| {
|
||||||
if (not_first) {
|
if (not_first) {
|
||||||
std.debug.warn(",\n");
|
debug.warn(",\n");
|
||||||
}
|
}
|
||||||
not_first = true;
|
not_first = true;
|
||||||
padSpace(level + indent);
|
padSpace(level + indent);
|
||||||
std.debug.warn("\"{}\": ", entry.key);
|
debug.warn("\"{}\": ", entry.key);
|
||||||
entry.value.dumpIndentLevel(indent, level + indent);
|
entry.value.dumpIndentLevel(indent, level + indent);
|
||||||
}
|
}
|
||||||
std.debug.warn("\n");
|
debug.warn("\n");
|
||||||
padSpace(level);
|
padSpace(level);
|
||||||
std.debug.warn("}}");
|
debug.warn("}}");
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1006,13 +1115,13 @@ pub const Value = union(enum) {
|
|||||||
fn padSpace(indent: usize) void {
|
fn padSpace(indent: usize) void {
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i < indent) : (i += 1) {
|
while (i < indent) : (i += 1) {
|
||||||
std.debug.warn(" ");
|
debug.warn(" ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// A non-stream JSON parser which constructs a tree of Value's.
|
// A non-stream JSON parser which constructs a tree of Value's.
|
||||||
pub const JsonParser = struct {
|
pub const Parser = struct {
|
||||||
allocator: *Allocator,
|
allocator: *Allocator,
|
||||||
state: State,
|
state: State,
|
||||||
copy_strings: bool,
|
copy_strings: bool,
|
||||||
@ -1026,8 +1135,8 @@ pub const JsonParser = struct {
|
|||||||
Simple,
|
Simple,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn init(allocator: *Allocator, copy_strings: bool) JsonParser {
|
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
|
||||||
return JsonParser{
|
return Parser{
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.state = State.Simple,
|
.state = State.Simple,
|
||||||
.copy_strings = copy_strings,
|
.copy_strings = copy_strings,
|
||||||
@ -1035,52 +1144,26 @@ pub const JsonParser = struct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(p: *JsonParser) void {
|
pub fn deinit(p: *Parser) void {
|
||||||
p.stack.deinit();
|
p.stack.deinit();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset(p: *JsonParser) void {
|
pub fn reset(p: *Parser) void {
|
||||||
p.state = State.Simple;
|
p.state = State.Simple;
|
||||||
p.stack.shrink(0);
|
p.stack.shrink(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(p: *JsonParser, input: []const u8) !ValueTree {
|
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
|
||||||
var mp = StreamingJsonParser.init();
|
var s = TokenStream.init(input);
|
||||||
|
|
||||||
var arena = ArenaAllocator.init(p.allocator);
|
var arena = ArenaAllocator.init(p.allocator);
|
||||||
errdefer arena.deinit();
|
errdefer arena.deinit();
|
||||||
|
|
||||||
for (input) |c, i| {
|
while (try s.next()) |token| {
|
||||||
var mt1: ?Token = undefined;
|
try p.transition(&arena.allocator, input, s.i - 1, token);
|
||||||
var mt2: ?Token = undefined;
|
|
||||||
|
|
||||||
try mp.feed(c, &mt1, &mt2);
|
|
||||||
if (mt1) |t1| {
|
|
||||||
try p.transition(&arena.allocator, input, i, t1);
|
|
||||||
|
|
||||||
if (mt2) |t2| {
|
|
||||||
try p.transition(&arena.allocator, input, i, t2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle top-level lonely number values.
|
debug.assert(p.stack.len == 1);
|
||||||
{
|
|
||||||
const i = input.len;
|
|
||||||
var mt1: ?Token = undefined;
|
|
||||||
var mt2: ?Token = undefined;
|
|
||||||
|
|
||||||
try mp.feed(' ', &mt1, &mt2);
|
|
||||||
if (mt1) |t1| {
|
|
||||||
try p.transition(&arena.allocator, input, i, t1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mp.complete) {
|
|
||||||
return error.IncompleteJsonInput;
|
|
||||||
}
|
|
||||||
|
|
||||||
std.debug.assert(p.stack.len == 1);
|
|
||||||
|
|
||||||
return ValueTree{
|
return ValueTree{
|
||||||
.arena = arena,
|
.arena = arena,
|
||||||
@ -1090,7 +1173,7 @@ pub const JsonParser = struct {
|
|||||||
|
|
||||||
// Even though p.allocator exists, we take an explicit allocator so that allocation state
|
// Even though p.allocator exists, we take an explicit allocator so that allocation state
|
||||||
// can be cleaned up on error correctly during a `parse` on call.
|
// can be cleaned up on error correctly during a `parse` on call.
|
||||||
fn transition(p: *JsonParser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
|
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
|
||||||
switch (p.state) {
|
switch (p.state) {
|
||||||
State.ObjectKey => switch (token.id) {
|
State.ObjectKey => switch (token.id) {
|
||||||
Token.Id.ObjectEnd => {
|
Token.Id.ObjectEnd => {
|
||||||
@ -1223,7 +1306,7 @@ pub const JsonParser = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pushToParent(p: *JsonParser, value: *const Value) !void {
|
fn pushToParent(p: *Parser, value: *const Value) !void {
|
||||||
switch (p.stack.at(p.stack.len - 1)) {
|
switch (p.stack.at(p.stack.len - 1)) {
|
||||||
// Object Parent -> [ ..., object, <key>, value ]
|
// Object Parent -> [ ..., object, <key>, value ]
|
||||||
Value.String => |key| {
|
Value.String => |key| {
|
||||||
@ -1244,14 +1327,14 @@ pub const JsonParser = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseString(p: *JsonParser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
|
fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
|
||||||
// TODO: We don't strictly have to copy values which do not contain any escape
|
// TODO: We don't strictly have to copy values which do not contain any escape
|
||||||
// characters if flagged with the option.
|
// characters if flagged with the option.
|
||||||
const slice = token.slice(input, i);
|
const slice = token.slice(input, i);
|
||||||
return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
|
return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseNumber(p: *JsonParser, token: *const Token, input: []const u8, i: usize) !Value {
|
fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value {
|
||||||
return if (token.number_is_integer)
|
return if (token.number_is_integer)
|
||||||
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
|
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
|
||||||
else
|
else
|
||||||
@ -1259,10 +1342,8 @@ pub const JsonParser = struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const debug = std.debug;
|
|
||||||
|
|
||||||
test "json parser dynamic" {
|
test "json parser dynamic" {
|
||||||
var p = JsonParser.init(std.debug.global_allocator, false);
|
var p = Parser.init(debug.global_allocator, false);
|
||||||
defer p.deinit();
|
defer p.deinit();
|
||||||
|
|
||||||
const s =
|
const s =
|
||||||
|
Loading…
x
Reference in New Issue
Block a user