Add json.TokenStream (#1062)
This hides some of the low-level parsing details from the StreamingParser. These don't need to be known when parsing a complete slice at once (which is we can usually do). Also, remove `Json` from Parser names. The namespace `json` is sufficient.
This commit is contained in:
parent
f389e53735
commit
e7f141b376
237
std/json.zig
237
std/json.zig
@ -3,6 +3,7 @@
|
||||
// https://tools.ietf.org/html/rfc8259
|
||||
|
||||
const std = @import("index.zig");
|
||||
const debug = std.debug;
|
||||
const mem = std.mem;
|
||||
|
||||
const u1 = @IntType(false, 1);
|
||||
@ -86,7 +87,9 @@ pub const Token = struct {
|
||||
// parsing state requires ~40-50 bytes of stack space.
|
||||
//
|
||||
// Conforms strictly to RFC8529.
|
||||
pub const StreamingJsonParser = struct {
|
||||
//
|
||||
// For a non-byte based wrapper, consider using TokenStream instead.
|
||||
pub const StreamingParser = struct {
|
||||
// Current state
|
||||
state: State,
|
||||
// How many bytes we have counted for the current token
|
||||
@ -109,13 +112,13 @@ pub const StreamingJsonParser = struct {
|
||||
const array_bit = 1;
|
||||
const max_stack_size = @maxValue(u8);
|
||||
|
||||
pub fn init() StreamingJsonParser {
|
||||
var p: StreamingJsonParser = undefined;
|
||||
pub fn init() StreamingParser {
|
||||
var p: StreamingParser = undefined;
|
||||
p.reset();
|
||||
return p;
|
||||
}
|
||||
|
||||
pub fn reset(p: *StreamingJsonParser) void {
|
||||
pub fn reset(p: *StreamingParser) void {
|
||||
p.state = State.TopLevelBegin;
|
||||
p.count = 0;
|
||||
// Set before ever read in main transition function
|
||||
@ -175,7 +178,7 @@ pub const StreamingJsonParser = struct {
|
||||
|
||||
// Only call this function to generate array/object final state.
|
||||
pub fn fromInt(x: var) State {
|
||||
std.debug.assert(x == 0 or x == 1);
|
||||
debug.assert(x == 0 or x == 1);
|
||||
const T = @TagType(State);
|
||||
return State(T(x));
|
||||
}
|
||||
@ -205,7 +208,7 @@ pub const StreamingJsonParser = struct {
|
||||
// tokens. token2 is always null if token1 is null.
|
||||
//
|
||||
// There is currently no error recovery on a bad stream.
|
||||
pub fn feed(p: *StreamingJsonParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
||||
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
||||
token1.* = null;
|
||||
token2.* = null;
|
||||
p.count += 1;
|
||||
@ -217,7 +220,7 @@ pub const StreamingJsonParser = struct {
|
||||
}
|
||||
|
||||
// Perform a single transition on the state machine and return any possible token.
|
||||
fn transition(p: *StreamingJsonParser, c: u8, token: *?Token) Error!bool {
|
||||
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
|
||||
switch (p.state) {
|
||||
State.TopLevelBegin => switch (c) {
|
||||
'{' => {
|
||||
@ -852,10 +855,116 @@ pub const StreamingJsonParser = struct {
|
||||
}
|
||||
};
|
||||
|
||||
// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
|
||||
pub const TokenStream = struct {
|
||||
i: usize,
|
||||
slice: []const u8,
|
||||
parser: StreamingParser,
|
||||
token: ?Token,
|
||||
|
||||
pub fn init(slice: []const u8) TokenStream {
|
||||
return TokenStream{
|
||||
.i = 0,
|
||||
.slice = slice,
|
||||
.parser = StreamingParser.init(),
|
||||
.token = null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn next(self: *TokenStream) !?Token {
|
||||
if (self.token) |token| {
|
||||
self.token = null;
|
||||
return token;
|
||||
}
|
||||
|
||||
var t1: ?Token = undefined;
|
||||
var t2: ?Token = undefined;
|
||||
|
||||
while (self.i < self.slice.len) {
|
||||
try self.parser.feed(self.slice[self.i], &t1, &t2);
|
||||
self.i += 1;
|
||||
|
||||
if (t1) |token| {
|
||||
self.token = t2;
|
||||
return token;
|
||||
}
|
||||
}
|
||||
|
||||
if (self.i > self.slice.len) {
|
||||
try self.parser.feed(' ', &t1, &t2);
|
||||
self.i += 1;
|
||||
|
||||
if (t1) |token| {
|
||||
return token;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
fn checkNext(p: *TokenStream, id: Token.Id) void {
|
||||
const token = ??(p.next() catch unreachable);
|
||||
debug.assert(token.id == id);
|
||||
}
|
||||
|
||||
test "token" {
|
||||
const s =
|
||||
\\{
|
||||
\\ "Image": {
|
||||
\\ "Width": 800,
|
||||
\\ "Height": 600,
|
||||
\\ "Title": "View from 15th Floor",
|
||||
\\ "Thumbnail": {
|
||||
\\ "Url": "http://www.example.com/image/481989943",
|
||||
\\ "Height": 125,
|
||||
\\ "Width": 100
|
||||
\\ },
|
||||
\\ "Animated" : false,
|
||||
\\ "IDs": [116, 943, 234, 38793]
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
var p = TokenStream.init(s);
|
||||
|
||||
checkNext(&p, Token.Id.ObjectBegin);
|
||||
checkNext(&p, Token.Id.String); // Image
|
||||
checkNext(&p, Token.Id.ObjectBegin);
|
||||
checkNext(&p, Token.Id.String); // Width
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.String); // Height
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.String); // Title
|
||||
checkNext(&p, Token.Id.String);
|
||||
checkNext(&p, Token.Id.String); // Thumbnail
|
||||
checkNext(&p, Token.Id.ObjectBegin);
|
||||
checkNext(&p, Token.Id.String); // Url
|
||||
checkNext(&p, Token.Id.String);
|
||||
checkNext(&p, Token.Id.String); // Height
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.String); // Width
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.ObjectEnd);
|
||||
checkNext(&p, Token.Id.String); // Animated
|
||||
checkNext(&p, Token.Id.False);
|
||||
checkNext(&p, Token.Id.String); // IDs
|
||||
checkNext(&p, Token.Id.ArrayBegin);
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.Number);
|
||||
checkNext(&p, Token.Id.ArrayEnd);
|
||||
checkNext(&p, Token.Id.ObjectEnd);
|
||||
checkNext(&p, Token.Id.ObjectEnd);
|
||||
|
||||
debug.assert((try p.next()) == null);
|
||||
}
|
||||
|
||||
// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
|
||||
// be able to decode the string even if this returns true.
|
||||
pub fn validate(s: []const u8) bool {
|
||||
var p = StreamingJsonParser.init();
|
||||
var p = StreamingParser.init();
|
||||
|
||||
for (s) |c, i| {
|
||||
var token1: ?Token = undefined;
|
||||
@ -897,46 +1006,46 @@ pub const Value = union(enum) {
|
||||
pub fn dump(self: *const Value) void {
|
||||
switch (self.*) {
|
||||
Value.Null => {
|
||||
std.debug.warn("null");
|
||||
debug.warn("null");
|
||||
},
|
||||
Value.Bool => |inner| {
|
||||
std.debug.warn("{}", inner);
|
||||
debug.warn("{}", inner);
|
||||
},
|
||||
Value.Integer => |inner| {
|
||||
std.debug.warn("{}", inner);
|
||||
debug.warn("{}", inner);
|
||||
},
|
||||
Value.Float => |inner| {
|
||||
std.debug.warn("{.5}", inner);
|
||||
debug.warn("{.5}", inner);
|
||||
},
|
||||
Value.String => |inner| {
|
||||
std.debug.warn("\"{}\"", inner);
|
||||
debug.warn("\"{}\"", inner);
|
||||
},
|
||||
Value.Array => |inner| {
|
||||
var not_first = false;
|
||||
std.debug.warn("[");
|
||||
debug.warn("[");
|
||||
for (inner.toSliceConst()) |value| {
|
||||
if (not_first) {
|
||||
std.debug.warn(",");
|
||||
debug.warn(",");
|
||||
}
|
||||
not_first = true;
|
||||
value.dump();
|
||||
}
|
||||
std.debug.warn("]");
|
||||
debug.warn("]");
|
||||
},
|
||||
Value.Object => |inner| {
|
||||
var not_first = false;
|
||||
std.debug.warn("{{");
|
||||
debug.warn("{{");
|
||||
var it = inner.iterator();
|
||||
|
||||
while (it.next()) |entry| {
|
||||
if (not_first) {
|
||||
std.debug.warn(",");
|
||||
debug.warn(",");
|
||||
}
|
||||
not_first = true;
|
||||
std.debug.warn("\"{}\":", entry.key);
|
||||
debug.warn("\"{}\":", entry.key);
|
||||
entry.value.dump();
|
||||
}
|
||||
std.debug.warn("}}");
|
||||
debug.warn("}}");
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -952,53 +1061,53 @@ pub const Value = union(enum) {
|
||||
fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
|
||||
switch (self.*) {
|
||||
Value.Null => {
|
||||
std.debug.warn("null");
|
||||
debug.warn("null");
|
||||
},
|
||||
Value.Bool => |inner| {
|
||||
std.debug.warn("{}", inner);
|
||||
debug.warn("{}", inner);
|
||||
},
|
||||
Value.Integer => |inner| {
|
||||
std.debug.warn("{}", inner);
|
||||
debug.warn("{}", inner);
|
||||
},
|
||||
Value.Float => |inner| {
|
||||
std.debug.warn("{.5}", inner);
|
||||
debug.warn("{.5}", inner);
|
||||
},
|
||||
Value.String => |inner| {
|
||||
std.debug.warn("\"{}\"", inner);
|
||||
debug.warn("\"{}\"", inner);
|
||||
},
|
||||
Value.Array => |inner| {
|
||||
var not_first = false;
|
||||
std.debug.warn("[\n");
|
||||
debug.warn("[\n");
|
||||
|
||||
for (inner.toSliceConst()) |value| {
|
||||
if (not_first) {
|
||||
std.debug.warn(",\n");
|
||||
debug.warn(",\n");
|
||||
}
|
||||
not_first = true;
|
||||
padSpace(level + indent);
|
||||
value.dumpIndentLevel(indent, level + indent);
|
||||
}
|
||||
std.debug.warn("\n");
|
||||
debug.warn("\n");
|
||||
padSpace(level);
|
||||
std.debug.warn("]");
|
||||
debug.warn("]");
|
||||
},
|
||||
Value.Object => |inner| {
|
||||
var not_first = false;
|
||||
std.debug.warn("{{\n");
|
||||
debug.warn("{{\n");
|
||||
var it = inner.iterator();
|
||||
|
||||
while (it.next()) |entry| {
|
||||
if (not_first) {
|
||||
std.debug.warn(",\n");
|
||||
debug.warn(",\n");
|
||||
}
|
||||
not_first = true;
|
||||
padSpace(level + indent);
|
||||
std.debug.warn("\"{}\": ", entry.key);
|
||||
debug.warn("\"{}\": ", entry.key);
|
||||
entry.value.dumpIndentLevel(indent, level + indent);
|
||||
}
|
||||
std.debug.warn("\n");
|
||||
debug.warn("\n");
|
||||
padSpace(level);
|
||||
std.debug.warn("}}");
|
||||
debug.warn("}}");
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -1006,13 +1115,13 @@ pub const Value = union(enum) {
|
||||
fn padSpace(indent: usize) void {
|
||||
var i: usize = 0;
|
||||
while (i < indent) : (i += 1) {
|
||||
std.debug.warn(" ");
|
||||
debug.warn(" ");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// A non-stream JSON parser which constructs a tree of Value's.
|
||||
pub const JsonParser = struct {
|
||||
pub const Parser = struct {
|
||||
allocator: *Allocator,
|
||||
state: State,
|
||||
copy_strings: bool,
|
||||
@ -1026,8 +1135,8 @@ pub const JsonParser = struct {
|
||||
Simple,
|
||||
};
|
||||
|
||||
pub fn init(allocator: *Allocator, copy_strings: bool) JsonParser {
|
||||
return JsonParser{
|
||||
pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
|
||||
return Parser{
|
||||
.allocator = allocator,
|
||||
.state = State.Simple,
|
||||
.copy_strings = copy_strings,
|
||||
@ -1035,52 +1144,26 @@ pub const JsonParser = struct {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(p: *JsonParser) void {
|
||||
pub fn deinit(p: *Parser) void {
|
||||
p.stack.deinit();
|
||||
}
|
||||
|
||||
pub fn reset(p: *JsonParser) void {
|
||||
pub fn reset(p: *Parser) void {
|
||||
p.state = State.Simple;
|
||||
p.stack.shrink(0);
|
||||
}
|
||||
|
||||
pub fn parse(p: *JsonParser, input: []const u8) !ValueTree {
|
||||
var mp = StreamingJsonParser.init();
|
||||
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
|
||||
var s = TokenStream.init(input);
|
||||
|
||||
var arena = ArenaAllocator.init(p.allocator);
|
||||
errdefer arena.deinit();
|
||||
|
||||
for (input) |c, i| {
|
||||
var mt1: ?Token = undefined;
|
||||
var mt2: ?Token = undefined;
|
||||
|
||||
try mp.feed(c, &mt1, &mt2);
|
||||
if (mt1) |t1| {
|
||||
try p.transition(&arena.allocator, input, i, t1);
|
||||
|
||||
if (mt2) |t2| {
|
||||
try p.transition(&arena.allocator, input, i, t2);
|
||||
}
|
||||
}
|
||||
while (try s.next()) |token| {
|
||||
try p.transition(&arena.allocator, input, s.i - 1, token);
|
||||
}
|
||||
|
||||
// Handle top-level lonely number values.
|
||||
{
|
||||
const i = input.len;
|
||||
var mt1: ?Token = undefined;
|
||||
var mt2: ?Token = undefined;
|
||||
|
||||
try mp.feed(' ', &mt1, &mt2);
|
||||
if (mt1) |t1| {
|
||||
try p.transition(&arena.allocator, input, i, t1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!mp.complete) {
|
||||
return error.IncompleteJsonInput;
|
||||
}
|
||||
|
||||
std.debug.assert(p.stack.len == 1);
|
||||
debug.assert(p.stack.len == 1);
|
||||
|
||||
return ValueTree{
|
||||
.arena = arena,
|
||||
@ -1090,7 +1173,7 @@ pub const JsonParser = struct {
|
||||
|
||||
// Even though p.allocator exists, we take an explicit allocator so that allocation state
|
||||
// can be cleaned up on error correctly during a `parse` on call.
|
||||
fn transition(p: *JsonParser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
|
||||
fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
|
||||
switch (p.state) {
|
||||
State.ObjectKey => switch (token.id) {
|
||||
Token.Id.ObjectEnd => {
|
||||
@ -1223,7 +1306,7 @@ pub const JsonParser = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn pushToParent(p: *JsonParser, value: *const Value) !void {
|
||||
fn pushToParent(p: *Parser, value: *const Value) !void {
|
||||
switch (p.stack.at(p.stack.len - 1)) {
|
||||
// Object Parent -> [ ..., object, <key>, value ]
|
||||
Value.String => |key| {
|
||||
@ -1244,14 +1327,14 @@ pub const JsonParser = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn parseString(p: *JsonParser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
|
||||
fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
|
||||
// TODO: We don't strictly have to copy values which do not contain any escape
|
||||
// characters if flagged with the option.
|
||||
const slice = token.slice(input, i);
|
||||
return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
|
||||
}
|
||||
|
||||
fn parseNumber(p: *JsonParser, token: *const Token, input: []const u8, i: usize) !Value {
|
||||
fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value {
|
||||
return if (token.number_is_integer)
|
||||
Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
|
||||
else
|
||||
@ -1259,10 +1342,8 @@ pub const JsonParser = struct {
|
||||
}
|
||||
};
|
||||
|
||||
const debug = std.debug;
|
||||
|
||||
test "json parser dynamic" {
|
||||
var p = JsonParser.init(std.debug.global_allocator, false);
|
||||
var p = Parser.init(debug.global_allocator, false);
|
||||
defer p.deinit();
|
||||
|
||||
const s =
|
||||
|
Loading…
x
Reference in New Issue
Block a user