1266 lines
30 KiB
C++
1266 lines
30 KiB
C++
/*
|
|
* Copyright (c) 2012, 2013 Chad Austin
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use, copy,
|
|
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
* of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <limits.h>
|
|
#include <ostream>
|
|
#include <algorithm>
|
|
#include <cstdio>
|
|
#include <limits>
|
|
|
|
#include <string> // for error messages. kill someday?
|
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
#define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1)
|
|
#define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
|
#else
|
|
#define SAJSON_LIKELY(x) x
|
|
#define SAJSON_UNLIKELY(x) x
|
|
#endif
|
|
|
|
namespace sajson {
|
|
enum type {
|
|
TYPE_INTEGER = 0,
|
|
TYPE_DOUBLE = 1,
|
|
TYPE_NULL = 2,
|
|
TYPE_FALSE = 3,
|
|
TYPE_TRUE = 4,
|
|
TYPE_STRING = 5,
|
|
TYPE_ARRAY = 6,
|
|
TYPE_OBJECT = 7,
|
|
};
|
|
|
|
inline std::ostream& operator<<(std::ostream &os, type t){
|
|
switch(t){
|
|
case TYPE_INTEGER:
|
|
return os<<"<integer>";
|
|
case TYPE_DOUBLE:
|
|
return os<<"<double>";
|
|
case TYPE_NULL:
|
|
return os<<"<null>";
|
|
case TYPE_FALSE:
|
|
return os<<"<false>";
|
|
case TYPE_TRUE:
|
|
return os<<"<true>";
|
|
case TYPE_STRING:
|
|
return os<<"<string>";
|
|
case TYPE_ARRAY:
|
|
return os<<"<array>";
|
|
case TYPE_OBJECT:
|
|
return os<<"<object>";
|
|
default:
|
|
return os<<"<unknown type";
|
|
}
|
|
}
|
|
|
|
static const size_t TYPE_BITS = 3;
|
|
static const size_t TYPE_SHIFT = sizeof(size_t) * 8 - TYPE_BITS;
|
|
static const size_t TYPE_MASK = (1<<TYPE_BITS) - 1;
|
|
static const size_t VALUE_MASK = size_t(-1)>>TYPE_BITS;
|
|
|
|
static const size_t ROOT_MARKER = size_t(-1) &VALUE_MASK;
|
|
|
|
inline type get_element_type(size_t s){
|
|
return static_cast<type>((s>>TYPE_SHIFT) & TYPE_MASK);
|
|
}
|
|
|
|
inline size_t get_element_value(size_t s){
|
|
return s & VALUE_MASK;
|
|
}
|
|
|
|
inline size_t make_element(type t, size_t value){
|
|
//assert(value & VALUE_MASK == 0);
|
|
//value &= VALUE_MASK;
|
|
return value | (static_cast<size_t>(t)<<TYPE_SHIFT);
|
|
}
|
|
|
|
class string {
|
|
public:
|
|
string(const char *text, size_t length)
|
|
: text(text)
|
|
, _length(length)
|
|
{}
|
|
|
|
const char* data() const {
|
|
return text;
|
|
}
|
|
|
|
size_t length() const {
|
|
return _length;
|
|
}
|
|
|
|
std::string as_string() const {
|
|
return std::string(text, text + _length);
|
|
}
|
|
|
|
private:
|
|
const char*const text;
|
|
const size_t _length;
|
|
|
|
string(); /*=delete*/
|
|
};
|
|
|
|
class literal: public string {
|
|
public:
|
|
explicit literal(const char *text)
|
|
: string(text, strlen(text))
|
|
{}
|
|
};
|
|
|
|
struct object_key_record
|
|
{
|
|
size_t key_start;
|
|
size_t key_end;
|
|
size_t value;
|
|
};
|
|
|
|
struct object_key_comparator
|
|
{
|
|
object_key_comparator(const char *object_data)
|
|
: data(object_data)
|
|
{
|
|
}
|
|
|
|
bool operator()(const object_key_record &lhs, const string &rhs) const {
|
|
const size_t lhs_length = lhs.key_end - lhs.key_start;
|
|
const size_t rhs_length = rhs.length();
|
|
if(lhs_length < rhs_length){
|
|
return true;
|
|
} else if(lhs_length > rhs_length){
|
|
return false;
|
|
}
|
|
return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
|
|
}
|
|
|
|
bool operator()(const string &lhs, const object_key_record &rhs) const {
|
|
return !(*this)(rhs, lhs);
|
|
}
|
|
|
|
bool operator()(const object_key_record &lhs, const
|
|
object_key_record &rhs)
|
|
{
|
|
const size_t lhs_length = lhs.key_end - lhs.key_start;
|
|
const size_t rhs_length = rhs.key_end - rhs.key_start;
|
|
if(lhs_length < rhs_length){
|
|
return true;
|
|
} else if(lhs_length > rhs_length){
|
|
return false;
|
|
}
|
|
return memcmp(data + lhs.key_start, data + rhs.key_start,
|
|
lhs_length) < 0;
|
|
}
|
|
|
|
const char *data;
|
|
};
|
|
|
|
class refcount {
|
|
public:
|
|
refcount()
|
|
: pn(new size_t(1))
|
|
{}
|
|
|
|
refcount(const refcount &rc)
|
|
: pn(rc.pn)
|
|
{
|
|
++*pn;
|
|
}
|
|
|
|
~refcount(){
|
|
if(--*pn == 0){
|
|
delete pn;
|
|
}
|
|
}
|
|
|
|
size_t count() const {
|
|
return *pn;
|
|
}
|
|
|
|
private:
|
|
size_t *pn;
|
|
|
|
refcount& operator=(const refcount&);
|
|
};
|
|
|
|
class mutable_string_view {
|
|
public:
|
|
mutable_string_view()
|
|
: length(0)
|
|
, data(0)
|
|
{}
|
|
|
|
mutable_string_view(const literal &s)
|
|
: length(s.length())
|
|
{
|
|
data = new char[length];
|
|
memcpy(data, s.data(), length);
|
|
}
|
|
|
|
mutable_string_view(const string &s)
|
|
: length(s.length())
|
|
{
|
|
data = new char[length];
|
|
memcpy(data, s.data(), length);
|
|
}
|
|
|
|
~mutable_string_view(){
|
|
if(uses.count() == 1){
|
|
delete[] data;
|
|
}
|
|
}
|
|
|
|
size_t get_length() const {
|
|
return length;
|
|
}
|
|
|
|
char* get_data() const {
|
|
return data;
|
|
}
|
|
|
|
private:
|
|
refcount uses;
|
|
size_t length;
|
|
char *data;
|
|
};
|
|
|
|
union integer_storage {
|
|
int i;
|
|
size_t u;
|
|
};
|
|
// TODO: reinstate with c++03 implementation
|
|
//static_assert(sizeof(integer_storage) == sizeof(size_t), "integer_storage must have same size as one structure slot");
|
|
|
|
union double_storage {
|
|
enum {
|
|
word_length = sizeof(double) / sizeof(size_t)
|
|
};
|
|
|
|
#if defined(_M_IX86) || defined(__i386__) || defined(_X86_)
|
|
static double load(const size_t *location){
|
|
return *reinterpret_cast<const double*>(location);
|
|
}
|
|
static void store(size_t *location, double value){
|
|
*reinterpret_cast<double*>(location) = value;
|
|
}
|
|
#else
|
|
static double load(const size_t *location){
|
|
double_storage s;
|
|
for(unsigned i = 0; i < double_storage::word_length; ++i){
|
|
s.u[i] = location[i];
|
|
}
|
|
return s.d;
|
|
}
|
|
|
|
static void store(size_t *location, double value){
|
|
double_storage ns;
|
|
ns.d = value;
|
|
|
|
for(int i = 0; i < ns.word_length; ++i){
|
|
location[i] = ns.u[i];
|
|
}
|
|
}
|
|
|
|
double d;
|
|
size_t u[word_length];
|
|
#endif
|
|
};
|
|
// TODO: reinstate with c++03 implementation
|
|
//static_assert(sizeof(double_storage) == sizeof(double), "double_storage should have same size as double");
|
|
|
|
class value {
|
|
public:
|
|
explicit value(type value_type, const size_t *payload, const char *text)
|
|
: value_type(value_type)
|
|
, payload(payload)
|
|
, text(text)
|
|
{}
|
|
|
|
type get_type() const {
|
|
return value_type;
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_ARRAY or TYPE_OBJECT
|
|
size_t get_length() const {
|
|
return payload[0];
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_ARRAY
|
|
value get_array_element(size_t index) const {
|
|
size_t element = payload[1 + index];
|
|
return value(get_element_type(element), payload +
|
|
get_element_value(element),
|
|
text);
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_OBJECT
|
|
string get_object_key(size_t index) const {
|
|
const size_t *s = payload + 1 + index * 3;
|
|
return string(text + s[0], s[1] - s[0]);
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_OBJECT
|
|
value get_object_value(size_t index) const {
|
|
size_t element = payload[3 + index * 3];
|
|
return value(get_element_type(element), payload +
|
|
get_element_value(element),
|
|
text);
|
|
}
|
|
|
|
|
|
// valid iff get_type() is TYPE_OBJECT
|
|
// return get_length() if there is no such key
|
|
size_t find_object_key(const string &key) const {
|
|
const object_key_record *start =
|
|
reinterpret_cast<const object_key_record*>
|
|
(payload + 1);
|
|
const object_key_record *end = start + get_length();
|
|
const object_key_record *i = std::lower_bound(start, end, key,
|
|
object_key_comparator(text));
|
|
return (i != end
|
|
&& (i->key_end - i->key_start) == key.length()
|
|
&& memcmp(key.data(), text + i->key_start,
|
|
key.length()) == 0) ? i - start : get_length();
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_INTEGER
|
|
int get_integer_value() const {
|
|
integer_storage s;
|
|
s.u = payload[0];
|
|
return s.i;
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_DOUBLE
|
|
double get_double_value() const {
|
|
return double_storage::load(payload);
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_INTEGER or TYPE_DOUBLE
|
|
double get_number_value() const {
|
|
if(get_type() == TYPE_INTEGER){
|
|
return get_integer_value();
|
|
} else {
|
|
return get_double_value();
|
|
}
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_STRING
|
|
size_t get_string_length() const {
|
|
return payload[1] - payload[0];
|
|
}
|
|
|
|
// valid iff get_type() is TYPE_STRING
|
|
std::string as_string() const {
|
|
return std::string(text + payload[0], text + payload[1]);
|
|
}
|
|
|
|
private:
|
|
const type value_type;
|
|
const size_t*const payload;
|
|
const char*const text;
|
|
|
|
};
|
|
|
|
class document {
|
|
public:
|
|
explicit document(mutable_string_view &input, const size_t *structure,
|
|
type root_type, const size_t *root, size_t error_line,
|
|
size_t error_column,
|
|
const std::string &error_message)
|
|
: input(input)
|
|
, structure(structure)
|
|
, root_type(root_type)
|
|
, root(root)
|
|
, error_line(error_line)
|
|
, error_column(error_column)
|
|
, error_message(error_message)
|
|
{}
|
|
|
|
~document(){
|
|
delete[] structure;
|
|
}
|
|
|
|
bool is_valid() const {
|
|
return !!structure;
|
|
}
|
|
|
|
value get_root() const {
|
|
return value(root_type, root, input.get_data());
|
|
}
|
|
|
|
size_t get_error_line() const {
|
|
return error_line;
|
|
}
|
|
|
|
size_t get_error_column() const {
|
|
return error_column;
|
|
}
|
|
|
|
std::string get_error_message() const {
|
|
return error_message;
|
|
}
|
|
|
|
private:
|
|
mutable_string_view input;
|
|
const size_t*const structure;
|
|
const type root_type;
|
|
const size_t*const root;
|
|
const size_t error_line;
|
|
const size_t error_column;
|
|
const std::string error_message;
|
|
};
|
|
|
|
class parser {
|
|
public:
|
|
parser(const mutable_string_view &msv, size_t *structure)
|
|
: input(msv)
|
|
, input_end(input.get_data() + input.get_length())
|
|
, structure(structure)
|
|
, p(input.get_data())
|
|
, temp(structure)
|
|
, root_type(TYPE_NULL)
|
|
, out(structure + input.get_length())
|
|
, error_line(0)
|
|
, error_column(0)
|
|
{}
|
|
|
|
document get_document(){
|
|
if(parse()){
|
|
return document(input, structure, root_type, out, 0, 0,
|
|
std::string());
|
|
} else {
|
|
delete[] structure;
|
|
return document(input, 0, TYPE_NULL, 0, error_line, error_column,
|
|
error_message);
|
|
}
|
|
}
|
|
|
|
private:
|
|
struct error_result {
|
|
operator bool() const {
|
|
return false;
|
|
}
|
|
};
|
|
|
|
struct parse_result {
|
|
parse_result(error_result)
|
|
: success(false)
|
|
{}
|
|
|
|
parse_result(type t)
|
|
: success(true)
|
|
, value_type(t)
|
|
{}
|
|
|
|
bool operator!() const {
|
|
return !success;
|
|
}
|
|
|
|
bool success;
|
|
type value_type;
|
|
};
|
|
|
|
bool at_eof(){
|
|
return p == input_end;
|
|
}
|
|
|
|
char peek_structure(){
|
|
for(;;){
|
|
if(p == input_end){
|
|
// 0 is never legal as a structural character in json text so treat it as eof
|
|
return 0;
|
|
}
|
|
switch(*p){
|
|
case 0x20:
|
|
case 0x09:
|
|
case 0x0A:
|
|
case 0x0D:
|
|
++p;
|
|
continue;
|
|
default:
|
|
return *p;
|
|
}
|
|
}
|
|
}
|
|
|
|
error_result error(const char *message){
|
|
error_line = 1;
|
|
error_column = 1;
|
|
error_message = message;
|
|
return error_result();
|
|
}
|
|
|
|
bool parse(){
|
|
char c = peek_structure();
|
|
if(c == 0){
|
|
return error("no root element");
|
|
}
|
|
|
|
type current_structure_type;
|
|
if(c == '['){
|
|
current_structure_type = TYPE_ARRAY;
|
|
} else if(c == '{'){
|
|
current_structure_type = TYPE_OBJECT;
|
|
} else if(c == '"'){
|
|
current_structure_type = TYPE_STRING;
|
|
} else if((c >= '0' && c <= '9') || c == '-'){
|
|
current_structure_type = TYPE_INTEGER;
|
|
} else if(c == 'f'){
|
|
current_structure_type = TYPE_FALSE;
|
|
} else if(c == 't'){
|
|
current_structure_type = TYPE_TRUE;
|
|
/*} else if(c == 'u' && input.get_length() == 9 &&
|
|
strncmp(input.get_data(), "undefined", 9) == 0){
|
|
return true;*/
|
|
} else {
|
|
return error("document root must be a json value");
|
|
}
|
|
|
|
size_t *current_base = temp;
|
|
*temp++ = make_element(current_structure_type, ROOT_MARKER);
|
|
|
|
parse_result result = error_result();
|
|
|
|
if(current_structure_type == TYPE_STRING){
|
|
out += 2;
|
|
result = parse_string();
|
|
if(!result){
|
|
return result.success;
|
|
}
|
|
root_type = result.value_type;
|
|
*temp++ = make_element(result.value_type, out - current_base - 1);
|
|
goto done;
|
|
}
|
|
if(current_structure_type == TYPE_INTEGER){
|
|
out += 2;
|
|
result = parse_number();
|
|
if(!result){
|
|
return result.success;
|
|
}
|
|
root_type = result.value_type;
|
|
*temp++ = make_element(result.value_type, out - current_base - 1);
|
|
goto done;
|
|
}
|
|
if(current_structure_type == TYPE_FALSE){
|
|
result = parse_false();
|
|
if(!result){
|
|
return result.success;
|
|
}
|
|
root_type = result.value_type;
|
|
*temp++ = make_element(result.value_type, out - current_base - 1);
|
|
goto done;
|
|
}
|
|
if(current_structure_type == TYPE_TRUE){
|
|
result = parse_true();
|
|
if(!result){
|
|
return result.success;
|
|
}
|
|
root_type = result.value_type;
|
|
*temp++ = make_element(result.value_type, out - current_base - 1);
|
|
goto done;
|
|
}
|
|
|
|
++p;
|
|
|
|
for(;;){
|
|
char closing_bracket =
|
|
(current_structure_type == TYPE_OBJECT ? '}' : ']');
|
|
|
|
c = peek_structure();
|
|
if(temp > current_base + 1){
|
|
if(c != closing_bracket){
|
|
if(c == ','){
|
|
++p;
|
|
c = peek_structure();
|
|
} else {
|
|
return error("expected ,");
|
|
}
|
|
}
|
|
}
|
|
|
|
if(current_structure_type == TYPE_OBJECT && c != '}'){
|
|
if(c != '"'){
|
|
return error("object key must be quoted");
|
|
}
|
|
result = parse_string(temp);
|
|
if(peek_structure() != ':'){
|
|
return error("expected :");
|
|
}
|
|
++p;
|
|
temp += 2;
|
|
}
|
|
|
|
switch(peek_structure()){
|
|
type next_type;
|
|
parse_result (parser::*structure_installer)(size_t *base);
|
|
|
|
case 0:
|
|
return error("unexpected end of input a");
|
|
case 'n':
|
|
result = parse_null();
|
|
break;
|
|
case 'f':
|
|
result = parse_false();
|
|
break;
|
|
case 't':
|
|
result = parse_true();
|
|
break;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
case '-':
|
|
result = parse_number();
|
|
break;
|
|
case '"':
|
|
result = parse_string();
|
|
break;
|
|
|
|
case '[':
|
|
next_type = TYPE_ARRAY;
|
|
goto push;
|
|
case '{':
|
|
next_type = TYPE_OBJECT;
|
|
goto push;
|
|
push: {
|
|
++p;
|
|
size_t *previous_base = current_base;
|
|
current_base = temp;
|
|
*temp++ = make_element(current_structure_type,
|
|
previous_base - structure);
|
|
current_structure_type = next_type;
|
|
continue;
|
|
}
|
|
|
|
case ']':
|
|
if(current_structure_type == TYPE_ARRAY){
|
|
structure_installer = &parser::install_array;
|
|
goto pop;
|
|
} else {
|
|
return error("expected }");
|
|
}
|
|
case '}':
|
|
if(current_structure_type == TYPE_OBJECT){
|
|
structure_installer = &parser::install_object;
|
|
goto pop;
|
|
} else {
|
|
return error("expected ]");
|
|
}
|
|
pop: {
|
|
++p;
|
|
size_t element = *current_base;
|
|
result = (this->*structure_installer)(current_base + 1);
|
|
size_t parent = get_element_value(element);
|
|
if(parent == ROOT_MARKER){
|
|
root_type = result.value_type;
|
|
goto done;
|
|
}
|
|
temp = current_base;
|
|
current_base = structure + parent;
|
|
current_structure_type = get_element_type(element);
|
|
break;
|
|
}
|
|
default:
|
|
printf("%c\n", *p);
|
|
return error("cannot parse unknown value");
|
|
}
|
|
|
|
if(!result){
|
|
return result.success;
|
|
}
|
|
|
|
*temp++ = make_element(result.value_type, out - current_base - 1);
|
|
}
|
|
|
|
done:
|
|
if(0 == peek_structure()){
|
|
return true;
|
|
} else {
|
|
return error("expected end of input");
|
|
}
|
|
}
|
|
|
|
bool has_remaining_characters(ptrdiff_t remaining){
|
|
return input_end - p >= remaining;
|
|
}
|
|
|
|
parse_result parse_null(){
|
|
if(SAJSON_UNLIKELY(!has_remaining_characters(4))){
|
|
return error("unexpected end of input b");
|
|
}
|
|
char p1 = p[1];
|
|
char p2 = p[2];
|
|
char p3 = p[3];
|
|
if(SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')){
|
|
return error("expected 'null'");
|
|
}
|
|
p += 4;
|
|
return TYPE_NULL;
|
|
}
|
|
|
|
parse_result parse_false(){
|
|
if(SAJSON_UNLIKELY(!has_remaining_characters(5))){
|
|
return error("unexpected end of input c");
|
|
}
|
|
char p1 = p[1];
|
|
char p2 = p[2];
|
|
char p3 = p[3];
|
|
char p4 = p[4];
|
|
if(SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')){
|
|
return error("expected 'false'");
|
|
}
|
|
p += 5;
|
|
return TYPE_FALSE;
|
|
}
|
|
|
|
parse_result parse_true(){
|
|
if(SAJSON_UNLIKELY(!has_remaining_characters(4))){
|
|
return error("unexpected end of input d");
|
|
}
|
|
char p1 = p[1];
|
|
char p2 = p[2];
|
|
char p3 = p[3];
|
|
if(SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')){
|
|
return error("expected 'true'");
|
|
}
|
|
p += 4;
|
|
return TYPE_TRUE;
|
|
}
|
|
|
|
static double pow10(int exponent){
|
|
if(exponent > 308){
|
|
return std::numeric_limits<double>::infinity();
|
|
} else if(exponent < -323){
|
|
return 0.0;
|
|
}
|
|
static const double constants[] = {
|
|
1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316,
|
|
1e-315, 1e-314,
|
|
1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, 1e-306,
|
|
1e-305, 1e-304,
|
|
1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, 1e-297, 1e-296,
|
|
1e-295, 1e-294,
|
|
1e-293, 1e-292, 1e-291, 1e-290, 1e-289, 1e-288, 1e-287, 1e-286,
|
|
1e-285, 1e-284,
|
|
1e-283, 1e-282, 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276,
|
|
1e-275, 1e-274,
|
|
1e-273, 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266,
|
|
1e-265, 1e-264,
|
|
1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256,
|
|
1e-255, 1e-254,
|
|
1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246,
|
|
1e-245, 1e-244,
|
|
1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236,
|
|
1e-235, 1e-234,
|
|
1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226,
|
|
1e-225, 1e-224,
|
|
1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217, 1e-216,
|
|
1e-215, 1e-214,
|
|
1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208, 1e-207, 1e-206,
|
|
1e-205, 1e-204,
|
|
1e-203, 1e-202, 1e-201, 1e-200, 1e-199, 1e-198, 1e-197, 1e-196,
|
|
1e-195, 1e-194,
|
|
1e-193, 1e-192, 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186,
|
|
1e-185, 1e-184,
|
|
1e-183, 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176,
|
|
1e-175, 1e-174,
|
|
1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166,
|
|
1e-165, 1e-164,
|
|
1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156,
|
|
1e-155, 1e-154,
|
|
1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146,
|
|
1e-145, 1e-144,
|
|
1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136,
|
|
1e-135, 1e-134,
|
|
1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127, 1e-126,
|
|
1e-125, 1e-124,
|
|
1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118, 1e-117, 1e-116,
|
|
1e-115, 1e-114,
|
|
1e-113, 1e-112, 1e-111, 1e-110, 1e-109, 1e-108, 1e-107, 1e-106,
|
|
1e-105, 1e-104,
|
|
1e-103, 1e-102, 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95,
|
|
1e-94, 1e-93,
|
|
1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83,
|
|
1e-82, 1e-81,
|
|
1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71,
|
|
1e-70, 1e-69,
|
|
1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59,
|
|
1e-58, 1e-57,
|
|
1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47,
|
|
1e-46, 1e-45,
|
|
1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35,
|
|
1e-34, 1e-33,
|
|
1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23,
|
|
1e-22, 1e-21,
|
|
1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11,
|
|
1e-10, 1e-9,
|
|
1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3,
|
|
1e4, 1e5, 1e6, 1e7,
|
|
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
|
|
1e20, 1e21,
|
|
1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32,
|
|
1e33, 1e34, 1e35,
|
|
1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46,
|
|
1e47, 1e48, 1e49,
|
|
1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60,
|
|
1e61, 1e62, 1e63,
|
|
1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74,
|
|
1e75, 1e76, 1e77,
|
|
1e78, 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88,
|
|
1e89, 1e90, 1e91,
|
|
1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, 1e100, 1e101, 1e102,
|
|
1e103, 1e104,
|
|
1e105, 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114,
|
|
1e115, 1e116,
|
|
1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126,
|
|
1e127, 1e128,
|
|
1e129, 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138,
|
|
1e139, 1e140,
|
|
1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150,
|
|
1e151, 1e152,
|
|
1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, 1e160, 1e161, 1e162,
|
|
1e163, 1e164,
|
|
1e165, 1e166, 1e167, 1e168, 1e169, 1e170, 1e171, 1e172, 1e173, 1e174,
|
|
1e175, 1e176,
|
|
1e177, 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186,
|
|
1e187, 1e188,
|
|
1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198,
|
|
1e199, 1e200,
|
|
1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, 1e210,
|
|
1e211, 1e212,
|
|
1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222,
|
|
1e223, 1e224,
|
|
1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232, 1e233, 1e234,
|
|
1e235, 1e236,
|
|
1e237, 1e238, 1e239, 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246,
|
|
1e247, 1e248,
|
|
1e249, 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258,
|
|
1e259, 1e260,
|
|
1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, 1e270,
|
|
1e271, 1e272,
|
|
1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, 1e280, 1e281, 1e282,
|
|
1e283, 1e284,
|
|
1e285, 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294,
|
|
1e295, 1e296,
|
|
1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306,
|
|
1e307, 1e308
|
|
};
|
|
return constants[exponent + 323];
|
|
}
|
|
|
|
parse_result parse_number(){
|
|
bool negative = false;
|
|
if('-' == *p){
|
|
++p;
|
|
negative = true;
|
|
|
|
if(at_eof()){
|
|
return error("unexpected end of input 1");
|
|
}
|
|
}
|
|
|
|
bool try_double = false;
|
|
|
|
int i = 0;
|
|
double d =
|
|
0.0; // gcc complains that d might be used uninitialized which isn't true. appease the warning anyway.
|
|
for(;;){
|
|
char c = *p;
|
|
if(c < '0' || c > '9'){
|
|
break;
|
|
}
|
|
|
|
char digit = c - '0';
|
|
|
|
if(SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)){
|
|
// TODO: could split this into two loops
|
|
try_double = true;
|
|
d = i;
|
|
}
|
|
if(SAJSON_UNLIKELY(try_double)){
|
|
d = 10.0 * d + digit;
|
|
} else {
|
|
i = 10 * i + digit;
|
|
}
|
|
|
|
++p;
|
|
if(at_eof()){
|
|
if(root_type == TYPE_ARRAY || root_type == TYPE_OBJECT){
|
|
return error("unexpected end of input 2");
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
int exponent = 0;
|
|
|
|
if(!at_eof() && '.' == *p){
|
|
if(!try_double){
|
|
try_double = true;
|
|
d = i;
|
|
}
|
|
++p;
|
|
if(at_eof()){
|
|
return error("unexpected end of input 3");
|
|
}
|
|
for(;;){
|
|
char c = *p;
|
|
if(c < '0' || c > '9'){
|
|
break;
|
|
}
|
|
d = d * 10 + (c - '0');
|
|
--exponent;
|
|
|
|
++p;
|
|
if(at_eof()){
|
|
if(root_type == TYPE_ARRAY || root_type == TYPE_OBJECT){
|
|
return error("unexpected end of input 4");
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(!at_eof()){
|
|
char e = *p;
|
|
if('e' == e || 'E' == e){
|
|
if(!try_double){
|
|
try_double = true;
|
|
d = i;
|
|
}
|
|
++p;
|
|
if(at_eof()){
|
|
return error("unexpected end of input 5");
|
|
}
|
|
|
|
bool negativeExponent = false;
|
|
if('-' == *p){
|
|
++p;
|
|
negativeExponent = true;
|
|
if(at_eof()){
|
|
return error("unexpected end of input 6");
|
|
}
|
|
} else if('+' == *p){
|
|
++p;
|
|
if(at_eof()){
|
|
return error("unexpected end of input 7");
|
|
}
|
|
}
|
|
|
|
int exp = 0;
|
|
for(;;){
|
|
char c = *p;
|
|
if(c < '0' || c > '9'){
|
|
break;
|
|
}
|
|
|
|
exp = 10 * exp + (c - '0');
|
|
|
|
++p;
|
|
if(at_eof()){
|
|
if(root_type == TYPE_ARRAY || root_type == TYPE_OBJECT){
|
|
return error("unexpected end of input 8");
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
exponent += (negativeExponent ? -exp : exp);
|
|
}
|
|
}
|
|
|
|
if(exponent){
|
|
assert(try_double);
|
|
d *= pow10(exponent);
|
|
}
|
|
|
|
if(negative){
|
|
if(try_double){
|
|
d = -d;
|
|
} else {
|
|
i = -i;
|
|
}
|
|
}
|
|
|
|
if(try_double){
|
|
out -= double_storage::word_length;
|
|
double_storage::store(out, d);
|
|
return TYPE_DOUBLE;
|
|
} else {
|
|
integer_storage is;
|
|
is.i = i;
|
|
|
|
*--out = is.u;
|
|
return TYPE_INTEGER;
|
|
}
|
|
}
|
|
|
|
parse_result install_array(size_t *array_base){
|
|
const size_t length = temp - array_base;
|
|
size_t*const new_base = out - length - 1;
|
|
while(temp > array_base){
|
|
// I think this addition is legal because the tag bits are at the top?
|
|
*(--out) = *(--temp) + (array_base - new_base);
|
|
}
|
|
*(--out) = length;
|
|
|
|
return TYPE_ARRAY;
|
|
}
|
|
|
|
parse_result install_object(size_t *object_base){
|
|
const size_t length = (temp - object_base) / 3;
|
|
object_key_record *oir =
|
|
reinterpret_cast<object_key_record*>(object_base);
|
|
std::sort(
|
|
oir,
|
|
oir + length,
|
|
object_key_comparator(input.get_data()));
|
|
|
|
size_t*const new_base = out - length * 3 - 1;
|
|
size_t i = length;
|
|
while(i--){
|
|
// I think this addition is legal because the tag bits are at the top?
|
|
*(--out) = *(--temp) + (object_base - new_base);
|
|
*(--out) = *(--temp);
|
|
*(--out) = *(--temp);
|
|
}
|
|
*(--out) = length;
|
|
|
|
return TYPE_OBJECT;
|
|
}
|
|
|
|
parse_result parse_string(size_t *tag = 0){
|
|
if(!tag){
|
|
out -= 2;
|
|
tag = out;
|
|
}
|
|
|
|
++p; // "
|
|
size_t start = p - input.get_data();
|
|
for(;;){
|
|
if(SAJSON_UNLIKELY(p >= input_end)){
|
|
return error("unexpected end of input e");
|
|
}
|
|
|
|
if(SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)){
|
|
return error("illegal unprintable codepoint in string");
|
|
}
|
|
|
|
switch(*p){
|
|
case '"':
|
|
tag[0] = start;
|
|
tag[1] = p - input.get_data();
|
|
++p;
|
|
return TYPE_STRING;
|
|
|
|
case '\\':
|
|
return parse_string_slow(tag, start);
|
|
|
|
default:
|
|
++p;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
parse_result read_hex(unsigned &u){
|
|
unsigned v = 0;
|
|
int i = 4;
|
|
while(i--){
|
|
unsigned char c = *p++;
|
|
if(c >= '0' && c <= '9'){
|
|
c -= '0';
|
|
} else if(c >= 'a' && c <= 'f'){
|
|
c = c - 'a' + 10;
|
|
} else if(c >= 'A' && c <= 'F'){
|
|
c = c - 'A' + 10;
|
|
} else {
|
|
return error("invalid character in unicode escape");
|
|
}
|
|
v = (v<<4) + c;
|
|
}
|
|
|
|
u = v;
|
|
return TYPE_NULL; // ???
|
|
}
|
|
|
|
void write_utf8(unsigned codepoint, char* &end){
|
|
if(codepoint < 0x80){
|
|
*end++ = codepoint;
|
|
} else if(codepoint < 0x800){
|
|
*end++ = 0xC0 | (codepoint>>6);
|
|
*end++ = 0x80 | (codepoint & 0x3F);
|
|
} else if(codepoint < 0x10000){
|
|
*end++ = 0xE0 | (codepoint>>12);
|
|
*end++ = 0x80 | ((codepoint>>6) & 0x3F);
|
|
*end++ = 0x80 | (codepoint & 0x3F);
|
|
} else {
|
|
assert(codepoint < 0x200000);
|
|
*end++ = 0xF0 | (codepoint>>18);
|
|
*end++ = 0x80 | ((codepoint>>12) & 0x3F);
|
|
*end++ = 0x80 | ((codepoint>>6) & 0x3F);
|
|
*end++ = 0x80 | (codepoint & 0x3F);
|
|
}
|
|
}
|
|
|
|
parse_result parse_string_slow(size_t *tag, size_t start){
|
|
char *end = p;
|
|
|
|
for(;;){
|
|
if(SAJSON_UNLIKELY(p >= input_end)){
|
|
return error("unexpected end of input f");
|
|
}
|
|
|
|
if(SAJSON_UNLIKELY(*p < 0x20)){
|
|
return error("illegal unprintable codepoint in string");
|
|
}
|
|
|
|
switch(*p){
|
|
case '"':
|
|
tag[0] = start;
|
|
tag[1] = end - input.get_data();
|
|
++p;
|
|
return TYPE_STRING;
|
|
|
|
case '\\':
|
|
++p;
|
|
if(SAJSON_UNLIKELY(p >= input_end)){
|
|
return error("unexpected end of input g");
|
|
}
|
|
|
|
char replacement;
|
|
switch(*p){
|
|
case '"':
|
|
replacement = '"';
|
|
goto replace;
|
|
case '\\':
|
|
replacement = '\\';
|
|
goto replace;
|
|
case '/':
|
|
replacement = '/';
|
|
goto replace;
|
|
case 'b':
|
|
replacement = '\b';
|
|
goto replace;
|
|
case 'f':
|
|
replacement = '\f';
|
|
goto replace;
|
|
case 'n':
|
|
replacement = '\n';
|
|
goto replace;
|
|
case 'r':
|
|
replacement = '\r';
|
|
goto replace;
|
|
case 't':
|
|
replacement = '\t';
|
|
goto replace;
|
|
replace:
|
|
*end++ = replacement;
|
|
++p;
|
|
break;
|
|
case 'u': {
|
|
++p;
|
|
if(SAJSON_UNLIKELY(!has_remaining_characters(4))){
|
|
return error("unexpected end of input h");
|
|
}
|
|
unsigned u =
|
|
0; // gcc's complaining that this could be used uninitialized. wrong.
|
|
parse_result result = read_hex(u);
|
|
if(!result){
|
|
return result;
|
|
}
|
|
if(u >= 0xD800 && u <= 0xDBFF){
|
|
if(SAJSON_UNLIKELY(!has_remaining_characters(6))){
|
|
return error(
|
|
"unexpected end of input during UTF-16 surrogate pair");
|
|
}
|
|
char p0 = p[0];
|
|
char p1 = p[1];
|
|
if(p0 != '\\' || p1 != 'u'){
|
|
return error("expected \\u");
|
|
}
|
|
p += 2;
|
|
unsigned v =
|
|
0; // gcc's complaining that this could be used uninitialized. wrong.
|
|
result = read_hex(v);
|
|
if(!result){
|
|
return result;
|
|
}
|
|
|
|
if(v < 0xDC00 || v > 0xDFFF){
|
|
return error("invalid UTF-16 trail surrogate");
|
|
}
|
|
u = 0x10000 + (((u - 0xD800)<<10) | (v - 0xDC00));
|
|
}
|
|
write_utf8(u, end);
|
|
break;
|
|
}
|
|
default:
|
|
return error("unknown escape");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
*end++ = *p++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
mutable_string_view input;
|
|
char*const input_end;
|
|
size_t*const structure;
|
|
|
|
char *p;
|
|
size_t *temp;
|
|
type root_type;
|
|
size_t *out;
|
|
size_t error_line;
|
|
size_t error_column;
|
|
std::string error_message;
|
|
};
|
|
|
|
template<typename StringType>
|
|
document parse(const StringType &string){
|
|
mutable_string_view ms(string);
|
|
|
|
size_t length = string.length();
|
|
size_t *structure = new size_t[length + 2];
|
|
|
|
return parser(ms, structure).get_document();
|
|
}
|
|
}
|
|
// vim: set noet ts=4 sw=4:
|