131 lines
3.7 KiB
Lua
131 lines
3.7 KiB
Lua
--[[
|
|
Licensed according to the included 'LICENSE' document
|
|
Author: Thomas Harning Jr <harningt@gmail.com>
|
|
]]
|
|
local lpeg = require("lpeg")
|
|
local util = require("json.decode.util")
|
|
local merge = require("json.util").merge
|
|
|
|
local tonumber = tonumber
|
|
local string_char = require("string").char
|
|
local floor = require("math").floor
|
|
local table_concat = require("table").concat
|
|
|
|
local error = error
|
|
module("json.decode.strings")
|
|
local function get_error(item)
|
|
local fmt_string = item .. " in string [%q] @ %i:%i"
|
|
return function(data, index)
|
|
local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index)
|
|
local err = fmt_string:format(bad_char, line, line_index)
|
|
error(err)
|
|
end
|
|
end
|
|
|
|
local bad_unicode = get_error("Illegal unicode escape")
|
|
local bad_hex = get_error("Illegal hex escape")
|
|
local bad_character = get_error("Illegal character")
|
|
local bad_escape = get_error("Illegal escape")
|
|
|
|
local knownReplacements = {
|
|
["'"] = "'",
|
|
['"'] = '"',
|
|
['\\'] = '\\',
|
|
['/'] = '/',
|
|
b = '\b',
|
|
f = '\f',
|
|
n = '\n',
|
|
r = '\r',
|
|
t = '\t',
|
|
v = '\v',
|
|
z = '\z'
|
|
}
|
|
|
|
-- according to the table at http://da.wikipedia.org/wiki/UTF-8
|
|
local function utf8DecodeUnicode(code1, code2)
|
|
code1, code2 = tonumber(code1, 16), tonumber(code2, 16)
|
|
if code1 == 0 and code2 < 0x80 then
|
|
return string_char(code2)
|
|
end
|
|
if code1 < 0x08 then
|
|
return string_char(
|
|
0xC0 + code1 * 4 + floor(code2 / 64),
|
|
0x80 + code2 % 64)
|
|
end
|
|
return string_char(
|
|
0xE0 + floor(code1 / 16),
|
|
0x80 + (code1 % 16) * 4 + floor(code2 / 64),
|
|
0x80 + code2 % 64)
|
|
end
|
|
|
|
local function decodeX(code)
|
|
code = tonumber(code, 16)
|
|
return string_char(code)
|
|
end
|
|
|
|
local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements
|
|
local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + lpeg.P(bad_unicode))
|
|
local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + lpeg.P(bad_hex))
|
|
|
|
local defaultOptions = {
|
|
badChars = '',
|
|
additionalEscapes = false, -- disallow untranslated escapes
|
|
escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters
|
|
decodeUnicode = utf8DecodeUnicode,
|
|
strict_quotes = false
|
|
}
|
|
|
|
default = nil -- Let the buildCapture optimization take place
|
|
|
|
strict = {
|
|
badChars = '\b\f\n\r\t\v',
|
|
additionalEscapes = false, -- no additional escapes
|
|
escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped
|
|
strict_quotes = true
|
|
}
|
|
|
|
local function buildCaptureString(quote, badChars, escapeMatch)
|
|
local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch)
|
|
captureChar = captureChar + (-#lpeg.P(quote) * lpeg.P(bad_character))
|
|
local captureString = captureChar^0
|
|
return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote)
|
|
end
|
|
|
|
local function buildCapture(options)
|
|
options = options and merge({}, defaultOptions, options) or defaultOptions
|
|
local quotes = { '"' }
|
|
if not options.strict_quotes then
|
|
quotes[#quotes + 1] = "'"
|
|
end
|
|
local escapeMatch = doSimpleSub
|
|
escapeMatch = escapeMatch + doXSub / decodeX
|
|
escapeMatch = escapeMatch + doUniSub / options.decodeUnicode
|
|
if options.additionalEscapes then
|
|
escapeMatch = escapeMatch + options.additionalEscapes
|
|
end
|
|
if options.escapeCheck then
|
|
escapeMatch = options.escapeCheck * escapeMatch + lpeg.P(bad_escape)
|
|
end
|
|
local captureString
|
|
for i = 1, #quotes do
|
|
local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch)
|
|
if captureString == nil then
|
|
captureString = cap
|
|
else
|
|
captureString = captureString + cap
|
|
end
|
|
end
|
|
return captureString
|
|
end
|
|
|
|
function register_types()
|
|
util.register_type("STRING")
|
|
end
|
|
|
|
function load_types(options, global_options, grammar)
|
|
local capture = buildCapture(options)
|
|
local string_id = util.types.STRING
|
|
grammar[string_id] = capture
|
|
util.append_grammar_item(grammar, "VALUE", lpeg.V(string_id))
|
|
end
|