New SSCSM minifier
This commit is contained in:
parent
2249e49f1b
commit
0019930f27
@ -122,6 +122,9 @@ function sscsm.register(def)
|
||||
error('Invalid "code" parameter passed to sscsm.register_csm.', 2)
|
||||
end
|
||||
|
||||
if block_colon then
|
||||
def.code = "local minetest=core;" .. def.code
|
||||
end
|
||||
def.code = sscsm.minify_code(def.code)
|
||||
if (#def.name + #def.code) > 65300 then
|
||||
error("The code (or name) passed to sscsm.register_csm is too large."
|
||||
|
@ -1,8 +1,8 @@
|
||||
--
|
||||
-- A primitive code minifier
|
||||
-- A primitive-ish code minifier
|
||||
--
|
||||
-- Copyright © 2019-2021 by luk3yx
|
||||
-- Copyright © 2020-2021 MultiCraft Development Team
|
||||
-- Copyright © 2023 by luk3yx
|
||||
-- Copyright © 2023 MultiCraft Development Team
|
||||
--
|
||||
-- This program is free software; you can redistribute it and/or modify
|
||||
-- it under the terms of the GNU Lesser General Public License as published by
|
||||
@ -19,114 +19,437 @@
|
||||
-- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
--
|
||||
|
||||
-- Find multiple patterns
|
||||
local function find_multiple(text, ...)
|
||||
local n = select('#', ...)
|
||||
local s, e, pattern
|
||||
for i = 1, n do
|
||||
local p = select(i, ...)
|
||||
local s2, e2 = text:find(p)
|
||||
if s2 and (not s or s2 < s) then
|
||||
s, e, pattern = s2, e2 or s2, p
|
||||
local find, sub, byte = string.find, string.sub, string.byte
|
||||
local QUOTE, APOSTROPHE, LBRACKET, BACKSLASH, SPACE, TAB, CR, NEWLINE, HYPHEN,
|
||||
DOT, EQUALS, COLON = byte('"\'[\\ \t\r\n-.=:', 1, -1)
|
||||
|
||||
local function set(...)
|
||||
local res = {}
|
||||
for _, item in ipairs({...}) do
|
||||
res[item] = true
|
||||
end
|
||||
end
|
||||
return s, e, pattern
|
||||
return res
|
||||
end
|
||||
|
||||
-- Matches
|
||||
-- These take 2-3 arguments (code, res, char) and should return code and res.
|
||||
local matches = {
|
||||
-- Handle multi-line strings
|
||||
['%[=*%['] = function(code, res, char)
|
||||
res = res .. char
|
||||
char = char:sub(2, -2)
|
||||
local s, e = code:find(']' .. char .. ']', nil, true)
|
||||
if not s or not e then return code, res end
|
||||
return code:sub(e + 1), res .. code:sub(1, e)
|
||||
end,
|
||||
local whitespace_bytes = set(SPACE, TAB, CR, NEWLINE)
|
||||
|
||||
-- Handle regular comments
|
||||
['--'] = function(code, res, char)
|
||||
local s, e = code:find('\n', nil, true)
|
||||
if not s or not e then return '', res end
|
||||
local function parse_long_string(code, idx, lineno)
|
||||
local marker_start, marker_end = find(code, "^%[=*%[", idx)
|
||||
if not marker_start then
|
||||
return nil, lineno
|
||||
end
|
||||
|
||||
-- Don't remove copyright or license information.
|
||||
if e >= 7 then
|
||||
local first_word = (code:match('^[ \t]*(%w+)') or ''):lower()
|
||||
if first_word == 'copyright' or first_word == 'license' then
|
||||
return code:sub(s), res .. char .. code:sub(1, s - 1)
|
||||
local end_symbol = "]" .. ("="):rep(marker_end - marker_start - 1) .. "]"
|
||||
local _, end_idx = find(code, end_symbol, marker_end + 1, true)
|
||||
end_idx = end_idx or #code + 1
|
||||
|
||||
for i = marker_end + 1, end_idx - 1 do
|
||||
if byte(code, i) == NEWLINE then
|
||||
lineno = lineno + 1
|
||||
end
|
||||
end
|
||||
|
||||
-- Shift trailing spaces back
|
||||
local spaces = res:match('(%s*)$') or ''
|
||||
return spaces .. code:sub(s), res:sub(1, #res - #spaces)
|
||||
end,
|
||||
return end_idx, lineno
|
||||
end
|
||||
|
||||
-- Handle multi-line comments
|
||||
['%-%-%[=*%['] = function(code, res, char)
|
||||
char = char:sub(4, -2)
|
||||
local s, e = code:find(']' .. char .. ']', nil, true)
|
||||
if not s or not e then return code, res end
|
||||
local function tokenise(code)
|
||||
local last_idx = 1
|
||||
local lineno = 1
|
||||
local next_token
|
||||
return function()
|
||||
-- Return the next token if set
|
||||
if next_token ~= nil then
|
||||
local res = next_token
|
||||
next_token = nil
|
||||
return res, lineno
|
||||
end
|
||||
|
||||
-- Shift trailing spaces back
|
||||
local spaces = res:match('(%s*)$') or ''
|
||||
return spaces .. code:sub(e + 1), res:sub(1, #res - #spaces)
|
||||
end,
|
||||
local start_idx, end_idx = find(code, "[%(%):%.\n\"' \t\r%+%-%*/~=<>|;%[%],%{%}#%%%^]", last_idx)
|
||||
if not start_idx then
|
||||
-- HACK: Get the last token
|
||||
if byte(code, -1) == NEWLINE then return end
|
||||
code = code .. "\n"
|
||||
start_idx, end_idx = #code, #code
|
||||
end
|
||||
|
||||
-- Handle quoted text
|
||||
['"'] = function(code, res, char)
|
||||
res = res .. char
|
||||
local symbol = byte(code, start_idx)
|
||||
|
||||
-- Handle backslashes
|
||||
if symbol == QUOTE or symbol == APOSTROPHE then
|
||||
-- Quoted strings
|
||||
local new_symbol
|
||||
repeat
|
||||
local _, e, pattern = find_multiple(code, '\\', char)
|
||||
if pattern == char then
|
||||
res = res .. code:sub(1, e)
|
||||
code = code:sub(e + 1)
|
||||
elseif pattern then
|
||||
res = res .. code:sub(1, e + 1)
|
||||
code = code:sub(e + 2)
|
||||
end_idx = end_idx + 1
|
||||
new_symbol = byte(code, end_idx)
|
||||
-- Skip over backslashes
|
||||
if new_symbol == BACKSLASH then
|
||||
end_idx = end_idx + 1
|
||||
end
|
||||
until not pattern or pattern == char
|
||||
|
||||
return code, res
|
||||
end,
|
||||
|
||||
['%s*[\r\n]%s*'] = function(code, res, char)
|
||||
return code, res .. '\n'
|
||||
end,
|
||||
|
||||
['[ \t]+'] = function(code, res, char)
|
||||
return code, res .. ' '
|
||||
end,
|
||||
}
|
||||
|
||||
-- Give the functions alternate names
|
||||
matches["'"] = matches['"']
|
||||
|
||||
-- The actual transpiler
|
||||
return function(code)
|
||||
assert(type(code) == 'string')
|
||||
|
||||
local res = ''
|
||||
|
||||
-- Split the code by "tokens"
|
||||
until symbol == new_symbol or new_symbol == nil
|
||||
elseif whitespace_bytes[symbol] then
|
||||
-- Consume spaces/tabs
|
||||
while true do
|
||||
-- Search for special characters
|
||||
local s, e, pattern = find_multiple(code, '[\'"\\]', '%-%-%[=*%[',
|
||||
'%-%-', '%[=*%[', '%s*[\r\n]%s*', '[ \t]+')
|
||||
if not s then break end
|
||||
if symbol == NEWLINE then lineno = lineno + 1 end
|
||||
|
||||
-- Add non-matching characters
|
||||
res = res .. code:sub(1, math.max(s - 1, 0))
|
||||
symbol = byte(code, end_idx + 1)
|
||||
if not whitespace_bytes[symbol] then break end
|
||||
end_idx = end_idx + 1
|
||||
end
|
||||
elseif symbol == LBRACKET then
|
||||
-- Long strings (potentially)
|
||||
end_idx, lineno = parse_long_string(code, start_idx, lineno)
|
||||
if not end_idx then end_idx = start_idx end
|
||||
elseif symbol == HYPHEN and byte(code, start_idx + 1) == HYPHEN then
|
||||
-- Comments
|
||||
end_idx, lineno = parse_long_string(code, start_idx + 2, lineno)
|
||||
if not end_idx then
|
||||
end_idx = find(code, "\n", start_idx, true) or #code + 1
|
||||
lineno = lineno + 1
|
||||
end
|
||||
elseif symbol == DOT and byte(code, start_idx + 1) == DOT then
|
||||
-- Varargs and string concatenation
|
||||
end_idx = byte(code, start_idx + 2) == DOT and start_idx + 2 or start_idx + 1
|
||||
elseif (symbol == COLON or symbol == EQUALS) and byte(code, start_idx + 1) == symbol then
|
||||
-- == and goto labels
|
||||
end_idx = start_idx + 1
|
||||
end
|
||||
next_token = sub(code, start_idx, end_idx)
|
||||
|
||||
-- Call the correct function
|
||||
local char = code:sub(s, e)
|
||||
local func = matches[char] or matches[pattern]
|
||||
assert(func, 'No function found for pattern!')
|
||||
code, res = func(code:sub(e + 1), res, char)
|
||||
local res = sub(code, last_idx, start_idx - 1)
|
||||
if res == "" and next_token then
|
||||
res, next_token = next_token, nil
|
||||
end
|
||||
last_idx = end_idx + 1
|
||||
return res, lineno
|
||||
end
|
||||
end
|
||||
|
||||
return (res .. code):trim()
|
||||
-- local string_starting_tokens = set(QUOTE, APOSTROPHE, LBRACKET)
|
||||
|
||||
local keywords = set("and", "break", "do", "else", "elseif", "end", "for",
|
||||
"function", "if", "in", "local", "not", "or", "repeat", "return", "then",
|
||||
"until", "while")
|
||||
local scope_begin = set("then", "do", "repeat")
|
||||
local scope_end = set("end", "until", "elseif")
|
||||
local operators = set("+", "-", "*", "/", "^", "%", "=", "==", "~=", "<", "<<",
|
||||
"<=", ">", ">>", ">=", "&", "|", "~", "//", "and", "or", "..", "#", "not")
|
||||
|
||||
local function copy_scope(scope)
|
||||
return setmetatable({}, {__index = scope})
|
||||
end
|
||||
|
||||
local function minify(code)
|
||||
local locals_count = 0
|
||||
local function new_var_name(scope, name)
|
||||
locals_count = locals_count + 1
|
||||
local var = ("_%x"):format(locals_count)
|
||||
scope[name] = var
|
||||
return var
|
||||
end
|
||||
|
||||
|
||||
-- Remove whitespace and comments when iterating
|
||||
local raw_token_iterator = tokenise(code)
|
||||
local need_whitespace = false
|
||||
local next_token
|
||||
local lineno_cache = -1
|
||||
local function token_iterator()
|
||||
if next_token ~= nil then
|
||||
local token = next_token
|
||||
next_token = nil
|
||||
return token, lineno_cache
|
||||
end
|
||||
|
||||
for token, lineno in raw_token_iterator do
|
||||
local first_byte = byte(token, 1)
|
||||
if whitespace_bytes[first_byte] or
|
||||
(first_byte == HYPHEN and byte(token, 2) == HYPHEN) then
|
||||
need_whitespace = true
|
||||
else
|
||||
lineno_cache = lineno
|
||||
return token, lineno
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Write tokens and any whitespace if necessary
|
||||
local res = {}
|
||||
local last_token = ""
|
||||
local function write_token(token)
|
||||
if need_whitespace then
|
||||
need_whitespace = false
|
||||
if (token:find("^[A-Za-z0-9_]") and last_token:find("[A-Za-z0-9_]$")) or
|
||||
(token == ".." and last_token:find("[0-9]$") or
|
||||
(token == "-" and last_token == "-")) then
|
||||
res[#res + 1] = " "
|
||||
end
|
||||
end
|
||||
|
||||
res[#res + 1] = token
|
||||
last_token = token
|
||||
end
|
||||
|
||||
-- Expects to be called after the ( token
|
||||
local process_block
|
||||
local function process_function(old_scope)
|
||||
local scope = copy_scope(old_scope)
|
||||
write_token("(")
|
||||
local token, lineno = token_iterator()
|
||||
while token do
|
||||
if token == ")" then
|
||||
write_token(")")
|
||||
process_block(copy_scope(scope), "function")
|
||||
return
|
||||
end
|
||||
assert(not scope_begin[token] and not scope_end[token], token)
|
||||
|
||||
if token ~= "..." then
|
||||
token = new_var_name(scope, token)
|
||||
end
|
||||
write_token(token)
|
||||
|
||||
token, lineno = token_iterator()
|
||||
if token == "," then
|
||||
write_token(",")
|
||||
token, lineno = token_iterator()
|
||||
elseif token ~= ")" then
|
||||
error(("Unexpected token %q on line %d"):format(token, lineno))
|
||||
end
|
||||
end
|
||||
|
||||
error("Unexpected EOF on line " .. lineno)
|
||||
end
|
||||
|
||||
local function process_declaration(scope)
|
||||
write_token("local")
|
||||
|
||||
local token = token_iterator()
|
||||
if token == "function" then
|
||||
write_token("function")
|
||||
write_token(new_var_name(scope, token_iterator()))
|
||||
assert(token_iterator() == "(", "Expected ( after local function")
|
||||
process_function(scope)
|
||||
return
|
||||
end
|
||||
|
||||
local var_names = {}
|
||||
local var_count = 0
|
||||
while true do
|
||||
-- Variable name
|
||||
write_token(token)
|
||||
var_names[#var_names + 1] = #res
|
||||
var_count = var_count + 1
|
||||
|
||||
token = token_iterator()
|
||||
if token == "," then
|
||||
write_token(token)
|
||||
token = token_iterator()
|
||||
elseif token == "=" then
|
||||
write_token("=")
|
||||
for i = 1, var_count do
|
||||
process_block(scope, "local")
|
||||
if i < var_count then
|
||||
token = token_iterator()
|
||||
if token ~= "," then
|
||||
next_token = token
|
||||
break
|
||||
end
|
||||
write_token(",")
|
||||
end
|
||||
end
|
||||
break
|
||||
else
|
||||
next_token = token
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
for _, res_idx in ipairs(var_names) do
|
||||
res[res_idx] = new_var_name(scope, res[res_idx])
|
||||
end
|
||||
end
|
||||
|
||||
local function process_table(scope)
|
||||
write_token("{")
|
||||
while true do
|
||||
local token = token_iterator()
|
||||
|
||||
-- Key
|
||||
local expect_value = false
|
||||
if token ~= "function" and token:find("^[A-Za-z_][A-Za-z0-9_]*$") then
|
||||
assert(not keywords[token])
|
||||
|
||||
-- If this is a variable name then swap it with the scope
|
||||
next_token = token_iterator()
|
||||
expect_value = next_token == "="
|
||||
if expect_value then
|
||||
write_token(token)
|
||||
else
|
||||
write_token(scope[token] or token)
|
||||
if next_token ~= "," and next_token ~= "}" then
|
||||
need_whitespace = true
|
||||
process_block(scope, "{")
|
||||
end
|
||||
end
|
||||
elseif token == "[" then
|
||||
write_token("[")
|
||||
process_block(scope, "[")
|
||||
expect_value = true
|
||||
elseif token == "}" then
|
||||
if res[#res] == "," then
|
||||
res[#res] = "}"
|
||||
else
|
||||
write_token(token)
|
||||
end
|
||||
break
|
||||
else
|
||||
next_token = token
|
||||
process_block(scope, "{")
|
||||
end
|
||||
|
||||
-- Separator (, or =)
|
||||
local lineno
|
||||
token, lineno = token_iterator()
|
||||
if expect_value then
|
||||
assert(token == "=")
|
||||
write_token("=")
|
||||
process_block(scope, "{")
|
||||
|
||||
token, lineno = token_iterator()
|
||||
end
|
||||
|
||||
write_token(token)
|
||||
|
||||
if token == "}" then
|
||||
break
|
||||
elseif token ~= "," then
|
||||
error(("Unexpected token %q on line %d, expected ','"):format(token, lineno))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function process_block(scope, block_start)
|
||||
local for_scope
|
||||
for token, lineno in token_iterator do
|
||||
local token_first_byte = byte(token)
|
||||
if whitespace_bytes[token_first_byte] then
|
||||
if last_token and last_token:find("[A-Za-z0-9_]$") then
|
||||
need_whitespace = true
|
||||
end
|
||||
elseif token == "function" then
|
||||
-- Process functions
|
||||
-- TODO: Maybe make this verify syntax?
|
||||
write_token("function")
|
||||
local first = true
|
||||
for token2 in token_iterator do
|
||||
if token2 == "(" then
|
||||
process_function(scope)
|
||||
break
|
||||
end
|
||||
write_token(first and scope[token2] or token2)
|
||||
first = false
|
||||
end
|
||||
elseif token == "local" then
|
||||
-- Process local declarations
|
||||
assert(block_start ~= "local")
|
||||
process_declaration(scope)
|
||||
elseif token == "for" then
|
||||
assert(block_start ~= "local")
|
||||
write_token("for")
|
||||
for_scope = copy_scope(scope)
|
||||
for_scope["?"] = lineno
|
||||
for token2 in token_iterator do
|
||||
if token2 == "," then
|
||||
write_token(token2)
|
||||
elseif token2 == "in" or token2 == "=" then
|
||||
write_token(token2)
|
||||
break
|
||||
else
|
||||
write_token(new_var_name(for_scope, token2))
|
||||
end
|
||||
end
|
||||
elseif scope_begin[token] then
|
||||
write_token(token)
|
||||
if not for_scope then
|
||||
process_block(copy_scope(scope), token)
|
||||
elseif token ~= "do" then
|
||||
error(("Unexpected token %q on line %d (for statement on line %d)"):format(token, lineno, for_scope["?"]))
|
||||
else
|
||||
process_block(for_scope, token)
|
||||
for_scope = nil
|
||||
end
|
||||
elseif scope_end[token] then
|
||||
if token == "elseif" then
|
||||
assert(block_start == "then", "Mismatched elseif")
|
||||
elseif block_start == "repeat" then
|
||||
assert(token == "until", "Mismatched until")
|
||||
write_token(token)
|
||||
process_block(scope, "local")
|
||||
return
|
||||
elseif token ~= "end" or block_start == nil or block_start == "(" or block_start == "local" then
|
||||
error("Mismatched end on line " .. lineno)
|
||||
end
|
||||
|
||||
write_token(token)
|
||||
return
|
||||
|
||||
-- Use recursion with brackets if required
|
||||
elseif (token == "(" or token == "[") and
|
||||
(token == block_start or block_start == "{" or block_start == "local") then
|
||||
write_token(token)
|
||||
process_block(scope, token)
|
||||
elseif (token == ")" and block_start == "(") or
|
||||
(token == "]" and block_start == "[") then
|
||||
write_token(token)
|
||||
return
|
||||
|
||||
elseif token == "else" then
|
||||
assert(block_start == "then", "Mismatched else")
|
||||
|
||||
-- Reset the scope
|
||||
for var in pairs(scope) do
|
||||
scope[var] = nil
|
||||
end
|
||||
write_token(token)
|
||||
|
||||
elseif token == "{" then
|
||||
-- Parse tables
|
||||
process_table(scope)
|
||||
|
||||
elseif token == "." or token == ":" then
|
||||
-- Parse attributes
|
||||
write_token(token)
|
||||
|
||||
local new_token = assert(token_iterator(), "Expected attribute, got EOF")
|
||||
if new_token:find("^[A-Za-z0-9_]") then
|
||||
-- Attribute (or decimal place on a number)
|
||||
write_token(new_token)
|
||||
else
|
||||
-- Not an attribute (for example 1.)
|
||||
next_token = new_token
|
||||
end
|
||||
else
|
||||
write_token(scope[token] or token)
|
||||
end
|
||||
|
||||
-- Peek at the next token
|
||||
if (block_start == "local" or block_start == "{") and not operators[token] then
|
||||
next_token = token_iterator()
|
||||
if next_token == nil or next_token == "," or
|
||||
(block_start == "{" and next_token == "}") or (next_token ~= "and" and
|
||||
next_token ~= "or" and next_token:find("^[A-Za-z0-9_]")) then
|
||||
return
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
process_block({})
|
||||
local token, lineno = token_iterator()
|
||||
if token then
|
||||
error(("Unexpected data %q on line %d, expected EOF"):format(token, lineno))
|
||||
end
|
||||
|
||||
return table.concat(res)
|
||||
end
|
||||
|
||||
return minify
|
||||
|
Loading…
x
Reference in New Issue
Block a user