1
0

New SSCSM minifier

This commit is contained in:
luk3yx 2023-02-21 21:56:27 +13:00
parent 2249e49f1b
commit 0019930f27
2 changed files with 429 additions and 103 deletions

View File

@ -122,6 +122,9 @@ function sscsm.register(def)
error('Invalid "code" parameter passed to sscsm.register_csm.', 2)
end
if block_colon then
def.code = "local minetest=core;" .. def.code
end
def.code = sscsm.minify_code(def.code)
if (#def.name + #def.code) > 65300 then
error("The code (or name) passed to sscsm.register_csm is too large."

View File

@ -1,8 +1,8 @@
--
-- A primitive code minifier
-- A primitive-ish code minifier
--
-- Copyright © 2019-2021 by luk3yx
-- Copyright © 2020-2021 MultiCraft Development Team
-- Copyright © 2023 by luk3yx
-- Copyright © 2023 MultiCraft Development Team
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU Lesser General Public License as published by
@ -19,114 +19,437 @@
-- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
--
-- Find multiple patterns
local function find_multiple(text, ...)
local n = select('#', ...)
local s, e, pattern
for i = 1, n do
local p = select(i, ...)
local s2, e2 = text:find(p)
if s2 and (not s or s2 < s) then
s, e, pattern = s2, e2 or s2, p
end
local find, sub, byte = string.find, string.sub, string.byte
local QUOTE, APOSTROPHE, LBRACKET, BACKSLASH, SPACE, TAB, CR, NEWLINE, HYPHEN,
DOT, EQUALS, COLON = byte('"\'[\\ \t\r\n-.=:', 1, -1)
local function set(...)
local res = {}
for _, item in ipairs({...}) do
res[item] = true
end
return s, e, pattern
return res
end
-- Matches
-- These take 2-3 arguments (code, res, char) and should return code and res.
local matches = {
-- Handle multi-line strings
['%[=*%['] = function(code, res, char)
res = res .. char
char = char:sub(2, -2)
local s, e = code:find(']' .. char .. ']', nil, true)
if not s or not e then return code, res end
return code:sub(e + 1), res .. code:sub(1, e)
end,
local whitespace_bytes = set(SPACE, TAB, CR, NEWLINE)
-- Handle regular comments
['--'] = function(code, res, char)
local s, e = code:find('\n', nil, true)
if not s or not e then return '', res end
local function parse_long_string(code, idx, lineno)
local marker_start, marker_end = find(code, "^%[=*%[", idx)
if not marker_start then
return nil, lineno
end
-- Don't remove copyright or license information.
if e >= 7 then
local first_word = (code:match('^[ \t]*(%w+)') or ''):lower()
if first_word == 'copyright' or first_word == 'license' then
return code:sub(s), res .. char .. code:sub(1, s - 1)
local end_symbol = "]" .. ("="):rep(marker_end - marker_start - 1) .. "]"
local _, end_idx = find(code, end_symbol, marker_end + 1, true)
end_idx = end_idx or #code + 1
for i = marker_end + 1, end_idx - 1 do
if byte(code, i) == NEWLINE then
lineno = lineno + 1
end
end
return end_idx, lineno
end
local function tokenise(code)
local last_idx = 1
local lineno = 1
local next_token
return function()
-- Return the next token if set
if next_token ~= nil then
local res = next_token
next_token = nil
return res, lineno
end
local start_idx, end_idx = find(code, "[%(%):%.\n\"' \t\r%+%-%*/~=<>|;%[%],%{%}#%%%^]", last_idx)
if not start_idx then
-- HACK: Get the last token
if byte(code, -1) == NEWLINE then return end
code = code .. "\n"
start_idx, end_idx = #code, #code
end
local symbol = byte(code, start_idx)
if symbol == QUOTE or symbol == APOSTROPHE then
-- Quoted strings
local new_symbol
repeat
end_idx = end_idx + 1
new_symbol = byte(code, end_idx)
-- Skip over backslashes
if new_symbol == BACKSLASH then
end_idx = end_idx + 1
end
until symbol == new_symbol or new_symbol == nil
elseif whitespace_bytes[symbol] then
-- Consume spaces/tabs
while true do
if symbol == NEWLINE then lineno = lineno + 1 end
symbol = byte(code, end_idx + 1)
if not whitespace_bytes[symbol] then break end
end_idx = end_idx + 1
end
elseif symbol == LBRACKET then
-- Long strings (potentially)
end_idx, lineno = parse_long_string(code, start_idx, lineno)
if not end_idx then end_idx = start_idx end
elseif symbol == HYPHEN and byte(code, start_idx + 1) == HYPHEN then
-- Comments
end_idx, lineno = parse_long_string(code, start_idx + 2, lineno)
if not end_idx then
end_idx = find(code, "\n", start_idx, true) or #code + 1
lineno = lineno + 1
end
elseif symbol == DOT and byte(code, start_idx + 1) == DOT then
-- Varargs and string concatenation
end_idx = byte(code, start_idx + 2) == DOT and start_idx + 2 or start_idx + 1
elseif (symbol == COLON or symbol == EQUALS) and byte(code, start_idx + 1) == symbol then
-- == and goto labels
end_idx = start_idx + 1
end
next_token = sub(code, start_idx, end_idx)
local res = sub(code, last_idx, start_idx - 1)
if res == "" and next_token then
res, next_token = next_token, nil
end
last_idx = end_idx + 1
return res, lineno
end
end
-- local string_starting_tokens = set(QUOTE, APOSTROPHE, LBRACKET)
local keywords = set("and", "break", "do", "else", "elseif", "end", "for",
"function", "if", "in", "local", "not", "or", "repeat", "return", "then",
"until", "while")
local scope_begin = set("then", "do", "repeat")
local scope_end = set("end", "until", "elseif")
local operators = set("+", "-", "*", "/", "^", "%", "=", "==", "~=", "<", "<<",
"<=", ">", ">>", ">=", "&", "|", "~", "//", "and", "or", "..", "#", "not")
local function copy_scope(scope)
return setmetatable({}, {__index = scope})
end
local function minify(code)
local locals_count = 0
local function new_var_name(scope, name)
locals_count = locals_count + 1
local var = ("_%x"):format(locals_count)
scope[name] = var
return var
end
-- Remove whitespace and comments when iterating
local raw_token_iterator = tokenise(code)
local need_whitespace = false
local next_token
local lineno_cache = -1
local function token_iterator()
if next_token ~= nil then
local token = next_token
next_token = nil
return token, lineno_cache
end
for token, lineno in raw_token_iterator do
local first_byte = byte(token, 1)
if whitespace_bytes[first_byte] or
(first_byte == HYPHEN and byte(token, 2) == HYPHEN) then
need_whitespace = true
else
lineno_cache = lineno
return token, lineno
end
end
end
-- Write tokens and any whitespace if necessary
local res = {}
local last_token = ""
local function write_token(token)
if need_whitespace then
need_whitespace = false
if (token:find("^[A-Za-z0-9_]") and last_token:find("[A-Za-z0-9_]$")) or
(token == ".." and last_token:find("[0-9]$") or
(token == "-" and last_token == "-")) then
res[#res + 1] = " "
end
end
-- Shift trailing spaces back
local spaces = res:match('(%s*)$') or ''
return spaces .. code:sub(s), res:sub(1, #res - #spaces)
end,
-- Handle multi-line comments
['%-%-%[=*%['] = function(code, res, char)
char = char:sub(4, -2)
local s, e = code:find(']' .. char .. ']', nil, true)
if not s or not e then return code, res end
-- Shift trailing spaces back
local spaces = res:match('(%s*)$') or ''
return spaces .. code:sub(e + 1), res:sub(1, #res - #spaces)
end,
-- Handle quoted text
['"'] = function(code, res, char)
res = res .. char
-- Handle backslashes
repeat
local _, e, pattern = find_multiple(code, '\\', char)
if pattern == char then
res = res .. code:sub(1, e)
code = code:sub(e + 1)
elseif pattern then
res = res .. code:sub(1, e + 1)
code = code:sub(e + 2)
end
until not pattern or pattern == char
return code, res
end,
['%s*[\r\n]%s*'] = function(code, res, char)
return code, res .. '\n'
end,
['[ \t]+'] = function(code, res, char)
return code, res .. ' '
end,
}
-- Give the functions alternate names
matches["'"] = matches['"']
-- The actual transpiler
return function(code)
assert(type(code) == 'string')
local res = ''
-- Split the code by "tokens"
while true do
-- Search for special characters
local s, e, pattern = find_multiple(code, '[\'"\\]', '%-%-%[=*%[',
'%-%-', '%[=*%[', '%s*[\r\n]%s*', '[ \t]+')
if not s then break end
-- Add non-matching characters
res = res .. code:sub(1, math.max(s - 1, 0))
-- Call the correct function
local char = code:sub(s, e)
local func = matches[char] or matches[pattern]
assert(func, 'No function found for pattern!')
code, res = func(code:sub(e + 1), res, char)
res[#res + 1] = token
last_token = token
end
return (res .. code):trim()
-- Expects to be called after the ( token
local process_block
local function process_function(old_scope)
local scope = copy_scope(old_scope)
write_token("(")
local token, lineno = token_iterator()
while token do
if token == ")" then
write_token(")")
process_block(copy_scope(scope), "function")
return
end
assert(not scope_begin[token] and not scope_end[token], token)
if token ~= "..." then
token = new_var_name(scope, token)
end
write_token(token)
token, lineno = token_iterator()
if token == "," then
write_token(",")
token, lineno = token_iterator()
elseif token ~= ")" then
error(("Unexpected token %q on line %d"):format(token, lineno))
end
end
error("Unexpected EOF on line " .. lineno)
end
local function process_declaration(scope)
write_token("local")
local token = token_iterator()
if token == "function" then
write_token("function")
write_token(new_var_name(scope, token_iterator()))
assert(token_iterator() == "(", "Expected ( after local function")
process_function(scope)
return
end
local var_names = {}
local var_count = 0
while true do
-- Variable name
write_token(token)
var_names[#var_names + 1] = #res
var_count = var_count + 1
token = token_iterator()
if token == "," then
write_token(token)
token = token_iterator()
elseif token == "=" then
write_token("=")
for i = 1, var_count do
process_block(scope, "local")
if i < var_count then
token = token_iterator()
if token ~= "," then
next_token = token
break
end
write_token(",")
end
end
break
else
next_token = token
break
end
end
for _, res_idx in ipairs(var_names) do
res[res_idx] = new_var_name(scope, res[res_idx])
end
end
local function process_table(scope)
write_token("{")
while true do
local token = token_iterator()
-- Key
local expect_value = false
if token ~= "function" and token:find("^[A-Za-z_][A-Za-z0-9_]*$") then
assert(not keywords[token])
-- If this is a variable name then swap it with the scope
next_token = token_iterator()
expect_value = next_token == "="
if expect_value then
write_token(token)
else
write_token(scope[token] or token)
if next_token ~= "," and next_token ~= "}" then
need_whitespace = true
process_block(scope, "{")
end
end
elseif token == "[" then
write_token("[")
process_block(scope, "[")
expect_value = true
elseif token == "}" then
if res[#res] == "," then
res[#res] = "}"
else
write_token(token)
end
break
else
next_token = token
process_block(scope, "{")
end
-- Separator (, or =)
local lineno
token, lineno = token_iterator()
if expect_value then
assert(token == "=")
write_token("=")
process_block(scope, "{")
token, lineno = token_iterator()
end
write_token(token)
if token == "}" then
break
elseif token ~= "," then
error(("Unexpected token %q on line %d, expected ','"):format(token, lineno))
end
end
end
function process_block(scope, block_start)
local for_scope
for token, lineno in token_iterator do
local token_first_byte = byte(token)
if whitespace_bytes[token_first_byte] then
if last_token and last_token:find("[A-Za-z0-9_]$") then
need_whitespace = true
end
elseif token == "function" then
-- Process functions
-- TODO: Maybe make this verify syntax?
write_token("function")
local first = true
for token2 in token_iterator do
if token2 == "(" then
process_function(scope)
break
end
write_token(first and scope[token2] or token2)
first = false
end
elseif token == "local" then
-- Process local declarations
assert(block_start ~= "local")
process_declaration(scope)
elseif token == "for" then
assert(block_start ~= "local")
write_token("for")
for_scope = copy_scope(scope)
for_scope["?"] = lineno
for token2 in token_iterator do
if token2 == "," then
write_token(token2)
elseif token2 == "in" or token2 == "=" then
write_token(token2)
break
else
write_token(new_var_name(for_scope, token2))
end
end
elseif scope_begin[token] then
write_token(token)
if not for_scope then
process_block(copy_scope(scope), token)
elseif token ~= "do" then
error(("Unexpected token %q on line %d (for statement on line %d)"):format(token, lineno, for_scope["?"]))
else
process_block(for_scope, token)
for_scope = nil
end
elseif scope_end[token] then
if token == "elseif" then
assert(block_start == "then", "Mismatched elseif")
elseif block_start == "repeat" then
assert(token == "until", "Mismatched until")
write_token(token)
process_block(scope, "local")
return
elseif token ~= "end" or block_start == nil or block_start == "(" or block_start == "local" then
error("Mismatched end on line " .. lineno)
end
write_token(token)
return
-- Use recursion with brackets if required
elseif (token == "(" or token == "[") and
(token == block_start or block_start == "{" or block_start == "local") then
write_token(token)
process_block(scope, token)
elseif (token == ")" and block_start == "(") or
(token == "]" and block_start == "[") then
write_token(token)
return
elseif token == "else" then
assert(block_start == "then", "Mismatched else")
-- Reset the scope
for var in pairs(scope) do
scope[var] = nil
end
write_token(token)
elseif token == "{" then
-- Parse tables
process_table(scope)
elseif token == "." or token == ":" then
-- Parse attributes
write_token(token)
local new_token = assert(token_iterator(), "Expected attribute, got EOF")
if new_token:find("^[A-Za-z0-9_]") then
-- Attribute (or decimal place on a number)
write_token(new_token)
else
-- Not an attribute (for example 1.)
next_token = new_token
end
else
write_token(scope[token] or token)
end
-- Peek at the next token
if (block_start == "local" or block_start == "{") and not operators[token] then
next_token = token_iterator()
if next_token == nil or next_token == "," or
(block_start == "{" and next_token == "}") or (next_token ~= "and" and
next_token ~= "or" and next_token:find("^[A-Za-z0-9_]")) then
return
end
end
end
end
process_block({})
local token, lineno = token_iterator()
if token then
error(("Unexpected data %q on line %d, expected EOF"):format(token, lineno))
end
return table.concat(res)
end
return minify