From 0019930f27a0e859b585d1ea569044114edb7038 Mon Sep 17 00:00:00 2001 From: luk3yx Date: Tue, 21 Feb 2023 21:56:27 +1300 Subject: [PATCH] New SSCSM minifier --- builtin/game/sscsm/init.lua | 3 + builtin/game/sscsm/minify.lua | 529 +++++++++++++++++++++++++++------- 2 files changed, 429 insertions(+), 103 deletions(-) diff --git a/builtin/game/sscsm/init.lua b/builtin/game/sscsm/init.lua index c9503d8b2..5251ea358 100644 --- a/builtin/game/sscsm/init.lua +++ b/builtin/game/sscsm/init.lua @@ -122,6 +122,9 @@ function sscsm.register(def) error('Invalid "code" parameter passed to sscsm.register_csm.', 2) end + if block_colon then + def.code = "local minetest=core;" .. def.code + end def.code = sscsm.minify_code(def.code) if (#def.name + #def.code) > 65300 then error("The code (or name) passed to sscsm.register_csm is too large." diff --git a/builtin/game/sscsm/minify.lua b/builtin/game/sscsm/minify.lua index 47bb5f4f1..8fcb7718a 100644 --- a/builtin/game/sscsm/minify.lua +++ b/builtin/game/sscsm/minify.lua @@ -1,8 +1,8 @@ -- --- A primitive code minifier +-- A primitive-ish code minifier -- --- Copyright © 2019-2021 by luk3yx --- Copyright © 2020-2021 MultiCraft Development Team +-- Copyright © 2023 by luk3yx +-- Copyright © 2023 MultiCraft Development Team -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU Lesser General Public License as published by @@ -19,114 +19,437 @@ -- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -- --- Find multiple patterns -local function find_multiple(text, ...) - local n = select('#', ...) - local s, e, pattern - for i = 1, n do - local p = select(i, ...) - local s2, e2 = text:find(p) - if s2 and (not s or s2 < s) then - s, e, pattern = s2, e2 or s2, p - end +local find, sub, byte = string.find, string.sub, string.byte +local QUOTE, APOSTROPHE, LBRACKET, BACKSLASH, SPACE, TAB, CR, NEWLINE, HYPHEN, + DOT, EQUALS, COLON = byte('"\'[\\ \t\r\n-.=:', 1, -1) + +local function set(...) + local res = {} + for _, item in ipairs({...}) do + res[item] = true end - return s, e, pattern + return res end --- Matches --- These take 2-3 arguments (code, res, char) and should return code and res. -local matches = { - -- Handle multi-line strings - ['%[=*%['] = function(code, res, char) - res = res .. char - char = char:sub(2, -2) - local s, e = code:find(']' .. char .. ']', nil, true) - if not s or not e then return code, res end - return code:sub(e + 1), res .. code:sub(1, e) - end, +local whitespace_bytes = set(SPACE, TAB, CR, NEWLINE) - -- Handle regular comments - ['--'] = function(code, res, char) - local s, e = code:find('\n', nil, true) - if not s or not e then return '', res end +local function parse_long_string(code, idx, lineno) + local marker_start, marker_end = find(code, "^%[=*%[", idx) + if not marker_start then + return nil, lineno + end - -- Don't remove copyright or license information. - if e >= 7 then - local first_word = (code:match('^[ \t]*(%w+)') or ''):lower() - if first_word == 'copyright' or first_word == 'license' then - return code:sub(s), res .. char .. code:sub(1, s - 1) + local end_symbol = "]" .. ("="):rep(marker_end - marker_start - 1) .. "]" + local _, end_idx = find(code, end_symbol, marker_end + 1, true) + end_idx = end_idx or #code + 1 + + for i = marker_end + 1, end_idx - 1 do + if byte(code, i) == NEWLINE then + lineno = lineno + 1 + end + end + + return end_idx, lineno +end + +local function tokenise(code) + local last_idx = 1 + local lineno = 1 + local next_token + return function() + -- Return the next token if set + if next_token ~= nil then + local res = next_token + next_token = nil + return res, lineno + end + + local start_idx, end_idx = find(code, "[%(%):%.\n\"' \t\r%+%-%*/~=<>|;%[%],%{%}#%%%^]", last_idx) + if not start_idx then + -- HACK: Get the last token + if byte(code, -1) == NEWLINE then return end + code = code .. "\n" + start_idx, end_idx = #code, #code + end + + local symbol = byte(code, start_idx) + + if symbol == QUOTE or symbol == APOSTROPHE then + -- Quoted strings + local new_symbol + repeat + end_idx = end_idx + 1 + new_symbol = byte(code, end_idx) + -- Skip over backslashes + if new_symbol == BACKSLASH then + end_idx = end_idx + 1 + end + until symbol == new_symbol or new_symbol == nil + elseif whitespace_bytes[symbol] then + -- Consume spaces/tabs + while true do + if symbol == NEWLINE then lineno = lineno + 1 end + + symbol = byte(code, end_idx + 1) + if not whitespace_bytes[symbol] then break end + end_idx = end_idx + 1 + end + elseif symbol == LBRACKET then + -- Long strings (potentially) + end_idx, lineno = parse_long_string(code, start_idx, lineno) + if not end_idx then end_idx = start_idx end + elseif symbol == HYPHEN and byte(code, start_idx + 1) == HYPHEN then + -- Comments + end_idx, lineno = parse_long_string(code, start_idx + 2, lineno) + if not end_idx then + end_idx = find(code, "\n", start_idx, true) or #code + 1 + lineno = lineno + 1 + end + elseif symbol == DOT and byte(code, start_idx + 1) == DOT then + -- Varargs and string concatenation + end_idx = byte(code, start_idx + 2) == DOT and start_idx + 2 or start_idx + 1 + elseif (symbol == COLON or symbol == EQUALS) and byte(code, start_idx + 1) == symbol then + -- == and goto labels + end_idx = start_idx + 1 + end + next_token = sub(code, start_idx, end_idx) + + local res = sub(code, last_idx, start_idx - 1) + if res == "" and next_token then + res, next_token = next_token, nil + end + last_idx = end_idx + 1 + return res, lineno + end +end + +-- local string_starting_tokens = set(QUOTE, APOSTROPHE, LBRACKET) + +local keywords = set("and", "break", "do", "else", "elseif", "end", "for", + "function", "if", "in", "local", "not", "or", "repeat", "return", "then", + "until", "while") +local scope_begin = set("then", "do", "repeat") +local scope_end = set("end", "until", "elseif") +local operators = set("+", "-", "*", "/", "^", "%", "=", "==", "~=", "<", "<<", + "<=", ">", ">>", ">=", "&", "|", "~", "//", "and", "or", "..", "#", "not") + +local function copy_scope(scope) + return setmetatable({}, {__index = scope}) +end + +local function minify(code) + local locals_count = 0 + local function new_var_name(scope, name) + locals_count = locals_count + 1 + local var = ("_%x"):format(locals_count) + scope[name] = var + return var + end + + + -- Remove whitespace and comments when iterating + local raw_token_iterator = tokenise(code) + local need_whitespace = false + local next_token + local lineno_cache = -1 + local function token_iterator() + if next_token ~= nil then + local token = next_token + next_token = nil + return token, lineno_cache + end + + for token, lineno in raw_token_iterator do + local first_byte = byte(token, 1) + if whitespace_bytes[first_byte] or + (first_byte == HYPHEN and byte(token, 2) == HYPHEN) then + need_whitespace = true + else + lineno_cache = lineno + return token, lineno + end + end + end + + -- Write tokens and any whitespace if necessary + local res = {} + local last_token = "" + local function write_token(token) + if need_whitespace then + need_whitespace = false + if (token:find("^[A-Za-z0-9_]") and last_token:find("[A-Za-z0-9_]$")) or + (token == ".." and last_token:find("[0-9]$") or + (token == "-" and last_token == "-")) then + res[#res + 1] = " " end end - -- Shift trailing spaces back - local spaces = res:match('(%s*)$') or '' - return spaces .. code:sub(s), res:sub(1, #res - #spaces) - end, - - -- Handle multi-line comments - ['%-%-%[=*%['] = function(code, res, char) - char = char:sub(4, -2) - local s, e = code:find(']' .. char .. ']', nil, true) - if not s or not e then return code, res end - - -- Shift trailing spaces back - local spaces = res:match('(%s*)$') or '' - return spaces .. code:sub(e + 1), res:sub(1, #res - #spaces) - end, - - -- Handle quoted text - ['"'] = function(code, res, char) - res = res .. char - - -- Handle backslashes - repeat - local _, e, pattern = find_multiple(code, '\\', char) - if pattern == char then - res = res .. code:sub(1, e) - code = code:sub(e + 1) - elseif pattern then - res = res .. code:sub(1, e + 1) - code = code:sub(e + 2) - end - until not pattern or pattern == char - - return code, res - end, - - ['%s*[\r\n]%s*'] = function(code, res, char) - return code, res .. '\n' - end, - - ['[ \t]+'] = function(code, res, char) - return code, res .. ' ' - end, -} - --- Give the functions alternate names -matches["'"] = matches['"'] - --- The actual transpiler -return function(code) - assert(type(code) == 'string') - - local res = '' - - -- Split the code by "tokens" - while true do - -- Search for special characters - local s, e, pattern = find_multiple(code, '[\'"\\]', '%-%-%[=*%[', - '%-%-', '%[=*%[', '%s*[\r\n]%s*', '[ \t]+') - if not s then break end - - -- Add non-matching characters - res = res .. code:sub(1, math.max(s - 1, 0)) - - -- Call the correct function - local char = code:sub(s, e) - local func = matches[char] or matches[pattern] - assert(func, 'No function found for pattern!') - code, res = func(code:sub(e + 1), res, char) + res[#res + 1] = token + last_token = token end - return (res .. code):trim() + -- Expects to be called after the ( token + local process_block + local function process_function(old_scope) + local scope = copy_scope(old_scope) + write_token("(") + local token, lineno = token_iterator() + while token do + if token == ")" then + write_token(")") + process_block(copy_scope(scope), "function") + return + end + assert(not scope_begin[token] and not scope_end[token], token) + + if token ~= "..." then + token = new_var_name(scope, token) + end + write_token(token) + + token, lineno = token_iterator() + if token == "," then + write_token(",") + token, lineno = token_iterator() + elseif token ~= ")" then + error(("Unexpected token %q on line %d"):format(token, lineno)) + end + end + + error("Unexpected EOF on line " .. lineno) + end + + local function process_declaration(scope) + write_token("local") + + local token = token_iterator() + if token == "function" then + write_token("function") + write_token(new_var_name(scope, token_iterator())) + assert(token_iterator() == "(", "Expected ( after local function") + process_function(scope) + return + end + + local var_names = {} + local var_count = 0 + while true do + -- Variable name + write_token(token) + var_names[#var_names + 1] = #res + var_count = var_count + 1 + + token = token_iterator() + if token == "," then + write_token(token) + token = token_iterator() + elseif token == "=" then + write_token("=") + for i = 1, var_count do + process_block(scope, "local") + if i < var_count then + token = token_iterator() + if token ~= "," then + next_token = token + break + end + write_token(",") + end + end + break + else + next_token = token + break + end + end + + for _, res_idx in ipairs(var_names) do + res[res_idx] = new_var_name(scope, res[res_idx]) + end + end + + local function process_table(scope) + write_token("{") + while true do + local token = token_iterator() + + -- Key + local expect_value = false + if token ~= "function" and token:find("^[A-Za-z_][A-Za-z0-9_]*$") then + assert(not keywords[token]) + + -- If this is a variable name then swap it with the scope + next_token = token_iterator() + expect_value = next_token == "=" + if expect_value then + write_token(token) + else + write_token(scope[token] or token) + if next_token ~= "," and next_token ~= "}" then + need_whitespace = true + process_block(scope, "{") + end + end + elseif token == "[" then + write_token("[") + process_block(scope, "[") + expect_value = true + elseif token == "}" then + if res[#res] == "," then + res[#res] = "}" + else + write_token(token) + end + break + else + next_token = token + process_block(scope, "{") + end + + -- Separator (, or =) + local lineno + token, lineno = token_iterator() + if expect_value then + assert(token == "=") + write_token("=") + process_block(scope, "{") + + token, lineno = token_iterator() + end + + write_token(token) + + if token == "}" then + break + elseif token ~= "," then + error(("Unexpected token %q on line %d, expected ','"):format(token, lineno)) + end + end + end + + function process_block(scope, block_start) + local for_scope + for token, lineno in token_iterator do + local token_first_byte = byte(token) + if whitespace_bytes[token_first_byte] then + if last_token and last_token:find("[A-Za-z0-9_]$") then + need_whitespace = true + end + elseif token == "function" then + -- Process functions + -- TODO: Maybe make this verify syntax? + write_token("function") + local first = true + for token2 in token_iterator do + if token2 == "(" then + process_function(scope) + break + end + write_token(first and scope[token2] or token2) + first = false + end + elseif token == "local" then + -- Process local declarations + assert(block_start ~= "local") + process_declaration(scope) + elseif token == "for" then + assert(block_start ~= "local") + write_token("for") + for_scope = copy_scope(scope) + for_scope["?"] = lineno + for token2 in token_iterator do + if token2 == "," then + write_token(token2) + elseif token2 == "in" or token2 == "=" then + write_token(token2) + break + else + write_token(new_var_name(for_scope, token2)) + end + end + elseif scope_begin[token] then + write_token(token) + if not for_scope then + process_block(copy_scope(scope), token) + elseif token ~= "do" then + error(("Unexpected token %q on line %d (for statement on line %d)"):format(token, lineno, for_scope["?"])) + else + process_block(for_scope, token) + for_scope = nil + end + elseif scope_end[token] then + if token == "elseif" then + assert(block_start == "then", "Mismatched elseif") + elseif block_start == "repeat" then + assert(token == "until", "Mismatched until") + write_token(token) + process_block(scope, "local") + return + elseif token ~= "end" or block_start == nil or block_start == "(" or block_start == "local" then + error("Mismatched end on line " .. lineno) + end + + write_token(token) + return + + -- Use recursion with brackets if required + elseif (token == "(" or token == "[") and + (token == block_start or block_start == "{" or block_start == "local") then + write_token(token) + process_block(scope, token) + elseif (token == ")" and block_start == "(") or + (token == "]" and block_start == "[") then + write_token(token) + return + + elseif token == "else" then + assert(block_start == "then", "Mismatched else") + + -- Reset the scope + for var in pairs(scope) do + scope[var] = nil + end + write_token(token) + + elseif token == "{" then + -- Parse tables + process_table(scope) + + elseif token == "." or token == ":" then + -- Parse attributes + write_token(token) + + local new_token = assert(token_iterator(), "Expected attribute, got EOF") + if new_token:find("^[A-Za-z0-9_]") then + -- Attribute (or decimal place on a number) + write_token(new_token) + else + -- Not an attribute (for example 1.) + next_token = new_token + end + else + write_token(scope[token] or token) + end + + -- Peek at the next token + if (block_start == "local" or block_start == "{") and not operators[token] then + next_token = token_iterator() + if next_token == nil or next_token == "," or + (block_start == "{" and next_token == "}") or (next_token ~= "and" and + next_token ~= "or" and next_token:find("^[A-Za-z0-9_]")) then + return + end + end + end + end + + process_block({}) + local token, lineno = token_iterator() + if token then + error(("Unexpected data %q on line %d, expected EOF"):format(token, lineno)) + end + + return table.concat(res) end + +return minify