diff --git a/.gitignore b/.gitignore index d002f61..d67a716 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ # generated by B3D test character.b3d.lua # generated by Lua logfile test -logfile.test.lua \ No newline at end of file +logfile.test.lua +# generated by SQLite3 persistence test +database.test.sqlite3 \ No newline at end of file diff --git a/Readme.md b/Readme.md index b5d4fab..226b470 100644 --- a/Readme.md +++ b/Readme.md @@ -36,6 +36,48 @@ persistence.lua_log_file.new(mod.get_resource"logfile.test.lua", {}, false) This will prevent strings from being referenced, possibly bloating file size, but saving memory. +#### SQLite3 Database Persistence + +Uses a SQLite3 database to persistently store a Lua table. **Experimental.**. Obtaining it is a bit trickier, as it requires access to the `lsqlite3` library, which may be passed: + +```lua +local modlib_sqlite3 = persistence.sqlite3(require"lsqlite3") +``` + +(assuming `require` is that of an insecure environment if Minetest is used) + +Alternatively, if you are not running Minetest, mod security is disabled, you have (temporarily) provided `require` globally, or added `modlib` to `secure.trusted_mods`, you can simply do the following: + +```lua +local modlib_sqlite3 = persistence.sqlite3() +``` + +Modlib will then simply call `require"lsqlite3"` for you. + +Then, you can proceed to create a new database: + +```lua +local database = persistence.modlib_sqlite3.new(mod.get_resource"database.test.sqlite3", {}) +-- Create or load +database:init() +-- Use it +database:set_root("key", {nested = true}) +database:close() +``` + +It uses a similar API to Lua log files: + +* `new(filename, root)` - without `reference_strings` however (strings aren't referenced currently) +* `init` +* `set` +* `set_root` +* `rewrite` +* `close` + +The advantage over Lua log files is that the SQlite3 database keeps disk usage minimal. Unused tables are dropped from the database immediately through reference counting. The downside of this is that this, combined with the overhead of using SQLite3, of course takes time, making updates on the SQLite3 database slower than Lua log file updates (which just append to an append-only file). +As simple and fast reference counting doesn't handle cycles, an additional `collectgarbage` stop-the-world method performing a full garbage collection on the database is provided which is called during `init`. +The method `defragment_ids` should not have to be used in practice (if it has to be, it happens automatically) and should be used solely for debugging purposes (neater IDs). + ### Bluon Binary Lua object notation. **Experimental.** Handling of subnormal numbers (very small floats) may be broken. diff --git a/persistence.lua b/persistence.lua index e5a88c1..a00a7b3 100644 --- a/persistence.lua +++ b/persistence.lua @@ -1,199 +1,17 @@ --- Localize globals -local assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type - = assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type - --- Set environment -local _ENV = {} -setfenv(1, _ENV) - -lua_log_file = { - -- default value - reference_strings = true -} - --- Note: keys may not be marked as weak references: garbage collected log files wouldn't close the file: --- The `__gc` metamethod doesn't work for tables in Lua 5.1; a hack using `newproxy` would be needed --- See https://stackoverflow.com/questions/27426704/lua-5-1-workaround-for-gc-metamethod-for-tables) --- Therefore, :close() must be called on log files to remove them from the `files` table -local files = {} -local metatable = {__index = lua_log_file} - -function lua_log_file.new(file_path, root, reference_strings) - local self = setmetatable({ - file_path = assert(file_path), - root = root, - reference_strings = reference_strings - }, metatable) - if minetest then - files[self] = true +-- TODO consider moving serializers in this namespace +local function load(module_name) + return assert(loadfile(modlib.mod.get_resource(modlib.modname, "persistence", module_name .. ".lua"))) +end +local _ENV = setmetatable({}, {__index = function(_ENV, module_name) + if module_name == "lua_log_file" then + local module = load(module_name)() + _ENV[module_name] = module + return module end - return self -end - -local function set_references(self, table) - -- Weak table keys to allow the collection of dead reference tables - -- TODO garbage collect strings in the references table - self.references = setmetatable(table, {__mode = "k"}) -end - -function lua_log_file:load() - -- Bytecode is blocked by the engine - local read = assert(loadfile(self.file_path)) - -- math.huge is serialized to inf - local env = {inf = math.huge} - setfenv(read, env) - read() - env.R = env.R or {{}} - local reference_count = #env.R - for ref in pairs(env.R) do - if ref > reference_count then - -- Ensure reference count always has the value of the largest reference - -- in case of "holes" (nil values) in the reference list - reference_count = ref - end + if module_name == "sqlite3" then + local module = load(module_name) + _ENV[module_name] = module + return module end - self.reference_count = reference_count - self.root = env.R[1] - set_references(self, {}) -end - -function lua_log_file:open() - self.file = io.open(self.file_path, "a+") -end - -function lua_log_file:init() - if modlib.file.exists(self.file_path) then - self:load() - self:_rewrite() - self:open() - return - end - self:open() - self.root = {} - self:_write() -end - -function lua_log_file:log(statement) - self.file:write(statement) - self.file:write"\n" -end - -function lua_log_file:flush() - self.file:flush() -end - -function lua_log_file:close() - self.file:close() - self.file = nil - files[self] = nil -end - -if minetest then - minetest.register_on_shutdown(function() - for self in pairs(files) do - self.file:close() - end - end) -end - -function lua_log_file:_dump(value, is_key) - if value == nil then - return "nil" - end - if value == true then - return "true" - end - if value == false then - return "false" - end - if value ~= value then - -- nan - return "0/0" - end - local _type = type(value) - if _type == "number" then - return ("%.17g"):format(value) - end - local reference = self.references[value] - if reference then - return "R[" .. reference .."]" - end - reference = self.reference_count + 1 - local key = "R[" .. reference .."]" - local function create_reference() - self.reference_count = reference - self.references[value] = reference - end - if _type == "string" then - local reference_strings = self.reference_strings - if is_key and ((not reference_strings) or value:len() <= key:len()) and value:match"^[%a_][%a%d_]*$" then - -- Short key - return value, true - end - local formatted = ("%q"):format(value) - if (not reference_strings) or formatted:len() <= key:len() then - -- Short string - return formatted - end - -- Use reference - create_reference() - self:log(key .. "=" .. formatted) - elseif _type == "table" then - -- Tables always need a reference before they are traversed to prevent infinite recursion - create_reference() - -- TODO traverse tables to determine whether this is actually needed - self:log(key .. "={}") - local tablelen = #value - for k, v in pairs(value) do - if type(k) ~= "number" or k % 1 ~= 0 or k < 1 or k > tablelen then - local dumped, short = self:_dump(k, true) - self:log(key .. (short and ("." .. dumped) or ("[" .. dumped .. "]")) .. "=" .. self:_dump(v)) - end - end - else - error("unsupported type: " .. _type) - end - return key -end - -function lua_log_file:set(table, key, value) - if not self.references[table] then - error"orphan table" - end - if table[key] == value then - -- No change - return - end - table[key] = value - table = self:_dump(table) - local key, short_key = self:_dump(key, true) - self:log(table .. (short_key and ("." .. key) or ("[" .. key .. "]")) .. "=" .. self:_dump(value)) -end - -function lua_log_file:set_root(key, value) - return self:set(self.root, key, value) -end - -function lua_log_file:_write() - set_references(self, {}) - self.reference_count = 0 - self:log"R={}" - self:_dump(self.root) -end - -function lua_log_file:_rewrite() - self.file = io.open(self.file_path, "w+") - self:_write() - self.file:close() -end - -function lua_log_file:rewrite() - if self.file then - self.file:close() - end - self:_rewrite() - self:open() -end - --- Export environment +end}) return _ENV \ No newline at end of file diff --git a/persistence/lua_log_file.lua b/persistence/lua_log_file.lua new file mode 100644 index 0000000..a6d387e --- /dev/null +++ b/persistence/lua_log_file.lua @@ -0,0 +1,198 @@ +-- Localize globals +local assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type + = assert, error, io, loadfile, math, minetest, modlib, pairs, setfenv, setmetatable, type + +-- Set environment +local _ENV = {} +setfenv(1, _ENV) + +-- Default value +reference_strings = true + +-- Note: keys may not be marked as weak references: garbage collected log files wouldn't close the file: +-- The `__gc` metamethod doesn't work for tables in Lua 5.1; a hack using `newproxy` would be needed +-- See https://stackoverflow.com/questions/27426704/lua-5-1-workaround-for-gc-metamethod-for-tables) +-- Therefore, :close() must be called on log files to remove them from the `files` table +local files = {} +local metatable = {__index = _ENV} +_ENV.metatable = metatable + +function new(file_path, root, reference_strings) + local self = setmetatable({ + file_path = assert(file_path), + root = root, + reference_strings = reference_strings + }, metatable) + if minetest then + files[self] = true + end + return self +end + +local function set_references(self, table) + -- Weak table keys to allow the collection of dead reference tables + -- TODO garbage collect strings in the references table + self.references = setmetatable(table, {__mode = "k"}) +end + +function load(self) + -- Bytecode is blocked by the engine + local read = assert(loadfile(self.file_path)) + -- math.huge is serialized to inf + local env = {inf = math.huge} + setfenv(read, env) + read() + env.R = env.R or {{}} + local reference_count = #env.R + for ref in pairs(env.R) do + if ref > reference_count then + -- Ensure reference count always has the value of the largest reference + -- in case of "holes" (nil values) in the reference list + reference_count = ref + end + end + self.reference_count = reference_count + self.root = env.R[1] + set_references(self, {}) +end + +function open(self) + self.file = io.open(self.file_path, "a+") +end + +function init(self) + if modlib.file.exists(self.file_path) then + self:load() + self:_rewrite() + self:open() + return + end + self:open() + self.root = {} + self:_write() +end + +function log(self, statement) + self.file:write(statement) + self.file:write"\n" +end + +function flush(self) + self.file:flush() +end + +function close(self) + self.file:close() + self.file = nil + files[self] = nil +end + +if minetest then + minetest.register_on_shutdown(function() + for self in pairs(files) do + self.file:close() + end + end) +end + +local function _dump(self, value, is_key) + if value == nil then + return "nil" + end + if value == true then + return "true" + end + if value == false then + return "false" + end + if value ~= value then + -- nan + return "0/0" + end + local _type = type(value) + if _type == "number" then + return ("%.17g"):format(value) + end + local reference = self.references[value] + if reference then + return "R[" .. reference .."]" + end + reference = self.reference_count + 1 + local key = "R[" .. reference .."]" + local function create_reference() + self.reference_count = reference + self.references[value] = reference + end + if _type == "string" then + local reference_strings = self.reference_strings + if is_key and ((not reference_strings) or value:len() <= key:len()) and value:match"^[%a_][%a%d_]*$" then + -- Short key + return value, true + end + local formatted = ("%q"):format(value) + if (not reference_strings) or formatted:len() <= key:len() then + -- Short string + return formatted + end + -- Use reference + create_reference() + self:log(key .. "=" .. formatted) + elseif _type == "table" then + -- Tables always need a reference before they are traversed to prevent infinite recursion + create_reference() + -- TODO traverse tables to determine whether this is actually needed + self:log(key .. "={}") + local tablelen = #value + for k, v in pairs(value) do + if type(k) ~= "number" or k % 1 ~= 0 or k < 1 or k > tablelen then + local dumped, short = _dump(self, k, true) + self:log(key .. (short and ("." .. dumped) or ("[" .. dumped .. "]")) .. "=" .. _dump(self, v)) + end + end + else + error("unsupported type: " .. _type) + end + return key +end + +function set(self, table, key, value) + if not self.references[table] then + error"orphan table" + end + if table[key] == value then + -- No change + return + end + table[key] = value + table = _dump(self, table) + local key, short_key = _dump(self, key, true) + self:log(table .. (short_key and ("." .. key) or ("[" .. key .. "]")) .. "=" .. _dump(self, value)) +end + +function set_root(self, key, value) + return self:set(self.root, key, value) +end + +function _write(self) + set_references(self, {}) + self.reference_count = 0 + self:log"R={}" + _dump(self, self.root) +end + +function _rewrite(self) + self.file = io.open(self.file_path, "w+") + self:_write() + self.file:close() +end + +function rewrite(self) + if self.file then + self.file:close() + end + self:_rewrite() + self:open() +end + +-- Export environment +return _ENV \ No newline at end of file diff --git a/persistence/sqlite3.lua b/persistence/sqlite3.lua new file mode 100644 index 0000000..49baef1 --- /dev/null +++ b/persistence/sqlite3.lua @@ -0,0 +1,316 @@ +local assert, error, math_huge, modlib, minetest, setmetatable, type, table_insert, table_sort, pairs, ipairs + = assert, error, math.huge, modlib, minetest, setmetatable, type, table.insert, table.sort, pairs, ipairs + +local sqlite3 = ... or require"lsqlite3" + +--! experimental + +--[[ + Currently uses reference counting to immediately delete tables which aren't reachable from the root table anymore, which has two issues: + 1. Deletion might trigger a large deletion chain + TODO defer deletion, clean up unused tables on startup, delete & iterate tables partially + 2. Reference counting is unable to handle cycles. `:collectgarbage()` implements a tracing "stop-the-world" garbage collector which handles cycles. + TODO take advantage of Lua's garbage collection by keeping a bunch of "twin" objects in a weak structure using proxies (Lua 5.1) or the __gc metamethod (Lua 5.2) + See https://wiki.c2.com/?ReferenceCountingCanHandleCycles, https://www.memorymanagement.org/mmref/recycle.html#mmref-recycle and https://wiki.c2.com/?GenerationalGarbageCollectio + Weak tables are of no use here, as we need to be notified when a reference is dropped +]] + +local _ENV = {} +setfenv(1, _ENV) +local metatable = {__index = _ENV} +_ENV.metatable = metatable + +-- Note: keys may not be marked as weak references: wouldn't close the database: see persistence/lua_log_file.lua +local databases = {} + +local types = { + boolean = 1, + number = 2, + string = 3, + table = 4 +} + +local function increment_highest_table_id(self) + self.highest_table_id = self.highest_table_id + 1 + if self.highest_table_id > 2^50 then + -- IDs are approaching double precision limit (52 bits mantissa), defragment them + self:defragment_ids() + end + return self.highest_table_id +end + +function new(file_path, root) + return setmetatable({ + database = sqlite3.open(file_path), + root = root + }, metatable) +end + +function _ENV.setmetatable(self) + assert(self.database and self.root) + return setmetatable(self, metatable) +end + +local set + +local function add_table(self, table) + if type(table) ~= "table" then return end + if self.counts[table] then + self.counts[table] = self.counts[table] + 1 + return + end + self.table_ids[table] = increment_highest_table_id(self) + self.counts[table] = 1 + for k, v in pairs(table) do + set(self, table, k, v) + end +end + +local decrement_reference_count + +local function delete_table(self, table) + local id = assert(self.table_ids[table]) + self.table_ids[table] = nil + self.counts[table] = nil + for k, v in pairs(table) do + decrement_reference_count(self, k) + decrement_reference_count(self, v) + end + local statement = self._prepared.delete_table + statement:bind(1, id) + statement:step() + statement:reset() +end + +function decrement_reference_count(self, table) + if type(table) ~= "table" then return end + local count = self.counts[table] + if not count then return end + count = count - 1 + if count == 0 then return delete_table(self, table) end + self.counts[table] = count +end + +function set(self, table, key, value) + local deletion = value == nil + if not deletion then + add_table(self, key) + add_table(self, value) + end + if type(previous_value) == "table" then + decrement_reference_count(self, previous_value) + end + if deletion and type(key) == "table" then + decrement_reference_count(self, key) + end + local statement = self._prepared[deletion and "delete" or "insert"] + local function bind_type_and_content(n, value) + local type_ = type(value) + statement:bind(n, assert(types[type_])) + if type_ == "boolean" then + statement:bind(n + 1, value and 1 or 0) + elseif type_ == "number" then + if value ~= value then + statement:bind(n + 1, "nan") + elseif value == math_huge then + statement:bind(n + 1, "inf") + elseif value == -math_huge then + statement:bind(n + 1, "-inf") + else + statement:bind(n + 1, value) + end + elseif type_ == "string" then + -- Use bind_blob instead of bind as Lua strings are effectively byte strings + statement:bind_blob(n + 1, value) + elseif type_ == "table" then + statement:bind(n + 1, self.table_ids[value]) + end + end + statement:bind(1, assert(self.table_ids[table])) + bind_type_and_content(2, key) + if not deletion then + bind_type_and_content(4, value) + end + statement:step() + statement:reset() +end + +local function exec(self, sql) + if self.database:exec(sql) ~= sqlite3.OK then + error(self.database:errmsg()) + end +end + +function init(self) + local database = self.database + local function prepare(sql) + local stmt = database:prepare(sql) + if not stmt then error(database:errmsg()) end + return stmt + end + self._prepared = { + insert = prepare"INSERT OR REPLACE INTO table_entries(table_id, key_type, key, value_type, value) VALUES (?, ?, ?, ?, ?)", + delete = prepare"DELETE FROM table_entries WHERE table_id = ? AND key_type = ? AND key = ?", + delete_table = prepare"DELETE FROM table_entries WHERE table_id = ?", + update = { + id = prepare"UPDATE table_entries SET table_id = ? WHERE table_id = ?", + keys = prepare("UPDATE table_entries SET key = ? WHERE key_type = " .. types.table .. " AND key = ?"), + values = prepare("UPDATE table_entries SET value = ? WHERE value_type = " .. types.table .. " AND value = ?") + } + } + exec(self, [[ +CREATE TABLE IF NOT EXISTS table_entries ( + table_id INTEGER NOT NULL, + key_type INTEGER NOT NULL, + key BLOB NOT NULL, + value_type INTEGER NOT NULL, + value BLOB NOT NULL, + PRIMARY KEY (table_id, key_type, key) +)]]) + -- Default value + self.highest_table_id = 0 + for id in self.database:urows"SELECT MAX(table_id) FROM table_entries" do + -- Gets a single value + self.highest_table_id = id + end + increment_highest_table_id(self) + local tables = {} + local counts = {} + self.counts = counts + local function get_value(type_, content) + if type_ == types.boolean then + if content == 0 then return false end + if content == 1 then return true end + error("invalid boolean value: " .. content) + end + if type_ == types.number then + if content == "nan" then + return 0/0 + end + if content == "inf" then + return math_huge + end + if content == "-inf" then + return -math_huge + end + assert(type(content) == "number") + return content + end + if type_ == types.string then + assert(type(content) == "string") + return content + end + if type_ == types.table then + -- Table reference + tables[content] = tables[content] or {} + counts[content] = counts[content] or 1 + return tables[content] + end + -- Null is unused + error("unsupported type: " .. type_) + end + -- Order by key_content to have retrieve list parts in the correct order, making it easier for Lua + for table_id, key_type, key, value_type, value in self.database:urows"SELECT * FROM table_entries ORDER BY table_id, key_type, key" do + local table = tables[table_id] or {} + counts[table] = counts[table] or 1 + table[get_value(key_type, key)] = get_value(value_type, value) + tables[table_id] = table + end + if tables[1] then + self.root = tables[1] + counts[self.root] = counts[self.root] + 1 + self.table_ids = modlib.table.flip(tables) + self:collectgarbage() + else + self.highest_table_id = 0 + self.table_ids = {} + add_table(self, self.root) + end + databases[self] = true +end + +function rewrite(self) + exec(self, "DELETE FROM table_entries") + self.highest_table_id = 0 + self.table_ids = {} + self.counts = {} + add_table(self, self.root) +end + +function _ENV.set(self, table, key, value) + local previous_value = table[key] + if previous_value == value then + -- no change + return + end + set(self, table, key, value) + table[key] = value +end + +function set_root(self, key, value) + return _ENV.set(self, self.root, key, value) +end + +function collectgarbage(self) + local marked = {} + local function mark(table) + if type(table) ~= "table" or marked[table] then return end + marked[table] = true + for k, v in pairs(table) do + mark(k) + mark(v) + end + end + mark(self.root) + for table in pairs(self.table_ids) do + if not marked[table] then + delete_table(self, table) + end + end +end + +function defragment_ids(self) + local ids = {} + for _, id in pairs(self.table_ids) do + table_insert(ids, id) + end + table_sort(ids) + local update = self._prepared.update + local tables = modlib.table.flip(self.table_ids) + for new_id, old_id in ipairs(ids) do + for _, stmt in pairs(update) do + stmt:bind_values(new_id, old_id) + stmt:step() + stmt:reset() + end + self.table_ids[tables[old_id]] = new_id + end + self.highest_table_id = #ids +end + +local function finalize_statements(table) + for _, stmt in pairs(table) do + if type(stmt) == "table" then + finalize_statements(stmt) + else + local errcode = stmt:finalize() + assert(errcode == sqlite3.OK, errcode) + end + end +end + +function close(self) + finalize_statements(self._prepared) + self.database:close() + databases[self] = nil +end + +if minetest then + minetest.register_on_shutdown(function() + for self in pairs(databases) do + self:close() + end + end) +end + +return _ENV \ No newline at end of file diff --git a/test.lua b/test.lua index 9b93834..11d4d1d 100644 --- a/test.lua +++ b/test.lua @@ -281,6 +281,7 @@ test_from_string("#694269", 0x694269FF) test_from_string("#11223344", 0x11223344) assert(colorspec.from_string"#694269":to_string() == "694269") +-- Persistence local function test_logfile(reference_strings) local logfile = persistence.lua_log_file.new(mod.get_resource"logfile.test.lua", {}, reference_strings) logfile:init() @@ -305,6 +306,51 @@ local function test_logfile(reference_strings) end test_logfile(true) test_logfile(false) +-- SQLite3 +do + local sqlite3 = persistence.sqlite3(require"lsqlite3") + local p = sqlite3.new("database.test.sqlite3", {}) + p:init() + p:rewrite() + p:set_root("key", "value") + assert(p.root.key == "value") + p:set_root("other key", "other value") + p:set_root("key", "other value") + p:set_root("key", nil) + local x = {x = 1, y = 2} + p:set_root("x1", x) + p:set_root("x2", x) + p:set_root("x2", nil) + p:set_root("x1", nil) + p:set_root("key", {a = 1, b = 2, c = {a = 1}}) + p:set_root("key", nil) + p:set_root("key", {a = 1, b = 2, c = 3}) + local cyclic = {} + cyclic.cycle = cyclic + p:set_root("cyclic", cyclic) + p:set_root("cyclic", nil) + p:collectgarbage() + p:defragment_ids() + local rows = {} + for row in p.database:rows"SELECT * FROM table_entries ORDER BY table_id, key_type, key" do + _G.table.insert(rows, row) + end + assert(modlib.table.equals(rows, { + {1, 3, "key", 4, 2}, + {1, 3, "other key", 3, "other value"}, + {2, 3, "a", 2, 1}, + {2, 3, "b", 2, 2}, + {2, 3, "c", 2, 3} + })) + p:close() + p = sqlite3.new("database.test.sqlite3", {}) + p:init() + assert(modlib.table.equals(p.root, { + key = {a = 1, b = 2, c = 3}, + ["other key"] = "other value" + })) + p:close() +end -- in-game tests & b3d testing local tests = {