Lua: Add initial support for UTF-8 string
This commit is contained in:
parent
9b24c80ccb
commit
55e638951b
@ -30,6 +30,7 @@ local clientpath = scriptdir .. "client" .. DIR_DELIM
|
||||
local commonpath = scriptdir .. "common" .. DIR_DELIM
|
||||
local asyncpath = scriptdir .. "async" .. DIR_DELIM
|
||||
local intlpath = scriptdir .. "intllib" .. DIR_DELIM
|
||||
local utf8path = scriptdir .. "utf8lib" .. DIR_DELIM
|
||||
|
||||
dofile(commonpath .. "strict.lua")
|
||||
dofile(commonpath .. "serialize.lua")
|
||||
@ -37,6 +38,7 @@ dofile(commonpath .. "misc_helpers.lua")
|
||||
|
||||
if INIT == "game" then
|
||||
dofile(intlpath .. "init.lua")
|
||||
dofile(utf8path .. "init.lua")
|
||||
dofile(gamepath .. "init.lua")
|
||||
elseif INIT == "mainmenu" then
|
||||
local mm_script = core.settings:get("main_menu_script")
|
||||
|
175
builtin/utf8lib/init.lua
Normal file
175
builtin/utf8lib/init.lua
Normal file
@ -0,0 +1,175 @@
|
||||
--[[
|
||||
$Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
|
||||
|
||||
Provides UTF-8 aware string functions implemented in pure lua:
|
||||
* string.upper(s)
|
||||
* string.lower(s)
|
||||
|
||||
All functions behave as their non UTF-8 aware counterparts with the exception
|
||||
that UTF-8 characters are used instead of bytes for all units.
|
||||
|
||||
Copyright (c) 2006-2007, Kyle Smith
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the author nor the names of its contributors may be
|
||||
used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Based on: https://github.com/Planimeter/grid-sdk/blob/master/public/utf8.lua
|
||||
Changed by: MultiCraft Development Team (2019)
|
||||
Note: Now used very minimal version, with the support of only lower and upper.
|
||||
Only latin and russian letters are supported.
|
||||
Support for additional characters will be added as the game localizes.
|
||||
]]
|
||||
|
||||
-- returns the number of bytes used by the UTF-8 character at byte i in s
|
||||
-- also doubles as a UTF-8 character validator
|
||||
function utf8charbytes(s, i)
|
||||
-- argument defaults
|
||||
i = i or 1
|
||||
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
if type(i) ~= "number" then
|
||||
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
|
||||
end
|
||||
|
||||
local c = s:byte(i)
|
||||
|
||||
-- determine bytes needed for character, based on RFC 3629
|
||||
-- validate byte 1
|
||||
if c > 0 and c <= 127 then
|
||||
-- UTF8-1
|
||||
return 1
|
||||
elseif c >= 194 and c <= 223 then
|
||||
-- UTF8-2
|
||||
local c2 = s:byte(i + 1)
|
||||
|
||||
if not c2 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 2
|
||||
elseif c >= 224 and c <= 239 then
|
||||
-- UTF8-3
|
||||
local c2 = s:byte(i + 1)
|
||||
local c3 = s:byte(i + 2)
|
||||
|
||||
if not c2 or not c3 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c == 224 and (c2 < 160 or c2 > 191) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c == 237 and (c2 < 128 or c2 > 159) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 3
|
||||
if c3 < 128 or c3 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 3
|
||||
elseif c >= 240 and c <= 244 then
|
||||
-- UTF8-4
|
||||
local c2 = s:byte(i + 1)
|
||||
local c3 = s:byte(i + 2)
|
||||
local c4 = s:byte(i + 3)
|
||||
|
||||
if not c2 or not c3 or not c4 then
|
||||
error("UTF-8 string terminated early")
|
||||
end
|
||||
|
||||
-- validate byte 2
|
||||
if c == 240 and (c2 < 144 or c2 > 191) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c == 244 and (c2 < 128 or c2 > 143) then
|
||||
error("Invalid UTF-8 character")
|
||||
elseif c2 < 128 or c2 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 3
|
||||
if c3 < 128 or c3 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
-- validate byte 4
|
||||
if c4 < 128 or c4 > 191 then
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
|
||||
return 4
|
||||
else
|
||||
error("Invalid UTF-8 character")
|
||||
end
|
||||
end
|
||||
|
||||
dofile(core.get_builtin_path() .. DIR_DELIM .. "utf8lib" .. DIR_DELIM .. "utf8data.lua")
|
||||
|
||||
-- replace UTF-8 characters based on a mapping table
|
||||
local function utf8replace(s, mapping)
|
||||
-- argument checking
|
||||
if type(s) ~= "string" then
|
||||
error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
|
||||
end
|
||||
if type(mapping) ~= "table" then
|
||||
error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
|
||||
end
|
||||
|
||||
local pos = 1
|
||||
local bytes = s:len()
|
||||
local charbytes
|
||||
local newstr = ""
|
||||
|
||||
while pos <= bytes do
|
||||
charbytes = utf8charbytes(s, pos)
|
||||
local c = s:sub(pos, pos + charbytes - 1)
|
||||
newstr = newstr .. (mapping[c] or c)
|
||||
pos = pos + charbytes
|
||||
end
|
||||
|
||||
return newstr
|
||||
end
|
||||
|
||||
-- identical to string.upper except it knows about unicode simple case conversions
|
||||
local origupper = string.upper
|
||||
function string.upper(s)
|
||||
return origupper(utf8replace(s, utf8_lc_uc))
|
||||
end
|
||||
|
||||
-- identical to string.lower except it knows about unicode simple case conversions
|
||||
local origlower = string.lower
|
||||
function string.lower(s)
|
||||
return origlower(utf8replace(s, utf8_uc_lc))
|
||||
end
|
7
builtin/utf8lib/utf8data.lua
Normal file
7
builtin/utf8lib/utf8data.lua
Normal file
@ -0,0 +1,7 @@
|
||||
utf8_uc_lc = {
|
||||
["А"]="а",["Б"]="б",["В"]="в",["Г"]="г",["Д"]="д",["Е"]="е",["Ё"]="ё",["Ж"]="ж",["З"]="з",["И"]="и",["Й"]="й",["К"]="к",["Л"]="л",["М"]="м",["Н"]="н",["О"]="о",["П"]="п",["Р"]="р",["С"]="с",["Т"]="т",["У"]="у",["Ф"]="ф",["Х"]="х",["Ц"]="ц",["Ч"]="ч",["Ш"]="ш",["Щ"]="щ",["Ъ"]="ъ",["Ы"]="ы",["Ь"]="ь",["Э"]="э",["Ю"]="ю",["Я"]="я"
|
||||
}
|
||||
|
||||
utf8_lc_uc = {
|
||||
["а"]="А",["б"]="Б",["в"]="В",["г"]="Г",["д"]="Д",["е"]="Е",["ё"]="Ё",["ж"]="Ж",["з"]="З",["и"]="И",["й"]="Й",["к"]="К",["л"]="Л",["м"]="М",["н"]="Н",["о"]="О",["п"]="П",["р"]="Р",["с"]="С",["т"]="Т",["у"]="У",["ф"]="Ф",["х"]="Х",["ц"]="Ц",["ч"]="Ч",["ш"]="Ш",["щ"]="Щ",["ъ"]="Ъ",["ы"]="Ы",["ь"]="Ь",["э"]="Э",["ю"]="Ю",["я"]="Я"
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user