Add UTF8 support for string.len
This commit is contained in:
parent
f884d0ace9
commit
232904ed1c
@ -2,6 +2,7 @@
|
|||||||
$Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
|
$Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
|
||||||
|
|
||||||
Provides UTF-8 aware string functions implemented in pure lua:
|
Provides UTF-8 aware string functions implemented in pure lua:
|
||||||
|
* string.len(s)
|
||||||
* string.upper(s)
|
* string.upper(s)
|
||||||
* string.lower(s)
|
* string.lower(s)
|
||||||
|
|
||||||
@ -35,6 +36,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
Based on: https://github.com/Planimeter/grid-sdk/blob/master/public/utf8.lua
|
Based on: https://github.com/Planimeter/grid-sdk/blob/master/public/utf8.lua
|
||||||
|
https://github.com/Stepets/utf8.lua
|
||||||
Changed by: MultiCraft Development Team (2019)
|
Changed by: MultiCraft Development Team (2019)
|
||||||
Note: Now used very minimal version, with the support of only lower and upper.
|
Note: Now used very minimal version, with the support of only lower and upper.
|
||||||
Only latin and russian letters are supported.
|
Only latin and russian letters are supported.
|
||||||
@ -43,16 +45,16 @@ Note: Now used very minimal version, with the support of only lower and upper.
|
|||||||
|
|
||||||
-- returns the number of bytes used by the UTF-8 character at byte i in s
|
-- returns the number of bytes used by the UTF-8 character at byte i in s
|
||||||
-- also doubles as a UTF-8 character validator
|
-- also doubles as a UTF-8 character validator
|
||||||
function utf8charbytes(s, i)
|
local function utf8charbytes(s, i)
|
||||||
-- argument defaults
|
-- argument defaults
|
||||||
i = i or 1
|
i = i or 1
|
||||||
|
|
||||||
-- argument checking
|
-- argument checking
|
||||||
if type(s) ~= "string" then
|
if type(s) ~= "string" then
|
||||||
error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(s).. ")")
|
error("bad argument #1 to 'utf8charbytes' (string expected, got " .. type(s) .. ")")
|
||||||
end
|
end
|
||||||
if type(i) ~= "number" then
|
if type(i) ~= "number" then
|
||||||
error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(i).. ")")
|
error("bad argument #2 to 'utf8charbytes' (number expected, got " .. type(i) .. ")")
|
||||||
end
|
end
|
||||||
|
|
||||||
local c = s:byte(i)
|
local c = s:byte(i)
|
||||||
@ -135,20 +137,40 @@ function utf8charbytes(s, i)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- returns the number of characters in a UTF-8 string
|
||||||
|
local originlen = string.len
|
||||||
|
local function utf8len(s)
|
||||||
|
-- argument checking
|
||||||
|
if type(s) ~= "string" then
|
||||||
|
error("bad argument #1 to 'utf8len' (string expected, got " .. type(s) .. ")")
|
||||||
|
end
|
||||||
|
|
||||||
|
local pos = 1
|
||||||
|
local bytes = originlen(s)
|
||||||
|
local len = 0
|
||||||
|
|
||||||
|
while pos <= bytes do
|
||||||
|
len = len + 1
|
||||||
|
pos = pos + utf8charbytes(s, pos)
|
||||||
|
end
|
||||||
|
|
||||||
|
return len
|
||||||
|
end
|
||||||
|
|
||||||
dofile(core.get_builtin_path() .. "utf8lib" .. DIR_DELIM .. "utf8data.lua")
|
dofile(core.get_builtin_path() .. "utf8lib" .. DIR_DELIM .. "utf8data.lua")
|
||||||
|
|
||||||
-- replace UTF-8 characters based on a mapping table
|
-- replace UTF-8 characters based on a mapping table
|
||||||
local function utf8replace(s, mapping)
|
local function utf8replace(s, mapping)
|
||||||
-- argument checking
|
-- argument checking
|
||||||
if type(s) ~= "string" then
|
if type(s) ~= "string" then
|
||||||
error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
|
error("bad argument #1 to 'utf8replace' (string expected, got " .. type(s) .. ")")
|
||||||
end
|
end
|
||||||
if type(mapping) ~= "table" then
|
if type(mapping) ~= "table" then
|
||||||
error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
|
error("bad argument #2 to 'utf8replace' (table expected, got " .. type(mapping) .. ")")
|
||||||
end
|
end
|
||||||
|
|
||||||
local pos = 1
|
local pos = 1
|
||||||
local bytes = s:len()
|
local bytes = originlen(s)
|
||||||
local charbytes
|
local charbytes
|
||||||
local newstr = ""
|
local newstr = ""
|
||||||
|
|
||||||
@ -162,13 +184,18 @@ local function utf8replace(s, mapping)
|
|||||||
return newstr
|
return newstr
|
||||||
end
|
end
|
||||||
|
|
||||||
-- identical to string.upper except it knows about unicode simple case conversions
|
-- identical to string.len with UTF-8 support
|
||||||
|
function string.len(s)
|
||||||
|
return utf8len(s)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- identical to string.upper with UTF-8 support
|
||||||
local origupper = string.upper
|
local origupper = string.upper
|
||||||
function string.upper(s)
|
function string.upper(s)
|
||||||
return origupper(utf8replace(s, utf8_lc_uc))
|
return origupper(utf8replace(s, utf8_lc_uc))
|
||||||
end
|
end
|
||||||
|
|
||||||
-- identical to string.lower except it knows about unicode simple case conversions
|
-- identical to string.lower with UTF-8 support
|
||||||
local origlower = string.lower
|
local origlower = string.lower
|
||||||
function string.lower(s)
|
function string.lower(s)
|
||||||
return origlower(utf8replace(s, utf8_uc_lc))
|
return origlower(utf8replace(s, utf8_uc_lc))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user