441 lines
12 KiB
Lua
441 lines
12 KiB
Lua
--- Python-style extended string library.
|
|
--
|
|
-- see 3.6.1 of the Python reference.
|
|
-- If you want to make these available as string methods, then say
|
|
-- `stringx.import()` to bring them into the standard `string` table.
|
|
--
|
|
-- See @{03-strings.md|the Guide}
|
|
--
|
|
-- Dependencies: `pl.utils`
|
|
-- @module pl.stringx
|
|
local utils = require 'pl.utils'
|
|
local string = string
|
|
local find = string.find
|
|
local type,setmetatable,getmetatable,ipairs,unpack = type,setmetatable,getmetatable,ipairs,unpack
|
|
local error,tostring = error,tostring
|
|
local gsub = string.gsub
|
|
local rep = string.rep
|
|
local sub = string.sub
|
|
local concat = table.concat
|
|
local escape = utils.escape
|
|
local ceil = math.ceil
|
|
local _G = _G
|
|
local assert_arg,usplit,list_MT = utils.assert_arg,utils.split,utils.stdmt.List
|
|
local lstrip
|
|
|
|
local function assert_string (n,s)
|
|
assert_arg(n,s,'string')
|
|
end
|
|
|
|
local function non_empty(s)
|
|
return #s > 0
|
|
end
|
|
|
|
local function assert_nonempty_string(n,s)
|
|
assert_arg(n,s,'string',non_empty,'must be a non-empty string')
|
|
end
|
|
|
|
local stringx = {}
|
|
|
|
--- does s only contain alphabetic characters?.
|
|
-- @param s a string
|
|
function stringx.isalpha(s)
|
|
assert_string(1,s)
|
|
return find(s,'^%a+$') == 1
|
|
end
|
|
|
|
--- does s only contain digits?.
|
|
-- @param s a string
|
|
function stringx.isdigit(s)
|
|
assert_string(1,s)
|
|
return find(s,'^%d+$') == 1
|
|
end
|
|
|
|
--- does s only contain alphanumeric characters?.
|
|
-- @param s a string
|
|
function stringx.isalnum(s)
|
|
assert_string(1,s)
|
|
return find(s,'^%w+$') == 1
|
|
end
|
|
|
|
--- does s only contain spaces?.
|
|
-- @param s a string
|
|
function stringx.isspace(s)
|
|
assert_string(1,s)
|
|
return find(s,'^%s+$') == 1
|
|
end
|
|
|
|
--- does s only contain lower case characters?.
|
|
-- @param s a string
|
|
function stringx.islower(s)
|
|
assert_string(1,s)
|
|
return find(s,'^[%l%s]+$') == 1
|
|
end
|
|
|
|
--- does s only contain upper case characters?.
|
|
-- @param s a string
|
|
function stringx.isupper(s)
|
|
assert_string(1,s)
|
|
return find(s,'^[%u%s]+$') == 1
|
|
end
|
|
|
|
--- concatenate the strings using this string as a delimiter.
|
|
-- @param self the string
|
|
-- @param seq a table of strings or numbers
|
|
-- @usage (' '):join {1,2,3} == '1 2 3'
|
|
function stringx.join (self,seq)
|
|
assert_string(1,self)
|
|
return concat(seq,self)
|
|
end
|
|
|
|
--- does string start with the substring?.
|
|
-- @param self the string
|
|
-- @param s2 a string
|
|
function stringx.startswith(self,s2)
|
|
assert_string(1,self)
|
|
assert_string(2,s2)
|
|
return find(self,s2,1,true) == 1
|
|
end
|
|
|
|
local function _find_all(s,sub,first,last)
|
|
if sub == '' then return #s+1,#s end
|
|
local i1,i2 = find(s,sub,first,true)
|
|
local res
|
|
local k = 0
|
|
while i1 do
|
|
res = i1
|
|
k = k + 1
|
|
i1,i2 = find(s,sub,i2+1,true)
|
|
if last and i1 > last then break end
|
|
end
|
|
return res,k
|
|
end
|
|
|
|
--- does string end with the given substring?.
|
|
-- @param s a string
|
|
-- @param send a substring or a table of suffixes
|
|
function stringx.endswith(s,send)
|
|
assert_string(1,s)
|
|
if type(send) == 'string' then
|
|
return #s >= #send and s:find(send, #s-#send+1, true) and true or false
|
|
elseif type(send) == 'table' then
|
|
local endswith = stringx.endswith
|
|
for _,suffix in ipairs(send) do
|
|
if endswith(s,suffix) then return true end
|
|
end
|
|
return false
|
|
else
|
|
error('argument #2: either a substring or a table of suffixes expected')
|
|
end
|
|
end
|
|
|
|
-- break string into a list of lines
|
|
-- @param self the string
|
|
-- @param keepends (currently not used)
|
|
function stringx.splitlines (self,keepends)
|
|
assert_string(1,self)
|
|
local res = usplit(self,'[\r\n]')
|
|
-- we are currently hacking around a problem with utils.split (see stringx.split)
|
|
if #res == 0 then res = {''} end
|
|
return setmetatable(res,list_MT)
|
|
end
|
|
|
|
local function tab_expand (self,n)
|
|
return (gsub(self,'([^\t]*)\t', function(s)
|
|
return s..(' '):rep(n - #s % n)
|
|
end))
|
|
end
|
|
|
|
--- replace all tabs in s with n spaces. If not specified, n defaults to 8.
|
|
-- with 0.9.5 this now correctly expands to the next tab stop (if you really
|
|
-- want to just replace tabs, use :gsub('\t',' ') etc)
|
|
-- @param self the string
|
|
-- @param n number of spaces to expand each tab, (default 8)
|
|
function stringx.expandtabs(self,n)
|
|
assert_string(1,self)
|
|
n = n or 8
|
|
if not self:find '\n' then return tab_expand(self,n) end
|
|
local res,i = {},1
|
|
for line in stringx.lines(self) do
|
|
res[i] = tab_expand(line,n)
|
|
i = i + 1
|
|
end
|
|
return table.concat(res,'\n')
|
|
end
|
|
|
|
--- find index of first instance of sub in s from the left.
|
|
-- @param self the string
|
|
-- @param sub substring
|
|
-- @param i1 start index
|
|
function stringx.lfind(self,sub,i1)
|
|
assert_string(1,self)
|
|
assert_string(2,sub)
|
|
local idx = find(self,sub,i1,true)
|
|
if idx then return idx else return nil end
|
|
end
|
|
|
|
--- find index of first instance of sub in s from the right.
|
|
-- @param self the string
|
|
-- @param sub substring
|
|
-- @param first first index
|
|
-- @param last last index
|
|
function stringx.rfind(self,sub,first,last)
|
|
assert_string(1,self)
|
|
assert_string(2,sub)
|
|
local idx = _find_all(self,sub,first,last)
|
|
if idx then return idx else return nil end
|
|
end
|
|
|
|
--- replace up to n instances of old by new in the string s.
|
|
-- if n is not present, replace all instances.
|
|
-- @param s the string
|
|
-- @param old the target substring
|
|
-- @param new the substitution
|
|
-- @param n optional maximum number of substitutions
|
|
-- @return result string
|
|
-- @return the number of substitutions
|
|
function stringx.replace(s,old,new,n)
|
|
assert_string(1,s)
|
|
assert_string(1,old)
|
|
return (gsub(s,escape(old),new:gsub('%%','%%%%'),n))
|
|
end
|
|
|
|
--- split a string into a list of strings using a delimiter.
|
|
-- @class function
|
|
-- @name split
|
|
-- @param self the string
|
|
-- @param re a delimiter (defaults to whitespace)
|
|
-- @param n maximum number of results
|
|
-- @usage #(('one two'):split()) == 2
|
|
-- @usage ('one,two,three'):split(',') == List{'one','two','three'}
|
|
-- @usage ('one,two,three'):split(',',2) == List{'one','two,three'}
|
|
function stringx.split(self,re,n)
|
|
local s = self
|
|
local plain = true
|
|
if not re then -- default spaces
|
|
s = lstrip(s)
|
|
plain = false
|
|
end
|
|
local res = usplit(s,re,plain,n)
|
|
if re and re ~= '' and find(s,re,-#re,true) then
|
|
res[#res+1] = ""
|
|
end
|
|
return setmetatable(res,list_MT)
|
|
end
|
|
|
|
--- split a string using a pattern. Note that at least one value will be returned!
|
|
-- @param self the string
|
|
-- @param re a Lua string pattern (defaults to whitespace)
|
|
-- @return the parts of the string
|
|
-- @usage a,b = line:splitv('=')
|
|
function stringx.splitv (self,re)
|
|
assert_string(1,self)
|
|
return utils.splitv(self,re)
|
|
end
|
|
|
|
local function copy(self)
|
|
return self..''
|
|
end
|
|
|
|
--- count all instances of substring in string.
|
|
-- @param self the string
|
|
-- @param sub substring
|
|
function stringx.count(self,sub)
|
|
assert_string(1,self)
|
|
local i,k = _find_all(self,sub,1)
|
|
return k
|
|
end
|
|
|
|
local function _just(s,w,ch,left,right)
|
|
local n = #s
|
|
if w > n then
|
|
if not ch then ch = ' ' end
|
|
local f1,f2
|
|
if left and right then
|
|
local ln = ceil((w-n)/2)
|
|
local rn = w - n - ln
|
|
f1 = rep(ch,ln)
|
|
f2 = rep(ch,rn)
|
|
elseif right then
|
|
f1 = rep(ch,w-n)
|
|
f2 = ''
|
|
else
|
|
f2 = rep(ch,w-n)
|
|
f1 = ''
|
|
end
|
|
return f1..s..f2
|
|
else
|
|
return copy(s)
|
|
end
|
|
end
|
|
|
|
--- left-justify s with width w.
|
|
-- @param self the string
|
|
-- @param w width of justification
|
|
-- @param ch padding character, default ' '
|
|
function stringx.ljust(self,w,ch)
|
|
assert_string(1,self)
|
|
assert_arg(2,w,'number')
|
|
return _just(self,w,ch,true,false)
|
|
end
|
|
|
|
--- right-justify s with width w.
|
|
-- @param s the string
|
|
-- @param w width of justification
|
|
-- @param ch padding character, default ' '
|
|
function stringx.rjust(s,w,ch)
|
|
assert_string(1,s)
|
|
assert_arg(2,w,'number')
|
|
return _just(s,w,ch,false,true)
|
|
end
|
|
|
|
--- center-justify s with width w.
|
|
-- @param s the string
|
|
-- @param w width of justification
|
|
-- @param ch padding character, default ' '
|
|
function stringx.center(s,w,ch)
|
|
assert_string(1,s)
|
|
assert_arg(2,w,'number')
|
|
return _just(s,w,ch,true,true)
|
|
end
|
|
|
|
local function _strip(s,left,right,chrs)
|
|
if not chrs then
|
|
chrs = '%s'
|
|
else
|
|
chrs = '['..escape(chrs)..']'
|
|
end
|
|
if left then
|
|
local i1,i2 = find(s,'^'..chrs..'*')
|
|
if i2 >= i1 then
|
|
s = sub(s,i2+1)
|
|
end
|
|
end
|
|
if right then
|
|
local i1,i2 = find(s,chrs..'*$')
|
|
if i2 >= i1 then
|
|
s = sub(s,1,i1-1)
|
|
end
|
|
end
|
|
return s
|
|
end
|
|
|
|
--- trim any whitespace on the left of s.
|
|
-- @param self the string
|
|
-- @param chrs default space, can be a string of characters to be trimmed
|
|
function stringx.lstrip(self,chrs)
|
|
assert_string(1,self)
|
|
return _strip(self,true,false,chrs)
|
|
end
|
|
lstrip = stringx.lstrip
|
|
|
|
--- trim any whitespace on the right of s.
|
|
-- @param s the string
|
|
-- @param chrs default space, can be a string of characters to be trimmed
|
|
function stringx.rstrip(s,chrs)
|
|
assert_string(1,s)
|
|
return _strip(s,false,true,chrs)
|
|
end
|
|
|
|
--- trim any whitespace on both left and right of s.
|
|
-- @param self the string
|
|
-- @param chrs default space, can be a string of characters to be trimmed
|
|
function stringx.strip(self,chrs)
|
|
assert_string(1,self)
|
|
return _strip(self,true,true,chrs)
|
|
end
|
|
|
|
-- The partition functions split a string using a delimiter into three parts:
|
|
-- the part before, the delimiter itself, and the part afterwards
|
|
local function _partition(p,delim,fn)
|
|
local i1,i2 = fn(p,delim)
|
|
if not i1 or i1 == -1 then
|
|
return p,'',''
|
|
else
|
|
if not i2 then i2 = i1 end
|
|
return sub(p,1,i1-1),sub(p,i1,i2),sub(p,i2+1)
|
|
end
|
|
end
|
|
|
|
--- partition the string using first occurance of a delimiter
|
|
-- @param self the string
|
|
-- @param ch delimiter
|
|
-- @return part before ch
|
|
-- @return ch
|
|
-- @return part after ch
|
|
function stringx.partition(self,ch)
|
|
assert_string(1,self)
|
|
assert_nonempty_string(2,ch)
|
|
return _partition(self,ch,stringx.lfind)
|
|
end
|
|
|
|
--- partition the string p using last occurance of a delimiter
|
|
-- @param self the string
|
|
-- @param ch delimiter
|
|
-- @return part before ch
|
|
-- @return ch
|
|
-- @return part after ch
|
|
function stringx.rpartition(self,ch)
|
|
assert_string(1,self)
|
|
assert_nonempty_string(2,ch)
|
|
return _partition(self,ch,stringx.rfind)
|
|
end
|
|
|
|
--- return the 'character' at the index.
|
|
-- @param self the string
|
|
-- @param idx an index (can be negative)
|
|
-- @return a substring of length 1 if successful, empty string otherwise.
|
|
function stringx.at(self,idx)
|
|
assert_string(1,self)
|
|
assert_arg(2,idx,'number')
|
|
return sub(self,idx,idx)
|
|
end
|
|
|
|
--- return an interator over all lines in a string
|
|
-- @param self the string
|
|
-- @return an iterator
|
|
function stringx.lines (self)
|
|
assert_string(1,self)
|
|
local s = self
|
|
if not s:find '\n$' then s = s..'\n' end
|
|
return s:gmatch('([^\n]*)\n')
|
|
end
|
|
|
|
--- iniital word letters uppercase ('title case').
|
|
-- Here 'words' mean chunks of non-space characters.
|
|
-- @param self the string
|
|
-- @return a string with each word's first letter uppercase
|
|
function stringx.title(self)
|
|
return (self:gsub('(%S)(%S*)',function(f,r)
|
|
return f:upper()..r:lower()
|
|
end))
|
|
end
|
|
|
|
stringx.capitalize = stringx.title
|
|
|
|
local elipsis = '...'
|
|
local n_elipsis = #elipsis
|
|
|
|
--- return a shorted version of a string.
|
|
-- @param self the string
|
|
-- @param sz the maxinum size allowed
|
|
-- @param tail true if we want to show the end of the string (head otherwise)
|
|
function stringx.shorten(self,sz,tail)
|
|
if #self > sz then
|
|
if sz < n_elipsis then return elipsis:sub(1,sz) end
|
|
if tail then
|
|
local i = #self - sz + 1 + n_elipsis
|
|
return elipsis .. self:sub(i)
|
|
else
|
|
return self:sub(1,sz-n_elipsis) .. elipsis
|
|
end
|
|
end
|
|
return self
|
|
end
|
|
|
|
function stringx.import(dont_overload)
|
|
utils.import(stringx,string)
|
|
end
|
|
|
|
return stringx
|