lua

A copy of the Lua development repository
Log | Files | Refs | README

pm.lua (13841B)


      1 -- $Id: testes/pm.lua $
      2 -- See Copyright Notice in file all.lua
      3 
      4 -- UTF-8 file
      5 
      6 
      7 print('testing pattern matching')
      8 
      9 local function checkerror (msg, f, ...)
     10   local s, err = pcall(f, ...)
     11   assert(not s and string.find(err, msg))
     12 end
     13 
     14 
     15 local function f (s, p)
     16   local i,e = string.find(s, p)
     17   if i then return string.sub(s, i, e) end
     18 end
     19 
     20 local a,b = string.find('', '')    -- empty patterns are tricky
     21 assert(a == 1 and b == 0);
     22 a,b = string.find('alo', '')
     23 assert(a == 1 and b == 0)
     24 a,b = string.find('a\0o a\0o a\0o', 'a', 1)   -- first position
     25 assert(a == 1 and b == 1)
     26 a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2)   -- starts in the midle
     27 assert(a == 5 and b == 7)
     28 a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9)   -- starts in the midle
     29 assert(a == 9 and b == 11)
     30 a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2);  -- finds at the end
     31 assert(a == 9 and b == 11);
     32 a,b = string.find('a\0a\0a\0a\0\0ab', 'b')    -- last position
     33 assert(a == 11 and b == 11)
     34 assert(not string.find('a\0a\0a\0a\0\0ab', 'b\0'))   -- check ending
     35 assert(not string.find('', '\0'))
     36 assert(string.find('alo123alo', '12') == 4)
     37 assert(not string.find('alo123alo', '^12'))
     38 
     39 assert(string.match("aaab", ".*b") == "aaab")
     40 assert(string.match("aaa", ".*a") == "aaa")
     41 assert(string.match("b", ".*b") == "b")
     42 
     43 assert(string.match("aaab", ".+b") == "aaab")
     44 assert(string.match("aaa", ".+a") == "aaa")
     45 assert(not string.match("b", ".+b"))
     46 
     47 assert(string.match("aaab", ".?b") == "ab")
     48 assert(string.match("aaa", ".?a") == "aa")
     49 assert(string.match("b", ".?b") == "b")
     50 
     51 assert(f('aloALO', '%l*') == 'alo')
     52 assert(f('aLo_ALO', '%a*') == 'aLo')
     53 
     54 assert(f("  \n\r*&\n\r   xuxu  \n\n", "%g%g%g+") == "xuxu")
     55 
     56 
     57 -- Adapt a pattern to UTF-8
     58 local function PU (p)
     59   -- distribute '?' into each individual byte of a character.
     60   -- (For instance, "á?" becomes "\195?\161?".)
     61   p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c)
     62     return string.gsub(c, ".", "%0?")
     63   end)
     64   -- change '.' to utf-8 character patterns
     65   p = string.gsub(p, "%.", utf8.charpattern)
     66   return p
     67 end
     68 
     69 
     70 assert(f('aaab', 'a*') == 'aaa');
     71 assert(f('aaa', '^.*$') == 'aaa');
     72 assert(f('aaa', 'b*') == '');
     73 assert(f('aaa', 'ab*a') == 'aa')
     74 assert(f('aba', 'ab*a') == 'aba')
     75 assert(f('aaab', 'a+') == 'aaa')
     76 assert(f('aaa', '^.+$') == 'aaa')
     77 assert(not f('aaa', 'b+'))
     78 assert(not f('aaa', 'ab+a'))
     79 assert(f('aba', 'ab+a') == 'aba')
     80 assert(f('a$a', '.$') == 'a')
     81 assert(f('a$a', '.%$') == 'a$')
     82 assert(f('a$a', '.$.') == 'a$a')
     83 assert(not f('a$a', '$$'))
     84 assert(not f('a$b', 'a$'))
     85 assert(f('a$a', '$') == '')
     86 assert(f('', 'b*') == '')
     87 assert(not f('aaa', 'bb*'))
     88 assert(f('aaab', 'a-') == '')
     89 assert(f('aaa', '^.-$') == 'aaa')
     90 assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
     91 assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
     92 assert(f('alo xo', '.o$') == 'xo')
     93 assert(f(' \n isto é assim', '%S%S*') == 'isto')
     94 assert(f(' \n isto é assim', '%S*$') == 'assim')
     95 assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
     96 assert(f('um caracter ? extra', '[^%sa-z]') == '?')
     97 assert(f('', 'a?') == '')
     98 assert(f('á', PU'á?') == 'á')
     99 assert(f('ábl', PU'á?b?l?') == 'ábl')
    100 assert(f('  ábl', PU'á?b?l?') == '')
    101 assert(f('aa', '^aa?a?a') == 'aa')
    102 assert(f(']]]áb', '[^]]+') == 'áb')
    103 assert(f("0alo alo", "%x*") == "0a")
    104 assert(f("alo alo", "%C+") == "alo alo")
    105 print('+')
    106 
    107 
    108 local function f1 (s, p)
    109   p = string.gsub(p, "%%([0-9])", function (s)
    110         return "%" .. (tonumber(s)+1)
    111        end)
    112   p = string.gsub(p, "^(^?)", "%1()", 1)
    113   p = string.gsub(p, "($?)$", "()%1", 1)
    114   local t = {string.match(s, p)}
    115   return string.sub(s, t[1], t[#t] - 1)
    116 end
    117 
    118 assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o")
    119 assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3')
    120 assert(f1('=======', '^(=*)=%1$') == '=======')
    121 assert(not string.match('==========', '^([=]*)=%1$'))
    122 
    123 local function range (i, j)
    124   if i <= j then
    125     return i, range(i+1, j)
    126   end
    127 end
    128 
    129 local abc = string.char(range(0, 127)) .. string.char(range(128, 255));
    130 
    131 assert(string.len(abc) == 256)
    132 
    133 local function strset (p)
    134   local res = {s=''}
    135   string.gsub(abc, p, function (c) res.s = res.s .. c end)
    136   return res.s
    137 end;
    138 
    139 assert(string.len(strset('[\200-\210]')) == 11)
    140 
    141 assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz")
    142 assert(strset('[a-z%d]') == strset('[%da-uu-z]'))
    143 assert(strset('[a-]') == "-a")
    144 assert(strset('[^%W]') == strset('[%w]'))
    145 assert(strset('[]%%]') == '%]')
    146 assert(strset('[a%-z]') == '-az')
    147 assert(strset('[%^%[%-a%]%-b]') == '-[]^ab')
    148 assert(strset('%Z') == strset('[\1-\255]'))
    149 assert(strset('.') == strset('[\1-\255%z]'))
    150 print('+');
    151 
    152 assert(string.match("alo xyzK", "(%w+)K") == "xyz")
    153 assert(string.match("254 K", "(%d*)K") == "")
    154 assert(string.match("alo ", "(%w*)$") == "")
    155 assert(not string.match("alo ", "(%w+)$"))
    156 assert(string.find("(álo)", "%(á") == 1)
    157 local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$")
    158 assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
    159 a, b, c, d  = string.match('0123456789', '(.+(.?)())')
    160 assert(a == '0123456789' and b == '' and c == 11 and d == nil)
    161 print('+')
    162 
    163 assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
    164 assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
    165 assert(string.gsub('  alo alo  ', '^%s*(.-)%s*$', '%1') == 'alo alo')  -- double trim
    166 assert(string.gsub('alo  alo  \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
    167 local t = "abç d"
    168 a, b = string.gsub(t, PU'(.)', '%1@')
    169 assert(a == "a@b@ç@ @d@" and b == 5)
    170 a, b = string.gsub('abçd', PU'(.)', '%0@', 2)
    171 assert(a == 'a@b@çd' and b == 2)
    172 assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
    173 assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
    174               "xyz=abc-abc=xyz")
    175 assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
    176 assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
    177 assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
    178 assert(string.gsub('', '^', 'r') == 'r')
    179 assert(string.gsub('', '$', 'r') == 'r')
    180 print('+')
    181 
    182 
    183 do   -- new (5.3.3) semantics for empty matches
    184   assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")
    185 
    186   local res = ""
    187   local sub = "a  \nbc\t\td"
    188   local i = 1
    189   for p, e in string.gmatch(sub, "()%s*()") do
    190     res = res .. string.sub(sub, i, p - 1) .. "-"
    191     i = e
    192   end
    193   assert(res == "-a-b-c-d-")
    194 end
    195 
    196 
    197 assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
    198             "um (DOIS) tres (QUATRO)")
    199 
    200 do
    201   local function setglobal (n,v) rawset(_G, n, v) end
    202   string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal)
    203   assert(_G.a=="roberto" and _G.roberto=="a")
    204   _G.a = nil; _G.roberto = nil
    205 end
    206 
    207 function f(a,b) return string.gsub(a,'.',b) end
    208 assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
    209             "trocar tudo em bbbbb é alalalalalal")
    210 
    211 local function dostring (s) return load(s, "")() or "" end
    212 assert(string.gsub("alo $a='x'$ novamente $return a$",
    213                    "$([^$]*)%$",
    214                    dostring) == "alo  novamente x")
    215 
    216 local x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
    217          "$([^$]*)%$", dostring)
    218 assert(x == ' assim vai para ALO')
    219 _G.a, _G.x = nil
    220 
    221 local t = {}
    222 local s = 'a alo jose  joao'
    223 local r = string.gsub(s, '()(%w+)()', function (a,w,b)
    224              assert(string.len(w) == b-a);
    225              t[a] = b-a;
    226            end)
    227 assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4)
    228 
    229 
    230 local function isbalanced (s)
    231   return not string.find(string.gsub(s, "%b()", ""), "[()]")
    232 end
    233 
    234 assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"))
    235 assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"))
    236 assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo')
    237 
    238 
    239 local t = {"apple", "orange", "lime"; n=0}
    240 assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
    241         == "apple and orange and lime")
    242 
    243 t = {n=0}
    244 string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
    245 assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3)
    246 
    247 t = {n=0}
    248 assert(string.gsub("first second word", "%w+",
    249          function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word")
    250 assert(t[1] == "first" and t[2] == "second" and t[3] == undef)
    251 
    252 checkerror("invalid replacement value %(a table%)",
    253             string.gsub, "alo", ".", {a = {}})
    254 checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2")
    255 checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a")
    256 checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a")
    257 checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x")
    258 
    259 
    260 if not _soft then
    261   print("big strings")
    262   local a = string.rep('a', 300000)
    263   assert(string.find(a, '^a*.?$'))
    264   assert(not string.find(a, '^a*.?b$'))
    265   assert(string.find(a, '^a-.?$'))
    266 
    267   -- bug in 5.1.2
    268   a = string.rep('a', 10000) .. string.rep('b', 10000)
    269   assert(not pcall(string.gsub, a, 'b'))
    270 end
    271 
    272 -- recursive nest of gsubs
    273 local function rev (s)
    274   return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
    275 end
    276 
    277 local x = "abcdef"
    278 assert(rev(rev(x)) == x)
    279 
    280 
    281 -- gsub with tables
    282 assert(string.gsub("alo alo", ".", {}) == "alo alo")
    283 assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo")
    284 assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo")
    285 assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo")
    286 
    287 assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo")
    288 
    289 t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
    290 assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI")
    291 
    292 
    293 -- tests for gmatch
    294 local a = 0
    295 for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end
    296 assert(a==6)
    297 
    298 t = {n=0}
    299 for w in string.gmatch("first second word", "%w+") do
    300       t.n=t.n+1; t[t.n] = w
    301 end
    302 assert(t[1] == "first" and t[2] == "second" and t[3] == "word")
    303 
    304 t = {3, 6, 9}
    305 for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do
    306   assert(i == table.remove(t, 1))
    307 end
    308 assert(#t == 0)
    309 
    310 t = {}
    311 for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do
    312   t[tonumber(i)] = tonumber(j)
    313 end
    314 a = 0
    315 for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
    316 assert(a == 3)
    317 
    318 
    319 do   -- init parameter in gmatch
    320   local s = 0
    321   for k in string.gmatch("10 20 30", "%d+", 3) do
    322     s = s + tonumber(k)
    323   end
    324   assert(s == 50)
    325 
    326   s = 0
    327   for k in string.gmatch("11 21 31", "%d+", -4) do
    328     s = s + tonumber(k)
    329   end
    330   assert(s == 32)
    331 
    332   -- there is an empty string at the end of the subject
    333   s = 0
    334   for k in string.gmatch("11 21 31", "%w*", 9) do
    335     s = s + 1
    336   end
    337   assert(s == 1)
    338 
    339   -- there are no empty strings after the end of the subject
    340   s = 0
    341   for k in string.gmatch("11 21 31", "%w*", 10) do
    342     s = s + 1
    343   end
    344   assert(s == 0)
    345 end
    346 
    347 
    348 -- tests for `%f' (`frontiers')
    349 
    350 assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
    351 assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[")
    352 assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3")
    353 assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.")
    354 assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction")
    355 assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.")
    356 
    357 assert(string.find("a", "%f[a]") == 1)
    358 assert(string.find("a", "%f[^%z]") == 1)
    359 assert(string.find("a", "%f[^%l]") == 2)
    360 assert(string.find("aba", "%f[a%z]") == 3)
    361 assert(string.find("aba", "%f[%z]") == 4)
    362 assert(not string.find("aba", "%f[%l%z]"))
    363 assert(not string.find("aba", "%f[^%l%z]"))
    364 
    365 local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]")
    366 assert(i == 2 and e == 5)
    367 local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])")
    368 assert(k == 'alo ')
    369 
    370 local a = {1, 5, 9, 14, 17,}
    371 for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do
    372   assert(table.remove(a, 1) == k)
    373 end
    374 assert(#a == 0)
    375 
    376 
    377 -- malformed patterns
    378 local function malform (p, m)
    379   m = m or "malformed"
    380   local r, msg = pcall(string.find, "a", p)
    381   assert(not r and string.find(msg, m))
    382 end
    383 
    384 malform("(.", "unfinished capture")
    385 malform(".)", "invalid pattern capture")
    386 malform("[a")
    387 malform("[]")
    388 malform("[^]")
    389 malform("[a%]")
    390 malform("[a%")
    391 malform("%b")
    392 malform("%ba")
    393 malform("%")
    394 malform("%f", "missing")
    395 
    396 -- \0 in patterns
    397 assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2")
    398 assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0")
    399 assert(string.find("b$a", "$\0?") == 2)
    400 assert(string.find("abc\0efg", "%\0") == 4)
    401 assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1")
    402 assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0")
    403 assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
    404 
    405 -- magic char after \0
    406 assert(string.find("abc\0\0","\0.") == 4)
    407 assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
    408 
    409 
    410 do   -- test reuse of original string in gsub
    411   local s = string.rep("a", 100)
    412   local r = string.gsub(s, "b", "c")   -- no match
    413   assert(string.format("%p", s) == string.format("%p", r))
    414 
    415   r = string.gsub(s, ".", {x = "y"})   -- no substitutions
    416   assert(string.format("%p", s) == string.format("%p", r))
    417 
    418   local count = 0
    419   r = string.gsub(s, ".", function (x)
    420                             assert(x == "a")
    421                             count = count + 1
    422                             return nil    -- no substitution
    423                           end)
    424   r = string.gsub(r, ".", {b = 'x'})   -- "a" is not a key; no subst.
    425   assert(count == 100)
    426   assert(string.format("%p", s) == string.format("%p", r))
    427 
    428   count = 0
    429   r = string.gsub(s, ".", function (x)
    430                             assert(x == "a")
    431                             count = count + 1
    432                             return x    -- substitution...
    433                           end)
    434   assert(count == 100)
    435   -- no reuse in this case
    436   assert(r == s and string.format("%p", s) ~= string.format("%p", r))
    437 end
    438 
    439 print('OK')
    440