pm.lua (13841B)
1 -- $Id: testes/pm.lua $ 2 -- See Copyright Notice in file all.lua 3 4 -- UTF-8 file 5 6 7 print('testing pattern matching') 8 9 local function checkerror (msg, f, ...) 10 local s, err = pcall(f, ...) 11 assert(not s and string.find(err, msg)) 12 end 13 14 15 local function f (s, p) 16 local i,e = string.find(s, p) 17 if i then return string.sub(s, i, e) end 18 end 19 20 local a,b = string.find('', '') -- empty patterns are tricky 21 assert(a == 1 and b == 0); 22 a,b = string.find('alo', '') 23 assert(a == 1 and b == 0) 24 a,b = string.find('a\0o a\0o a\0o', 'a', 1) -- first position 25 assert(a == 1 and b == 1) 26 a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2) -- starts in the midle 27 assert(a == 5 and b == 7) 28 a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9) -- starts in the midle 29 assert(a == 9 and b == 11) 30 a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2); -- finds at the end 31 assert(a == 9 and b == 11); 32 a,b = string.find('a\0a\0a\0a\0\0ab', 'b') -- last position 33 assert(a == 11 and b == 11) 34 assert(not string.find('a\0a\0a\0a\0\0ab', 'b\0')) -- check ending 35 assert(not string.find('', '\0')) 36 assert(string.find('alo123alo', '12') == 4) 37 assert(not string.find('alo123alo', '^12')) 38 39 assert(string.match("aaab", ".*b") == "aaab") 40 assert(string.match("aaa", ".*a") == "aaa") 41 assert(string.match("b", ".*b") == "b") 42 43 assert(string.match("aaab", ".+b") == "aaab") 44 assert(string.match("aaa", ".+a") == "aaa") 45 assert(not string.match("b", ".+b")) 46 47 assert(string.match("aaab", ".?b") == "ab") 48 assert(string.match("aaa", ".?a") == "aa") 49 assert(string.match("b", ".?b") == "b") 50 51 assert(f('aloALO', '%l*') == 'alo') 52 assert(f('aLo_ALO', '%a*') == 'aLo') 53 54 assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu") 55 56 57 -- Adapt a pattern to UTF-8 58 local function PU (p) 59 -- distribute '?' into each individual byte of a character. 60 -- (For instance, "á?" becomes "\195?\161?".) 61 p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c) 62 return string.gsub(c, ".", "%0?") 63 end) 64 -- change '.' to utf-8 character patterns 65 p = string.gsub(p, "%.", utf8.charpattern) 66 return p 67 end 68 69 70 assert(f('aaab', 'a*') == 'aaa'); 71 assert(f('aaa', '^.*$') == 'aaa'); 72 assert(f('aaa', 'b*') == ''); 73 assert(f('aaa', 'ab*a') == 'aa') 74 assert(f('aba', 'ab*a') == 'aba') 75 assert(f('aaab', 'a+') == 'aaa') 76 assert(f('aaa', '^.+$') == 'aaa') 77 assert(not f('aaa', 'b+')) 78 assert(not f('aaa', 'ab+a')) 79 assert(f('aba', 'ab+a') == 'aba') 80 assert(f('a$a', '.$') == 'a') 81 assert(f('a$a', '.%$') == 'a$') 82 assert(f('a$a', '.$.') == 'a$a') 83 assert(not f('a$a', '$$')) 84 assert(not f('a$b', 'a$')) 85 assert(f('a$a', '$') == '') 86 assert(f('', 'b*') == '') 87 assert(not f('aaa', 'bb*')) 88 assert(f('aaab', 'a-') == '') 89 assert(f('aaa', '^.-$') == 'aaa') 90 assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab') 91 assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab') 92 assert(f('alo xo', '.o$') == 'xo') 93 assert(f(' \n isto é assim', '%S%S*') == 'isto') 94 assert(f(' \n isto é assim', '%S*$') == 'assim') 95 assert(f(' \n isto é assim', '[a-z]*$') == 'assim') 96 assert(f('um caracter ? extra', '[^%sa-z]') == '?') 97 assert(f('', 'a?') == '') 98 assert(f('á', PU'á?') == 'á') 99 assert(f('ábl', PU'á?b?l?') == 'ábl') 100 assert(f(' ábl', PU'á?b?l?') == '') 101 assert(f('aa', '^aa?a?a') == 'aa') 102 assert(f(']]]áb', '[^]]+') == 'áb') 103 assert(f("0alo alo", "%x*") == "0a") 104 assert(f("alo alo", "%C+") == "alo alo") 105 print('+') 106 107 108 local function f1 (s, p) 109 p = string.gsub(p, "%%([0-9])", function (s) 110 return "%" .. (tonumber(s)+1) 111 end) 112 p = string.gsub(p, "^(^?)", "%1()", 1) 113 p = string.gsub(p, "($?)$", "()%1", 1) 114 local t = {string.match(s, p)} 115 return string.sub(s, t[1], t[#t] - 1) 116 end 117 118 assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o") 119 assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3') 120 assert(f1('=======', '^(=*)=%1$') == '=======') 121 assert(not string.match('==========', '^([=]*)=%1$')) 122 123 local function range (i, j) 124 if i <= j then 125 return i, range(i+1, j) 126 end 127 end 128 129 local abc = string.char(range(0, 127)) .. string.char(range(128, 255)); 130 131 assert(string.len(abc) == 256) 132 133 local function strset (p) 134 local res = {s=''} 135 string.gsub(abc, p, function (c) res.s = res.s .. c end) 136 return res.s 137 end; 138 139 assert(string.len(strset('[\200-\210]')) == 11) 140 141 assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz") 142 assert(strset('[a-z%d]') == strset('[%da-uu-z]')) 143 assert(strset('[a-]') == "-a") 144 assert(strset('[^%W]') == strset('[%w]')) 145 assert(strset('[]%%]') == '%]') 146 assert(strset('[a%-z]') == '-az') 147 assert(strset('[%^%[%-a%]%-b]') == '-[]^ab') 148 assert(strset('%Z') == strset('[\1-\255]')) 149 assert(strset('.') == strset('[\1-\255%z]')) 150 print('+'); 151 152 assert(string.match("alo xyzK", "(%w+)K") == "xyz") 153 assert(string.match("254 K", "(%d*)K") == "") 154 assert(string.match("alo ", "(%w*)$") == "") 155 assert(not string.match("alo ", "(%w+)$")) 156 assert(string.find("(álo)", "%(á") == 1) 157 local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$") 158 assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) 159 a, b, c, d = string.match('0123456789', '(.+(.?)())') 160 assert(a == '0123456789' and b == '' and c == 11 and d == nil) 161 print('+') 162 163 assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') 164 assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim 165 assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim 166 assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ') 167 local t = "abç d" 168 a, b = string.gsub(t, PU'(.)', '%1@') 169 assert(a == "a@b@ç@ @d@" and b == 5) 170 a, b = string.gsub('abçd', PU'(.)', '%0@', 2) 171 assert(a == 'a@b@çd' and b == 2) 172 assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o') 173 assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == 174 "xyz=abc-abc=xyz") 175 assert(string.gsub("abc", "%w", "%1%0") == "aabbcc") 176 assert(string.gsub("abc", "%w+", "%0%1") == "abcabc") 177 assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') 178 assert(string.gsub('', '^', 'r') == 'r') 179 assert(string.gsub('', '$', 'r') == 'r') 180 print('+') 181 182 183 do -- new (5.3.3) semantics for empty matches 184 assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-") 185 186 local res = "" 187 local sub = "a \nbc\t\td" 188 local i = 1 189 for p, e in string.gmatch(sub, "()%s*()") do 190 res = res .. string.sub(sub, i, p - 1) .. "-" 191 i = e 192 end 193 assert(res == "-a-b-c-d-") 194 end 195 196 197 assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) == 198 "um (DOIS) tres (QUATRO)") 199 200 do 201 local function setglobal (n,v) rawset(_G, n, v) end 202 string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal) 203 assert(_G.a=="roberto" and _G.roberto=="a") 204 _G.a = nil; _G.roberto = nil 205 end 206 207 function f(a,b) return string.gsub(a,'.',b) end 208 assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == 209 "trocar tudo em bbbbb é alalalalalal") 210 211 local function dostring (s) return load(s, "")() or "" end 212 assert(string.gsub("alo $a='x'$ novamente $return a$", 213 "$([^$]*)%$", 214 dostring) == "alo novamente x") 215 216 local x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$", 217 "$([^$]*)%$", dostring) 218 assert(x == ' assim vai para ALO') 219 _G.a, _G.x = nil 220 221 local t = {} 222 local s = 'a alo jose joao' 223 local r = string.gsub(s, '()(%w+)()', function (a,w,b) 224 assert(string.len(w) == b-a); 225 t[a] = b-a; 226 end) 227 assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4) 228 229 230 local function isbalanced (s) 231 return not string.find(string.gsub(s, "%b()", ""), "[()]") 232 end 233 234 assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a")) 235 assert(not isbalanced("(9 ((8) 7) a b (\0 c) a")) 236 assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo') 237 238 239 local t = {"apple", "orange", "lime"; n=0} 240 assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end) 241 == "apple and orange and lime") 242 243 t = {n=0} 244 string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end) 245 assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3) 246 247 t = {n=0} 248 assert(string.gsub("first second word", "%w+", 249 function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word") 250 assert(t[1] == "first" and t[2] == "second" and t[3] == undef) 251 252 checkerror("invalid replacement value %(a table%)", 253 string.gsub, "alo", ".", {a = {}}) 254 checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2") 255 checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a") 256 checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a") 257 checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x") 258 259 260 if not _soft then 261 print("big strings") 262 local a = string.rep('a', 300000) 263 assert(string.find(a, '^a*.?$')) 264 assert(not string.find(a, '^a*.?b$')) 265 assert(string.find(a, '^a-.?$')) 266 267 -- bug in 5.1.2 268 a = string.rep('a', 10000) .. string.rep('b', 10000) 269 assert(not pcall(string.gsub, a, 'b')) 270 end 271 272 -- recursive nest of gsubs 273 local function rev (s) 274 return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end) 275 end 276 277 local x = "abcdef" 278 assert(rev(rev(x)) == x) 279 280 281 -- gsub with tables 282 assert(string.gsub("alo alo", ".", {}) == "alo alo") 283 assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo") 284 assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo") 285 assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo") 286 287 assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo") 288 289 t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end}) 290 assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI") 291 292 293 -- tests for gmatch 294 local a = 0 295 for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end 296 assert(a==6) 297 298 t = {n=0} 299 for w in string.gmatch("first second word", "%w+") do 300 t.n=t.n+1; t[t.n] = w 301 end 302 assert(t[1] == "first" and t[2] == "second" and t[3] == "word") 303 304 t = {3, 6, 9} 305 for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do 306 assert(i == table.remove(t, 1)) 307 end 308 assert(#t == 0) 309 310 t = {} 311 for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do 312 t[tonumber(i)] = tonumber(j) 313 end 314 a = 0 315 for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end 316 assert(a == 3) 317 318 319 do -- init parameter in gmatch 320 local s = 0 321 for k in string.gmatch("10 20 30", "%d+", 3) do 322 s = s + tonumber(k) 323 end 324 assert(s == 50) 325 326 s = 0 327 for k in string.gmatch("11 21 31", "%d+", -4) do 328 s = s + tonumber(k) 329 end 330 assert(s == 32) 331 332 -- there is an empty string at the end of the subject 333 s = 0 334 for k in string.gmatch("11 21 31", "%w*", 9) do 335 s = s + 1 336 end 337 assert(s == 1) 338 339 -- there are no empty strings after the end of the subject 340 s = 0 341 for k in string.gmatch("11 21 31", "%w*", 10) do 342 s = s + 1 343 end 344 assert(s == 0) 345 end 346 347 348 -- tests for `%f' (`frontiers') 349 350 assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x") 351 assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[") 352 assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3") 353 assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.") 354 assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction") 355 assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.") 356 357 assert(string.find("a", "%f[a]") == 1) 358 assert(string.find("a", "%f[^%z]") == 1) 359 assert(string.find("a", "%f[^%l]") == 2) 360 assert(string.find("aba", "%f[a%z]") == 3) 361 assert(string.find("aba", "%f[%z]") == 4) 362 assert(not string.find("aba", "%f[%l%z]")) 363 assert(not string.find("aba", "%f[^%l%z]")) 364 365 local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]") 366 assert(i == 2 and e == 5) 367 local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])") 368 assert(k == 'alo ') 369 370 local a = {1, 5, 9, 14, 17,} 371 for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do 372 assert(table.remove(a, 1) == k) 373 end 374 assert(#a == 0) 375 376 377 -- malformed patterns 378 local function malform (p, m) 379 m = m or "malformed" 380 local r, msg = pcall(string.find, "a", p) 381 assert(not r and string.find(msg, m)) 382 end 383 384 malform("(.", "unfinished capture") 385 malform(".)", "invalid pattern capture") 386 malform("[a") 387 malform("[]") 388 malform("[^]") 389 malform("[a%]") 390 malform("[a%") 391 malform("%b") 392 malform("%ba") 393 malform("%") 394 malform("%f", "missing") 395 396 -- \0 in patterns 397 assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2") 398 assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0") 399 assert(string.find("b$a", "$\0?") == 2) 400 assert(string.find("abc\0efg", "%\0") == 4) 401 assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1") 402 assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0") 403 assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0") 404 405 -- magic char after \0 406 assert(string.find("abc\0\0","\0.") == 4) 407 assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4) 408 409 410 do -- test reuse of original string in gsub 411 local s = string.rep("a", 100) 412 local r = string.gsub(s, "b", "c") -- no match 413 assert(string.format("%p", s) == string.format("%p", r)) 414 415 r = string.gsub(s, ".", {x = "y"}) -- no substitutions 416 assert(string.format("%p", s) == string.format("%p", r)) 417 418 local count = 0 419 r = string.gsub(s, ".", function (x) 420 assert(x == "a") 421 count = count + 1 422 return nil -- no substitution 423 end) 424 r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst. 425 assert(count == 100) 426 assert(string.format("%p", s) == string.format("%p", r)) 427 428 count = 0 429 r = string.gsub(s, ".", function (x) 430 assert(x == "a") 431 count = count + 1 432 return x -- substitution... 433 end) 434 assert(count == 100) 435 -- no reuse in this case 436 assert(r == s and string.format("%p", s) ~= string.format("%p", r)) 437 end 438 439 print('OK') 440