commit b0810c51c3f075cc8a309bfb3c1714ac42b0f020
parent a93e0144479f1eb0ac19b8c31862f4cbc2fbe1c4
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Thu, 11 Apr 2019 11:28:49 -0300
Small optimizations in 'string.gsub'
Avoid creating extra strings when possible:
- avoid creating new resulting string when subject was not modified
(instead, return the subject itself);
- avoid creating strings representing the captured substrings when
handling replacements like '%1' (instead, add the substring directly
to the buffer).
Diffstat:
3 files changed, 115 insertions(+), 47 deletions(-)
diff --git a/lstrlib.c b/lstrlib.c
@@ -660,25 +660,46 @@ static const char *lmemfind (const char *s1, size_t l1,
}
-static void push_onecapture (MatchState *ms, int i, const char *s,
- const char *e) {
+/*
+** get information about the i-th capture. If there are no captures
+** and 'i==0', return information about the whole match, which
+** is the range 's'..'e'. If the capture is a string, return
+** its length and put its address in '*cap'. If it is an integer
+** (a position), push it on the stack and return CAP_POSITION.
+*/
+static size_t get_onecapture (MatchState *ms, int i, const char *s,
+ const char *e, const char **cap) {
if (i >= ms->level) {
- if (i == 0) /* ms->level == 0, too */
- lua_pushlstring(ms->L, s, e - s); /* add whole match */
- else
+ if (i != 0)
luaL_error(ms->L, "invalid capture index %%%d", i + 1);
+ *cap = s;
+ return e - s;
}
else {
- ptrdiff_t l = ms->capture[i].len;
- if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
- if (l == CAP_POSITION)
+ ptrdiff_t capl = ms->capture[i].len;
+ *cap = ms->capture[i].init;
+ if (capl == CAP_UNFINISHED)
+ luaL_error(ms->L, "unfinished capture");
+ else if (capl == CAP_POSITION)
lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1);
- else
- lua_pushlstring(ms->L, ms->capture[i].init, l);
+ return capl;
}
}
+/*
+** Push the i-th capture on the stack.
+*/
+static void push_onecapture (MatchState *ms, int i, const char *s,
+ const char *e) {
+ const char *cap;
+ ptrdiff_t l = get_onecapture(ms, i, s, e, &cap);
+ if (l != CAP_POSITION)
+ lua_pushlstring(ms->L, cap, l);
+ /* else position was already pushed */
+}
+
+
static int push_captures (MatchState *ms, const char *s, const char *e) {
int i;
int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
@@ -817,60 +838,72 @@ static int gmatch (lua_State *L) {
static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
const char *e) {
- size_t l, i;
+ size_t l;
lua_State *L = ms->L;
const char *news = lua_tolstring(L, 3, &l);
- for (i = 0; i < l; i++) {
- if (news[i] != L_ESC)
- luaL_addchar(b, news[i]);
- else {
- i++; /* skip ESC */
- if (!isdigit(uchar(news[i]))) {
- if (news[i] != L_ESC)
- luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
- luaL_addchar(b, news[i]);
- }
- else if (news[i] == '0')
- luaL_addlstring(b, s, e - s);
- else {
- push_onecapture(ms, news[i] - '1', s, e);
- luaL_tolstring(L, -1, NULL); /* if number, convert it to string */
- lua_remove(L, -2); /* remove original value */
- luaL_addvalue(b); /* add capture to accumulated result */
- }
+ const char *p;
+ while ((p = (char *)memchr(news, L_ESC, l)) != NULL) {
+ luaL_addlstring(b, news, p - news);
+ p++; /* skip ESC */
+ if (*p == L_ESC) /* '%%' */
+ luaL_addchar(b, *p);
+ else if (*p == '0') /* '%0' */
+ luaL_addlstring(b, s, e - s);
+ else if (isdigit(uchar(*p))) { /* '%n' */
+ const char *cap;
+ ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap);
+ if (resl == CAP_POSITION)
+ luaL_addvalue(b); /* add position to accumulated result */
+ else
+ luaL_addlstring(b, cap, resl);
}
+ else
+ luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
+ l -= p + 1 - news;
+ news = p + 1;
}
+ luaL_addlstring(b, news, l);
}
-static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
- const char *e, int tr) {
+/*
+** Add the replacement value to the string buffer 'b'.
+** Return true if the original string was changed. (Function calls and
+** table indexing resulting in nil or false do not change the subject.)
+*/
+static int add_value (MatchState *ms, luaL_Buffer *b, const char *s,
+ const char *e, int tr) {
lua_State *L = ms->L;
switch (tr) {
- case LUA_TFUNCTION: {
+ case LUA_TFUNCTION: { /* call the function */
int n;
- lua_pushvalue(L, 3);
- n = push_captures(ms, s, e);
- lua_call(L, n, 1);
+ lua_pushvalue(L, 3); /* push the function */
+ n = push_captures(ms, s, e); /* all captures as arguments */
+ lua_call(L, n, 1); /* call it */
break;
}
- case LUA_TTABLE: {
- push_onecapture(ms, 0, s, e);
+ case LUA_TTABLE: { /* index the table */
+ push_onecapture(ms, 0, s, e); /* first capture is the index */
lua_gettable(L, 3);
break;
}
default: { /* LUA_TNUMBER or LUA_TSTRING */
- add_s(ms, b, s, e);
- return;
+ add_s(ms, b, s, e); /* add value to the buffer */
+ return 1; /* something changed */
}
}
if (!lua_toboolean(L, -1)) { /* nil or false? */
- lua_pop(L, 1);
- lua_pushlstring(L, s, e - s); /* keep original text */
+ lua_pop(L, 1); /* remove value */
+ luaL_addlstring(b, s, e - s); /* keep original text */
+ return 0; /* no changes */
}
else if (!lua_isstring(L, -1))
- luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
- luaL_addvalue(b); /* add result to accumulator */
+ return luaL_error(L, "invalid replacement value (a %s)",
+ luaL_typename(L, -1));
+ else {
+ luaL_addvalue(b); /* add result to accumulator */
+ return 1; /* something changed */
+ }
}
@@ -883,6 +916,7 @@ static int str_gsub (lua_State *L) {
lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */
int anchor = (*p == '^');
lua_Integer n = 0; /* replacement count */
+ int changed = 0; /* change flag */
MatchState ms;
luaL_Buffer b;
luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
@@ -898,7 +932,7 @@ static int str_gsub (lua_State *L) {
reprepstate(&ms); /* (re)prepare state for new match */
if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */
n++;
- add_value(&ms, &b, src, e, tr); /* add replacement to buffer */
+ changed = add_value(&ms, &b, src, e, tr) | changed;
src = lastmatch = e;
}
else if (src < ms.src_end) /* otherwise, skip one character */
@@ -906,8 +940,12 @@ static int str_gsub (lua_State *L) {
else break; /* end of subject */
if (anchor) break;
}
- luaL_addlstring(&b, src, ms.src_end-src);
- luaL_pushresult(&b);
+ if (!changed) /* no changes? */
+ lua_pushvalue(L, 1); /* return original string */
+ else { /* something changed */
+ luaL_addlstring(&b, src, ms.src_end-src);
+ luaL_pushresult(&b); /* create and return new string */
+ }
lua_pushinteger(L, n); /* number of substitutions */
return 2;
}
diff --git a/testes/gc.lua b/testes/gc.lua
@@ -113,7 +113,7 @@ do
contCreate = 0
while contCreate <= limit do
a = contCreate .. "b";
- a = string.gsub(a, '(%d%d*)', string.upper)
+ a = string.gsub(a, '(%d%d*)', "%1 %1")
a = "a"
contCreate = contCreate+1
end
diff --git a/testes/pm.lua b/testes/pm.lua
@@ -387,5 +387,35 @@ assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
assert(string.find("abc\0\0","\0.") == 4)
assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
+
+do -- test reuse of original string in gsub
+ local s = string.rep("a", 100)
+ local r = string.gsub(s, "b", "c") -- no match
+ assert(string.format("%p", s) == string.format("%p", r))
+
+ r = string.gsub(s, ".", {x = "y"}) -- no substitutions
+ assert(string.format("%p", s) == string.format("%p", r))
+
+ local count = 0
+ r = string.gsub(s, ".", function (x)
+ assert(x == "a")
+ count = count + 1
+ return nil -- no substitution
+ end)
+ r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst.
+ assert(count == 100)
+ assert(string.format("%p", s) == string.format("%p", r))
+
+ count = 0
+ r = string.gsub(s, ".", function (x)
+ assert(x == "a")
+ count = count + 1
+ return x -- substitution...
+ end)
+ assert(count == 100)
+ -- no reuse in this case
+ assert(r == s and string.format("%p", s) ~= string.format("%p", r))
+end
+
print('OK')