commit d70a0c91ad42275af1f6f1b6e37c604442b3f0d1
parent 3e6818ca87b8d7aa007e6992295956a92bb89de4
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Thu, 15 Dec 2022 16:43:55 -0300
Dump/undump reuse strings
A repeated string in a dump is represented as an index to its first
occurence, instead of another copy of the string.
Diffstat:
5 files changed, 77 insertions(+), 12 deletions(-)
diff --git a/lapi.c b/lapi.c
@@ -1107,16 +1107,37 @@ LUA_API int lua_load (lua_State *L, lua_Reader reader, void *data,
}
+/*
+** Dump a function, calling 'writer' to write its parts. Because the
+** writer can use the stack in unkown ways, this function should not
+** push things on the stack, but it must anchor an auxiliary table
+** used by 'luaU_dump'. To do so, it creates the table, anchors the
+** function that is on the stack in the table, and substitutes the
+** table for the function in the stack.
+*/
+
LUA_API int lua_dump (lua_State *L, lua_Writer writer, void *data, int strip) {
int status;
+ StkId fstk; /* pointer to function */
TValue *o;
lua_lock(L);
api_checknelems(L, 1);
- o = s2v(L->top.p - 1);
- if (isLfunction(o))
- status = luaU_dump(L, getproto(o), writer, data, strip);
- else
+ fstk = L->top.p - 1;
+ o = s2v(fstk);
+ if (!isLfunction(o))
status = 1;
+ else {
+ LClosure *f = clLvalue(o);
+ ptrdiff_t fidx = savestack(L, fstk); /* function index */
+ Table *h = luaH_new(L); /* auxiliary table used by 'luaU_dump' */
+ sethvalue2s(L, L->top.p, h); /* anchor it (luaH_set may call GC) */
+ L->top.p++; /* (assume extra slot) */
+ luaH_set(L, h, o, o); /* anchor function into table */
+ setobjs2s(L, fstk, L->top.p - 1); /* move table over function */
+ L->top.p--; /* stack back to initial size */
+ status = luaU_dump(L, f->p, writer, data, strip, h);
+ setclLvalue2s(L, restorestack(L, fidx), f); /* put function back */
+ }
lua_unlock(L);
return status;
}
diff --git a/ldump.c b/ldump.c
@@ -14,8 +14,10 @@
#include "lua.h"
+#include "lgc.h"
#include "lobject.h"
#include "lstate.h"
+#include "ltable.h"
#include "lundump.h"
@@ -25,6 +27,8 @@ typedef struct {
void *data;
int strip;
int status;
+ Table *h; /* table to track saved strings */
+ lua_Integer nstr; /* counter to number saved strings */
} DumpState;
@@ -85,14 +89,33 @@ static void dumpInteger (DumpState *D, lua_Integer x) {
}
-static void dumpString (DumpState *D, const TString *s) {
+/*
+** Dump a String. First dump its "size": size==0 means NULL;
+** size==1 is followed by an index and means "reuse saved string with
+** that index"; size>=2 is followed by the string contents with real
+** size==size-2 and means that string, which will be saved with
+** the next available index.
+*/
+static void dumpString (DumpState *D, TString *s) {
if (s == NULL)
dumpSize(D, 0);
else {
- size_t size = tsslen(s);
- const char *str = getstr(s);
- dumpSize(D, size + 1);
- dumpVector(D, str, size);
+ const TValue *idx = luaH_getstr(D->h, s);
+ if (ttisinteger(idx)) { /* string already saved? */
+ dumpSize(D, 1); /* reuse a saved string */
+ dumpInt(D, ivalue(idx)); /* index of saved string */
+ }
+ else { /* must write and save the string */
+ TValue key, value; /* to save the string in the hash */
+ size_t size = tsslen(s);
+ dumpSize(D, size + 2);
+ dumpVector(D, getstr(s), size);
+ D->nstr++; /* one more saved string */
+ setsvalue(D->L, &key, s); /* the string is the key */
+ setivalue(&value, D->nstr); /* its index is the value */
+ luaH_finishset(D->L, D->h, &key, idx, &value); /* h[s] = nstr */
+ /* integer value does not need barrier */
+ }
}
}
@@ -211,13 +234,15 @@ static void dumpHeader (DumpState *D) {
** dump Lua function as precompiled chunk
*/
int luaU_dump(lua_State *L, const Proto *f, lua_Writer w, void *data,
- int strip) {
+ int strip, Table *h) {
DumpState D;
D.L = L;
D.writer = w;
D.data = data;
D.strip = strip;
D.status = 0;
+ D.h = h;
+ D.nstr = 0;
dumpHeader(&D);
dumpByte(&D, f->sizeupvalues);
dumpFunction(&D, f, NULL);
diff --git a/lstrlib.c b/lstrlib.c
@@ -239,6 +239,7 @@ static int str_dump (lua_State *L) {
if (l_unlikely(lua_dump(L, writer, &state, strip) != 0))
return luaL_error(L, "unable to dump given function");
luaL_pushresult(&state.B);
+ lua_assert(lua_isfunction(L, 1)); /* lua_dump kept that value */
return 1;
}
diff --git a/lundump.c b/lundump.c
@@ -21,6 +21,7 @@
#include "lmem.h"
#include "lobject.h"
#include "lstring.h"
+#include "ltable.h"
#include "lundump.h"
#include "lzio.h"
@@ -34,6 +35,8 @@ typedef struct {
lua_State *L;
ZIO *Z;
const char *name;
+ Table *h; /* list for string reuse */
+ lua_Integer nstr; /* number of strings in the list */
} LoadState;
@@ -110,10 +113,16 @@ static lua_Integer loadInteger (LoadState *S) {
static TString *loadStringN (LoadState *S, Proto *p) {
lua_State *L = S->L;
TString *ts;
+ TValue sv;
size_t size = loadSize(S);
if (size == 0) /* no string? */
return NULL;
- else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */
+ else if (size == 1) { /* previously saved string? */
+ int idx = loadInt(S); /* get its index */
+ const TValue *stv = luaH_getint(S->h, idx);
+ return tsvalue(stv);
+ }
+ else if (size -= 2, size <= LUAI_MAXSHORTLEN) { /* short string? */
char buff[LUAI_MAXSHORTLEN];
loadVector(S, buff, size); /* load string into buffer */
ts = luaS_newlstr(L, buff, size); /* create string */
@@ -126,6 +135,10 @@ static TString *loadStringN (LoadState *S, Proto *p) {
L->top.p--; /* pop string */
}
luaC_objbarrier(L, p, ts);
+ S->nstr++; /* add string to list of saved strings */
+ setsvalue(L, &sv, ts);
+ luaH_setint(L, S->h, S->nstr, &sv);
+ luaC_objbarrierback(L, obj2gco(S->h), ts);
return ts;
}
@@ -323,11 +336,16 @@ LClosure *luaU_undump(lua_State *L, ZIO *Z, const char *name) {
cl = luaF_newLclosure(L, loadByte(&S));
setclLvalue2s(L, L->top.p, cl);
luaD_inctop(L);
+ S.h = luaH_new(L); /* create list of saved strings */
+ S.nstr = 0;
+ sethvalue2s(L, L->top.p, S.h); /* anchor it */
+ luaD_inctop(L);
cl->p = luaF_newproto(L);
luaC_objbarrier(L, cl, cl->p);
loadFunction(&S, cl->p, NULL);
lua_assert(cl->nupvalues == cl->p->sizeupvalues);
luai_verifycode(L, cl->p);
+ L->top.p--; /* pop table */
return cl;
}
diff --git a/lundump.h b/lundump.h
@@ -31,6 +31,6 @@ LUAI_FUNC LClosure* luaU_undump (lua_State* L, ZIO* Z, const char* name);
/* dump one chunk; from ldump.c */
LUAI_FUNC int luaU_dump (lua_State* L, const Proto* f, lua_Writer w,
- void* data, int strip);
+ void* data, int strip, Table *h);
#endif