commit a4b96ce9a3305ae3585c0bb143fa7342c140f20b
parent 291f564485d8968fc7b0d043dda5ff91a7ce604b
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Wed, 25 Jan 2012 19:05:16 -0200
first implementation of long strings
Diffstat:
9 files changed, 144 insertions(+), 47 deletions(-)
diff --git a/lgc.c b/lgc.c
@@ -65,7 +65,11 @@
#define white2gray(x) resetbits(gch(x)->marked, WHITEBITS)
#define black2gray(x) resetbit(gch(x)->marked, BLACKBIT)
-#define stringmark(s) ((void)((s) && resetbits((s)->tsv.marked, WHITEBITS)))
+/*
+** dirty trick: we know that 'reallymarkobject' does not use 'g' when
+** object is a string
+*/
+#define stringmark(s) markobject(NULL, s)
#define isfinalized(x) testbit(gch(x)->marked, FINALIZEDBIT)
@@ -240,18 +244,18 @@ GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list,
/*
-** mark an object. Userdata and closed upvalues are visited and turned
-** black here. Strings remain gray (it is the same as making them
-** black). Other objects are marked gray and added to appropriate list
-** to be visited (and turned black) later. (Open upvalues are already
-** linked in 'headuv' list.)
+** mark an object. Userdata, strings, and closed upvalues are visited
+** and turned black here. Other objects are marked gray and added
+** to appropriate list to be visited (and turned black) later. (Open
+** upvalues are already linked in 'headuv' list.)
*/
static void reallymarkobject (global_State *g, GCObject *o) {
- lua_assert(iswhite(o) && !isdead(g, o));
white2gray(o);
switch (gch(o)->tt) {
- case LUA_TSTRING: {
- return; /* for strings, gray is as good as black */
+ case LUA_TSHRSTR:
+ case LUA_TLNGSTR: {
+ gray2black(o);
+ return; /* nothing else to mark */
}
case LUA_TUSERDATA: {
Table *mt = gco2u(o)->metatable;
@@ -663,8 +667,10 @@ static void freeobj (lua_State *L, GCObject *o) {
case LUA_TTABLE: luaH_free(L, gco2t(o)); break;
case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
- case LUA_TSTRING: {
+ case LUA_TSHRSTR:
G(L)->strt.nuse--;
+ /* go through */
+ case LUA_TLNGSTR: {
luaM_freemem(L, o, sizestring(gco2ts(o)));
break;
}
diff --git a/llimits.h b/llimits.h
@@ -1,5 +1,5 @@
/*
-** $Id: llimits.h,v 1.94 2011/11/29 15:39:48 roberto Exp roberto $
+** $Id: llimits.h,v 1.95 2011/12/06 16:58:36 roberto Exp roberto $
** Limits, basic types, and some other `installation-dependent' definitions
** See Copyright Notice in lua.h
*/
@@ -125,6 +125,15 @@ typedef LUAI_UACNUMBER l_uacNumber;
/*
+** maximum length for short strings, that is, strings that are
+** internalized. (Cannot be smaller than reserved words or tags
+** for metamethods; #"function" = 8, #"__newindex" = 10; should
+** not be larger than 255, to allow future changes)
+*/
+#define LUA_MAXSHORTLEN (8 * sizeof(void*))
+
+
+/*
** type for virtual-machine instructions
** must be an unsigned with (at least) 4 bytes (see details in lopcodes.h)
*/
diff --git a/lobject.h b/lobject.h
@@ -52,6 +52,12 @@
#define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */
+/*
+** LUA_TSTRING variants */
+#define LUA_TSHRSTR (LUA_TSTRING | (0 << 4)) /* short strings */
+#define LUA_TLNGSTR (LUA_TSTRING | (1 << 4)) /* long strings */
+
+
/* Bit mark for collectable types */
#define BIT_ISCOLLECTABLE (1 << 6)
@@ -129,7 +135,9 @@ typedef struct lua_TValue TValue;
#define ttisnil(o) checktag((o), LUA_TNIL)
#define ttisboolean(o) checktag((o), LUA_TBOOLEAN)
#define ttislightuserdata(o) checktag((o), LUA_TLIGHTUSERDATA)
-#define ttisstring(o) checktag((o), ctb(LUA_TSTRING))
+#define ttisstring(o) checktype((o), LUA_TSTRING)
+#define ttisshrstring(o) checktag((o), ctb(LUA_TSHRSTR))
+#define ttislngstring(o) checktag((o), ctb(LUA_TLNGSTR))
#define ttistable(o) checktag((o), ctb(LUA_TTABLE))
#define ttisfunction(o) checktype(o, LUA_TFUNCTION)
#define ttisclosure(o) ((rttype(o) & 0x1F) == LUA_TFUNCTION)
@@ -199,7 +207,8 @@ typedef struct lua_TValue TValue;
#define setsvalue(L,obj,x) \
{ TValue *io=(obj); \
- val_(io).gc=cast(GCObject *, (x)); settt_(io, ctb(LUA_TSTRING)); \
+ TString *x_ = (x); \
+ val_(io).gc=cast(GCObject *, x_); settt_(io, ctb(x_->tsv.tt)); \
checkliveness(G(L),io); }
#define setuvalue(L,obj,x) \
@@ -409,7 +418,7 @@ typedef union TString {
L_Umaxalign dummy; /* ensures maximum alignment for strings */
struct {
CommonHeader;
- lu_byte extra; /* reserved words for strings */
+ lu_byte extra; /* reserved words for short strings; "has hash" for longs */
unsigned int hash;
size_t len; /* number of characters in string */
} tsv;
diff --git a/lstate.h b/lstate.h
@@ -1,5 +1,5 @@
/*
-** $Id: lstate.h,v 2.74 2011/09/30 12:45:07 roberto Exp roberto $
+** $Id: lstate.h,v 2.75 2012/01/20 22:05:50 roberto Exp roberto $
** Global State
** See Copyright Notice in lua.h
*/
@@ -193,7 +193,8 @@ union GCObject {
#define gch(o) (&(o)->gch)
/* macros to convert a GCObject into a specific value */
-#define rawgco2ts(o) check_exp((o)->gch.tt == LUA_TSTRING, &((o)->ts))
+#define rawgco2ts(o) \
+ check_exp(novariant((o)->gch.tt) == LUA_TSTRING, &((o)->ts))
#define gco2ts(o) (&rawgco2ts(o)->tsv)
#define rawgco2u(o) check_exp((o)->gch.tt == LUA_TUSERDATA, &((o)->u))
#define gco2u(o) (&rawgco2u(o)->uv)
diff --git a/lstring.c b/lstring.c
@@ -18,7 +18,37 @@
#include "lstring.h"
+/*
+** equality for long strings
+*/
+int luaS_eqlngstr (TString *a, TString *b) {
+ size_t len = a->tsv.len;
+ lua_assert(a->tsv.tt == LUA_TLNGSTR && b->tsv.tt == LUA_TLNGSTR);
+ return (len == b->tsv.len) && (memcmp(getstr(a), getstr(b), len) == 0);
+}
+
+
+/*
+** equality for strings
+*/
+int luaS_eqstr (TString *a, TString *b) {
+ return (a->tsv.tt == b->tsv.tt) &&
+ (a->tsv.tt == LUA_TSHRSTR ? eqshrstr(a, b) : luaS_eqlngstr(a, b));
+}
+
+
+unsigned int luaS_hash (const char *str, size_t l) {
+ unsigned int h = cast(unsigned int, l); /* seed */
+ size_t l1;
+ for (l1 = 0; l1 < l; l1++)
+ h = h ^ ((h<<5) + (h>>2) + cast_byte(str[l1]));
+ return h;
+}
+
+/*
+** resizes the string table
+*/
void luaS_resize (lua_State *L, int newsize) {
int i;
stringtable *tb = &G(L)->strt;
@@ -50,36 +80,47 @@ void luaS_resize (lua_State *L, int newsize) {
}
-static TString *newlstr (lua_State *L, const char *str, size_t l,
- unsigned int h) {
- size_t totalsize; /* total size of TString object */
- GCObject **list; /* (pointer to) list where it will be inserted */
+/*
+** creates a new string object
+*/
+static TString *createstrobj (lua_State *L, const char *str, size_t l,
+ int tag, unsigned int h, GCObject **list) {
TString *ts;
- stringtable *tb = &G(L)->strt;
- if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
- luaM_toobig(L);
- if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
- luaS_resize(L, tb->size*2); /* too crowded */
+ size_t totalsize; /* total size of TString object */
totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
- list = &tb->hash[lmod(h, tb->size)];
- ts = &luaC_newobj(L, LUA_TSTRING, totalsize, list, 0)->ts;
+ ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts;
ts->tsv.len = l;
ts->tsv.hash = h;
ts->tsv.extra = 0;
memcpy(ts+1, str, l*sizeof(char));
((char *)(ts+1))[l] = '\0'; /* ending 0 */
- tb->nuse++;
return ts;
}
-TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
+/*
+** creates a new short string, inserting it into string table
+*/
+static TString *newshrstr (lua_State *L, const char *str, size_t l,
+ unsigned int h) {
+ GCObject **list; /* (pointer to) list where it will be inserted */
+ stringtable *tb = &G(L)->strt;
+ TString *s;
+ if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
+ luaS_resize(L, tb->size*2); /* too crowded */
+ list = &tb->hash[lmod(h, tb->size)];
+ s = createstrobj(L, str, l, LUA_TSHRSTR, h, list);
+ tb->nuse++;
+ return s;
+}
+
+
+/*
+** checks whether short string exists and reuses it or creates a new one
+*/
+static TString *internshrstr (lua_State *L, const char *str, size_t l) {
GCObject *o;
- unsigned int h = cast(unsigned int, l); /* seed */
- size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
- size_t l1;
- for (l1=l; l1>=step; l1-=step) /* compute hash */
- h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
+ unsigned int h = luaS_hash(str, l);
for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
o != NULL;
o = gch(o)->next) {
@@ -92,10 +133,27 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
return ts;
}
}
- return newlstr(L, str, l, h); /* not found; create a new string */
+ return newshrstr(L, str, l, h); /* not found; create a new string */
}
+/*
+** new string (with explicit length)
+*/
+TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
+ if (l <= LUA_MAXSHORTLEN) /* short string? */
+ return internshrstr(L, str, l);
+ else {
+ if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
+ luaM_toobig(L);
+ return createstrobj(L, str, l, LUA_TLNGSTR, 0, NULL);
+ }
+}
+
+
+/*
+** new zero-terminated string
+*/
TString *luaS_new (lua_State *L, const char *str) {
return luaS_newlstr(L, str, strlen(str));
}
diff --git a/lstring.h b/lstring.h
@@ -25,15 +25,18 @@
/*
** test whether a string is a reserved word
*/
-#define isreserved(s) ((s)->tsv.extra > 0)
+#define isreserved(s) ((s)->tsv.tt == LUA_TSHRSTR && (s)->tsv.extra > 0)
/*
-** equality for strings, which are always internalized
+** equality for short strings, which are always internalized
*/
-#define luaS_eqstr(a,b) ((a) == (b))
+#define eqshrstr(a,b) check_exp((a)->tsv.tt == LUA_TSHRSTR, (a) == (b))
+LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l);
+LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
+LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
diff --git a/ltable.c b/ltable.c
@@ -50,7 +50,7 @@
#define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t))))
-#define hashstr(t,str) hashpow2(t, (str)->tsv.hash)
+#define hashstr(t,str) hashpow2(t, (str)->tsv.hash)
#define hashboolean(t,p) hashpow2(t, p)
@@ -98,7 +98,15 @@ static Node *mainposition (const Table *t, const TValue *key) {
switch (ttype(key)) {
case LUA_TNUMBER:
return hashnum(t, nvalue(key));
- case LUA_TSTRING:
+ case LUA_TLNGSTR: {
+ TString *s = rawtsvalue(key);
+ if (s->tsv.extra == 0) { /* no hash? */
+ s->tsv.hash = luaS_hash(getstr(s), s->tsv.len);
+ s->tsv.extra = 1; /* now it has its hash */
+ }
+ return hashstr(t, rawtsvalue(key));
+ }
+ case LUA_TSHRSTR:
return hashstr(t, rawtsvalue(key));
case LUA_TBOOLEAN:
return hashboolean(t, bvalue(key));
@@ -453,12 +461,13 @@ const TValue *luaH_getint (Table *t, int key) {
/*
-** search function for strings
+** search function for short strings
*/
const TValue *luaH_getstr (Table *t, TString *key) {
Node *n = hashstr(t, key);
+ lua_assert(key->tsv.tt == LUA_TSHRSTR);
do { /* check whether `key' is somewhere in the chain */
- if (ttisstring(gkey(n)) && luaS_eqstr(rawtsvalue(gkey(n)), key))
+ if (ttisshrstring(gkey(n)) && eqshrstr(rawtsvalue(gkey(n)), key))
return gval(n); /* that's it */
else n = gnext(n);
} while (n);
@@ -470,9 +479,9 @@ const TValue *luaH_getstr (Table *t, TString *key) {
** main search function
*/
const TValue *luaH_get (Table *t, const TValue *key) {
- switch (ttypenv(key)) {
+ switch (ttype(key)) {
case LUA_TNIL: return luaO_nilobject;
- case LUA_TSTRING: return luaH_getstr(t, rawtsvalue(key));
+ case LUA_TSHRSTR: return luaH_getstr(t, rawtsvalue(key));
case LUA_TNUMBER: {
int k;
lua_Number n = nvalue(key);
diff --git a/ltests.c b/ltests.c
@@ -1,5 +1,5 @@
/*
-** $Id: ltests.c,v 2.124 2011/11/09 19:08:07 roberto Exp roberto $
+** $Id: ltests.c,v 2.125 2012/01/20 22:05:50 roberto Exp roberto $
** Internal Module for Debugging of the Lua Implementation
** See Copyright Notice in lua.h
*/
@@ -360,7 +360,8 @@ static void checkobject (global_State *g, GCObject *o) {
checkproto(g, gco2p(o));
break;
}
- case LUA_TSTRING: break;
+ case LUA_TSHRSTR:
+ case LUA_TLNGSTR: break;
default: lua_assert(0);
}
}
diff --git a/lvm.c b/lvm.c
@@ -258,7 +258,8 @@ int luaV_equalobj_ (lua_State *L, const TValue *t1, const TValue *t2) {
case LUA_TBOOLEAN: return bvalue(t1) == bvalue(t2); /* true must be 1 !! */
case LUA_TLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
case LUA_TLCF: return fvalue(t1) == fvalue(t2);
- case LUA_TSTRING: return luaS_eqstr(rawtsvalue(t1), rawtsvalue(t2));
+ case LUA_TSHRSTR: return eqshrstr(rawtsvalue(t1), rawtsvalue(t2));
+ case LUA_TLNGSTR: return luaS_eqlngstr(rawtsvalue(t1), rawtsvalue(t2));
case LUA_TUSERDATA: {
if (uvalue(t1) == uvalue(t2)) return 1;
else if (L == NULL) return 0;