commit 7288528a1e081d101a1bc19346a974088b6b8315
parent 513559cc4760392b6fa33754c516683ef49dba22
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Wed, 1 Apr 2020 10:52:14 -0300
Short strings always use all bytes in the hash
Collisions in short strings occurr just by their existence, when
internalizing them. (Collisions in long strings is caused/controlled
by the program, when adding them as keys to the same table.)
Diffstat:
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/lstate.c b/lstate.c
@@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) {
addbuff(buff, p, &h); /* local variable */
addbuff(buff, p, &lua_newstate); /* public function */
lua_assert(p == sizeof(buff));
- return luaS_hash(buff, p, h);
+ return luaS_hash(buff, p, h, 1);
}
#endif
diff --git a/lstring.c b/lstring.c
@@ -23,7 +23,7 @@
/*
-** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
+** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to
** compute its hash
*/
#if !defined(LUAI_HASHLIMIT)
@@ -50,9 +50,9 @@ int luaS_eqlngstr (TString *a, TString *b) {
}
-unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
+unsigned int luaS_hash (const char *str, size_t l, unsigned int seed,
+ size_t step) {
unsigned int h = seed ^ cast_uint(l);
- size_t step = (l >> LUAI_HASHLIMIT) + 1;
for (; l >= step; l -= step)
h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
return h;
@@ -62,7 +62,9 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
unsigned int luaS_hashlongstr (TString *ts) {
lua_assert(ts->tt == LUA_VLNGSTR);
if (ts->extra == 0) { /* no hash? */
- ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash);
+ size_t len = ts->u.lnglen;
+ size_t step = (len >> LUAI_HASHLIMIT) + 1;
+ ts->hash = luaS_hash(getstr(ts), len, ts->hash, step);
ts->extra = 1; /* now it has its hash */
}
return ts->hash;
@@ -199,7 +201,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString *ts;
global_State *g = G(L);
stringtable *tb = &g->strt;
- unsigned int h = luaS_hash(str, l, g->seed);
+ unsigned int h = luaS_hash(str, l, g->seed, 1);
TString **list = &tb->hash[lmod(h, tb->size)];
lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
for (ts = *list; ts != NULL; ts = ts->u.hnext) {
diff --git a/lstring.h b/lstring.h
@@ -37,7 +37,8 @@
#define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b))
-LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed);
+LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l,
+ unsigned int seed, size_t step);
LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts);
LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);