commit 9a91fe1640ddbe5b55e7454541059372b971f400
parent 2491b87c10db530eac2f3d81cd39f95875d16cd5
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Thu, 14 Nov 2024 11:47:58 -0300
Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances. See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (MartÃnez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.
Diffstat:
2 files changed, 82 insertions(+), 6 deletions(-)
diff --git a/ltable.c b/ltable.c
@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) {
** Structure to count the keys in a table.
** 'total' is the total number of keys in the table.
** 'na' is the number of *array indices* in the table (see 'arrayindex').
+** 'deleted' is true if there are deleted nodes in the hash part.
** 'nums' is a "count array" where 'nums[i]' is the number of integer
** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
** of 'nums'.
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) {
typedef struct {
unsigned total;
unsigned na;
+ int deleted;
unsigned nums[MAXABITS + 1];
} Counters;
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) {
/*
-** Count keys in hash part of table 't'.
+** Count keys in hash part of table 't'. As this only happens during
+** a rehash, all nodes have been used. A node can have a nil value only
+** if it was deleted after being created.
*/
static void numusehash (const Table *t, Counters *ct) {
unsigned i = sizenode(t);
unsigned total = 0;
while (i--) {
Node *n = &t->node[i];
- if (!isempty(gval(n))) {
+ if (isempty(gval(n))) {
+ /* entry was deleted; key cannot be nil */
+ lua_assert(isdummy(t) || !keyisnil(n));
+ ct->deleted = 1;
+ }
+ else {
total++;
if (keyisinteger(n))
countint(keyival(n), ct);
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
unsigned asize; /* optimal size for array part */
Counters ct;
unsigned i;
+ unsigned nsize; /* size for the hash part */
setlimittosize(t);
/* reset counts */
for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
ct.na = 0;
+ ct.deleted = 0;
ct.total = 1; /* count extra key */
if (ttisinteger(ek))
countint(ivalue(ek), &ct); /* extra key may go to array */
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
numusearray(t, &ct); /* count keys in array part */
asize = computesizes(&ct); /* compute new size for array part */
}
+ /* all keys not in the array part go to the hash part */
+ nsize = ct.total - ct.na;
+ if (ct.deleted) { /* table has deleted entries? */
+ /* insertion-deletion-insertion: give hash some extra size to
+ avoid constant resizings */
+ nsize += nsize >> 2;
+ }
/* resize the table to new computed sizes */
- luaH_resize(L, t, asize, ct.total - ct.na);
+ luaH_resize(L, t, asize, nsize);
}
-
-
/*
** }=============================================================
*/
diff --git a/testes/nextvar.lua b/testes/nextvar.lua
@@ -23,6 +23,12 @@ local function printTable (t)
end
end
----------------------------------------------------------------
+local function countentries (t)
+ local e = 0
+ for _ in pairs(t) do e = e + 1 end
+ return e
+end
+----------------------------------------------------------------
local function check (t, na, nh)
@@ -115,6 +121,24 @@ do -- overflow (must wrap-around)
assert(k == nil)
end
+
+do
+ -- alternate insertions and deletions in an almost full hash.
+ -- In versions pre-5.5, that causes constant rehashings and
+ -- takes a long time to complete.
+ local a = {}
+ for i = 1, 2^11 - 1 do
+ a[i .. ""] = true
+ end
+
+ for i = 1, 1e5 do
+ local key = i .. "."
+ a[key] = true
+ a[key] = nil
+ end
+ assert(countentries(a) == 2^11 - 1)
+end
+
if not T then
(Message or print)
('\n >>> testC not active: skipping tests for table sizes <<<\n')
@@ -202,6 +226,23 @@ for i = 1,lim do
check(a, 0, mp2(i))
end
+
+-- insert and delete elements until a rehash occurr. Caller must ensure
+-- that a rehash will change the shape of the table. Must repeat because
+-- the insertion may collide with the deleted element, and then there is
+-- no rehash.
+local function forcerehash (t)
+ local na, nh = T.querytab(t)
+ local i = 10000
+ repeat
+ i = i + 1
+ t[i] = true
+ t[i] = undef
+ local nna, nnh = T.querytab(t)
+ until nna ~= na or nnh ~= nh
+end
+
+
do
local a = {}
for i=1,16 do a[i] = i end
@@ -212,7 +253,7 @@ do
a[30] = undef
check(a, 0, 8) -- 5 elements in the hash part: [12]-[16]
a[10] = 1
- for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash
+ forcerehash(a)
check(a, 16, 1)
for i=1,14 do a[i] = true; a[i] = undef end
check(a, 16, 1) -- no rehash...
@@ -242,6 +283,25 @@ do -- "almost sparse" arrays
end
+do
+ -- alternate insertions and deletions should give some extra
+ -- space for the hash part. Otherwise, a mix of insertions/deletions
+ -- could cause too many rehashes. (See the other test for "alternate
+ -- insertions and deletions" in this file.)
+ local a = {}
+ for i = 1, 256 do
+ a[i .. ""] = true
+ end
+ check(a, 0, 256) -- hash part is full
+ a["256"] = nil -- delete a key
+ forcerehash(a)
+ -- table has only 255 elements, but it got some extra space;
+ -- otherwise, almost each delete-insert would rehash the table again.
+ assert(countentries(a) == 255)
+ check(a, 0, 512)
+end
+
+
-- size tests for vararg
lim = 35
local function foo (n, ...)