lua

A copy of the Lua development repository
Log | Files | Refs | README

commit 9a91fe1640ddbe5b55e7454541059372b971f400
parent 2491b87c10db530eac2f3d81cd39f95875d16cd5
Author: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date:   Thu, 14 Nov 2024 11:47:58 -0300

Add extra size when resizing tables with deleted keys

Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances.  See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (Martínez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.

Diffstat:
Mltable.c | 26+++++++++++++++++++++-----
Mtestes/nextvar.lua | 62+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/ltable.c b/ltable.c @@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) { ** Structure to count the keys in a table. ** 'total' is the total number of keys in the table. ** 'na' is the number of *array indices* in the table (see 'arrayindex'). +** 'deleted' is true if there are deleted nodes in the hash part. ** 'nums' is a "count array" where 'nums[i]' is the number of integer ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation ** of 'nums'. @@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) { typedef struct { unsigned total; unsigned na; + int deleted; unsigned nums[MAXABITS + 1]; } Counters; @@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) { /* -** Count keys in hash part of table 't'. +** Count keys in hash part of table 't'. As this only happens during +** a rehash, all nodes have been used. A node can have a nil value only +** if it was deleted after being created. */ static void numusehash (const Table *t, Counters *ct) { unsigned i = sizenode(t); unsigned total = 0; while (i--) { Node *n = &t->node[i]; - if (!isempty(gval(n))) { + if (isempty(gval(n))) { + /* entry was deleted; key cannot be nil */ + lua_assert(isdummy(t) || !keyisnil(n)); + ct->deleted = 1; + } + else { total++; if (keyisinteger(n)) countint(keyival(n), ct); @@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { unsigned asize; /* optimal size for array part */ Counters ct; unsigned i; + unsigned nsize; /* size for the hash part */ setlimittosize(t); /* reset counts */ for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; ct.na = 0; + ct.deleted = 0; ct.total = 1; /* count extra key */ if (ttisinteger(ek)) countint(ivalue(ek), &ct); /* extra key may go to array */ @@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { numusearray(t, &ct); /* count keys in array part */ asize = computesizes(&ct); /* compute new size for array part */ } + /* all keys not in the array part go to the hash part */ + nsize = ct.total - ct.na; + if (ct.deleted) { /* table has deleted entries? */ + /* insertion-deletion-insertion: give hash some extra size to + avoid constant resizings */ + nsize += nsize >> 2; + } /* resize the table to new computed sizes */ - luaH_resize(L, t, asize, ct.total - ct.na); + luaH_resize(L, t, asize, nsize); } - - /* ** }============================================================= */ diff --git a/testes/nextvar.lua b/testes/nextvar.lua @@ -23,6 +23,12 @@ local function printTable (t) end end ---------------------------------------------------------------- +local function countentries (t) + local e = 0 + for _ in pairs(t) do e = e + 1 end + return e +end +---------------------------------------------------------------- local function check (t, na, nh) @@ -115,6 +121,24 @@ do -- overflow (must wrap-around) assert(k == nil) end + +do + -- alternate insertions and deletions in an almost full hash. + -- In versions pre-5.5, that causes constant rehashings and + -- takes a long time to complete. + local a = {} + for i = 1, 2^11 - 1 do + a[i .. ""] = true + end + + for i = 1, 1e5 do + local key = i .. "." + a[key] = true + a[key] = nil + end + assert(countentries(a) == 2^11 - 1) +end + if not T then (Message or print) ('\n >>> testC not active: skipping tests for table sizes <<<\n') @@ -202,6 +226,23 @@ for i = 1,lim do check(a, 0, mp2(i)) end + +-- insert and delete elements until a rehash occurr. Caller must ensure +-- that a rehash will change the shape of the table. Must repeat because +-- the insertion may collide with the deleted element, and then there is +-- no rehash. +local function forcerehash (t) + local na, nh = T.querytab(t) + local i = 10000 + repeat + i = i + 1 + t[i] = true + t[i] = undef + local nna, nnh = T.querytab(t) + until nna ~= na or nnh ~= nh +end + + do local a = {} for i=1,16 do a[i] = i end @@ -212,7 +253,7 @@ do a[30] = undef check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] a[10] = 1 - for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash + forcerehash(a) check(a, 16, 1) for i=1,14 do a[i] = true; a[i] = undef end check(a, 16, 1) -- no rehash... @@ -242,6 +283,25 @@ do -- "almost sparse" arrays end +do + -- alternate insertions and deletions should give some extra + -- space for the hash part. Otherwise, a mix of insertions/deletions + -- could cause too many rehashes. (See the other test for "alternate + -- insertions and deletions" in this file.) + local a = {} + for i = 1, 256 do + a[i .. ""] = true + end + check(a, 0, 256) -- hash part is full + a["256"] = nil -- delete a key + forcerehash(a) + -- table has only 255 elements, but it got some extra space; + -- otherwise, almost each delete-insert would rehash the table again. + assert(countentries(a) == 255) + check(a, 0, 512) +end + + -- size tests for vararg lim = 35 local function foo (n, ...)