ltable.c revision 344220
1/*
2** $Id: ltable.c,v 2.118.1.4 2018/06/08 16:22:51 roberto Exp $
3** Lua tables (hash)
4** See Copyright Notice in lua.h
5*/
6
7#define ltable_c
8#define LUA_CORE
9
10#include "lprefix.h"
11
12
13/*
14** Implementation of tables (aka arrays, objects, or hash tables).
15** Tables keep its elements in two parts: an array part and a hash part.
16** Non-negative integer keys are all candidates to be kept in the array
17** part. The actual size of the array is the largest 'n' such that
18** more than half the slots between 1 and n are in use.
19** Hash uses a mix of chained scatter table with Brent's variation.
20** A main invariant of these tables is that, if an element is not
21** in its main position (i.e. the 'original' position that its hash gives
22** to it), then the colliding element is in its own main position.
23** Hence even when the load factor reaches 100%, performance remains good.
24*/
25
26#include <math.h>
27#include <limits.h>
28
29#include "lua.h"
30
31#include "ldebug.h"
32#include "ldo.h"
33#include "lgc.h"
34#include "lmem.h"
35#include "lobject.h"
36#include "lstate.h"
37#include "lstring.h"
38#include "ltable.h"
39#include "lvm.h"
40
41
42/*
43** Maximum size of array part (MAXASIZE) is 2^MAXABITS. MAXABITS is
44** the largest integer such that MAXASIZE fits in an unsigned int.
45*/
46#define MAXABITS	cast_int(sizeof(int) * CHAR_BIT - 1)
47#define MAXASIZE	(1u << MAXABITS)
48
49/*
50** Maximum size of hash part is 2^MAXHBITS. MAXHBITS is the largest
51** integer such that 2^MAXHBITS fits in a signed int. (Note that the
52** maximum number of elements in a table, 2^MAXABITS + 2^MAXHBITS, still
53** fits comfortably in an unsigned int.)
54*/
55#define MAXHBITS	(MAXABITS - 1)
56
57
58#define hashpow2(t,n)		(gnode(t, lmod((n), sizenode(t))))
59
60#define hashstr(t,str)		hashpow2(t, (str)->hash)
61#define hashboolean(t,p)	hashpow2(t, p)
62#define hashint(t,i)		hashpow2(t, i)
63
64
65/*
66** for some types, it is better to avoid modulus by power of 2, as
67** they tend to have many 2 factors.
68*/
69#define hashmod(t,n)	(gnode(t, ((n) % ((sizenode(t)-1)|1))))
70
71
72#define hashpointer(t,p)	hashmod(t, point2uint(p))
73
74
75#define dummynode		(&dummynode_)
76
77static const Node dummynode_ = {
78  {NILCONSTANT},  /* value */
79  {{NILCONSTANT, 0}}  /* key */
80};
81
82
83/*
84** Hash for floating-point numbers.
85** The main computation should be just
86**     n = frexp(n, &i); return (n * INT_MAX) + i
87** but there are some numerical subtleties.
88** In a two-complement representation, INT_MAX does not has an exact
89** representation as a float, but INT_MIN does; because the absolute
90** value of 'frexp' is smaller than 1 (unless 'n' is inf/NaN), the
91** absolute value of the product 'frexp * -INT_MIN' is smaller or equal
92** to INT_MAX. Next, the use of 'unsigned int' avoids overflows when
93** adding 'i'; the use of '~u' (instead of '-u') avoids problems with
94** INT_MIN.
95*/
96#if !defined(l_hashfloat)
97static int l_hashfloat (lua_Number n) {
98  int i;
99  lua_Integer ni;
100  n = l_mathop(frexp)(n, &i) * -cast_num(INT_MIN);
101  if (!lua_numbertointeger(n, &ni)) {  /* is 'n' inf/-inf/NaN? */
102    lua_assert(luai_numisnan(n) || l_mathop(fabs)(n) == cast_num(HUGE_VAL));
103    return 0;
104  }
105  else {  /* normal case */
106    unsigned int u = cast(unsigned int, i) + cast(unsigned int, ni);
107    return cast_int(u <= cast(unsigned int, INT_MAX) ? u : ~u);
108  }
109}
110#endif
111
112
113/*
114** returns the 'main' position of an element in a table (that is, the index
115** of its hash value)
116*/
117static Node *mainposition (const Table *t, const TValue *key) {
118  switch (ttype(key)) {
119    case LUA_TNUMINT:
120      return hashint(t, ivalue(key));
121    case LUA_TNUMFLT:
122      return hashmod(t, l_hashfloat(fltvalue(key)));
123    case LUA_TSHRSTR:
124      return hashstr(t, tsvalue(key));
125    case LUA_TLNGSTR:
126      return hashpow2(t, luaS_hashlongstr(tsvalue(key)));
127    case LUA_TBOOLEAN:
128      return hashboolean(t, bvalue(key));
129    case LUA_TLIGHTUSERDATA:
130      return hashpointer(t, pvalue(key));
131    case LUA_TLCF:
132      return hashpointer(t, fvalue(key));
133    default:
134      lua_assert(!ttisdeadkey(key));
135      return hashpointer(t, gcvalue(key));
136  }
137}
138
139
140/*
141** returns the index for 'key' if 'key' is an appropriate key to live in
142** the array part of the table, 0 otherwise.
143*/
144static unsigned int arrayindex (const TValue *key) {
145  if (ttisinteger(key)) {
146    lua_Integer k = ivalue(key);
147    if (0 < k && (lua_Unsigned)k <= MAXASIZE)
148      return cast(unsigned int, k);  /* 'key' is an appropriate array index */
149  }
150  return 0;  /* 'key' did not match some condition */
151}
152
153
154/*
155** returns the index of a 'key' for table traversals. First goes all
156** elements in the array part, then elements in the hash part. The
157** beginning of a traversal is signaled by 0.
158*/
159static unsigned int findindex (lua_State *L, Table *t, StkId key) {
160  unsigned int i;
161  if (ttisnil(key)) return 0;  /* first iteration */
162  i = arrayindex(key);
163  if (i != 0 && i <= t->sizearray)  /* is 'key' inside array part? */
164    return i;  /* yes; that's the index */
165  else {
166    int nx;
167    Node *n = mainposition(t, key);
168    for (;;) {  /* check whether 'key' is somewhere in the chain */
169      /* key may be dead already, but it is ok to use it in 'next' */
170      if (luaV_rawequalobj(gkey(n), key) ||
171            (ttisdeadkey(gkey(n)) && iscollectable(key) &&
172             deadvalue(gkey(n)) == gcvalue(key))) {
173        i = cast_int(n - gnode(t, 0));  /* key index in hash table */
174        /* hash elements are numbered after array ones */
175        return (i + 1) + t->sizearray;
176      }
177      nx = gnext(n);
178      if (nx == 0)
179        luaG_runerror(L, "invalid key to 'next'");  /* key not found */
180      else n += nx;
181    }
182  }
183}
184
185
186int luaH_next (lua_State *L, Table *t, StkId key) {
187  unsigned int i = findindex(L, t, key);  /* find original element */
188  for (; i < t->sizearray; i++) {  /* try first array part */
189    if (!ttisnil(&t->array[i])) {  /* a non-nil value? */
190      setivalue(key, i + 1);
191      setobj2s(L, key+1, &t->array[i]);
192      return 1;
193    }
194  }
195  for (i -= t->sizearray; cast_int(i) < sizenode(t); i++) {  /* hash part */
196    if (!ttisnil(gval(gnode(t, i)))) {  /* a non-nil value? */
197      setobj2s(L, key, gkey(gnode(t, i)));
198      setobj2s(L, key+1, gval(gnode(t, i)));
199      return 1;
200    }
201  }
202  return 0;  /* no more elements */
203}
204
205
206/*
207** {=============================================================
208** Rehash
209** ==============================================================
210*/
211
212/*
213** Compute the optimal size for the array part of table 't'. 'nums' is a
214** "count array" where 'nums[i]' is the number of integers in the table
215** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of
216** integer keys in the table and leaves with the number of keys that
217** will go to the array part; return the optimal size.
218*/
219static unsigned int computesizes (unsigned int nums[], unsigned int *pna) {
220  int i;
221  unsigned int twotoi;  /* 2^i (candidate for optimal size) */
222  unsigned int a = 0;  /* number of elements smaller than 2^i */
223  unsigned int na = 0;  /* number of elements to go to array part */
224  unsigned int optimal = 0;  /* optimal size for array part */
225  /* loop while keys can fill more than half of total size */
226  for (i = 0, twotoi = 1;
227       twotoi > 0 && *pna > twotoi / 2;
228       i++, twotoi *= 2) {
229    if (nums[i] > 0) {
230      a += nums[i];
231      if (a > twotoi/2) {  /* more than half elements present? */
232        optimal = twotoi;  /* optimal size (till now) */
233        na = a;  /* all elements up to 'optimal' will go to array part */
234      }
235    }
236  }
237  lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
238  *pna = na;
239  return optimal;
240}
241
242
243static int countint (const TValue *key, unsigned int *nums) {
244  unsigned int k = arrayindex(key);
245  if (k != 0) {  /* is 'key' an appropriate array index? */
246    nums[luaO_ceillog2(k)]++;  /* count as such */
247    return 1;
248  }
249  else
250    return 0;
251}
252
253
254/*
255** Count keys in array part of table 't': Fill 'nums[i]' with
256** number of keys that will go into corresponding slice and return
257** total number of non-nil keys.
258*/
259static unsigned int numusearray (const Table *t, unsigned int *nums) {
260  int lg;
261  unsigned int ttlg;  /* 2^lg */
262  unsigned int ause = 0;  /* summation of 'nums' */
263  unsigned int i = 1;  /* count to traverse all array keys */
264  /* traverse each slice */
265  for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
266    unsigned int lc = 0;  /* counter */
267    unsigned int lim = ttlg;
268    if (lim > t->sizearray) {
269      lim = t->sizearray;  /* adjust upper limit */
270      if (i > lim)
271        break;  /* no more elements to count */
272    }
273    /* count elements in range (2^(lg - 1), 2^lg] */
274    for (; i <= lim; i++) {
275      if (!ttisnil(&t->array[i-1]))
276        lc++;
277    }
278    nums[lg] += lc;
279    ause += lc;
280  }
281  return ause;
282}
283
284
285static int numusehash (const Table *t, unsigned int *nums, unsigned int *pna) {
286  int totaluse = 0;  /* total number of elements */
287  int ause = 0;  /* elements added to 'nums' (can go to array part) */
288  int i = sizenode(t);
289  while (i--) {
290    Node *n = &t->node[i];
291    if (!ttisnil(gval(n))) {
292      ause += countint(gkey(n), nums);
293      totaluse++;
294    }
295  }
296  *pna += ause;
297  return totaluse;
298}
299
300
301static void setarrayvector (lua_State *L, Table *t, unsigned int size) {
302  unsigned int i;
303  luaM_reallocvector(L, t->array, t->sizearray, size, TValue);
304  for (i=t->sizearray; i<size; i++)
305     setnilvalue(&t->array[i]);
306  t->sizearray = size;
307}
308
309
310static void setnodevector (lua_State *L, Table *t, unsigned int size) {
311  if (size == 0) {  /* no elements to hash part? */
312    t->node = cast(Node *, dummynode);  /* use common 'dummynode' */
313    t->lsizenode = 0;
314    t->lastfree = NULL;  /* signal that it is using dummy node */
315  }
316  else {
317    int i;
318    int lsize = luaO_ceillog2(size);
319    if (lsize > MAXHBITS)
320      luaG_runerror(L, "table overflow");
321    size = twoto(lsize);
322    t->node = luaM_newvector(L, size, Node);
323    for (i = 0; i < (int)size; i++) {
324      Node *n = gnode(t, i);
325      gnext(n) = 0;
326      setnilvalue(wgkey(n));
327      setnilvalue(gval(n));
328    }
329    t->lsizenode = cast_byte(lsize);
330    t->lastfree = gnode(t, size);  /* all positions are free */
331  }
332}
333
334
335typedef struct {
336  Table *t;
337  unsigned int nhsize;
338} AuxsetnodeT;
339
340
341static void auxsetnode (lua_State *L, void *ud) {
342  AuxsetnodeT *asn = cast(AuxsetnodeT *, ud);
343  setnodevector(L, asn->t, asn->nhsize);
344}
345
346
347void luaH_resize (lua_State *L, Table *t, unsigned int nasize,
348                                          unsigned int nhsize) {
349  unsigned int i;
350  int j;
351  AuxsetnodeT asn;
352  unsigned int oldasize = t->sizearray;
353  int oldhsize = allocsizenode(t);
354  Node *nold = t->node;  /* save old hash ... */
355  if (nasize > oldasize)  /* array part must grow? */
356    setarrayvector(L, t, nasize);
357  /* create new hash part with appropriate size */
358  asn.t = t; asn.nhsize = nhsize;
359  if (luaD_rawrunprotected(L, auxsetnode, &asn) != LUA_OK) {  /* mem. error? */
360    setarrayvector(L, t, oldasize);  /* array back to its original size */
361    luaD_throw(L, LUA_ERRMEM);  /* rethrow memory error */
362  }
363  if (nasize < oldasize) {  /* array part must shrink? */
364    t->sizearray = nasize;
365    /* re-insert elements from vanishing slice */
366    for (i=nasize; i<oldasize; i++) {
367      if (!ttisnil(&t->array[i]))
368        luaH_setint(L, t, i + 1, &t->array[i]);
369    }
370    /* shrink array */
371    luaM_reallocvector(L, t->array, oldasize, nasize, TValue);
372  }
373  /* re-insert elements from hash part */
374  for (j = oldhsize - 1; j >= 0; j--) {
375    Node *old = nold + j;
376    if (!ttisnil(gval(old))) {
377      /* doesn't need barrier/invalidate cache, as entry was
378         already present in the table */
379      setobjt2t(L, luaH_set(L, t, gkey(old)), gval(old));
380    }
381  }
382  if (oldhsize > 0)  /* not the dummy node? */
383    luaM_freearray(L, nold, cast(size_t, oldhsize)); /* free old hash */
384}
385
386
387void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
388  int nsize = allocsizenode(t);
389  luaH_resize(L, t, nasize, nsize);
390}
391
392/*
393** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
394*/
395static void rehash (lua_State *L, Table *t, const TValue *ek) {
396  unsigned int asize;  /* optimal size for array part */
397  unsigned int na;  /* number of keys in the array part */
398  unsigned int nums[MAXABITS + 1];
399  int i;
400  int totaluse;
401  for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
402  na = numusearray(t, nums);  /* count keys in array part */
403  totaluse = na;  /* all those keys are integer keys */
404  totaluse += numusehash(t, nums, &na);  /* count keys in hash part */
405  /* count extra key */
406  na += countint(ek, nums);
407  totaluse++;
408  /* compute new size for array part */
409  asize = computesizes(nums, &na);
410  /* resize the table to new computed sizes */
411  luaH_resize(L, t, asize, totaluse - na);
412}
413
414
415
416/*
417** }=============================================================
418*/
419
420
421Table *luaH_new (lua_State *L) {
422  GCObject *o = luaC_newobj(L, LUA_TTABLE, sizeof(Table));
423  Table *t = gco2t(o);
424  t->metatable = NULL;
425  t->flags = cast_byte(~0);
426  t->array = NULL;
427  t->sizearray = 0;
428  setnodevector(L, t, 0);
429  return t;
430}
431
432
433void luaH_free (lua_State *L, Table *t) {
434  if (!isdummy(t))
435    luaM_freearray(L, t->node, cast(size_t, sizenode(t)));
436  luaM_freearray(L, t->array, t->sizearray);
437  luaM_free(L, t);
438}
439
440
441static Node *getfreepos (Table *t) {
442  if (!isdummy(t)) {
443    while (t->lastfree > t->node) {
444      t->lastfree--;
445      if (ttisnil(gkey(t->lastfree)))
446        return t->lastfree;
447    }
448  }
449  return NULL;  /* could not find a free place */
450}
451
452
453
454/*
455** inserts a new key into a hash table; first, check whether key's main
456** position is free. If not, check whether colliding node is in its main
457** position or not: if it is not, move colliding node to an empty place and
458** put new key in its main position; otherwise (colliding node is in its main
459** position), new key goes to an empty position.
460*/
461TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
462  Node *mp;
463  TValue aux;
464  if (ttisnil(key)) luaG_runerror(L, "table index is nil");
465  else if (ttisfloat(key)) {
466    lua_Integer k;
467    if (luaV_tointeger(key, &k, 0)) {  /* does index fit in an integer? */
468      setivalue(&aux, k);
469      key = &aux;  /* insert it as an integer */
470    }
471    else if (luai_numisnan(fltvalue(key)))
472      luaG_runerror(L, "table index is NaN");
473  }
474  mp = mainposition(t, key);
475  if (!ttisnil(gval(mp)) || isdummy(t)) {  /* main position is taken? */
476    Node *othern;
477    Node *f = getfreepos(t);  /* get a free place */
478    if (f == NULL) {  /* cannot find a free place? */
479      rehash(L, t, key);  /* grow table */
480      /* whatever called 'newkey' takes care of TM cache */
481      return luaH_set(L, t, key);  /* insert key into grown table */
482    }
483    lua_assert(!isdummy(t));
484    othern = mainposition(t, gkey(mp));
485    if (othern != mp) {  /* is colliding node out of its main position? */
486      /* yes; move colliding node into free position */
487      while (othern + gnext(othern) != mp)  /* find previous */
488        othern += gnext(othern);
489      gnext(othern) = cast_int(f - othern);  /* rechain to point to 'f' */
490      *f = *mp;  /* copy colliding node into free pos. (mp->next also goes) */
491      if (gnext(mp) != 0) {
492        gnext(f) += cast_int(mp - f);  /* correct 'next' */
493        gnext(mp) = 0;  /* now 'mp' is free */
494      }
495      setnilvalue(gval(mp));
496    }
497    else {  /* colliding node is in its own main position */
498      /* new node will go into free position */
499      if (gnext(mp) != 0)
500        gnext(f) = cast_int((mp + gnext(mp)) - f);  /* chain new position */
501      else lua_assert(gnext(f) == 0);
502      gnext(mp) = cast_int(f - mp);
503      mp = f;
504    }
505  }
506  setnodekey(L, &mp->i_key, key);
507  luaC_barrierback(L, t, key);
508  lua_assert(ttisnil(gval(mp)));
509  return gval(mp);
510}
511
512
513/*
514** search function for integers
515*/
516const TValue *luaH_getint (Table *t, lua_Integer key) {
517  /* (1 <= key && key <= t->sizearray) */
518  if (l_castS2U(key) - 1 < t->sizearray)
519    return &t->array[key - 1];
520  else {
521    Node *n = hashint(t, key);
522    for (;;) {  /* check whether 'key' is somewhere in the chain */
523      if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
524        return gval(n);  /* that's it */
525      else {
526        int nx = gnext(n);
527        if (nx == 0) break;
528        n += nx;
529      }
530    }
531    return luaO_nilobject;
532  }
533}
534
535
536/*
537** search function for short strings
538*/
539const TValue *luaH_getshortstr (Table *t, TString *key) {
540  Node *n = hashstr(t, key);
541  lua_assert(key->tt == LUA_TSHRSTR);
542  for (;;) {  /* check whether 'key' is somewhere in the chain */
543    const TValue *k = gkey(n);
544    if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
545      return gval(n);  /* that's it */
546    else {
547      int nx = gnext(n);
548      if (nx == 0)
549        return luaO_nilobject;  /* not found */
550      n += nx;
551    }
552  }
553}
554
555
556/*
557** "Generic" get version. (Not that generic: not valid for integers,
558** which may be in array part, nor for floats with integral values.)
559*/
560static const TValue *getgeneric (Table *t, const TValue *key) {
561  Node *n = mainposition(t, key);
562  for (;;) {  /* check whether 'key' is somewhere in the chain */
563    if (luaV_rawequalobj(gkey(n), key))
564      return gval(n);  /* that's it */
565    else {
566      int nx = gnext(n);
567      if (nx == 0)
568        return luaO_nilobject;  /* not found */
569      n += nx;
570    }
571  }
572}
573
574
575const TValue *luaH_getstr (Table *t, TString *key) {
576  if (key->tt == LUA_TSHRSTR)
577    return luaH_getshortstr(t, key);
578  else {  /* for long strings, use generic case */
579    TValue ko;
580    setsvalue(cast(lua_State *, NULL), &ko, key);
581    return getgeneric(t, &ko);
582  }
583}
584
585
586/*
587** main search function
588*/
589const TValue *luaH_get (Table *t, const TValue *key) {
590  switch (ttype(key)) {
591    case LUA_TSHRSTR: return luaH_getshortstr(t, tsvalue(key));
592    case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
593    case LUA_TNIL: return luaO_nilobject;
594    case LUA_TNUMFLT: {
595      lua_Integer k;
596      if (luaV_tointeger(key, &k, 0)) /* index is int? */
597        return luaH_getint(t, k);  /* use specialized version */
598      /* else... */
599    }  /* FALLTHROUGH */
600    default:
601      return getgeneric(t, key);
602  }
603}
604
605
606/*
607** beware: when using this function you probably need to check a GC
608** barrier and invalidate the TM cache.
609*/
610TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
611  const TValue *p = luaH_get(t, key);
612  if (p != luaO_nilobject)
613    return cast(TValue *, p);
614  else return luaH_newkey(L, t, key);
615}
616
617
618void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
619  const TValue *p = luaH_getint(t, key);
620  TValue *cell;
621  if (p != luaO_nilobject)
622    cell = cast(TValue *, p);
623  else {
624    TValue k;
625    setivalue(&k, key);
626    cell = luaH_newkey(L, t, &k);
627  }
628  setobj2t(L, cell, value);
629}
630
631
632static lua_Unsigned unbound_search (Table *t, lua_Unsigned j) {
633  lua_Unsigned i = j;  /* i is zero or a present index */
634  j++;
635  /* find 'i' and 'j' such that i is present and j is not */
636  while (!ttisnil(luaH_getint(t, j))) {
637    i = j;
638    if (j > l_castS2U(LUA_MAXINTEGER) / 2) {  /* overflow? */
639      /* table was built with bad purposes: resort to linear search */
640      i = 1;
641      while (!ttisnil(luaH_getint(t, i))) i++;
642      return i - 1;
643    }
644    j *= 2;
645  }
646  /* now do a binary search between them */
647  while (j - i > 1) {
648    lua_Unsigned m = (i+j)/2;
649    if (ttisnil(luaH_getint(t, m))) j = m;
650    else i = m;
651  }
652  return i;
653}
654
655
656/*
657** Try to find a boundary in table 't'. A 'boundary' is an integer index
658** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
659*/
660lua_Unsigned luaH_getn (Table *t) {
661  unsigned int j = t->sizearray;
662  if (j > 0 && ttisnil(&t->array[j - 1])) {
663    /* there is a boundary in the array part: (binary) search for it */
664    unsigned int i = 0;
665    while (j - i > 1) {
666      unsigned int m = (i+j)/2;
667      if (ttisnil(&t->array[m - 1])) j = m;
668      else i = m;
669    }
670    return i;
671  }
672  /* else must find a boundary in hash part */
673  else if (isdummy(t))  /* hash part is empty? */
674    return j;  /* that is easy... */
675  else return unbound_search(t, j);
676}
677
678
679
680#if defined(LUA_DEBUG)
681
682Node *luaH_mainposition (const Table *t, const TValue *key) {
683  return mainposition(t, key);
684}
685
686int luaH_isdummy (const Table *t) { return isdummy(t); }
687
688#endif
689