1/*    hv.c
2 *
3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 *    You may distribute under the terms of either the GNU General Public
7 *    License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 *      I sit beside the fire and think
13 *          of all that I have seen.
14 *                         --Bilbo
15 *
16 *     [p.278 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
17 */
18
19/*
20=head1 HV Handling
21A HV structure represents a Perl hash.  It consists mainly of an array
22of pointers, each of which points to a linked list of HE structures.  The
23array is indexed by the hash function of the key, so each linked list
24represents all the hash entries with the same hash value.  Each HE contains
25a pointer to the actual value, plus a pointer to a HEK structure which
26holds the key and hash value.
27
28=cut
29
30*/
31
32#include "EXTERN.h"
33#define PERL_IN_HV_C
34#define PERL_HASH_INTERNAL_ACCESS
35#include "perl.h"
36
37/* we split when we collide and we have a load factor over 0.667.
38 * NOTE if you change this formula so we split earlier than previously
39 * you MUST change the logic in hv_ksplit()
40 */
41
42/*  MAX_BUCKET_MAX is the maximum max bucket index, at which point we stop growing the
43 *  number of buckets,
44 */
45#define MAX_BUCKET_MAX ((1<<26)-1)
46#define DO_HSPLIT(xhv) ( ( ((xhv)->xhv_keys + ((xhv)->xhv_keys >> 1)) > (xhv)->xhv_max ) && \
47                           ((xhv)->xhv_max < MAX_BUCKET_MAX) )
48
49static const char S_strtab_error[]
50    = "Cannot modify shared string table in hv_%s";
51
52#define DEBUG_HASH_RAND_BITS (DEBUG_h_TEST)
53
54/* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
55 * See also https://en.wikipedia.org/wiki/Xorshift
56 */
57#if IVSIZE == 8
58/* 64 bit version */
59#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT64_A(x)
60#else
61/* 32 bit version */
62#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT32_A(x)
63#endif
64
65#define UPDATE_HASH_RAND_BITS_KEY(key,klen)                             \
66STMT_START {                                                            \
67    XORSHIFT_RAND_BITS(PL_hash_rand_bits);                              \
68    if (DEBUG_HASH_RAND_BITS) {                                         \
69        PerlIO_printf( Perl_debug_log,                                  \
70            "PL_hash_rand_bits=%016" UVxf" @ %s:%-4d",                   \
71            (UV)PL_hash_rand_bits, __FILE__, __LINE__                   \
72        );                                                              \
73        if (DEBUG_v_TEST && key) {                                      \
74            PerlIO_printf( Perl_debug_log, " key:'%.*s' %" UVuf"\n",     \
75                    (int)klen,                                          \
76                    key ? key : "", /* silence warning */               \
77                    (UV)klen                                            \
78            );                                                          \
79        } else {                                                        \
80            PerlIO_printf( Perl_debug_log, "\n");                       \
81        }                                                               \
82    }                                                                   \
83} STMT_END
84
85#define MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen)                       \
86STMT_START {                                                            \
87    if (PL_HASH_RAND_BITS_ENABLED)                                      \
88        UPDATE_HASH_RAND_BITS_KEY(key,klen);                            \
89} STMT_END
90
91
92#define UPDATE_HASH_RAND_BITS()                                         \
93    UPDATE_HASH_RAND_BITS_KEY(NULL,0)
94
95#define MAYBE_UPDATE_HASH_RAND_BITS()                                   \
96    MAYBE_UPDATE_HASH_RAND_BITS_KEY(NULL,0)
97
98/* HeKFLAGS(entry) is a single U8, so only provides 8 flags bits.
99   We currently use 3. All 3 we have behave differently, so if we find a use for
100   more flags it's hard to predict which they group with.
101
102   Hash keys are stored as flat octet sequences, not SVs. Hence we need a flag
103   bit to say whether those octet sequences represent ISO-8859-1 or UTF-8 -
104   HVhek_UTF8. The value of this flag bit matters for (regular) hash key
105   lookups.
106
107   To speed up comparisons, keys are normalised to octets. But we (also)
108   preserve whether the key was supplied, so we need another flag bit to say
109   whether to reverse the normalisation when iterating the keys (converting them
110   back to SVs) - HVhek_WASUTF8. The value of this flag bit must be ignored for
111   (regular) hash key lookups.
112
113   But for the shared string table (the private "hash" that manages shared hash
114   keys and their reference counts), we need to be able to store both variants
115   (HVhek_WASUTF8 set and clear), so the code performing lookups in this hash
116   must be different and consider both keys.
117
118   However, regular hashes (now) can have a mix of shared and unshared keys.
119   (This avoids the need to reallocate all the keys into unshared storage at
120   the point where hash passes the "large" hash threshold, and no longer uses
121   the shared string table - existing keys remain shared, to avoid makework.)
122
123   Meaning that HVhek_NOTSHARED *may* be set in regular hashes (but should be
124   ignored for hash lookups) but must always be clear in the keys in the shared
125   string table (because the pointers to these keys are directly copied into
126   regular hashes - this is how shared keys work.)
127
128   Hence all 3 are different, and it's hard to predict the best way to future
129   proof what is needed next.
130
131   We also have HVhek_ENABLEHVKFLAGS, which is used as a mask within the code
132   below to determine whether to set HvHASKFLAGS() true on the hash as a whole.
133   This is a public "optimisation" flag provided to serealisers, to indicate
134   (up front) that a hash contains non-8-bit keys, if they want to use different
135   storage formats for hashes where all keys are simple octet sequences
136   (avoiding needing to store an extra byte per hash key), and they need to know
137   that this holds *before* iterating the hash keys. Only Storable seems to use
138   this. (For this use case, HVhek_NOTSHARED doesn't matter)
139
140   For now, we assume that any future flag bits will need to be distinguished
141   in the shared string table, hence we create this mask for the shared string
142   table code. It happens to be the same as HVhek_ENABLEHVKFLAGS, but that might
143   change if we add a flag bit that matters to the shared string table but not
144   to Storable (or similar). */
145
146#define HVhek_STORAGE_MASK (0xFF & ~HVhek_NOTSHARED)
147
148#ifdef PURIFY
149
150#define new_HE() (HE*)safemalloc(sizeof(HE))
151#define del_HE(p) safefree((char*)p)
152
153#else
154
155STATIC HE*
156S_new_he(pTHX)
157{
158    HE* he;
159    void ** const root = &PL_body_roots[HE_ARENA_ROOT_IX];
160
161    if (!*root)
162        Perl_more_bodies(aTHX_ HE_ARENA_ROOT_IX, sizeof(HE), PERL_ARENA_SIZE);
163    he = (HE*) *root;
164    assert(he);
165    *root = HeNEXT(he);
166    return he;
167}
168
169#define new_HE() new_he()
170#define del_HE(p) \
171    STMT_START { \
172        HeNEXT(p) = (HE*)(PL_body_roots[HE_ARENA_ROOT_IX]);	\
173        PL_body_roots[HE_ARENA_ROOT_IX] = p; \
174    } STMT_END
175
176
177
178#endif
179
180STATIC HEK *
181S_save_hek_flags(const char *str, I32 len, U32 hash, int flags)
182{
183    char *k;
184    HEK *hek;
185
186    PERL_ARGS_ASSERT_SAVE_HEK_FLAGS;
187
188    Newx(k, HEK_BASESIZE + len + 2, char);
189    hek = (HEK*)k;
190    Copy(str, HEK_KEY(hek), len, char);
191    HEK_KEY(hek)[len] = 0;
192    HEK_LEN(hek) = len;
193    HEK_HASH(hek) = hash;
194    HEK_FLAGS(hek) = HVhek_NOTSHARED | (flags & HVhek_STORAGE_MASK);
195
196    if (flags & HVhek_FREEKEY)
197        Safefree(str);
198    return hek;
199}
200
201/* free the pool of temporary HE/HEK pairs returned by hv_fetch_ent
202 * for tied hashes */
203
204void
205Perl_free_tied_hv_pool(pTHX)
206{
207    HE *he = PL_hv_fetch_ent_mh;
208    while (he) {
209        HE * const ohe = he;
210        Safefree(HeKEY_hek(he));
211        he = HeNEXT(he);
212        del_HE(ohe);
213    }
214    PL_hv_fetch_ent_mh = NULL;
215}
216
217#if defined(USE_ITHREADS)
218HEK *
219Perl_hek_dup(pTHX_ HEK *source, CLONE_PARAMS* param)
220{
221    HEK *shared;
222
223    PERL_ARGS_ASSERT_HEK_DUP;
224    PERL_UNUSED_ARG(param);
225
226    if (!source)
227        return NULL;
228
229    shared = (HEK*)ptr_table_fetch(PL_ptr_table, source);
230    if (shared) {
231        /* We already shared this hash key.  */
232        (void)share_hek_hek(shared);
233    }
234    else {
235        shared
236            = share_hek_flags(HEK_KEY(source), HEK_LEN(source),
237                              HEK_HASH(source), HEK_FLAGS(source));
238        ptr_table_store(PL_ptr_table, source, shared);
239    }
240    return shared;
241}
242
243HE *
244Perl_he_dup(pTHX_ const HE *e, bool shared, CLONE_PARAMS* param)
245{
246    HE *ret;
247
248    PERL_ARGS_ASSERT_HE_DUP;
249
250    /* All the *_dup functions are deemed to be API, despite most being deeply
251       tied to the internals. Hence we can't simply remove the parameter
252       "shared" from this function. */
253    /* sv_dup and sv_dup_inc seem to be the only two that are used by XS code.
254       Probably the others should be dropped from the API. See #19409 */
255    PERL_UNUSED_ARG(shared);
256
257    if (!e)
258        return NULL;
259    /* look for it in the table first */
260    ret = (HE*)ptr_table_fetch(PL_ptr_table, e);
261    if (ret)
262        return ret;
263
264    /* create anew and remember what it is */
265    ret = new_HE();
266    ptr_table_store(PL_ptr_table, e, ret);
267
268    if (HeKLEN(e) == HEf_SVKEY) {
269        char *k;
270        Newx(k, HEK_BASESIZE + sizeof(const SV *), char);
271        HeKEY_hek(ret) = (HEK*)k;
272        HeKEY_sv(ret) = sv_dup_inc(HeKEY_sv(e), param);
273    }
274    else if (!(HeKFLAGS(e) & HVhek_NOTSHARED)) {
275        /* This is hek_dup inlined, which seems to be important for speed
276           reasons.  */
277        HEK * const source = HeKEY_hek(e);
278        HEK *shared = (HEK*)ptr_table_fetch(PL_ptr_table, source);
279
280        if (shared) {
281            /* We already shared this hash key.  */
282            (void)share_hek_hek(shared);
283        }
284        else {
285            shared
286                = share_hek_flags(HEK_KEY(source), HEK_LEN(source),
287                                  HEK_HASH(source), HEK_FLAGS(source));
288            ptr_table_store(PL_ptr_table, source, shared);
289        }
290        HeKEY_hek(ret) = shared;
291    }
292    else
293        HeKEY_hek(ret) = save_hek_flags(HeKEY(e), HeKLEN(e), HeHASH(e),
294                                        HeKFLAGS(e));
295    HeVAL(ret) = sv_dup_inc(HeVAL(e), param);
296
297    HeNEXT(ret) = he_dup(HeNEXT(e), FALSE, param);
298    return ret;
299}
300#endif	/* USE_ITHREADS */
301
302static void
303S_hv_notallowed(pTHX_ int flags, const char *key, I32 klen,
304                const char *msg)
305{
306   /* Straight to SVt_PVN here, as needed by sv_setpvn_fresh and
307    * sv_usepvn would otherwise call it */
308    SV * const sv = newSV_type_mortal(SVt_PV);
309
310    PERL_ARGS_ASSERT_HV_NOTALLOWED;
311
312    if (!(flags & HVhek_FREEKEY)) {
313        sv_setpvn_fresh(sv, key, klen);
314    }
315    else {
316        /* Need to free saved eventually assign to mortal SV */
317        /* XXX is this line an error ???:  SV *sv = sv_newmortal(); */
318        sv_usepvn(sv, (char *) key, klen);
319    }
320    if (flags & HVhek_UTF8) {
321        SvUTF8_on(sv);
322    }
323    Perl_croak(aTHX_ msg, SVfARG(sv));
324}
325
326/* (klen == HEf_SVKEY) is special for MAGICAL hv entries, meaning key slot
327 * contains an SV* */
328
329/*
330=for apidoc      hv_store
331=for apidoc_item hv_stores
332
333These each store SV C<val> with the specified key in hash C<hv>, returning NULL
334if the operation failed or if the value did not need to be actually stored
335within the hash (as in the case of tied hashes).  Otherwise it can be
336dereferenced to get the original C<SV*>.
337
338They differ only in how the hash key is specified.
339
340In C<hv_stores>, the key is a C language string literal, enclosed in double
341quotes.  It is never treated as being in UTF-8.
342
343In C<hv_store>, C<key> is either NULL or points to the first byte of the string
344specifying the key, and its length in bytes is given by the absolute value of
345an additional parameter, C<klen>.  A NULL key indicates the key is to be
346treated as C<undef>, and C<klen> is ignored; otherwise the key string may
347contain embedded-NUL bytes.  If C<klen> is negative, the string is treated as
348being encoded in UTF-8; otherwise not.
349
350C<hv_store> has another extra parameter, C<hash>, a precomputed hash of the key
351string, or zero if it has not been precomputed.  This parameter is omitted from
352C<hv_stores>, as it is computed automatically at compile time.
353
354If <hv> is NULL, NULL is returned and no action is taken.
355
356If C<val> is NULL, it is treated as being C<undef>; otherwise the caller is
357responsible for suitably incrementing the reference count of C<val> before
358the call, and decrementing it if the function returned C<NULL>.  Effectively
359a successful C<hv_store> takes ownership of one reference to C<val>.  This is
360usually what you want; a newly created SV has a reference count of one, so
361if all your code does is create SVs then store them in a hash, C<hv_store>
362will own the only reference to the new SV, and your code doesn't need to do
363anything further to tidy up.
364
365C<hv_store> is not implemented as a call to L</C<hv_store_ent>>, and does not
366create a temporary SV for the key, so if your key data is not already in SV
367form then use C<hv_store> in preference to C<hv_store_ent>.
368
369See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
370information on how to use this function on tied hashes.
371
372=for apidoc hv_store_ent
373
374Stores C<val> in a hash.  The hash key is specified as C<key>.  The C<hash>
375parameter is the precomputed hash value; if it is zero then Perl will
376compute it.  The return value is the new hash entry so created.  It will be
377C<NULL> if the operation failed or if the value did not need to be actually
378stored within the hash (as in the case of tied hashes).  Otherwise the
379contents of the return value can be accessed using the C<He?> macros
380described here.  Note that the caller is responsible for suitably
381incrementing the reference count of C<val> before the call, and
382decrementing it if the function returned NULL.  Effectively a successful
383C<hv_store_ent> takes ownership of one reference to C<val>.  This is
384usually what you want; a newly created SV has a reference count of one, so
385if all your code does is create SVs then store them in a hash, C<hv_store>
386will own the only reference to the new SV, and your code doesn't need to do
387anything further to tidy up.  Note that C<hv_store_ent> only reads the C<key>;
388unlike C<val> it does not take ownership of it, so maintaining the correct
389reference count on C<key> is entirely the caller's responsibility.  The reason
390it does not take ownership, is that C<key> is not used after this function
391returns, and so can be freed immediately.  C<hv_store>
392is not implemented as a call to C<hv_store_ent>, and does not create a temporary
393SV for the key, so if your key data is not already in SV form then use
394C<hv_store> in preference to C<hv_store_ent>.
395
396See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
397information on how to use this function on tied hashes.
398
399=for apidoc hv_exists
400
401Returns a boolean indicating whether the specified hash key exists.  The
402absolute value of C<klen> is the length of the key.  If C<klen> is
403negative the key is assumed to be in UTF-8-encoded Unicode.
404
405=for apidoc hv_fetch
406
407Returns the SV which corresponds to the specified key in the hash.
408The absolute value of C<klen> is the length of the key.  If C<klen> is
409negative the key is assumed to be in UTF-8-encoded Unicode.  If
410C<lval> is set then the fetch will be part of a store.  This means that if
411there is no value in the hash associated with the given key, then one is
412created and a pointer to it is returned.  The C<SV*> it points to can be
413assigned to.  But always check that the
414return value is non-null before dereferencing it to an C<SV*>.
415
416See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
417information on how to use this function on tied hashes.
418
419=for apidoc hv_exists_ent
420
421Returns a boolean indicating whether
422the specified hash key exists.  C<hash>
423can be a valid precomputed hash value, or 0 to ask for it to be
424computed.
425
426=cut
427*/
428
429/* returns an HE * structure with the all fields set */
430/* note that hent_val will be a mortal sv for MAGICAL hashes */
431/*
432=for apidoc hv_fetch_ent
433
434Returns the hash entry which corresponds to the specified key in the hash.
435C<hash> must be a valid precomputed hash number for the given C<key>, or 0
436if you want the function to compute it.  IF C<lval> is set then the fetch
437will be part of a store.  Make sure the return value is non-null before
438accessing it.  The return value when C<hv> is a tied hash is a pointer to a
439static location, so be sure to make a copy of the structure if you need to
440store it somewhere.
441
442See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
443information on how to use this function on tied hashes.
444
445=cut
446*/
447
448/* Common code for hv_delete()/hv_exists()/hv_fetch()/hv_store()  */
449void *
450Perl_hv_common_key_len(pTHX_ HV *hv, const char *key, I32 klen_i32,
451                       const int action, SV *val, const U32 hash)
452{
453    STRLEN klen;
454    int flags;
455
456    PERL_ARGS_ASSERT_HV_COMMON_KEY_LEN;
457
458    if (klen_i32 < 0) {
459        klen = -klen_i32;
460        flags = HVhek_UTF8;
461    } else {
462        klen = klen_i32;
463        flags = 0;
464    }
465    return hv_common(hv, NULL, key, klen, flags, action, val, hash);
466}
467
468void *
469Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
470               int flags, int action, SV *val, U32 hash)
471{
472    XPVHV* xhv;
473    HE *entry;
474    HE **oentry;
475    SV *sv;
476    bool is_utf8;
477    bool in_collision;
478    const int return_svp = action & HV_FETCH_JUST_SV;
479    HEK *keysv_hek = NULL;
480
481    if (!hv)
482        return NULL;
483    if (SvIS_FREED(hv))
484        return NULL;
485
486    assert(SvTYPE(hv) == SVt_PVHV);
487
488    if (SvSMAGICAL(hv) && SvGMAGICAL(hv) && !(action & HV_DISABLE_UVAR_XKEY)) {
489        MAGIC* mg;
490        if ((mg = mg_find((const SV *)hv, PERL_MAGIC_uvar))) {
491            struct ufuncs * const uf = (struct ufuncs *)mg->mg_ptr;
492            if (uf->uf_set == NULL) {
493                SV* obj = mg->mg_obj;
494
495                if (!keysv) {
496                    keysv = newSVpvn_flags(key, klen, SVs_TEMP |
497                                           ((flags & HVhek_UTF8)
498                                            ? SVf_UTF8 : 0));
499                }
500
501                mg->mg_obj = keysv;         /* pass key */
502                uf->uf_index = action;      /* pass action */
503                magic_getuvar(MUTABLE_SV(hv), mg);
504                keysv = mg->mg_obj;         /* may have changed */
505                mg->mg_obj = obj;
506
507                /* If the key may have changed, then we need to invalidate
508                   any passed-in computed hash value.  */
509                hash = 0;
510            }
511        }
512    }
513
514    /* flags might have HVhek_NOTSHARED set. If so, we need to ignore that.
515       Some callers to hv_common() pass the flags value from an existing HEK,
516       and if that HEK is not shared, then it has the relevant flag bit set,
517       which must not be passed into share_hek_flags().
518
519       It would be "purer" to insist that all callers clear it, but we'll end up
520       with subtle bugs if we leave it to them, or runtime assertion failures if
521       we try to enforce our documentation with landmines.
522
523       If keysv is true, all code paths assign a new value to flags with that
524       bit clear, so we're always "good". Hence we only need to explicitly clear
525       this bit in the else block. */
526    if (keysv) {
527        if (flags & HVhek_FREEKEY)
528            Safefree(key);
529        key = SvPV_const(keysv, klen);
530        is_utf8 = (SvUTF8(keysv) != 0);
531        if (SvIsCOW_shared_hash(keysv)) {
532            flags = HVhek_KEYCANONICAL | (is_utf8 ? HVhek_UTF8 : 0);
533        } else {
534            flags = 0;
535        }
536    } else {
537        is_utf8 = cBOOL(flags & HVhek_UTF8);
538        flags &= ~HVhek_NOTSHARED;
539    }
540
541    if (action & HV_DELETE) {
542        return (void *) hv_delete_common(hv, keysv, key, klen,
543                                         flags | (is_utf8 ? HVhek_UTF8 : 0),
544                                         action, hash);
545    }
546
547    xhv = (XPVHV*)SvANY(hv);
548    if (SvMAGICAL(hv)) {
549        if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) {
550            if (mg_find((const SV *)hv, PERL_MAGIC_tied)
551                || SvGMAGICAL((const SV *)hv))
552            {
553                /* FIXME should be able to skimp on the HE/HEK here when
554                   HV_FETCH_JUST_SV is true.  */
555                if (!keysv) {
556                    keysv = newSVpvn_utf8(key, klen, is_utf8);
557                } else {
558                    keysv = newSVsv(keysv);
559                }
560                sv = sv_newmortal();
561                mg_copy(MUTABLE_SV(hv), sv, (char *)keysv, HEf_SVKEY);
562
563                /* grab a fake HE/HEK pair from the pool or make a new one */
564                entry = PL_hv_fetch_ent_mh;
565                if (entry)
566                    PL_hv_fetch_ent_mh = HeNEXT(entry);
567                else {
568                    char *k;
569                    entry = new_HE();
570                    Newx(k, HEK_BASESIZE + sizeof(const SV *), char);
571                    HeKEY_hek(entry) = (HEK*)k;
572                }
573                HeNEXT(entry) = NULL;
574                HeSVKEY_set(entry, keysv);
575                HeVAL(entry) = sv;
576                sv_upgrade(sv, SVt_PVLV);
577                LvTYPE(sv) = 'T';
578                 /* so we can free entry when freeing sv */
579                LvTARG(sv) = MUTABLE_SV(entry);
580
581                /* XXX remove at some point? */
582                if (flags & HVhek_FREEKEY)
583                    Safefree(key);
584
585                if (return_svp) {
586                    return entry ? (void *) &HeVAL(entry) : NULL;
587                }
588                return (void *) entry;
589            }
590#ifdef ENV_IS_CASELESS
591            else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
592                U32 i;
593                for (i = 0; i < klen; ++i)
594                    if (isLOWER(key[i])) {
595                        /* Would be nice if we had a routine to do the
596                           copy and uppercase in a single pass through.  */
597                        const char * const nkey = strupr(savepvn(key,klen));
598                        /* Note that this fetch is for nkey (the uppercased
599                           key) whereas the store is for key (the original)  */
600                        void *result = hv_common(hv, NULL, nkey, klen,
601                                                 HVhek_FREEKEY, /* free nkey */
602                                                 0 /* non-LVAL fetch */
603                                                 | HV_DISABLE_UVAR_XKEY
604                                                 | return_svp,
605                                                 NULL /* no value */,
606                                                 0 /* compute hash */);
607                        if (!result && (action & HV_FETCH_LVALUE)) {
608                            /* This call will free key if necessary.
609                               Do it this way to encourage compiler to tail
610                               call optimise.  */
611                            result = hv_common(hv, keysv, key, klen, flags,
612                                               HV_FETCH_ISSTORE
613                                               | HV_DISABLE_UVAR_XKEY
614                                               | return_svp,
615                                               newSV_type(SVt_NULL), hash);
616                        } else {
617                            if (flags & HVhek_FREEKEY)
618                                Safefree(key);
619                        }
620                        return result;
621                    }
622            }
623#endif
624        } /* ISFETCH */
625        else if (SvRMAGICAL(hv) && (action & HV_FETCH_ISEXISTS)) {
626            if (mg_find((const SV *)hv, PERL_MAGIC_tied)
627                || SvGMAGICAL((const SV *)hv)) {
628                /* I don't understand why hv_exists_ent has svret and sv,
629                   whereas hv_exists only had one.  */
630                SV * const svret = sv_newmortal();
631                sv = sv_newmortal();
632
633                if (keysv || is_utf8) {
634                    if (!keysv) {
635                        keysv = newSVpvn_utf8(key, klen, TRUE);
636                    } else {
637                        keysv = newSVsv(keysv);
638                    }
639                    mg_copy(MUTABLE_SV(hv), sv, (char *)sv_2mortal(keysv), HEf_SVKEY);
640                } else {
641                    mg_copy(MUTABLE_SV(hv), sv, key, klen);
642                }
643                if (flags & HVhek_FREEKEY)
644                    Safefree(key);
645                {
646                  MAGIC * const mg = mg_find(sv, PERL_MAGIC_tiedelem);
647                  if (mg)
648                    magic_existspack(svret, mg);
649                }
650                /* This cast somewhat evil, but I'm merely using NULL/
651                   not NULL to return the boolean exists.
652                   And I know hv is not NULL.  */
653                return SvTRUE_NN(svret) ? (void *)hv : NULL;
654                }
655#ifdef ENV_IS_CASELESS
656            else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
657                /* XXX This code isn't UTF8 clean.  */
658                char * const keysave = (char * const)key;
659                /* Will need to free this, so set FREEKEY flag.  */
660                key = savepvn(key,klen);
661                key = (const char*)strupr((char*)key);
662                is_utf8 = FALSE;
663                hash = 0;
664                keysv = 0;
665
666                if (flags & HVhek_FREEKEY) {
667                    Safefree(keysave);
668                }
669                flags |= HVhek_FREEKEY;
670            }
671#endif
672        } /* ISEXISTS */
673        else if (action & HV_FETCH_ISSTORE) {
674            bool needs_copy;
675            bool needs_store;
676            hv_magic_check (hv, &needs_copy, &needs_store);
677            if (needs_copy) {
678                const bool save_taint = TAINT_get;
679                if (keysv || is_utf8) {
680                    if (!keysv) {
681                        keysv = newSVpvn_utf8(key, klen, TRUE);
682                    }
683                    if (TAINTING_get)
684                        TAINT_set(SvTAINTED(keysv));
685                    keysv = sv_2mortal(newSVsv(keysv));
686                    mg_copy(MUTABLE_SV(hv), val, (char*)keysv, HEf_SVKEY);
687                } else {
688                    mg_copy(MUTABLE_SV(hv), val, key, klen);
689                }
690
691                TAINT_IF(save_taint);
692#ifdef NO_TAINT_SUPPORT
693                PERL_UNUSED_VAR(save_taint);
694#endif
695                if (!needs_store) {
696                    if (flags & HVhek_FREEKEY)
697                        Safefree(key);
698                    return NULL;
699                }
700#ifdef ENV_IS_CASELESS
701                else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
702                    /* XXX This code isn't UTF8 clean.  */
703                    const char *keysave = key;
704                    /* Will need to free this, so set FREEKEY flag.  */
705                    key = savepvn(key,klen);
706                    key = (const char*)strupr((char*)key);
707                    is_utf8 = FALSE;
708                    hash = 0;
709                    keysv = 0;
710
711                    if (flags & HVhek_FREEKEY) {
712                        Safefree(keysave);
713                    }
714                    flags |= HVhek_FREEKEY;
715                }
716#endif
717            }
718        } /* ISSTORE */
719    } /* SvMAGICAL */
720
721    if (!HvARRAY(hv)) {
722        if ((action & (HV_FETCH_LVALUE | HV_FETCH_ISSTORE))
723#ifdef DYNAMIC_ENV_FETCH  /* if it's an %ENV lookup, we may get it on the fly */
724                 || (SvRMAGICAL((const SV *)hv)
725                     && mg_find((const SV *)hv, PERL_MAGIC_env))
726#endif
727                                                                  ) {
728            char *array;
729            Newxz(array,
730                 PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */),
731                 char);
732            HvARRAY(hv) = (HE**)array;
733        }
734#ifdef DYNAMIC_ENV_FETCH
735        else if (action & HV_FETCH_ISEXISTS) {
736            /* for an %ENV exists, if we do an insert it's by a recursive
737               store call, so avoid creating HvARRAY(hv) right now.  */
738        }
739#endif
740        else {
741            /* XXX remove at some point? */
742            if (flags & HVhek_FREEKEY)
743                Safefree(key);
744
745            return NULL;
746        }
747    }
748
749    if (is_utf8 && !(flags & HVhek_KEYCANONICAL)) {
750        char * const keysave = (char *)key;
751        key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
752        if (is_utf8)
753            flags |= HVhek_UTF8;
754        else
755            flags &= ~HVhek_UTF8;
756        if (key != keysave) {
757            if (flags & HVhek_FREEKEY)
758                Safefree(keysave);
759            flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
760            /* If the caller calculated a hash, it was on the sequence of
761               octets that are the UTF-8 form. We've now changed the sequence
762               of octets stored to that of the equivalent byte representation,
763               so the hash we need is different.  */
764            hash = 0;
765        }
766    }
767
768    if (keysv && (SvIsCOW_shared_hash(keysv))) {
769        if (HvSHAREKEYS(hv))
770            keysv_hek  = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
771        hash = SvSHARED_HASH(keysv);
772    }
773    else if (!hash)
774        PERL_HASH(hash, key, klen);
775
776#ifdef DYNAMIC_ENV_FETCH
777    if (!HvARRAY(hv)) entry = NULL;
778    else
779#endif
780    {
781        entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)];
782    }
783
784    if (!entry)
785        goto not_found;
786
787    if (keysv_hek) {
788        /* keysv is actually a HEK in disguise, so we can match just by
789         * comparing the HEK pointers in the HE chain. There is a slight
790         * caveat: on something like "\x80", which has both plain and utf8
791         * representations, perl's hashes do encoding-insensitive lookups,
792         * but preserve the encoding of the stored key. Thus a particular
793         * key could map to two different HEKs in PL_strtab. We only
794         * conclude 'not found' if all the flags are the same; otherwise
795         * we fall back to a full search (this should only happen in rare
796         * cases).
797         */
798        int keysv_flags = HEK_FLAGS(keysv_hek);
799        HE  *orig_entry = entry;
800
801        for (; entry; entry = HeNEXT(entry)) {
802            HEK *hek = HeKEY_hek(entry);
803            if (hek == keysv_hek)
804                goto found;
805            if (HEK_FLAGS(hek) != keysv_flags)
806                break; /* need to do full match */
807        }
808        if (!entry)
809            goto not_found;
810        /* failed on shortcut - do full search loop */
811        entry = orig_entry;
812    }
813
814    for (; entry; entry = HeNEXT(entry)) {
815        if (HeHASH(entry) != hash)		/* strings can't be equal */
816            continue;
817        if (HeKLEN(entry) != (I32)klen)
818            continue;
819        if (memNE(HeKEY(entry),key,klen))	/* is this it? */
820            continue;
821        if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
822            continue;
823
824      found:
825        if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) {
826            if ((HeKFLAGS(entry) ^ flags) & HVhek_WASUTF8) {
827                /* We match if HVhek_UTF8 bit in our flags and hash key's
828                   match.  But if entry was set previously with HVhek_WASUTF8
829                   and key now doesn't (or vice versa) then we should change
830                   the key's flag, as this is assignment.  */
831                if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) {
832                    /* Need to swap the key we have for a key with the flags we
833                       need. As keys are shared we can't just write to the
834                       flag, so we share the new one, unshare the old one.  */
835                    HEK * const new_hek
836                        = share_hek_flags(key, klen, hash, flags & ~HVhek_FREEKEY);
837                    unshare_hek (HeKEY_hek(entry));
838                    HeKEY_hek(entry) = new_hek;
839                }
840                else if (hv == PL_strtab) {
841                    /* PL_strtab is usually the only hash without HvSHAREKEYS,
842                       so putting this test here is cheap  */
843                    if (flags & HVhek_FREEKEY)
844                        Safefree(key);
845                    Perl_croak(aTHX_ S_strtab_error,
846                               action & HV_FETCH_LVALUE ? "fetch" : "store");
847                }
848                else {
849                    /* Effectively this is save_hek_flags() for a new version
850                       of the HEK and Safefree() of the old rolled together. */
851                    HeKFLAGS(entry) ^= HVhek_WASUTF8;
852                }
853                if (flags & HVhek_ENABLEHVKFLAGS)
854                    HvHASKFLAGS_on(hv);
855            }
856            if (HeVAL(entry) == &PL_sv_placeholder) {
857                /* yes, can store into placeholder slot */
858                if (action & HV_FETCH_LVALUE) {
859                    if (SvMAGICAL(hv)) {
860                        /* This preserves behaviour with the old hv_fetch
861                           implementation which at this point would bail out
862                           with a break; (at "if we find a placeholder, we
863                           pretend we haven't found anything")
864
865                           That break mean that if a placeholder were found, it
866                           caused a call into hv_store, which in turn would
867                           check magic, and if there is no magic end up pretty
868                           much back at this point (in hv_store's code).  */
869                        break;
870                    }
871                    /* LVAL fetch which actually needs a store.  */
872                    val = newSV_type(SVt_NULL);
873                    HvPLACEHOLDERS(hv)--;
874                } else {
875                    /* store */
876                    if (val != &PL_sv_placeholder)
877                        HvPLACEHOLDERS(hv)--;
878                }
879                HeVAL(entry) = val;
880            } else if (action & HV_FETCH_ISSTORE) {
881                SvREFCNT_dec(HeVAL(entry));
882                HeVAL(entry) = val;
883            }
884        } else if (HeVAL(entry) == &PL_sv_placeholder) {
885            /* if we find a placeholder, we pretend we haven't found
886               anything */
887            break;
888        }
889        if (flags & HVhek_FREEKEY)
890            Safefree(key);
891        if (return_svp) {
892            return (void *) &HeVAL(entry);
893        }
894        return entry;
895    }
896
897  not_found:
898#ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
899    if (!(action & HV_FETCH_ISSTORE)
900        && SvRMAGICAL((const SV *)hv)
901        && mg_find((const SV *)hv, PERL_MAGIC_env)) {
902        unsigned long len;
903        const char * const env = PerlEnv_ENVgetenv_len(key,&len);
904        if (env) {
905            sv = newSVpvn(env,len);
906            SvTAINTED_on(sv);
907            return hv_common(hv, keysv, key, klen, flags,
908                             HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
909                             sv, hash);
910        }
911    }
912#endif
913
914    if (!entry && SvREADONLY(hv) && !(action & HV_FETCH_ISEXISTS)) {
915        hv_notallowed(flags, key, klen,
916                        "Attempt to access disallowed key '%" SVf "' in"
917                        " a restricted hash");
918    }
919    if (!(action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE))) {
920        /* Not doing some form of store, so return failure.  */
921        if (flags & HVhek_FREEKEY)
922            Safefree(key);
923        return NULL;
924    }
925    if (action & HV_FETCH_LVALUE) {
926        val = action & HV_FETCH_EMPTY_HE ? NULL : newSV_type(SVt_NULL);
927        if (SvMAGICAL(hv)) {
928            /* At this point the old hv_fetch code would call to hv_store,
929               which in turn might do some tied magic. So we need to make that
930               magic check happen.  */
931            /* gonna assign to this, so it better be there */
932            /* If a fetch-as-store fails on the fetch, then the action is to
933               recurse once into "hv_store". If we didn't do this, then that
934               recursive call would call the key conversion routine again.
935               However, as we replace the original key with the converted
936               key, this would result in a double conversion, which would show
937               up as a bug if the conversion routine is not idempotent.
938               Hence the use of HV_DISABLE_UVAR_XKEY.  */
939            return hv_common(hv, keysv, key, klen, flags,
940                             HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
941                             val, hash);
942            /* XXX Surely that could leak if the fetch-was-store fails?
943               Just like the hv_fetch.  */
944        }
945    }
946
947    /* Welcome to hv_store...  */
948
949    if (!HvARRAY(hv)) {
950        /* Not sure if we can get here.  I think the only case of oentry being
951           NULL is for %ENV with dynamic env fetch.  But that should disappear
952           with magic in the previous code.  */
953        char *array;
954        Newxz(array,
955             PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */),
956             char);
957        HvARRAY(hv) = (HE**)array;
958    }
959
960    oentry = &(HvARRAY(hv))[hash & (I32) xhv->xhv_max];
961
962    /* share_hek_flags will do the free for us.  This might be considered
963       bad API design.  */
964    if (LIKELY(HvSHAREKEYS(hv))) {
965        entry = new_HE();
966        HeKEY_hek(entry) = share_hek_flags(key, klen, hash, flags);
967    }
968    else if (UNLIKELY(hv == PL_strtab)) {
969        /* PL_strtab is usually the only hash without HvSHAREKEYS, so putting
970           this test here is cheap  */
971        if (flags & HVhek_FREEKEY)
972            Safefree(key);
973        Perl_croak(aTHX_ S_strtab_error,
974                   action & HV_FETCH_LVALUE ? "fetch" : "store");
975    }
976    else {
977        /* gotta do the real thing */
978        entry = new_HE();
979        HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
980    }
981    HeVAL(entry) = val;
982    in_collision = cBOOL(*oentry != NULL);
983
984
985#ifdef PERL_HASH_RANDOMIZE_KEYS
986    /* This logic semi-randomizes the insert order in a bucket.
987     * Either we insert into the top, or the slot below the top,
988     * making it harder to see if there is a collision. We also
989     * reset the iterator randomizer if there is one.
990     */
991
992
993    if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
994        UPDATE_HASH_RAND_BITS_KEY(key,klen);
995        if ( PL_hash_rand_bits & 1 ) {
996            HeNEXT(entry) = HeNEXT(*oentry);
997            HeNEXT(*oentry) = entry;
998        } else {
999            HeNEXT(entry) = *oentry;
1000            *oentry = entry;
1001        }
1002    } else
1003#endif
1004    {
1005        HeNEXT(entry) = *oentry;
1006        *oentry = entry;
1007    }
1008#ifdef PERL_HASH_RANDOMIZE_KEYS
1009    if (HvHasAUX(hv)) {
1010        /* Currently this makes various tests warn in annoying ways.
1011         * So Silenced for now. - Yves | bogus end of comment =>* /
1012        if (HvAUX(hv)->xhv_riter != -1) {
1013            Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
1014                             "[TESTING] Inserting into a hash during each() traversal results in undefined behavior"
1015                             pTHX__FORMAT
1016                             pTHX__VALUE);
1017        }
1018        */
1019        MAYBE_UPDATE_HASH_RAND_BITS_KEY(key,klen);
1020        HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
1021    }
1022#endif
1023
1024    if (val == &PL_sv_placeholder)
1025        HvPLACEHOLDERS(hv)++;
1026    if (flags & HVhek_ENABLEHVKFLAGS)
1027        HvHASKFLAGS_on(hv);
1028
1029    xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
1030    if ( in_collision && DO_HSPLIT(xhv) ) {
1031        const STRLEN oldsize = xhv->xhv_max + 1;
1032        const U32 items = (U32)HvPLACEHOLDERS_get(hv);
1033
1034        if (items /* hash has placeholders  */
1035            && !SvREADONLY(hv) /* but is not a restricted hash */) {
1036            /* If this hash previously was a "restricted hash" and had
1037               placeholders, but the "restricted" flag has been turned off,
1038               then the placeholders no longer serve any useful purpose.
1039               However, they have the downsides of taking up RAM, and adding
1040               extra steps when finding used values. It's safe to clear them
1041               at this point, even though Storable rebuilds restricted hashes by
1042               putting in all the placeholders (first) before turning on the
1043               readonly flag, because Storable always pre-splits the hash.
1044               If we're lucky, then we may clear sufficient placeholders to
1045               avoid needing to split the hash at all.  */
1046            clear_placeholders(hv, items);
1047            if (DO_HSPLIT(xhv))
1048                hsplit(hv, oldsize, oldsize * 2);
1049        } else
1050            hsplit(hv, oldsize, oldsize * 2);
1051    }
1052
1053    if (return_svp) {
1054        return entry ? (void *) &HeVAL(entry) : NULL;
1055    }
1056    return (void *) entry;
1057}
1058
1059STATIC void
1060S_hv_magic_check(HV *hv, bool *needs_copy, bool *needs_store)
1061{
1062    const MAGIC *mg = SvMAGIC(hv);
1063
1064    PERL_ARGS_ASSERT_HV_MAGIC_CHECK;
1065
1066    *needs_copy = FALSE;
1067    *needs_store = TRUE;
1068    while (mg) {
1069        if (isUPPER(mg->mg_type)) {
1070            *needs_copy = TRUE;
1071            if (mg->mg_type == PERL_MAGIC_tied) {
1072                *needs_store = FALSE;
1073                return; /* We've set all there is to set. */
1074            }
1075        }
1076        mg = mg->mg_moremagic;
1077    }
1078}
1079
1080/*
1081=for apidoc hv_scalar
1082
1083Evaluates the hash in scalar context and returns the result.
1084
1085When the hash is tied dispatches through to the SCALAR method,
1086otherwise returns a mortal SV containing the number of keys
1087in the hash.
1088
1089Note, prior to 5.25 this function returned what is now
1090returned by the hv_bucket_ratio() function.
1091
1092=cut
1093*/
1094
1095SV *
1096Perl_hv_scalar(pTHX_ HV *hv)
1097{
1098    SV *sv;
1099    UV u;
1100
1101    PERL_ARGS_ASSERT_HV_SCALAR;
1102
1103    if (SvRMAGICAL(hv)) {
1104        MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied);
1105        if (mg)
1106            return magic_scalarpack(hv, mg);
1107    }
1108
1109    sv = newSV_type_mortal(SVt_IV);
1110
1111    /* Inlined sv_setuv(sv, HvUSEDKEYS(hv)) follows:*/
1112    u = HvUSEDKEYS(hv);
1113
1114    if (u <= (UV)IV_MAX) {
1115        SvIV_set(sv, (IV)u);
1116        (void)SvIOK_only(sv);
1117        SvTAINT(sv);
1118    } else {
1119        SvIV_set(sv, 0);
1120        SvUV_set(sv, u);
1121        (void)SvIOK_only_UV(sv);
1122        SvTAINT(sv);
1123    }
1124
1125    return sv;
1126}
1127
1128
1129/*
1130hv_pushkv(): push all the keys and/or values of a hash onto the stack.
1131The rough Perl equivalents:
1132    () = %hash;
1133    () = keys %hash;
1134    () = values %hash;
1135
1136Resets the hash's iterator.
1137
1138flags : 1   = push keys
1139        2   = push values
1140        1|2 = push keys and values
1141        XXX use symbolic flag constants at some point?
1142I might unroll the non-tied hv_iternext() in here at some point - DAPM
1143*/
1144
1145void
1146Perl_hv_pushkv(pTHX_ HV *hv, U32 flags)
1147{
1148    HE *entry;
1149    bool tied = SvRMAGICAL(hv) && (mg_find(MUTABLE_SV(hv), PERL_MAGIC_tied)
1150#ifdef DYNAMIC_ENV_FETCH  /* might not know number of keys yet */
1151                                   || mg_find(MUTABLE_SV(hv), PERL_MAGIC_env)
1152#endif
1153                                  );
1154    dSP;
1155
1156    PERL_ARGS_ASSERT_HV_PUSHKV;
1157    assert(flags); /* must be pushing at least one of keys and values */
1158
1159    (void)hv_iterinit(hv);
1160
1161    if (tied) {
1162        SSize_t ext = (flags == 3) ? 2 : 1;
1163        while ((entry = hv_iternext(hv))) {
1164            EXTEND(SP, ext);
1165            if (flags & 1)
1166                PUSHs(hv_iterkeysv(entry));
1167            if (flags & 2)
1168                PUSHs(hv_iterval(hv, entry));
1169        }
1170    }
1171    else {
1172        Size_t nkeys = HvUSEDKEYS(hv);
1173        SSize_t ext;
1174
1175        if (!nkeys)
1176            return;
1177
1178        /* 2*nkeys() should never be big enough to truncate or wrap */
1179        assert(nkeys <= (SSize_t_MAX >> 1));
1180        ext = nkeys * ((flags == 3) ? 2 : 1);
1181
1182        EXTEND_MORTAL(nkeys);
1183        EXTEND(SP, ext);
1184
1185        while ((entry = hv_iternext(hv))) {
1186            if (flags & 1) {
1187                SV *keysv = newSVhek(HeKEY_hek(entry));
1188                SvTEMP_on(keysv);
1189                PL_tmps_stack[++PL_tmps_ix] = keysv;
1190                PUSHs(keysv);
1191            }
1192            if (flags & 2)
1193                PUSHs(HeVAL(entry));
1194        }
1195    }
1196
1197    PUTBACK;
1198}
1199
1200
1201/*
1202=for apidoc hv_bucket_ratio
1203
1204If the hash is tied dispatches through to the SCALAR tied method,
1205otherwise if the hash contains no keys returns 0, otherwise returns
1206a mortal sv containing a string specifying the number of used buckets,
1207followed by a slash, followed by the number of available buckets.
1208
1209This function is expensive, it must scan all of the buckets
1210to determine which are used, and the count is NOT cached.
1211In a large hash this could be a lot of buckets.
1212
1213=cut
1214*/
1215
1216SV *
1217Perl_hv_bucket_ratio(pTHX_ HV *hv)
1218{
1219    SV *sv;
1220
1221    PERL_ARGS_ASSERT_HV_BUCKET_RATIO;
1222
1223    if (SvRMAGICAL(hv)) {
1224        MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied);
1225        if (mg)
1226            return magic_scalarpack(hv, mg);
1227    }
1228
1229    if (HvUSEDKEYS((HV *)hv)) {
1230        sv = sv_newmortal();
1231        Perl_sv_setpvf(aTHX_ sv, "%ld/%ld",
1232                (long)HvFILL(hv), (long)HvMAX(hv) + 1);
1233    }
1234    else
1235        sv = &PL_sv_zero;
1236
1237    return sv;
1238}
1239
1240/*
1241=for apidoc hv_delete
1242
1243Deletes a key/value pair in the hash.  The value's SV is removed from
1244the hash, made mortal, and returned to the caller.  The absolute
1245value of C<klen> is the length of the key.  If C<klen> is negative the
1246key is assumed to be in UTF-8-encoded Unicode.  The C<flags> value
1247will normally be zero; if set to C<G_DISCARD> then C<NULL> will be returned.
1248C<NULL> will also be returned if the key is not found.
1249
1250=for apidoc hv_delete_ent
1251
1252Deletes a key/value pair in the hash.  The value SV is removed from the hash,
1253made mortal, and returned to the caller.  The C<flags> value will normally be
1254zero; if set to C<G_DISCARD> then C<NULL> will be returned.  C<NULL> will also
1255be returned if the key is not found.  C<hash> can be a valid precomputed hash
1256value, or 0 to ask for it to be computed.
1257
1258=cut
1259*/
1260
1261STATIC SV *
1262S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
1263                   int k_flags, I32 d_flags, U32 hash)
1264{
1265    XPVHV* xhv;
1266    HE *entry;
1267    HE **oentry;
1268    HE **first_entry;
1269    bool is_utf8 = cBOOL(k_flags & HVhek_UTF8);
1270    HEK *keysv_hek = NULL;
1271    U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
1272    SV *sv;
1273    GV *gv = NULL;
1274    HV *stash = NULL;
1275
1276    if (SvMAGICAL(hv)) {
1277        bool needs_copy;
1278        bool needs_store;
1279        hv_magic_check (hv, &needs_copy, &needs_store);
1280
1281        if (needs_copy) {
1282            SV *sv;
1283            entry = (HE *) hv_common(hv, keysv, key, klen,
1284                                     k_flags & ~HVhek_FREEKEY,
1285                                     HV_FETCH_LVALUE|HV_DISABLE_UVAR_XKEY,
1286                                     NULL, hash);
1287            sv = entry ? HeVAL(entry) : NULL;
1288            if (sv) {
1289                if (SvMAGICAL(sv)) {
1290                    mg_clear(sv);
1291                }
1292                if (!needs_store) {
1293                    if (mg_find(sv, PERL_MAGIC_tiedelem)) {
1294                        /* No longer an element */
1295                        sv_unmagic(sv, PERL_MAGIC_tiedelem);
1296                        return sv;
1297                    }
1298                    return NULL;		/* element cannot be deleted */
1299                }
1300#ifdef ENV_IS_CASELESS
1301                else if (mg_find((const SV *)hv, PERL_MAGIC_env)) {
1302                    /* XXX This code isn't UTF8 clean.  */
1303                    keysv = newSVpvn_flags(key, klen, SVs_TEMP);
1304                    if (k_flags & HVhek_FREEKEY) {
1305                        Safefree(key);
1306                    }
1307                    key = strupr(SvPVX(keysv));
1308                    is_utf8 = 0;
1309                    k_flags = 0;
1310                    hash = 0;
1311                }
1312#endif
1313            }
1314        }
1315    }
1316    xhv = (XPVHV*)SvANY(hv);
1317    if (!HvTOTALKEYS(hv))
1318        return NULL;
1319
1320    if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) {
1321        const char * const keysave = key;
1322        key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
1323
1324        if (is_utf8)
1325            k_flags |= HVhek_UTF8;
1326        else
1327            k_flags &= ~HVhek_UTF8;
1328        if (key != keysave) {
1329            if (k_flags & HVhek_FREEKEY) {
1330                /* This shouldn't happen if our caller does what we expect,
1331                   but strictly the API allows it.  */
1332                Safefree(keysave);
1333            }
1334            k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
1335        }
1336    }
1337
1338    if (keysv && (SvIsCOW_shared_hash(keysv))) {
1339        if (HvSHAREKEYS(hv))
1340            keysv_hek  = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
1341        hash = SvSHARED_HASH(keysv);
1342    }
1343    else if (!hash)
1344        PERL_HASH(hash, key, klen);
1345
1346    first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
1347    entry = *oentry;
1348
1349    if (!entry)
1350        goto not_found;
1351
1352    if (keysv_hek) {
1353        /* keysv is actually a HEK in disguise, so we can match just by
1354         * comparing the HEK pointers in the HE chain. There is a slight
1355         * caveat: on something like "\x80", which has both plain and utf8
1356         * representations, perl's hashes do encoding-insensitive lookups,
1357         * but preserve the encoding of the stored key. Thus a particular
1358         * key could map to two different HEKs in PL_strtab. We only
1359         * conclude 'not found' if all the flags are the same; otherwise
1360         * we fall back to a full search (this should only happen in rare
1361         * cases).
1362         */
1363        int keysv_flags = HEK_FLAGS(keysv_hek);
1364
1365        for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
1366            HEK *hek = HeKEY_hek(entry);
1367            if (hek == keysv_hek)
1368                goto found;
1369            if (HEK_FLAGS(hek) != keysv_flags)
1370                break; /* need to do full match */
1371        }
1372        if (!entry)
1373            goto not_found;
1374        /* failed on shortcut - do full search loop */
1375        oentry = first_entry;
1376        entry = *oentry;
1377    }
1378
1379    for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
1380        if (HeHASH(entry) != hash)		/* strings can't be equal */
1381            continue;
1382        if (HeKLEN(entry) != (I32)klen)
1383            continue;
1384        if (memNE(HeKEY(entry),key,klen))	/* is this it? */
1385            continue;
1386        if ((HeKFLAGS(entry) ^ k_flags) & HVhek_UTF8)
1387            continue;
1388
1389      found:
1390        if (hv == PL_strtab) {
1391            if (k_flags & HVhek_FREEKEY)
1392                Safefree(key);
1393            Perl_croak(aTHX_ S_strtab_error, "delete");
1394        }
1395
1396        sv = HeVAL(entry);
1397
1398        /* if placeholder is here, it's already been deleted.... */
1399        if (sv == &PL_sv_placeholder) {
1400            if (k_flags & HVhek_FREEKEY)
1401                Safefree(key);
1402            return NULL;
1403        }
1404        if (SvREADONLY(hv) && sv && SvREADONLY(sv)) {
1405            hv_notallowed(k_flags, key, klen,
1406                            "Attempt to delete readonly key '%" SVf "' from"
1407                            " a restricted hash");
1408        }
1409
1410        /*
1411         * If a restricted hash, rather than really deleting the entry, put
1412         * a placeholder there. This marks the key as being "approved", so
1413         * we can still access via not-really-existing key without raising
1414         * an error.
1415         */
1416        if (SvREADONLY(hv)) {
1417            /* We'll be saving this slot, so the number of allocated keys
1418             * doesn't go down, but the number placeholders goes up */
1419            HeVAL(entry) = &PL_sv_placeholder;
1420            HvPLACEHOLDERS(hv)++;
1421        }
1422        else {
1423            HeVAL(entry) = NULL;
1424            *oentry = HeNEXT(entry);
1425            if (HvHasAUX(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) {
1426                HvLAZYDEL_on(hv);
1427            }
1428            else {
1429                if (HvHasAUX(hv) && HvLAZYDEL(hv) &&
1430                    entry == HeNEXT(HvAUX(hv)->xhv_eiter))
1431                    HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry);
1432                hv_free_ent(NULL, entry);
1433            }
1434            xhv->xhv_keys--; /* HvTOTALKEYS(hv)-- */
1435            if (xhv->xhv_keys == 0)
1436                HvHASKFLAGS_off(hv);
1437        }
1438
1439        /* If this is a stash and the key ends with ::, then someone is
1440         * deleting a package.
1441         */
1442        if (sv && SvTYPE(sv) == SVt_PVGV && HvHasENAME(hv)) {
1443                gv = (GV *)sv;
1444                if ((
1445                     (klen > 1 && key[klen-2] == ':' && key[klen-1] == ':')
1446                      ||
1447                     (klen == 1 && key[0] == ':')
1448                    )
1449                 && (klen != 6 || hv!=PL_defstash || memNE(key,"main::",6))
1450                 && (stash = GvHV((GV *)gv))
1451                 && HvHasENAME(stash)) {
1452                        /* A previous version of this code checked that the
1453                         * GV was still in the symbol table by fetching the
1454                         * GV with its name. That is not necessary (and
1455                         * sometimes incorrect), as HvENAME cannot be set
1456                         * on hv if it is not in the symtab. */
1457                        mro_changes = 2;
1458                        /* Hang on to it for a bit. */
1459                        SvREFCNT_inc_simple_void_NN(
1460                         sv_2mortal((SV *)gv)
1461                        );
1462                }
1463                else if (memEQs(key, klen, "ISA") && GvAV(gv)) {
1464                    AV *isa = GvAV(gv);
1465                    MAGIC *mg = mg_find((SV*)isa, PERL_MAGIC_isa);
1466
1467                    mro_changes = 1;
1468                    if (mg) {
1469                        if (mg->mg_obj == (SV*)gv) {
1470                            /* This is the only stash this ISA was used for.
1471                             * The isaelem magic asserts if there's no
1472                             * isa magic on the array, so explicitly
1473                             * remove the magic on both the array and its
1474                             * elements.  @ISA shouldn't be /too/ large.
1475                             */
1476                            SV **svp, **end;
1477                        strip_magic:
1478                            svp = AvARRAY(isa);
1479                            if (svp) {
1480                                end = svp + (AvFILLp(isa)+1);
1481                                while (svp < end) {
1482                                    if (*svp)
1483                                        mg_free_type(*svp, PERL_MAGIC_isaelem);
1484                                    ++svp;
1485                                }
1486                            }
1487                            mg_free_type((SV*)GvAV(gv), PERL_MAGIC_isa);
1488                        }
1489                        else {
1490                            /* mg_obj is an array of stashes
1491                               Note that the array doesn't keep a reference
1492                               count on the stashes.
1493                             */
1494                            AV *av = (AV*)mg->mg_obj;
1495                            SV **svp, **arrayp;
1496                            SSize_t index;
1497                            SSize_t items;
1498
1499                            assert(SvTYPE(mg->mg_obj) == SVt_PVAV);
1500
1501                            /* remove the stash from the magic array */
1502                            arrayp = svp = AvARRAY(av);
1503                            items = AvFILLp(av) + 1;
1504                            if (items == 1) {
1505                                assert(*arrayp == (SV *)gv);
1506                                mg->mg_obj = NULL;
1507                                /* avoid a double free on the last stash */
1508                                AvFILLp(av) = -1;
1509                                /* The magic isn't MGf_REFCOUNTED, so release
1510                                 * the array manually.
1511                                 */
1512                                SvREFCNT_dec_NN(av);
1513                                goto strip_magic;
1514                            }
1515                            else {
1516                                while (items--) {
1517                                    if (*svp == (SV*)gv)
1518                                        break;
1519                                    ++svp;
1520                                }
1521                                index = svp - arrayp;
1522                                assert(index >= 0 && index <= AvFILLp(av));
1523                                if (index < AvFILLp(av)) {
1524                                    arrayp[index] = arrayp[AvFILLp(av)];
1525                                }
1526                                arrayp[AvFILLp(av)] = NULL;
1527                                --AvFILLp(av);
1528                            }
1529                        }
1530                    }
1531                }
1532        }
1533
1534        if (k_flags & HVhek_FREEKEY)
1535            Safefree(key);
1536
1537        if (sv) {
1538            /* deletion of method from stash */
1539            if (isGV(sv) && isGV_with_GP(sv) && GvCVu(sv)
1540             && HvHasENAME(hv))
1541                mro_method_changed_in(hv);
1542
1543            if (d_flags & G_DISCARD) {
1544                SvREFCNT_dec(sv);
1545                sv = NULL;
1546            }
1547            else {
1548                sv_2mortal(sv);
1549            }
1550        }
1551
1552        if (mro_changes == 1) mro_isa_changed_in(hv);
1553        else if (mro_changes == 2)
1554            mro_package_moved(NULL, stash, gv, 1);
1555
1556        return sv;
1557    }
1558
1559  not_found:
1560    if (SvREADONLY(hv)) {
1561        hv_notallowed(k_flags, key, klen,
1562                        "Attempt to delete disallowed key '%" SVf "' from"
1563                        " a restricted hash");
1564    }
1565
1566    if (k_flags & HVhek_FREEKEY)
1567        Safefree(key);
1568    return NULL;
1569}
1570
1571/* HVs are used for (at least) three things
1572   1) objects
1573   2) symbol tables
1574   3) associative arrays
1575
1576   shared hash keys benefit the first two greatly, because keys are likely
1577   to be re-used between objects, or for constants in the optree
1578
1579   However, for large associative arrays (lookup tables, "seen" hashes) keys are
1580   unlikely to be re-used. Hence having those keys in the shared string table as
1581   well as the hash is a memory hit, if they are never actually shared with a
1582   second hash. Hence we turn off shared hash keys if a (regular) hash gets
1583   large.
1584
1585   This is a heuristic. There might be a better answer than 42, but for now
1586   we'll use it.
1587
1588   NOTE: Configure with -Accflags='-DPERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES'
1589   to enable this new functionality.
1590*/
1591
1592#ifdef PERL_USE_UNSHARED_KEYS_IN_LARGE_HASHES
1593static bool
1594S_large_hash_heuristic(pTHX_ HV *hv, STRLEN size) {
1595    if (size > 42
1596        && !SvOBJECT(hv)
1597        && !(HvHasAUX(hv) && HvENAME_get(hv))) {
1598        /* This hash appears to be growing quite large.
1599           We gamble that it is not sharing keys with other hashes. */
1600        return TRUE;
1601    }
1602    return FALSE;
1603}
1604#endif
1605
1606STATIC void
1607S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
1608{
1609    STRLEN i = 0;
1610    char *a = (char*) HvARRAY(hv);
1611    HE **aep;
1612
1613    PERL_ARGS_ASSERT_HSPLIT;
1614    if (newsize > MAX_BUCKET_MAX+1)
1615            return;
1616
1617    PL_nomemok = TRUE;
1618    Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
1619    PL_nomemok = FALSE;
1620    if (!a) {
1621      return;
1622    }
1623
1624#ifdef PERL_HASH_RANDOMIZE_KEYS
1625    /* the idea of this is that we create a "random" value by hashing the address of
1626     * the array, we then use the low bit to decide if we insert at the top, or insert
1627     * second from top. After each such insert we rotate the hashed value. So we can
1628     * use the same hashed value over and over, and in normal build environments use
1629     * very few ops to do so. ROTL32() should produce a single machine operation. */
1630    MAYBE_UPDATE_HASH_RAND_BITS();
1631#endif
1632    HvARRAY(hv) = (HE**) a;
1633    HvMAX(hv) = newsize - 1;
1634    /* now we can safely clear the second half */
1635    Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char);	/* zero 2nd half*/
1636
1637    if (!HvTOTALKEYS(hv))       /* skip rest if no entries */
1638        return;
1639
1640    /* don't share keys in large simple hashes */
1641    if (LARGE_HASH_HEURISTIC(hv, HvTOTALKEYS(hv)))
1642        HvSHAREKEYS_off(hv);
1643
1644
1645    newsize--;
1646    aep = (HE**)a;
1647    do {
1648        HE **oentry = aep + i;
1649        HE *entry = aep[i];
1650
1651        if (!entry)				/* non-existent */
1652            continue;
1653        do {
1654            U32 j = (HeHASH(entry) & newsize);
1655            if (j != (U32)i) {
1656                *oentry = HeNEXT(entry);
1657#ifdef PERL_HASH_RANDOMIZE_KEYS
1658                /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false
1659                 * insert to top, otherwise rotate the bucket rand 1 bit,
1660                 * and use the new low bit to decide if we insert at top,
1661                 * or next from top. IOW, we only rotate on a collision.*/
1662                if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
1663                    UPDATE_HASH_RAND_BITS();
1664                    if (PL_hash_rand_bits & 1) {
1665                        HeNEXT(entry)= HeNEXT(aep[j]);
1666                        HeNEXT(aep[j])= entry;
1667                    } else {
1668                        /* Note, this is structured in such a way as the optimizer
1669                        * should eliminate the duplicated code here and below without
1670                        * us needing to explicitly use a goto. */
1671                        HeNEXT(entry) = aep[j];
1672                        aep[j] = entry;
1673                    }
1674                } else
1675#endif
1676                {
1677                    /* see comment above about duplicated code */
1678                    HeNEXT(entry) = aep[j];
1679                    aep[j] = entry;
1680                }
1681            }
1682            else {
1683                oentry = &HeNEXT(entry);
1684            }
1685            entry = *oentry;
1686        } while (entry);
1687    } while (i++ < oldsize);
1688}
1689
1690/*
1691=for apidoc hv_ksplit
1692
1693Attempt to grow the hash C<hv> so it has at least C<newmax> buckets available.
1694Perl chooses the actual number for its convenience.
1695
1696This is the same as doing the following in Perl code:
1697
1698 keys %hv = newmax;
1699
1700=cut
1701*/
1702
1703void
1704Perl_hv_ksplit(pTHX_ HV *hv, IV newmax)
1705{
1706    XPVHV* xhv = (XPVHV*)SvANY(hv);
1707    const I32 oldsize = (I32) xhv->xhv_max+1;       /* HvMAX(hv)+1 */
1708    I32 newsize;
1709    I32 wantsize;
1710    I32 trysize;
1711    char *a;
1712
1713    PERL_ARGS_ASSERT_HV_KSPLIT;
1714
1715    wantsize = (I32) newmax;                            /* possible truncation here */
1716    if (wantsize != newmax)
1717        return;
1718
1719    wantsize= wantsize + (wantsize >> 1);           /* wantsize *= 1.5 */
1720    if (wantsize < newmax)                          /* overflow detection */
1721        return;
1722
1723    newsize = oldsize;
1724    while (wantsize > newsize) {
1725        trysize = newsize << 1;
1726        if (trysize > newsize) {
1727            newsize = trysize;
1728        } else {
1729            /* we overflowed */
1730            return;
1731        }
1732    }
1733
1734    if (newsize <= oldsize)
1735        return;                                            /* overflow detection */
1736
1737    a = (char *) HvARRAY(hv);
1738    if (a) {
1739#ifdef PERL_HASH_RANDOMIZE_KEYS
1740        U32 was_ook = HvHasAUX(hv);
1741#endif
1742        hsplit(hv, oldsize, newsize);
1743#ifdef PERL_HASH_RANDOMIZE_KEYS
1744        if (was_ook && HvHasAUX(hv) && HvTOTALKEYS(hv)) {
1745            MAYBE_UPDATE_HASH_RAND_BITS();
1746            HvAUX(hv)->xhv_rand = (U32)PL_hash_rand_bits;
1747        }
1748#endif
1749    } else {
1750        if (LARGE_HASH_HEURISTIC(hv, newmax))
1751            HvSHAREKEYS_off(hv);
1752        Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
1753        xhv->xhv_max = newsize - 1;
1754        HvARRAY(hv) = (HE **) a;
1755    }
1756}
1757
1758/* IMO this should also handle cases where hv_max is smaller than hv_keys
1759 * as tied hashes could play silly buggers and mess us around. We will
1760 * do the right thing during hv_store() afterwards, but still - Yves */
1761#define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\
1762    /* Can we use fewer buckets? (hv_max is always 2^n-1) */        \
1763    if (hv_max < PERL_HASH_DEFAULT_HvMAX) {                         \
1764        hv_max = PERL_HASH_DEFAULT_HvMAX;                           \
1765    } else {                                                        \
1766        while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \
1767            hv_max = hv_max / 2;                                    \
1768    }                                                               \
1769    HvMAX(hv) = hv_max;                                             \
1770} STMT_END
1771
1772
1773/*
1774=for apidoc newHVhv
1775
1776The content of C<ohv> is copied to a new hash.  A pointer to the new hash is
1777returned.
1778
1779=cut
1780*/
1781
1782HV *
1783Perl_newHVhv(pTHX_ HV *ohv)
1784{
1785    HV * const hv = newHV();
1786    STRLEN hv_max;
1787
1788    if (!ohv || (!HvTOTALKEYS(ohv) && !SvMAGICAL((const SV *)ohv)))
1789        return hv;
1790    hv_max = HvMAX(ohv);
1791
1792    if (!SvMAGICAL((const SV *)ohv)) {
1793        /* It's an ordinary hash, so copy it fast. AMS 20010804 */
1794        STRLEN i;
1795        HE **ents, ** const oents = (HE **)HvARRAY(ohv);
1796        char *a;
1797        Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(hv_max+1), char);
1798        ents = (HE**)a;
1799
1800        if (HvSHAREKEYS(ohv)) {
1801#ifdef NODEFAULT_SHAREKEYS
1802            HvSHAREKEYS_on(hv);
1803#else
1804            /* Shared is the default - it should have been set by newHV(). */
1805            assert(HvSHAREKEYS(hv));
1806#endif
1807        }
1808        else {
1809            HvSHAREKEYS_off(hv);
1810        }
1811
1812        /* In each bucket... */
1813        for (i = 0; i <= hv_max; i++) {
1814            HE *prev = NULL;
1815            HE *oent = oents[i];
1816
1817            if (!oent) {
1818                ents[i] = NULL;
1819                continue;
1820            }
1821
1822            /* Copy the linked list of entries. */
1823            for (; oent; oent = HeNEXT(oent)) {
1824                HE * const ent   = new_HE();
1825                SV *const val    = HeVAL(oent);
1826                const int flags  = HeKFLAGS(oent);
1827
1828                HeVAL(ent) = SvIMMORTAL(val) ? val : newSVsv(val);
1829                if ((flags & HVhek_NOTSHARED) == 0) {
1830                    HeKEY_hek(ent) = share_hek_hek(HeKEY_hek(oent));
1831                }
1832                else {
1833                    const U32 hash   = HeHASH(oent);
1834                    const char * const key = HeKEY(oent);
1835                    const STRLEN len = HeKLEN(oent);
1836                    HeKEY_hek(ent) = save_hek_flags(key, len, hash, flags);
1837                }
1838                if (prev)
1839                    HeNEXT(prev) = ent;
1840                else
1841                    ents[i] = ent;
1842                prev = ent;
1843                HeNEXT(ent) = NULL;
1844            }
1845        }
1846
1847        HvMAX(hv)   = hv_max;
1848        HvTOTALKEYS(hv)  = HvTOTALKEYS(ohv);
1849        HvARRAY(hv) = ents;
1850    } /* not magical */
1851    else {
1852        /* Iterate over ohv, copying keys and values one at a time. */
1853        HE *entry;
1854        const I32 riter = HvRITER_get(ohv);
1855        HE * const eiter = HvEITER_get(ohv);
1856        STRLEN hv_keys = HvTOTALKEYS(ohv);
1857
1858        HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
1859
1860        hv_iterinit(ohv);
1861        while ((entry = hv_iternext_flags(ohv, 0))) {
1862            SV *val = hv_iterval(ohv,entry);
1863            SV * const keysv = HeSVKEY(entry);
1864            val = SvIMMORTAL(val) ? val : newSVsv(val);
1865            if (keysv)
1866                (void)hv_store_ent(hv, keysv, val, 0);
1867            else
1868                (void)hv_store_flags(hv, HeKEY(entry), HeKLEN(entry), val,
1869                                 HeHASH(entry), HeKFLAGS(entry));
1870        }
1871        HvRITER_set(ohv, riter);
1872        HvEITER_set(ohv, eiter);
1873    }
1874
1875    return hv;
1876}
1877
1878/*
1879=for apidoc hv_copy_hints_hv
1880
1881A specialised version of L</newHVhv> for copying C<%^H>.  C<ohv> must be
1882a pointer to a hash (which may have C<%^H> magic, but should be generally
1883non-magical), or C<NULL> (interpreted as an empty hash).  The content
1884of C<ohv> is copied to a new hash, which has the C<%^H>-specific magic
1885added to it.  A pointer to the new hash is returned.
1886
1887=cut
1888*/
1889
1890HV *
1891Perl_hv_copy_hints_hv(pTHX_ HV *const ohv)
1892{
1893    HV * const hv = newHV();
1894
1895    if (ohv) {
1896        STRLEN hv_max = HvMAX(ohv);
1897        STRLEN hv_keys = HvTOTALKEYS(ohv);
1898        HE *entry;
1899        const I32 riter = HvRITER_get(ohv);
1900        HE * const eiter = HvEITER_get(ohv);
1901
1902        ENTER;
1903        SAVEFREESV(hv);
1904
1905        HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
1906
1907        hv_iterinit(ohv);
1908        while ((entry = hv_iternext_flags(ohv, 0))) {
1909            SV *const sv = newSVsv(hv_iterval(ohv,entry));
1910            SV *heksv = HeSVKEY(entry);
1911            if (!heksv && sv) heksv = newSVhek(HeKEY_hek(entry));
1912            if (sv) sv_magic(sv, NULL, PERL_MAGIC_hintselem,
1913                     (char *)heksv, HEf_SVKEY);
1914            if (heksv == HeSVKEY(entry))
1915                (void)hv_store_ent(hv, heksv, sv, 0);
1916            else {
1917                (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry),
1918                                 HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry));
1919                SvREFCNT_dec_NN(heksv);
1920            }
1921        }
1922        HvRITER_set(ohv, riter);
1923        HvEITER_set(ohv, eiter);
1924
1925        SvREFCNT_inc_simple_void_NN(hv);
1926        LEAVE;
1927    }
1928    hv_magic(hv, NULL, PERL_MAGIC_hints);
1929    return hv;
1930}
1931#undef HV_SET_MAX_ADJUSTED_FOR_KEYS
1932
1933/* like hv_free_ent, but returns the SV rather than freeing it */
1934STATIC SV*
1935S_hv_free_ent_ret(pTHX_ HE *entry)
1936{
1937    PERL_ARGS_ASSERT_HV_FREE_ENT_RET;
1938
1939    SV *val = HeVAL(entry);
1940    if (HeKLEN(entry) == HEf_SVKEY) {
1941        SvREFCNT_dec(HeKEY_sv(entry));
1942        Safefree(HeKEY_hek(entry));
1943    }
1944    else if ((HeKFLAGS(entry) & HVhek_NOTSHARED) == 0) {
1945        unshare_hek(HeKEY_hek(entry));
1946    }
1947    else {
1948        Safefree(HeKEY_hek(entry));
1949    }
1950    del_HE(entry);
1951    return val;
1952}
1953
1954
1955void
1956Perl_hv_free_ent(pTHX_ HV *notused, HE *entry)
1957{
1958    PERL_UNUSED_ARG(notused);
1959
1960    if (!entry)
1961        return;
1962
1963    SV *val = hv_free_ent_ret(entry);
1964    SvREFCNT_dec(val);
1965}
1966
1967
1968void
1969Perl_hv_delayfree_ent(pTHX_ HV *notused, HE *entry)
1970{
1971    PERL_UNUSED_ARG(notused);
1972
1973    if (!entry)
1974        return;
1975    /* SvREFCNT_inc to counter the SvREFCNT_dec in hv_free_ent  */
1976    sv_2mortal(SvREFCNT_inc(HeVAL(entry)));	/* free between statements */
1977    if (HeKLEN(entry) == HEf_SVKEY) {
1978        sv_2mortal(SvREFCNT_inc(HeKEY_sv(entry)));
1979    }
1980    hv_free_ent(NULL, entry);
1981}
1982
1983/*
1984=for apidoc hv_clear
1985
1986Frees all the elements of a hash, leaving it empty.
1987The XS equivalent of C<%hash = ()>.  See also L</hv_undef>.
1988
1989See L</av_clear> for a note about the hash possibly being invalid on
1990return.
1991
1992=cut
1993*/
1994
1995void
1996Perl_hv_clear(pTHX_ HV *hv)
1997{
1998    SSize_t orig_ix;
1999
2000    if (!hv)
2001        return;
2002
2003    DEBUG_A(Perl_hv_assert(aTHX_ hv));
2004
2005    /* avoid hv being freed when calling destructors below */
2006    EXTEND_MORTAL(1);
2007    PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv);
2008    orig_ix = PL_tmps_ix;
2009    if (SvREADONLY(hv) && HvTOTALKEYS(hv)) {
2010        /* restricted hash: convert all keys to placeholders */
2011        STRLEN max = HvMAX(hv);
2012        STRLEN i;
2013        for (i = 0; i <= max; i++) {
2014            HE *entry = (HvARRAY(hv))[i];
2015            for (; entry; entry = HeNEXT(entry)) {
2016                /* not already placeholder */
2017                if (HeVAL(entry) != &PL_sv_placeholder) {
2018                    if (HeVAL(entry)) {
2019                        if (SvREADONLY(HeVAL(entry))) {
2020                            SV* const keysv = hv_iterkeysv(entry);
2021                            Perl_croak_nocontext(
2022                                "Attempt to delete readonly key '%" SVf "' from a restricted hash",
2023                                (void*)keysv);
2024                        }
2025                        SvREFCNT_dec_NN(HeVAL(entry));
2026                    }
2027                    HeVAL(entry) = &PL_sv_placeholder;
2028                    HvPLACEHOLDERS(hv)++;
2029                }
2030            }
2031        }
2032    }
2033    else {
2034        hv_free_entries(hv);
2035        HvPLACEHOLDERS_set(hv, 0);
2036
2037        if (SvRMAGICAL(hv))
2038            mg_clear(MUTABLE_SV(hv));
2039
2040        HvHASKFLAGS_off(hv);
2041    }
2042    if (HvHasAUX(hv)) {
2043        if(HvENAME_get(hv))
2044            mro_isa_changed_in(hv);
2045        HvEITER_set(hv, NULL);
2046    }
2047    /* disarm hv's premature free guard */
2048    if (LIKELY(PL_tmps_ix == orig_ix))
2049        PL_tmps_ix--;
2050    else
2051        PL_tmps_stack[orig_ix] = &PL_sv_undef;
2052    SvREFCNT_dec_NN(hv);
2053}
2054
2055/*
2056=for apidoc hv_clear_placeholders
2057
2058Clears any placeholders from a hash.  If a restricted hash has any of its keys
2059marked as readonly and the key is subsequently deleted, the key is not actually
2060deleted but is marked by assigning it a value of C<&PL_sv_placeholder>.  This tags
2061it so it will be ignored by future operations such as iterating over the hash,
2062but will still allow the hash to have a value reassigned to the key at some
2063future point.  This function clears any such placeholder keys from the hash.
2064See C<L<Hash::Util::lock_keys()|Hash::Util/lock_keys>> for an example of its
2065use.
2066
2067=cut
2068*/
2069
2070void
2071Perl_hv_clear_placeholders(pTHX_ HV *hv)
2072{
2073    const U32 items = (U32)HvPLACEHOLDERS_get(hv);
2074
2075    PERL_ARGS_ASSERT_HV_CLEAR_PLACEHOLDERS;
2076
2077    if (items)
2078        clear_placeholders(hv, items);
2079}
2080
2081static void
2082S_clear_placeholders(pTHX_ HV *hv, const U32 placeholders)
2083{
2084    I32 i;
2085    U32 to_find = placeholders;
2086
2087    PERL_ARGS_ASSERT_CLEAR_PLACEHOLDERS;
2088
2089    assert(to_find);
2090
2091    i = HvMAX(hv);
2092    do {
2093        /* Loop down the linked list heads  */
2094        HE **oentry = &(HvARRAY(hv))[i];
2095        HE *entry;
2096
2097        while ((entry = *oentry)) {
2098            if (HeVAL(entry) == &PL_sv_placeholder) {
2099                *oentry = HeNEXT(entry);
2100                if (entry == HvEITER_get(hv))
2101                    HvLAZYDEL_on(hv);
2102                else {
2103                    if (HvHasAUX(hv) && HvLAZYDEL(hv) &&
2104                        entry == HeNEXT(HvAUX(hv)->xhv_eiter))
2105                        HeNEXT(HvAUX(hv)->xhv_eiter) = HeNEXT(entry);
2106                    hv_free_ent(NULL, entry);
2107                }
2108
2109                if (--to_find == 0) {
2110                    /* Finished.  */
2111                    HvTOTALKEYS(hv) -= (IV)placeholders;
2112                    if (HvTOTALKEYS(hv) == 0)
2113                        HvHASKFLAGS_off(hv);
2114                    HvPLACEHOLDERS_set(hv, 0);
2115                    return;
2116                }
2117            } else {
2118                oentry = &HeNEXT(entry);
2119            }
2120        }
2121    } while (--i >= 0);
2122    /* You can't get here, hence assertion should always fail.  */
2123    assert (to_find == 0);
2124    NOT_REACHED; /* NOTREACHED */
2125}
2126
2127STATIC void
2128S_hv_free_entries(pTHX_ HV *hv)
2129{
2130    STRLEN index = 0;
2131    SV *sv;
2132
2133    PERL_ARGS_ASSERT_HV_FREE_ENTRIES;
2134
2135    while ((sv = Perl_hfree_next_entry(aTHX_ hv, &index)) || HvTOTALKEYS(hv)) {
2136        SvREFCNT_dec(sv);
2137    }
2138}
2139
2140
2141/* hfree_next_entry()
2142 * For use only by S_hv_free_entries() and sv_clear().
2143 * Delete the next available HE from hv and return the associated SV.
2144 * Returns null on empty hash. Nevertheless null is not a reliable
2145 * indicator that the hash is empty, as the deleted entry may have a
2146 * null value.
2147 * indexp is a pointer to the current index into HvARRAY. The index should
2148 * initially be set to 0. hfree_next_entry() may update it.  */
2149
2150SV*
2151Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
2152{
2153    struct xpvhv_aux *iter;
2154    HE *entry;
2155    HE ** array;
2156#ifdef DEBUGGING
2157    STRLEN orig_index = *indexp;
2158#endif
2159
2160    PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY;
2161
2162    if (HvHasAUX(hv) && ((iter = HvAUX(hv)))) {
2163        if ((entry = iter->xhv_eiter)) {
2164            /* the iterator may get resurrected after each
2165             * destructor call, so check each time */
2166            if (entry && HvLAZYDEL(hv)) {	/* was deleted earlier? */
2167                HvLAZYDEL_off(hv);
2168                hv_free_ent(NULL, entry);
2169                /* warning: at this point HvARRAY may have been
2170                 * re-allocated, HvMAX changed etc */
2171            }
2172            iter->xhv_riter = -1; 	/* HvRITER(hv) = -1 */
2173            iter->xhv_eiter = NULL;	/* HvEITER(hv) = NULL */
2174#ifdef PERL_HASH_RANDOMIZE_KEYS
2175            iter->xhv_last_rand = iter->xhv_rand;
2176#endif
2177        }
2178    }
2179
2180    if (!((XPVHV*)SvANY(hv))->xhv_keys)
2181        return NULL;
2182
2183    array = HvARRAY(hv);
2184    assert(array);
2185    while ( ! ((entry = array[*indexp])) ) {
2186        if ((*indexp)++ >= HvMAX(hv))
2187            *indexp = 0;
2188        assert(*indexp != orig_index);
2189    }
2190    array[*indexp] = HeNEXT(entry);
2191    ((XPVHV*) SvANY(hv))->xhv_keys--;
2192
2193    if (   PL_phase != PERL_PHASE_DESTRUCT && HvHasENAME(hv)
2194        && HeVAL(entry) && isGV(HeVAL(entry))
2195        && GvHV(HeVAL(entry)) && HvHasENAME(GvHV(HeVAL(entry)))
2196    ) {
2197        STRLEN klen;
2198        const char * const key = HePV(entry,klen);
2199        if ((klen > 1 && key[klen-1]==':' && key[klen-2]==':')
2200         || (klen == 1 && key[0] == ':')) {
2201            mro_package_moved(
2202             NULL, GvHV(HeVAL(entry)),
2203             (GV *)HeVAL(entry), 0
2204            );
2205        }
2206    }
2207    return hv_free_ent_ret(entry);
2208}
2209
2210
2211/*
2212=for apidoc hv_undef
2213
2214Undefines the hash.  The XS equivalent of C<undef(%hash)>.
2215
2216As well as freeing all the elements of the hash (like C<hv_clear()>), this
2217also frees any auxiliary data and storage associated with the hash.
2218
2219See L</av_clear> for a note about the hash possibly being invalid on
2220return.
2221
2222=cut
2223*/
2224
2225void
2226Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
2227{
2228    bool save;
2229    SSize_t orig_ix = PL_tmps_ix; /* silence compiler warning about uninitialized vars */
2230
2231    if (!hv)
2232        return;
2233    save = cBOOL(SvREFCNT(hv));
2234    DEBUG_A(Perl_hv_assert(aTHX_ hv));
2235
2236    /* The name must be deleted before the call to hv_free_entries so that
2237       CVs are anonymised properly. But the effective name must be pre-
2238       served until after that call (and only deleted afterwards if the
2239       call originated from sv_clear). For stashes with one name that is
2240       both the canonical name and the effective name, hv_name_set has to
2241       allocate an array for storing the effective name. We can skip that
2242       during global destruction, as it does not matter where the CVs point
2243       if they will be freed anyway. */
2244    /* note that the code following prior to hv_free_entries is duplicated
2245     * in sv_clear(), and changes here should be done there too */
2246    if (PL_phase != PERL_PHASE_DESTRUCT && HvHasNAME(hv)) {
2247        if (PL_stashcache) {
2248            DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for '%"
2249                             HEKf "'\n", HEKfARG(HvNAME_HEK(hv))));
2250            (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD);
2251        }
2252        hv_name_set(hv, NULL, 0, 0);
2253    }
2254    if (save) {
2255        /* avoid hv being freed when calling destructors below */
2256        EXTEND_MORTAL(1);
2257        PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple_NN(hv);
2258        orig_ix = PL_tmps_ix;
2259    }
2260
2261    /* As well as any/all HE*s in HvARRAY(), this call also ensures that
2262       xhv_eiter is NULL, including handling the case of a tied hash partway
2263       through iteration where HvLAZYDEL() is true and xhv_eiter points to an
2264       HE* that needs to be explicitly freed. */
2265    hv_free_entries(hv);
2266
2267    /* HvHasAUX() is true for a hash if it has struct xpvhv_aux allocated. That
2268       structure has several other pieces of allocated memory - hence those must
2269       be freed before the structure itself can be freed. Some can be freed when
2270       a hash is "undefined" (this function), but some must persist until it is
2271       destroyed (which might be this function's immediate caller).
2272
2273       Hence the code in this block frees what it is logical to free (and NULLs
2274       out anything freed) so that the structure is left in a logically
2275       consistent state - pointers are NULL or point to valid memory, and
2276       non-pointer values are correct for an empty hash. The structure state
2277       must remain consistent, because this code can no longer clear SVf_OOK,
2278       meaning that this structure might be read again at any point in the
2279       future without further checks or reinitialisation. */
2280    if (HvHasAUX(hv)) {
2281      struct xpvhv_aux *aux = HvAUX(hv);
2282      struct mro_meta *meta;
2283      const char *name;
2284
2285      if (HvHasENAME(hv)) {
2286        if (PL_phase != PERL_PHASE_DESTRUCT)
2287            mro_isa_changed_in(hv);
2288        if (PL_stashcache) {
2289            DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for effective name '%"
2290                             HEKf "'\n", HEKfARG(HvENAME_HEK_NN(hv))));
2291            (void)hv_deletehek(PL_stashcache, HvENAME_HEK_NN(hv), G_DISCARD);
2292        }
2293      }
2294
2295      /* If this call originated from sv_clear, then we must check for
2296       * effective names that need freeing, as well as the usual name. */
2297      name = HvNAME(hv);
2298      if (flags & HV_NAME_SETALL
2299          ? cBOOL(aux->xhv_name_u.xhvnameu_name)
2300          : cBOOL(name))
2301      {
2302        if (name && PL_stashcache) {
2303            DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for name '%"
2304                             HEKf "'\n", HEKfARG(HvNAME_HEK_NN(hv))));
2305            (void)hv_deletehek(PL_stashcache, HvNAME_HEK_NN(hv), G_DISCARD);
2306        }
2307        hv_name_set(hv, NULL, 0, flags);
2308      }
2309      if((meta = aux->xhv_mro_meta)) {
2310        if (meta->mro_linear_all) {
2311            SvREFCNT_dec_NN(meta->mro_linear_all);
2312            /* mro_linear_current is just acting as a shortcut pointer,
2313               hence the else.  */
2314        }
2315        else
2316            /* Only the current MRO is stored, so this owns the data.
2317             */
2318            SvREFCNT_dec(meta->mro_linear_current);
2319        SvREFCNT_dec(meta->mro_nextmethod);
2320        SvREFCNT_dec(meta->isa);
2321        SvREFCNT_dec(meta->super);
2322        Safefree(meta);
2323        aux->xhv_mro_meta = NULL;
2324      }
2325
2326      if(HvSTASH_IS_CLASS(hv)) {
2327          SvREFCNT_dec(aux->xhv_class_superclass);
2328          SvREFCNT_dec(aux->xhv_class_initfields_cv);
2329          SvREFCNT_dec(aux->xhv_class_adjust_blocks);
2330          if(aux->xhv_class_fields)
2331            PadnamelistREFCNT_dec(aux->xhv_class_fields);
2332          SvREFCNT_dec(aux->xhv_class_param_map);
2333          Safefree(aux->xhv_class_suspended_initfields_compcv);
2334          aux->xhv_class_suspended_initfields_compcv = NULL;
2335
2336          aux->xhv_aux_flags &= ~HvAUXf_IS_CLASS;
2337      }
2338    }
2339
2340    Safefree(HvARRAY(hv));
2341    HvMAX(hv) = PERL_HASH_DEFAULT_HvMAX;        /* 7 (it's a normal hash) */
2342    HvARRAY(hv) = 0;
2343
2344    /* if we're freeing the HV, the SvMAGIC field has been reused for
2345     * other purposes, and so there can't be any placeholder magic */
2346    if (SvREFCNT(hv))
2347        HvPLACEHOLDERS_set(hv, 0);
2348
2349    if (SvRMAGICAL(hv))
2350        mg_clear(MUTABLE_SV(hv));
2351
2352    if (save) {
2353        /* disarm hv's premature free guard */
2354        if (LIKELY(PL_tmps_ix == orig_ix))
2355            PL_tmps_ix--;
2356        else
2357            PL_tmps_stack[orig_ix] = &PL_sv_undef;
2358        SvREFCNT_dec_NN(hv);
2359    }
2360}
2361
2362/*
2363=for apidoc hv_fill
2364
2365Returns the number of hash buckets that happen to be in use.
2366
2367This function implements the L<C<HvFILL> macro|perlapi/HvFILL> which you should
2368use instead.
2369
2370As of perl 5.25 this function is used only for debugging
2371purposes, and the number of used hash buckets is not
2372in any way cached, thus this function can be costly
2373to execute as it must iterate over all the buckets in the
2374hash.
2375
2376=cut
2377*/
2378
2379STRLEN
2380Perl_hv_fill(pTHX_ HV *const hv)
2381{
2382    STRLEN count = 0;
2383    HE **ents = HvARRAY(hv);
2384
2385    PERL_UNUSED_CONTEXT;
2386    PERL_ARGS_ASSERT_HV_FILL;
2387
2388    /* No keys implies no buckets used.
2389       One key can only possibly mean one bucket used.  */
2390    if (HvTOTALKEYS(hv) < 2)
2391        return HvTOTALKEYS(hv);
2392
2393    if (ents) {
2394        /* I wonder why we count down here...
2395         * Is it some micro-optimisation?
2396         * I would have thought counting up was better.
2397         * - Yves
2398         */
2399        HE *const *const last = ents + HvMAX(hv);
2400        count = last + 1 - ents;
2401
2402        do {
2403            if (!*ents)
2404                --count;
2405        } while (++ents <= last);
2406    }
2407    return count;
2408}
2409
2410static struct xpvhv_aux*
2411S_hv_auxinit(pTHX_ HV *hv) {
2412    struct xpvhv_aux *iter;
2413
2414    PERL_ARGS_ASSERT_HV_AUXINIT;
2415
2416    if (!HvHasAUX(hv)) {
2417        char *array = (char *) HvARRAY(hv);
2418        if (!array) {
2419            Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char);
2420            HvARRAY(hv) = (HE**)array;
2421        }
2422        iter = Perl_hv_auxalloc(aTHX_ hv);
2423#ifdef PERL_HASH_RANDOMIZE_KEYS
2424        MAYBE_UPDATE_HASH_RAND_BITS();
2425        iter->xhv_rand = (U32)PL_hash_rand_bits;
2426#endif
2427    } else {
2428        iter = HvAUX(hv);
2429    }
2430
2431    iter->xhv_riter = -1;       /* HvRITER(hv) = -1 */
2432    iter->xhv_eiter = NULL;     /* HvEITER(hv) = NULL */
2433#ifdef PERL_HASH_RANDOMIZE_KEYS
2434    iter->xhv_last_rand = iter->xhv_rand;
2435#endif
2436    iter->xhv_name_u.xhvnameu_name = 0;
2437    iter->xhv_name_count = 0;
2438    iter->xhv_backreferences = 0;
2439    iter->xhv_mro_meta = NULL;
2440    iter->xhv_aux_flags = 0;
2441    return iter;
2442}
2443
2444/*
2445=for apidoc hv_iterinit
2446
2447Prepares a starting point to traverse a hash table.  Returns the number of
2448keys in the hash, including placeholders (i.e. the same as C<HvTOTALKEYS(hv)>).
2449The return value is currently only meaningful for hashes without tie magic.
2450
2451NOTE: Before version 5.004_65, C<hv_iterinit> used to return the number of
2452hash buckets that happen to be in use.  If you still need that esoteric
2453value, you can get it through the macro C<HvFILL(hv)>.
2454
2455
2456=cut
2457*/
2458
2459I32
2460Perl_hv_iterinit(pTHX_ HV *hv)
2461{
2462    PERL_ARGS_ASSERT_HV_ITERINIT;
2463
2464    if (HvHasAUX(hv)) {
2465        struct xpvhv_aux * iter = HvAUX(hv);
2466        HE * const entry = iter->xhv_eiter; /* HvEITER(hv) */
2467        if (entry && HvLAZYDEL(hv)) {	/* was deleted earlier? */
2468            HvLAZYDEL_off(hv);
2469            hv_free_ent(NULL, entry);
2470        }
2471        iter->xhv_riter = -1; 	/* HvRITER(hv) = -1 */
2472        iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
2473#ifdef PERL_HASH_RANDOMIZE_KEYS
2474        iter->xhv_last_rand = iter->xhv_rand;
2475#endif
2476    } else {
2477        hv_auxinit(hv);
2478    }
2479
2480    /* note this includes placeholders! */
2481    return HvTOTALKEYS(hv);
2482}
2483
2484/*
2485=for apidoc hv_riter_p
2486
2487Implements C<HvRITER> which you should use instead.
2488
2489=cut
2490*/
2491
2492I32 *
2493Perl_hv_riter_p(pTHX_ HV *hv) {
2494    struct xpvhv_aux *iter;
2495
2496    PERL_ARGS_ASSERT_HV_RITER_P;
2497
2498    iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2499    return &(iter->xhv_riter);
2500}
2501
2502/*
2503=for apidoc hv_eiter_p
2504
2505Implements C<HvEITER> which you should use instead.
2506
2507=cut
2508*/
2509
2510HE **
2511Perl_hv_eiter_p(pTHX_ HV *hv) {
2512    struct xpvhv_aux *iter;
2513
2514    PERL_ARGS_ASSERT_HV_EITER_P;
2515
2516    iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2517    return &(iter->xhv_eiter);
2518}
2519
2520/*
2521=for apidoc hv_riter_set
2522
2523Implements C<HvRITER_set> which you should use instead.
2524
2525=cut
2526*/
2527
2528void
2529Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) {
2530    struct xpvhv_aux *iter;
2531
2532    PERL_ARGS_ASSERT_HV_RITER_SET;
2533
2534    if (HvHasAUX(hv)) {
2535        iter = HvAUX(hv);
2536    } else {
2537        if (riter == -1)
2538            return;
2539
2540        iter = hv_auxinit(hv);
2541    }
2542    iter->xhv_riter = riter;
2543}
2544
2545void
2546Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) {
2547    struct xpvhv_aux *iter;
2548
2549    PERL_ARGS_ASSERT_HV_RAND_SET;
2550
2551#ifdef PERL_HASH_RANDOMIZE_KEYS
2552    if (HvHasAUX(hv)) {
2553        iter = HvAUX(hv);
2554    } else {
2555        iter = hv_auxinit(hv);
2556    }
2557    iter->xhv_rand = new_xhv_rand;
2558#else
2559    Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set().");
2560#endif
2561}
2562
2563/*
2564=for apidoc hv_eiter_set
2565
2566Implements C<HvEITER_set> which you should use instead.
2567
2568=cut
2569*/
2570
2571void
2572Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) {
2573    struct xpvhv_aux *iter;
2574
2575    PERL_ARGS_ASSERT_HV_EITER_SET;
2576
2577    if (HvHasAUX(hv)) {
2578        iter = HvAUX(hv);
2579    } else {
2580        /* 0 is the default so don't go malloc()ing a new structure just to
2581           hold 0.  */
2582        if (!eiter)
2583            return;
2584
2585        iter = hv_auxinit(hv);
2586    }
2587    iter->xhv_eiter = eiter;
2588}
2589
2590/*
2591=for apidoc        hv_name_set
2592=for apidoc_item ||hv_name_sets|HV *hv|"name"|U32 flags
2593
2594These each set the name of stash C<hv> to the specified name.
2595
2596They differ only in how the name is specified.
2597
2598In C<hv_name_sets>, the name is a literal C string, enclosed in double quotes.
2599
2600In C<hv_name_set>, C<name> points to the first byte of the name, and an
2601additional parameter, C<len>, specifies its length in bytes.  Hence, the name
2602may contain embedded-NUL characters.
2603
2604If C<SVf_UTF8> is set in C<flags>, the name is treated as being in UTF-8;
2605otherwise not.
2606
2607If C<HV_NAME_SETALL> is set in C<flags>, both the name and the effective name
2608are set.
2609
2610=for apidoc Amnh||HV_NAME_SETALL
2611
2612=cut
2613*/
2614
2615void
2616Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2617{
2618    struct xpvhv_aux *iter;
2619    U32 hash;
2620    HEK **spot;
2621
2622    PERL_ARGS_ASSERT_HV_NAME_SET;
2623
2624    if (len > I32_MAX)
2625        Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2626
2627    if (HvHasAUX(hv)) {
2628        iter = HvAUX(hv);
2629        if (iter->xhv_name_u.xhvnameu_name) {
2630            if(iter->xhv_name_count) {
2631              if(flags & HV_NAME_SETALL) {
2632                HEK ** const this_name = HvAUX(hv)->xhv_name_u.xhvnameu_names;
2633                HEK **hekp = this_name + (
2634                    iter->xhv_name_count < 0
2635                     ? -iter->xhv_name_count
2636                     :  iter->xhv_name_count
2637                   );
2638                while(hekp-- > this_name+1)
2639                    unshare_hek_or_pvn(*hekp, 0, 0, 0);
2640                /* The first elem may be null. */
2641                if(*this_name) unshare_hek_or_pvn(*this_name, 0, 0, 0);
2642                Safefree(this_name);
2643                spot = &iter->xhv_name_u.xhvnameu_name;
2644                iter->xhv_name_count = 0;
2645              }
2646              else {
2647                if(iter->xhv_name_count > 0) {
2648                    /* shift some things over */
2649                    Renew(
2650                     iter->xhv_name_u.xhvnameu_names, iter->xhv_name_count + 1, HEK *
2651                    );
2652                    spot = iter->xhv_name_u.xhvnameu_names;
2653                    spot[iter->xhv_name_count] = spot[1];
2654                    spot[1] = spot[0];
2655                    iter->xhv_name_count = -(iter->xhv_name_count + 1);
2656                }
2657                else if(*(spot = iter->xhv_name_u.xhvnameu_names)) {
2658                    unshare_hek_or_pvn(*spot, 0, 0, 0);
2659                }
2660              }
2661            }
2662            else if (flags & HV_NAME_SETALL) {
2663                unshare_hek_or_pvn(iter->xhv_name_u.xhvnameu_name, 0, 0, 0);
2664                spot = &iter->xhv_name_u.xhvnameu_name;
2665            }
2666            else {
2667                HEK * const existing_name = iter->xhv_name_u.xhvnameu_name;
2668                Newx(iter->xhv_name_u.xhvnameu_names, 2, HEK *);
2669                iter->xhv_name_count = -2;
2670                spot = iter->xhv_name_u.xhvnameu_names;
2671                spot[1] = existing_name;
2672            }
2673        }
2674        else { spot = &iter->xhv_name_u.xhvnameu_name; iter->xhv_name_count = 0; }
2675    } else {
2676        if (name == 0)
2677            return;
2678
2679        iter = hv_auxinit(hv);
2680        spot = &iter->xhv_name_u.xhvnameu_name;
2681    }
2682    PERL_HASH(hash, name, len);
2683    *spot = name ? share_hek(name, flags & SVf_UTF8 ? -(I32)len : (I32)len, hash) : NULL;
2684}
2685
2686/*
2687This is basically sv_eq_flags() in sv.c, but we avoid the magic
2688and bytes checking.
2689*/
2690
2691STATIC I32
2692hek_eq_pvn_flags(pTHX_ const HEK *hek, const char* pv, const I32 pvlen, const U32 flags) {
2693    if ( (HEK_UTF8(hek) ? 1 : 0) != (flags & SVf_UTF8 ? 1 : 0) ) {
2694        if (flags & SVf_UTF8)
2695            return (bytes_cmp_utf8(
2696                        (const U8*)HEK_KEY(hek), HEK_LEN(hek),
2697                        (const U8*)pv, pvlen) == 0);
2698        else
2699            return (bytes_cmp_utf8(
2700                        (const U8*)pv, pvlen,
2701                        (const U8*)HEK_KEY(hek), HEK_LEN(hek)) == 0);
2702    }
2703    else
2704        return HEK_LEN(hek) == pvlen && ((HEK_KEY(hek) == pv)
2705                    || memEQ(HEK_KEY(hek), pv, pvlen));
2706}
2707
2708/*
2709=for apidoc hv_ename_add
2710
2711Adds a name to a stash's internal list of effective names.  See
2712C<L</hv_ename_delete>>.
2713
2714This is called when a stash is assigned to a new location in the symbol
2715table.
2716
2717=cut
2718*/
2719
2720void
2721Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2722{
2723    struct xpvhv_aux *aux = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2724    U32 hash;
2725
2726    PERL_ARGS_ASSERT_HV_ENAME_ADD;
2727
2728    if (len > I32_MAX)
2729        Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2730
2731    PERL_HASH(hash, name, len);
2732
2733    if (aux->xhv_name_count) {
2734        I32 count = aux->xhv_name_count;
2735        HEK ** const xhv_name = aux->xhv_name_u.xhvnameu_names + (count<0);
2736        HEK **hekp = xhv_name + (count < 0 ? -count - 1 : count);
2737        while (hekp-- > xhv_name)
2738        {
2739            assert(*hekp);
2740            if (
2741                 (HEK_UTF8(*hekp) || (flags & SVf_UTF8))
2742                    ? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags)
2743                    : (HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len))
2744               ) {
2745                if (hekp == xhv_name && count < 0)
2746                    aux->xhv_name_count = -count;
2747                return;
2748            }
2749        }
2750        if (count < 0) aux->xhv_name_count--, count = -count;
2751        else aux->xhv_name_count++;
2752        Renew(aux->xhv_name_u.xhvnameu_names, count + 1, HEK *);
2753        (aux->xhv_name_u.xhvnameu_names)[count] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash);
2754    }
2755    else {
2756        HEK *existing_name = aux->xhv_name_u.xhvnameu_name;
2757        if (
2758            existing_name && (
2759             (HEK_UTF8(existing_name) || (flags & SVf_UTF8))
2760                ? hek_eq_pvn_flags(aTHX_ existing_name, name, (I32)len, flags)
2761                : (HEK_LEN(existing_name) == (I32)len && memEQ(HEK_KEY(existing_name), name, len))
2762            )
2763        ) return;
2764        Newx(aux->xhv_name_u.xhvnameu_names, 2, HEK *);
2765        aux->xhv_name_count = existing_name ? 2 : -2;
2766        *aux->xhv_name_u.xhvnameu_names = existing_name;
2767        (aux->xhv_name_u.xhvnameu_names)[1] = share_hek(name, (flags & SVf_UTF8 ? -(I32)len : (I32)len), hash);
2768    }
2769}
2770
2771/*
2772=for apidoc hv_ename_delete
2773
2774Removes a name from a stash's internal list of effective names.  If this is
2775the name returned by C<HvENAME>, then another name in the list will take
2776its place (C<HvENAME> will use it).
2777
2778This is called when a stash is deleted from the symbol table.
2779
2780=cut
2781*/
2782
2783void
2784Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
2785{
2786    struct xpvhv_aux *aux;
2787
2788    PERL_ARGS_ASSERT_HV_ENAME_DELETE;
2789
2790    if (len > I32_MAX)
2791        Perl_croak(aTHX_ "panic: hv name too long (%" UVuf ")", (UV) len);
2792
2793    if (!HvHasAUX(hv)) return;
2794
2795    aux = HvAUX(hv);
2796    if (!aux->xhv_name_u.xhvnameu_name) return;
2797
2798    if (aux->xhv_name_count) {
2799        HEK ** const namep = aux->xhv_name_u.xhvnameu_names;
2800        I32 const count = aux->xhv_name_count;
2801        HEK **victim = namep + (count < 0 ? -count : count);
2802        while (victim-- > namep + 1)
2803            if (
2804             (HEK_UTF8(*victim) || (flags & SVf_UTF8))
2805                ? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags)
2806                : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len))
2807            ) {
2808                unshare_hek_or_pvn(*victim, 0, 0, 0);
2809                if (count < 0) ++aux->xhv_name_count;
2810                else --aux->xhv_name_count;
2811                if (
2812                    (aux->xhv_name_count == 1 || aux->xhv_name_count == -1)
2813                 && !*namep
2814                ) {  /* if there are none left */
2815                    Safefree(namep);
2816                    aux->xhv_name_u.xhvnameu_names = NULL;
2817                    aux->xhv_name_count = 0;
2818                }
2819                else {
2820                    /* Move the last one back to fill the empty slot. It
2821                       does not matter what order they are in. */
2822                    *victim = *(namep + (count < 0 ? -count : count) - 1);
2823                }
2824                return;
2825            }
2826        if (
2827            count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8))
2828                ? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags)
2829                : (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len))
2830            )
2831        ) {
2832            aux->xhv_name_count = -count;
2833        }
2834    }
2835    else if(
2836        (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8))
2837                ? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags)
2838                : (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len &&
2839                            memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len))
2840    ) {
2841        HEK * const namehek = aux->xhv_name_u.xhvnameu_name;
2842        Newx(aux->xhv_name_u.xhvnameu_names, 1, HEK *);
2843        *aux->xhv_name_u.xhvnameu_names = namehek;
2844        aux->xhv_name_count = -1;
2845    }
2846}
2847
2848AV **
2849Perl_hv_backreferences_p(pTHX_ HV *hv) {
2850    PERL_ARGS_ASSERT_HV_BACKREFERENCES_P;
2851    /* See also Perl_sv_get_backrefs in sv.c where this logic is unrolled */
2852    {
2853        struct xpvhv_aux * const iter = HvHasAUX(hv) ? HvAUX(hv) : hv_auxinit(hv);
2854        return &(iter->xhv_backreferences);
2855    }
2856}
2857
2858void
2859Perl_hv_kill_backrefs(pTHX_ HV *hv) {
2860    AV *av;
2861
2862    PERL_ARGS_ASSERT_HV_KILL_BACKREFS;
2863
2864    if (!HvHasAUX(hv))
2865        return;
2866
2867    av = HvAUX(hv)->xhv_backreferences;
2868
2869    if (av) {
2870        HvAUX(hv)->xhv_backreferences = 0;
2871        Perl_sv_kill_backrefs(aTHX_ MUTABLE_SV(hv), av);
2872        if (SvTYPE(av) == SVt_PVAV)
2873            SvREFCNT_dec_NN(av);
2874    }
2875}
2876
2877/*
2878hv_iternext is implemented as a macro in hv.h
2879
2880=for apidoc hv_iternext
2881
2882Returns entries from a hash iterator.  See C<L</hv_iterinit>>.
2883
2884You may call C<hv_delete> or C<hv_delete_ent> on the hash entry that the
2885iterator currently points to, without losing your place or invalidating your
2886iterator.  Note that in this case the current entry is deleted from the hash
2887with your iterator holding the last reference to it.  Your iterator is flagged
2888to free the entry on the next call to C<hv_iternext>, so you must not discard
2889your iterator immediately else the entry will leak - call C<hv_iternext> to
2890trigger the resource deallocation.
2891
2892=for apidoc hv_iternext_flags
2893
2894Returns entries from a hash iterator.  See C<L</hv_iterinit>> and
2895C<L</hv_iternext>>.
2896The C<flags> value will normally be zero; if C<HV_ITERNEXT_WANTPLACEHOLDERS> is
2897set the placeholders keys (for restricted hashes) will be returned in addition
2898to normal keys.  By default placeholders are automatically skipped over.
2899Currently a placeholder is implemented with a value that is
2900C<&PL_sv_placeholder>.  Note that the implementation of placeholders and
2901restricted hashes may change, and the implementation currently is
2902insufficiently abstracted for any change to be tidy.
2903
2904=for apidoc Amnh||HV_ITERNEXT_WANTPLACEHOLDERS
2905
2906=cut
2907*/
2908
2909HE *
2910Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
2911{
2912    HE *entry;
2913    HE *oldentry;
2914    MAGIC* mg;
2915    struct xpvhv_aux *iter;
2916
2917    PERL_ARGS_ASSERT_HV_ITERNEXT_FLAGS;
2918
2919    if (!HvHasAUX(hv)) {
2920        /* Too many things (well, pp_each at least) merrily assume that you can
2921           call hv_iternext without calling hv_iterinit, so we'll have to deal
2922           with it.  */
2923        hv_iterinit(hv);
2924    }
2925    else if (!HvARRAY(hv)) {
2926        /* Since 5.002 calling hv_iternext() has ensured that HvARRAY() is
2927           non-NULL. There was explicit code for this added as part of commit
2928           4633a7c4bad06b47, without any explicit comment as to why, but from
2929           code inspection it seems to be a fix to ensure that the later line
2930               entry = ((HE**)xhv->xhv_array)[xhv->xhv_riter];
2931           was accessing a valid address, because that lookup in the loop was
2932           always reached even if the hash had no keys.
2933
2934           That explicit code was removed in 2005 as part of b79f7545f218479c:
2935               Store the xhv_aux structure after the main array.
2936               This reduces the size of HV bodies from 24 to 20 bytes on a 32 bit
2937               build. It has the side effect of defined %symbol_table:: now always
2938               being true. defined %hash is already deprecated.
2939
2940           with a comment and assertion added to note that after the call to
2941           hv_iterinit() HvARRAY() will now always be non-NULL.
2942
2943           In turn, that potential NULL-pointer access within the loop was made
2944           unreachable in 2009 by commit 9eb4ebd1619c0362
2945               In Perl_hv_iternext_flags(), clarify and generalise the empty hash bailout code.
2946
2947           which skipped the entire while loop if the hash had no keys.
2948           (If the hash has any keys, HvARRAY() cannot be NULL.)
2949           Hence the code in hv_iternext_flags() has long been able to handle
2950           HvARRAY() being NULL because no keys are allocated.
2951
2952           Now that we have decoupled the aux structure from HvARRAY(),
2953           HvARRAY() can now be NULL even when SVf_OOK is true (and the aux
2954           struct is allocated and correction initialised).
2955
2956           Is this actually a guarantee that we need to make? We should check
2957           whether anything is actually relying on this, or if we are simply
2958           making work for ourselves.
2959
2960           For now, keep the behaviour as-was - after calling hv_iternext_flags
2961           ensure that HvARRAY() is non-NULL. Many (other) things are changing -
2962           no need to add risk by changing this too. But in the future we should
2963           consider changing hv_iternext_flags() to avoid allocating HvARRAY()
2964           here, and potentially also we avoid allocating HvARRAY()
2965           automatically in hv_auxinit() */
2966
2967        char *array;
2968        Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1), char);
2969        HvARRAY(hv) = (HE**)array;
2970    }
2971
2972    iter = HvAUX(hv);
2973
2974    oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */
2975    if (SvMAGICAL(hv) && SvRMAGICAL(hv)) {
2976        if ( ( mg = mg_find((const SV *)hv, PERL_MAGIC_tied) ) ) {
2977            SV * const key = sv_newmortal();
2978            if (entry) {
2979                sv_setsv(key, HeSVKEY_force(entry));
2980                SvREFCNT_dec(HeSVKEY(entry));       /* get rid of previous key */
2981                HeSVKEY_set(entry, NULL);
2982            }
2983            else {
2984                char *k;
2985                HEK *hek;
2986
2987                /* one HE per MAGICAL hash */
2988                iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */
2989                HvLAZYDEL_on(hv); /* make sure entry gets freed */
2990                Zero(entry, 1, HE);
2991                Newxz(k, HEK_BASESIZE + sizeof(const SV *), char);
2992                hek = (HEK*)k;
2993                HeKEY_hek(entry) = hek;
2994                HeKLEN(entry) = HEf_SVKEY;
2995            }
2996            magic_nextpack(MUTABLE_SV(hv),mg,key);
2997            if (SvOK(key)) {
2998                /* force key to stay around until next time */
2999                HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key));
3000                return entry;               /* beware, hent_val is not set */
3001            }
3002            SvREFCNT_dec(HeVAL(entry));
3003            Safefree(HeKEY_hek(entry));
3004            del_HE(entry);
3005            iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
3006            HvLAZYDEL_off(hv);
3007            return NULL;
3008        }
3009    }
3010#if defined(DYNAMIC_ENV_FETCH) && defined(VMS)  /* set up %ENV for iteration */
3011    if (!entry && SvRMAGICAL((const SV *)hv)
3012        && mg_find((const SV *)hv, PERL_MAGIC_env)) {
3013        prime_env_iter();
3014    }
3015#endif
3016
3017    /* hv_iterinit now ensures this.  */
3018    assert (HvARRAY(hv));
3019
3020    /* At start of hash, entry is NULL.  */
3021    if (entry)
3022    {
3023        entry = HeNEXT(entry);
3024        if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
3025            /*
3026             * Skip past any placeholders -- don't want to include them in
3027             * any iteration.
3028             */
3029            while (entry && HeVAL(entry) == &PL_sv_placeholder) {
3030                entry = HeNEXT(entry);
3031            }
3032        }
3033    }
3034
3035#ifdef PERL_HASH_RANDOMIZE_KEYS
3036    if (iter->xhv_last_rand != iter->xhv_rand) {
3037        if (iter->xhv_riter != -1) {
3038            Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
3039                             "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior"
3040                             pTHX__FORMAT
3041                             pTHX__VALUE);
3042        }
3043        iter->xhv_last_rand = iter->xhv_rand;
3044    }
3045#endif
3046
3047    /* Skip the entire loop if the hash is empty.   */
3048    if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS)
3049        ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) {
3050        STRLEN max = HvMAX(hv);
3051        while (!entry) {
3052            /* OK. Come to the end of the current list.  Grab the next one.  */
3053
3054            iter->xhv_riter++; /* HvRITER(hv)++ */
3055            if (iter->xhv_riter > (I32)max /* HvRITER(hv) > HvMAX(hv) */) {
3056                /* There is no next one.  End of the hash.  */
3057                iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
3058#ifdef PERL_HASH_RANDOMIZE_KEYS
3059                iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */
3060#endif
3061                break;
3062            }
3063            entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & max ];
3064
3065            if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
3066                /* If we have an entry, but it's a placeholder, don't count it.
3067                   Try the next.  */
3068                while (entry && HeVAL(entry) == &PL_sv_placeholder)
3069                    entry = HeNEXT(entry);
3070            }
3071            /* Will loop again if this linked list starts NULL
3072               (for HV_ITERNEXT_WANTPLACEHOLDERS)
3073               or if we run through it and find only placeholders.  */
3074        }
3075    }
3076    else {
3077        iter->xhv_riter = -1;
3078#ifdef PERL_HASH_RANDOMIZE_KEYS
3079        iter->xhv_last_rand = iter->xhv_rand;
3080#endif
3081    }
3082
3083    if (oldentry && HvLAZYDEL(hv)) {		/* was deleted earlier? */
3084        HvLAZYDEL_off(hv);
3085        hv_free_ent(NULL, oldentry);
3086    }
3087
3088    iter->xhv_eiter = entry; /* HvEITER(hv) = entry */
3089    return entry;
3090}
3091
3092/*
3093=for apidoc hv_iterkey
3094
3095Returns the key from the current position of the hash iterator.  See
3096C<L</hv_iterinit>>.
3097
3098=cut
3099*/
3100
3101char *
3102Perl_hv_iterkey(pTHX_ HE *entry, I32 *retlen)
3103{
3104    PERL_ARGS_ASSERT_HV_ITERKEY;
3105
3106    if (HeKLEN(entry) == HEf_SVKEY) {
3107        STRLEN len;
3108        char * const p = SvPV(HeKEY_sv(entry), len);
3109        *retlen = len;
3110        return p;
3111    }
3112    else {
3113        *retlen = HeKLEN(entry);
3114        return HeKEY(entry);
3115    }
3116}
3117
3118/* unlike hv_iterval(), this always returns a mortal copy of the key */
3119/*
3120=for apidoc hv_iterkeysv
3121
3122Returns the key as an C<SV*> from the current position of the hash
3123iterator.  The return value will always be a mortal copy of the key.  Also
3124see C<L</hv_iterinit>>.
3125
3126=cut
3127*/
3128
3129SV *
3130Perl_hv_iterkeysv(pTHX_ HE *entry)
3131{
3132    PERL_ARGS_ASSERT_HV_ITERKEYSV;
3133
3134    return newSVhek_mortal(HeKEY_hek(entry));
3135}
3136
3137/*
3138=for apidoc hv_iterval
3139
3140Returns the value from the current position of the hash iterator.  See
3141C<L</hv_iterkey>>.
3142
3143=cut
3144*/
3145
3146SV *
3147Perl_hv_iterval(pTHX_ HV *hv, HE *entry)
3148{
3149    PERL_ARGS_ASSERT_HV_ITERVAL;
3150
3151    if (SvRMAGICAL(hv)) {
3152        if (mg_find((const SV *)hv, PERL_MAGIC_tied)) {
3153            SV* const sv = sv_newmortal();
3154            if (HeKLEN(entry) == HEf_SVKEY)
3155                mg_copy(MUTABLE_SV(hv), sv, (char*)HeKEY_sv(entry), HEf_SVKEY);
3156            else
3157                mg_copy(MUTABLE_SV(hv), sv, HeKEY(entry), HeKLEN(entry));
3158            return sv;
3159        }
3160    }
3161    return HeVAL(entry);
3162}
3163
3164/*
3165=for apidoc hv_iternextsv
3166
3167Performs an C<hv_iternext>, C<hv_iterkey>, and C<hv_iterval> in one
3168operation.
3169
3170=cut
3171*/
3172
3173SV *
3174Perl_hv_iternextsv(pTHX_ HV *hv, char **key, I32 *retlen)
3175{
3176    HE * const he = hv_iternext_flags(hv, 0);
3177
3178    PERL_ARGS_ASSERT_HV_ITERNEXTSV;
3179
3180    if (!he)
3181        return NULL;
3182    *key = hv_iterkey(he, retlen);
3183    return hv_iterval(hv, he);
3184}
3185
3186/*
3187
3188Now a macro in hv.h
3189
3190=for apidoc hv_magic
3191
3192Adds magic to a hash.  See C<L</sv_magic>>.
3193
3194=for apidoc unsharepvn
3195
3196If no one has access to shared string C<str> with length C<len>, free it.
3197
3198C<len> and C<hash> must both be valid for C<str>.
3199
3200=cut
3201*/
3202
3203void
3204Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
3205{
3206    unshare_hek_or_pvn (NULL, str, len, hash);
3207}
3208
3209
3210void
3211Perl_unshare_hek(pTHX_ HEK *hek)
3212{
3213    assert(hek);
3214    unshare_hek_or_pvn(hek, NULL, 0, 0);
3215}
3216
3217/* possibly free a shared string if no one has access to it
3218   hek if non-NULL takes priority over the other 3, else str, len and hash
3219   are used.  If so, len and hash must both be valid for str.
3220 */
3221STATIC void
3222S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
3223{
3224    HE *entry;
3225    HE **oentry;
3226    bool is_utf8 = FALSE;
3227    int k_flags = 0;
3228    const char * const save = str;
3229    struct shared_he *he = NULL;
3230
3231    if (hek) {
3232        assert((HEK_FLAGS(hek) & HVhek_NOTSHARED) == 0);
3233        /* Find the shared he which is just before us in memory.  */
3234        he = (struct shared_he *)(((char *)hek)
3235                                  - STRUCT_OFFSET(struct shared_he,
3236                                                  shared_he_hek));
3237
3238        /* Assert that the caller passed us a genuine (or at least consistent)
3239           shared hek  */
3240        assert (he->shared_he_he.hent_hek == hek);
3241
3242        if (he->shared_he_he.he_valu.hent_refcount - 1) {
3243            --he->shared_he_he.he_valu.hent_refcount;
3244            return;
3245        }
3246
3247        hash = HEK_HASH(hek);
3248    } else if (len < 0) {
3249        STRLEN tmplen = -len;
3250        is_utf8 = TRUE;
3251        /* See the note in hv_fetch(). --jhi */
3252        str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
3253        len = tmplen;
3254        if (is_utf8)
3255            k_flags = HVhek_UTF8;
3256        if (str != save)
3257            k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
3258    }
3259
3260    /* what follows was the moral equivalent of:
3261    if ((Svp = hv_fetch(PL_strtab, tmpsv, FALSE, hash))) {
3262        if (--*Svp == NULL)
3263            hv_delete(PL_strtab, str, len, G_DISCARD, hash);
3264    } */
3265
3266    /* assert(xhv_array != 0) */
3267    oentry = &(HvARRAY(PL_strtab))[hash & (I32) HvMAX(PL_strtab)];
3268    if (he) {
3269        const HE *const he_he = &(he->shared_he_he);
3270        for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) {
3271            if (entry == he_he)
3272                break;
3273        }
3274    } else {
3275        const U8 flags_masked = k_flags & HVhek_STORAGE_MASK;
3276        for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) {
3277            if (HeHASH(entry) != hash)		/* strings can't be equal */
3278                continue;
3279            if (HeKLEN(entry) != len)
3280                continue;
3281            if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len))	/* is this it? */
3282                continue;
3283            if (HeKFLAGS(entry) != flags_masked)
3284                continue;
3285            break;
3286        }
3287    }
3288
3289    if (entry) {
3290        if (--entry->he_valu.hent_refcount == 0) {
3291            *oentry = HeNEXT(entry);
3292            Safefree(entry);
3293            HvTOTALKEYS(PL_strtab)--;
3294        }
3295    }
3296
3297    if (!entry)
3298        Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
3299                         "Attempt to free nonexistent shared string '%s'%s"
3300                         pTHX__FORMAT,
3301                         hek ? HEK_KEY(hek) : str,
3302                         ((k_flags & HVhek_UTF8) ? " (utf8)" : "") pTHX__VALUE);
3303    if (k_flags & HVhek_FREEKEY)
3304        Safefree(str);
3305}
3306
3307/* get a (constant) string ptr from the global string table
3308 * string will get added if it is not already there.
3309 * len and hash must both be valid for str.
3310 */
3311HEK *
3312Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash)
3313{
3314    bool is_utf8 = FALSE;
3315    int flags = 0;
3316    const char * const save = str;
3317
3318    PERL_ARGS_ASSERT_SHARE_HEK;
3319
3320    if (len < 0) {
3321      STRLEN tmplen = -len;
3322      is_utf8 = TRUE;
3323      /* See the note in hv_fetch(). --jhi */
3324      str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
3325      len = tmplen;
3326      /* If we were able to downgrade here, then than means that we were passed
3327         in a key which only had chars 0-255, but was utf8 encoded.  */
3328      if (is_utf8)
3329          flags = HVhek_UTF8;
3330      /* If we found we were able to downgrade the string to bytes, then
3331         we should flag that it needs upgrading on keys or each.  Also flag
3332         that we need share_hek_flags to free the string.  */
3333      if (str != save) {
3334          PERL_HASH(hash, str, len);
3335          flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
3336      }
3337    }
3338
3339    return share_hek_flags (str, len, hash, flags);
3340}
3341
3342STATIC HEK *
3343S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
3344{
3345    HE *entry;
3346    const U8 flags_masked = flags & HVhek_STORAGE_MASK;
3347    const U32 hindex = hash & (I32) HvMAX(PL_strtab);
3348
3349    PERL_ARGS_ASSERT_SHARE_HEK_FLAGS;
3350    assert(!(flags & HVhek_NOTSHARED));
3351
3352    if (UNLIKELY(len > (STRLEN) I32_MAX)) {
3353        Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes");
3354    }
3355
3356    /* what follows is the moral equivalent of:
3357
3358    if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
3359        hv_store(PL_strtab, str, len, NULL, hash);
3360
3361        Can't rehash the shared string table, so not sure if it's worth
3362        counting the number of entries in the linked list
3363    */
3364
3365    /* assert(xhv_array != 0) */
3366    entry = (HvARRAY(PL_strtab))[hindex];
3367    for (;entry; entry = HeNEXT(entry)) {
3368        if (HeHASH(entry) != hash)		/* strings can't be equal */
3369            continue;
3370        if (HeKLEN(entry) != (SSize_t) len)
3371            continue;
3372        if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len))	/* is this it? */
3373            continue;
3374        if (HeKFLAGS(entry) != flags_masked)
3375            continue;
3376        break;
3377    }
3378
3379    if (!entry) {
3380        /* What used to be head of the list.
3381           If this is NULL, then we're the first entry for this slot, which
3382           means we need to increase fill.  */
3383        struct shared_he *new_entry;
3384        HEK *hek;
3385        char *k;
3386        HE **const head = &HvARRAY(PL_strtab)[hindex];
3387        HE *const next = *head;
3388        XPVHV * const xhv = (XPVHV*)SvANY(PL_strtab);
3389
3390        /* We don't actually store a HE from the arena and a regular HEK.
3391           Instead we allocate one chunk of memory big enough for both,
3392           and put the HEK straight after the HE. This way we can find the
3393           HE directly from the HEK.
3394        */
3395
3396        Newx(k, STRUCT_OFFSET(struct shared_he,
3397                                shared_he_hek.hek_key[0]) + len + 2, char);
3398        new_entry = (struct shared_he *)k;
3399        entry = &(new_entry->shared_he_he);
3400        hek = &(new_entry->shared_he_hek);
3401
3402        Copy(str, HEK_KEY(hek), len, char);
3403        HEK_KEY(hek)[len] = 0;
3404        HEK_LEN(hek) = len;
3405        HEK_HASH(hek) = hash;
3406        HEK_FLAGS(hek) = (unsigned char)flags_masked;
3407
3408        /* Still "point" to the HEK, so that other code need not know what
3409           we're up to.  */
3410        HeKEY_hek(entry) = hek;
3411        entry->he_valu.hent_refcount = 0;
3412        HeNEXT(entry) = next;
3413        *head = entry;
3414
3415        xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
3416        if (!next) {			/* initial entry? */
3417        } else if ( DO_HSPLIT(xhv) ) {
3418            const STRLEN oldsize = xhv->xhv_max + 1;
3419            hsplit(PL_strtab, oldsize, oldsize * 2);
3420        }
3421    }
3422
3423    ++entry->he_valu.hent_refcount;
3424
3425    if (flags & HVhek_FREEKEY)
3426        Safefree(str);
3427
3428    return HeKEY_hek(entry);
3429}
3430
3431SSize_t *
3432Perl_hv_placeholders_p(pTHX_ HV *hv)
3433{
3434    MAGIC *mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3435
3436    PERL_ARGS_ASSERT_HV_PLACEHOLDERS_P;
3437
3438    if (!mg) {
3439        mg = sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, 0);
3440
3441        if (!mg) {
3442            Perl_die(aTHX_ "panic: hv_placeholders_p");
3443        }
3444    }
3445    return &(mg->mg_len);
3446}
3447
3448/*
3449=for apidoc hv_placeholders_get
3450
3451Implements C<HvPLACEHOLDERS_get>, which you should use instead.
3452
3453=cut
3454*/
3455
3456I32
3457Perl_hv_placeholders_get(pTHX_ const HV *hv)
3458{
3459    MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3460
3461    PERL_ARGS_ASSERT_HV_PLACEHOLDERS_GET;
3462    PERL_UNUSED_CONTEXT;
3463
3464    return mg ? mg->mg_len : 0;
3465}
3466
3467/*
3468=for apidoc hv_placeholders_set
3469
3470Implements C<HvPLACEHOLDERS_set>, which you should use instead.
3471
3472=cut
3473*/
3474
3475void
3476Perl_hv_placeholders_set(pTHX_ HV *hv, I32 ph)
3477{
3478    MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash);
3479
3480    PERL_ARGS_ASSERT_HV_PLACEHOLDERS_SET;
3481
3482    if (mg) {
3483        mg->mg_len = ph;
3484    } else if (ph) {
3485        if (!sv_magicext(MUTABLE_SV(hv), 0, PERL_MAGIC_rhash, 0, 0, ph))
3486            Perl_die(aTHX_ "panic: hv_placeholders_set");
3487    }
3488    /* else we don't need to add magic to record 0 placeholders.  */
3489}
3490
3491STATIC SV *
3492S_refcounted_he_value(pTHX_ const struct refcounted_he *he)
3493{
3494    SV *value;
3495
3496    PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE;
3497
3498    switch(he->refcounted_he_data[0] & HVrhek_typemask) {
3499    case HVrhek_undef:
3500        value = newSV_type(SVt_NULL);
3501        break;
3502    case HVrhek_delete:
3503        value = &PL_sv_placeholder;
3504        break;
3505    case HVrhek_IV:
3506        value = newSViv(he->refcounted_he_val.refcounted_he_u_iv);
3507        break;
3508    case HVrhek_UV:
3509        value = newSVuv(he->refcounted_he_val.refcounted_he_u_uv);
3510        break;
3511    case HVrhek_PV:
3512    case HVrhek_PV_UTF8:
3513        /* Create a string SV that directly points to the bytes in our
3514           structure.  */
3515        value = newSV_type(SVt_PV);
3516        SvPV_set(value, (char *) he->refcounted_he_data + 1);
3517        SvCUR_set(value, he->refcounted_he_val.refcounted_he_u_len);
3518        /* This stops anything trying to free it  */
3519        SvLEN_set(value, 0);
3520        SvPOK_on(value);
3521        SvREADONLY_on(value);
3522        if ((he->refcounted_he_data[0] & HVrhek_typemask) == HVrhek_PV_UTF8)
3523            SvUTF8_on(value);
3524        break;
3525    default:
3526        Perl_croak(aTHX_ "panic: refcounted_he_value bad flags %" UVxf,
3527                   (UV)he->refcounted_he_data[0]);
3528    }
3529    return value;
3530}
3531
3532/*
3533=for apidoc refcounted_he_chain_2hv
3534
3535Generates and returns a C<HV *> representing the content of a
3536C<refcounted_he> chain.
3537C<flags> is currently unused and must be zero.
3538
3539=cut
3540*/
3541HV *
3542Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags)
3543{
3544    HV *hv;
3545    U32 placeholders, max;
3546
3547    if (flags)
3548        Perl_croak(aTHX_ "panic: refcounted_he_chain_2hv bad flags %" UVxf,
3549            (UV)flags);
3550
3551    /* We could chase the chain once to get an idea of the number of keys,
3552       and call ksplit.  But for now we'll make a potentially inefficient
3553       hash with only 8 entries in its array.  */
3554    hv = newHV();
3555#ifdef NODEFAULT_SHAREKEYS
3556    /* We share keys in the COP, so it's much easier to keep sharing keys in
3557       the hash we build from it. */
3558    HvSHAREKEYS_on(hv);
3559#endif
3560    max = HvMAX(hv);
3561    if (!HvARRAY(hv)) {
3562        char *array;
3563        Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(max + 1), char);
3564        HvARRAY(hv) = (HE**)array;
3565    }
3566
3567    placeholders = 0;
3568    while (chain) {
3569#ifdef USE_ITHREADS
3570        U32 hash = chain->refcounted_he_hash;
3571#else
3572        U32 hash = HEK_HASH(chain->refcounted_he_hek);
3573#endif
3574        HE **oentry = &((HvARRAY(hv))[hash & max]);
3575        HE *entry = *oentry;
3576        SV *value;
3577
3578        for (; entry; entry = HeNEXT(entry)) {
3579            if (HeHASH(entry) == hash) {
3580                /* We might have a duplicate key here.  If so, entry is older
3581                   than the key we've already put in the hash, so if they are
3582                   the same, skip adding entry.  */
3583#ifdef USE_ITHREADS
3584                const STRLEN klen = HeKLEN(entry);
3585                const char *const key = HeKEY(entry);
3586                if (klen == chain->refcounted_he_keylen
3587                    && (cBOOL(HeKUTF8(entry))
3588                        == cBOOL((chain->refcounted_he_data[0] & HVhek_UTF8)))
3589                    && memEQ(key, REF_HE_KEY(chain), klen))
3590                    goto next_please;
3591#else
3592                if (HeKEY_hek(entry) == chain->refcounted_he_hek)
3593                    goto next_please;
3594                if (HeKLEN(entry) == HEK_LEN(chain->refcounted_he_hek)
3595                    && HeKUTF8(entry) == HEK_UTF8(chain->refcounted_he_hek)
3596                    && memEQ(HeKEY(entry), HEK_KEY(chain->refcounted_he_hek),
3597                             HeKLEN(entry)))
3598                    goto next_please;
3599#endif
3600            }
3601        }
3602        assert (!entry);
3603        entry = new_HE();
3604
3605#ifdef USE_ITHREADS
3606        HeKEY_hek(entry)
3607            = share_hek_flags(REF_HE_KEY(chain),
3608                              chain->refcounted_he_keylen,
3609                              chain->refcounted_he_hash,
3610                              (chain->refcounted_he_data[0]
3611                               & (HVhek_UTF8|HVhek_WASUTF8)));
3612#else
3613        HeKEY_hek(entry) = share_hek_hek(chain->refcounted_he_hek);
3614#endif
3615        value = refcounted_he_value(chain);
3616        if (value == &PL_sv_placeholder)
3617            placeholders++;
3618        HeVAL(entry) = value;
3619
3620        /* Link it into the chain.  */
3621        HeNEXT(entry) = *oentry;
3622        *oentry = entry;
3623
3624        HvTOTALKEYS(hv)++;
3625
3626    next_please:
3627        chain = chain->refcounted_he_next;
3628    }
3629
3630    if (placeholders) {
3631        clear_placeholders(hv, placeholders);
3632    }
3633
3634    /* We could check in the loop to see if we encounter any keys with key
3635       flags, but it's probably not worth it, as this per-hash flag is only
3636       really meant as an optimisation for things like Storable.  */
3637    HvHASKFLAGS_on(hv);
3638    DEBUG_A(Perl_hv_assert(aTHX_ hv));
3639
3640    return hv;
3641}
3642
3643/*
3644=for apidoc refcounted_he_fetch_pvn
3645
3646Search along a C<refcounted_he> chain for an entry with the key specified
3647by C<keypv> and C<keylen>.  If C<flags> has the C<REFCOUNTED_HE_KEY_UTF8>
3648bit set, the key octets are interpreted as UTF-8, otherwise they
3649are interpreted as Latin-1.  C<hash> is a precomputed hash of the key
3650string, or zero if it has not been precomputed.  Returns a mortal scalar
3651representing the value associated with the key, or C<&PL_sv_placeholder>
3652if there is no value associated with the key.
3653
3654=cut
3655*/
3656
3657SV *
3658Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
3659                         const char *keypv, STRLEN keylen, U32 hash, U32 flags)
3660{
3661    U8 utf8_flag;
3662    PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;
3663
3664    if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
3665        Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
3666            (UV)flags);
3667    if (!chain)
3668        goto ret;
3669    if (flags & REFCOUNTED_HE_KEY_UTF8) {
3670        /* For searching purposes, canonicalise to Latin-1 where possible. */
3671        const char *keyend = keypv + keylen, *p;
3672        STRLEN nonascii_count = 0;
3673        for (p = keypv; p != keyend; p++) {
3674            if (! UTF8_IS_INVARIANT(*p)) {
3675                if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3676                    goto canonicalised_key;
3677                }
3678                nonascii_count++;
3679                p++;
3680            }
3681        }
3682        if (nonascii_count) {
3683            char *q;
3684            const char *p = keypv, *keyend = keypv + keylen;
3685            keylen -= nonascii_count;
3686            Newx(q, keylen, char);
3687            SAVEFREEPV(q);
3688            keypv = q;
3689            for (; p != keyend; p++, q++) {
3690                U8 c = (U8)*p;
3691                if (UTF8_IS_INVARIANT(c)) {
3692                    *q = (char) c;
3693                }
3694                else {
3695                    p++;
3696                    *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3697                }
3698            }
3699        }
3700        flags &= ~REFCOUNTED_HE_KEY_UTF8;
3701        canonicalised_key: ;
3702    }
3703    utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0;
3704    if (!hash)
3705        PERL_HASH(hash, keypv, keylen);
3706
3707    for (; chain; chain = chain->refcounted_he_next) {
3708        if (
3709#ifdef USE_ITHREADS
3710            hash == chain->refcounted_he_hash &&
3711            keylen == chain->refcounted_he_keylen &&
3712            memEQ(REF_HE_KEY(chain), keypv, keylen) &&
3713            utf8_flag == (chain->refcounted_he_data[0] & HVhek_UTF8)
3714#else
3715            hash == HEK_HASH(chain->refcounted_he_hek) &&
3716            keylen == (STRLEN)HEK_LEN(chain->refcounted_he_hek) &&
3717            memEQ(HEK_KEY(chain->refcounted_he_hek), keypv, keylen) &&
3718            utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8)
3719#endif
3720        ) {
3721            if (flags & REFCOUNTED_HE_EXISTS)
3722                return (chain->refcounted_he_data[0] & HVrhek_typemask)
3723                    == HVrhek_delete
3724                    ? NULL : &PL_sv_yes;
3725            return sv_2mortal(refcounted_he_value(chain));
3726        }
3727    }
3728  ret:
3729    return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder;
3730}
3731
3732/*
3733=for apidoc refcounted_he_fetch_pv
3734
3735Like L</refcounted_he_fetch_pvn>, but takes a nul-terminated string
3736instead of a string/length pair.
3737
3738=cut
3739*/
3740
3741SV *
3742Perl_refcounted_he_fetch_pv(pTHX_ const struct refcounted_he *chain,
3743                         const char *key, U32 hash, U32 flags)
3744{
3745    PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PV;
3746    return refcounted_he_fetch_pvn(chain, key, strlen(key), hash, flags);
3747}
3748
3749/*
3750=for apidoc refcounted_he_fetch_sv
3751
3752Like L</refcounted_he_fetch_pvn>, but takes a Perl scalar instead of a
3753string/length pair.
3754
3755=cut
3756*/
3757
3758SV *
3759Perl_refcounted_he_fetch_sv(pTHX_ const struct refcounted_he *chain,
3760                         SV *key, U32 hash, U32 flags)
3761{
3762    const char *keypv;
3763    STRLEN keylen;
3764    PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_SV;
3765    if (flags & REFCOUNTED_HE_KEY_UTF8)
3766        Perl_croak(aTHX_ "panic: refcounted_he_fetch_sv bad flags %" UVxf,
3767            (UV)flags);
3768    keypv = SvPV_const(key, keylen);
3769    if (SvUTF8(key))
3770        flags |= REFCOUNTED_HE_KEY_UTF8;
3771    if (!hash && SvIsCOW_shared_hash(key))
3772        hash = SvSHARED_HASH(key);
3773    return refcounted_he_fetch_pvn(chain, keypv, keylen, hash, flags);
3774}
3775
3776/*
3777=for apidoc refcounted_he_new_pvn
3778
3779Creates a new C<refcounted_he>.  This consists of a single key/value
3780pair and a reference to an existing C<refcounted_he> chain (which may
3781be empty), and thus forms a longer chain.  When using the longer chain,
3782the new key/value pair takes precedence over any entry for the same key
3783further along the chain.
3784
3785The new key is specified by C<keypv> and C<keylen>.  If C<flags> has
3786the C<REFCOUNTED_HE_KEY_UTF8> bit set, the key octets are interpreted
3787as UTF-8, otherwise they are interpreted as Latin-1.  C<hash> is
3788a precomputed hash of the key string, or zero if it has not been
3789precomputed.
3790
3791C<value> is the scalar value to store for this key.  C<value> is copied
3792by this function, which thus does not take ownership of any reference
3793to it, and later changes to the scalar will not be reflected in the
3794value visible in the C<refcounted_he>.  Complex types of scalar will not
3795be stored with referential integrity, but will be coerced to strings.
3796C<value> may be either null or C<&PL_sv_placeholder> to indicate that no
3797value is to be associated with the key; this, as with any non-null value,
3798takes precedence over the existence of a value for the key further along
3799the chain.
3800
3801C<parent> points to the rest of the C<refcounted_he> chain to be
3802attached to the new C<refcounted_he>.  This function takes ownership
3803of one reference to C<parent>, and returns one reference to the new
3804C<refcounted_he>.
3805
3806=cut
3807*/
3808
3809struct refcounted_he *
3810Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
3811        const char *keypv, STRLEN keylen, U32 hash, SV *value, U32 flags)
3812{
3813    STRLEN value_len = 0;
3814    const char *value_p = NULL;
3815    bool is_pv;
3816    char value_type;
3817    char hekflags;
3818    STRLEN key_offset = 1;
3819    struct refcounted_he *he;
3820    PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;
3821
3822    if (!value || value == &PL_sv_placeholder) {
3823        value_type = HVrhek_delete;
3824    } else if (SvPOK(value)) {
3825        value_type = HVrhek_PV;
3826    } else if (SvIOK(value)) {
3827        value_type = SvUOK((const SV *)value) ? HVrhek_UV : HVrhek_IV;
3828    } else if (!SvOK(value)) {
3829        value_type = HVrhek_undef;
3830    } else {
3831        value_type = HVrhek_PV;
3832    }
3833    is_pv = value_type == HVrhek_PV;
3834    if (is_pv) {
3835        /* Do it this way so that the SvUTF8() test is after the SvPV, in case
3836           the value is overloaded, and doesn't yet have the UTF-8flag set.  */
3837        value_p = SvPV_const(value, value_len);
3838        if (SvUTF8(value))
3839            value_type = HVrhek_PV_UTF8;
3840        key_offset = value_len + 2;
3841    }
3842    hekflags = value_type;
3843
3844    if (flags & REFCOUNTED_HE_KEY_UTF8) {
3845        /* Canonicalise to Latin-1 where possible. */
3846        const char *keyend = keypv + keylen, *p;
3847        STRLEN nonascii_count = 0;
3848        for (p = keypv; p != keyend; p++) {
3849            if (! UTF8_IS_INVARIANT(*p)) {
3850                if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3851                    goto canonicalised_key;
3852                }
3853                nonascii_count++;
3854                p++;
3855            }
3856        }
3857        if (nonascii_count) {
3858            char *q;
3859            const char *p = keypv, *keyend = keypv + keylen;
3860            keylen -= nonascii_count;
3861            Newx(q, keylen, char);
3862            SAVEFREEPV(q);
3863            keypv = q;
3864            for (; p != keyend; p++, q++) {
3865                U8 c = (U8)*p;
3866                if (UTF8_IS_INVARIANT(c)) {
3867                    *q = (char) c;
3868                }
3869                else {
3870                    p++;
3871                    *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3872                }
3873            }
3874        }
3875        flags &= ~REFCOUNTED_HE_KEY_UTF8;
3876        canonicalised_key: ;
3877    }
3878    if (flags & REFCOUNTED_HE_KEY_UTF8)
3879        hekflags |= HVhek_UTF8;
3880    if (!hash)
3881        PERL_HASH(hash, keypv, keylen);
3882
3883#ifdef USE_ITHREADS
3884    he = (struct refcounted_he*)
3885        PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
3886                             + keylen
3887                             + key_offset);
3888#else
3889    he = (struct refcounted_he*)
3890        PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
3891                             + key_offset);
3892#endif
3893
3894    he->refcounted_he_next = parent;
3895
3896    if (is_pv) {
3897        Copy(value_p, he->refcounted_he_data + 1, value_len + 1, char);
3898        he->refcounted_he_val.refcounted_he_u_len = value_len;
3899    } else if (value_type == HVrhek_IV) {
3900        he->refcounted_he_val.refcounted_he_u_iv = SvIVX(value);
3901    } else if (value_type == HVrhek_UV) {
3902        he->refcounted_he_val.refcounted_he_u_uv = SvUVX(value);
3903    }
3904
3905#ifdef USE_ITHREADS
3906    he->refcounted_he_hash = hash;
3907    he->refcounted_he_keylen = keylen;
3908    Copy(keypv, he->refcounted_he_data + key_offset, keylen, char);
3909#else
3910    he->refcounted_he_hek = share_hek_flags(keypv, keylen, hash, hekflags);
3911#endif
3912
3913    he->refcounted_he_data[0] = hekflags;
3914    he->refcounted_he_refcnt = 1;
3915
3916    return he;
3917}
3918
3919/*
3920=for apidoc refcounted_he_new_pv
3921
3922Like L</refcounted_he_new_pvn>, but takes a nul-terminated string instead
3923of a string/length pair.
3924
3925=cut
3926*/
3927
3928struct refcounted_he *
3929Perl_refcounted_he_new_pv(pTHX_ struct refcounted_he *parent,
3930        const char *key, U32 hash, SV *value, U32 flags)
3931{
3932    PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PV;
3933    return refcounted_he_new_pvn(parent, key, strlen(key), hash, value, flags);
3934}
3935
3936/*
3937=for apidoc refcounted_he_new_sv
3938
3939Like L</refcounted_he_new_pvn>, but takes a Perl scalar instead of a
3940string/length pair.
3941
3942=cut
3943*/
3944
3945struct refcounted_he *
3946Perl_refcounted_he_new_sv(pTHX_ struct refcounted_he *parent,
3947        SV *key, U32 hash, SV *value, U32 flags)
3948{
3949    const char *keypv;
3950    STRLEN keylen;
3951    PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_SV;
3952    if (flags & REFCOUNTED_HE_KEY_UTF8)
3953        Perl_croak(aTHX_ "panic: refcounted_he_new_sv bad flags %" UVxf,
3954            (UV)flags);
3955    keypv = SvPV_const(key, keylen);
3956    if (SvUTF8(key))
3957        flags |= REFCOUNTED_HE_KEY_UTF8;
3958    if (!hash && SvIsCOW_shared_hash(key))
3959        hash = SvSHARED_HASH(key);
3960    return refcounted_he_new_pvn(parent, keypv, keylen, hash, value, flags);
3961}
3962
3963/*
3964=for apidoc refcounted_he_free
3965
3966Decrements the reference count of a C<refcounted_he> by one.  If the
3967reference count reaches zero the structure's memory is freed, which
3968(recursively) causes a reduction of its parent C<refcounted_he>'s
3969reference count.  It is safe to pass a null pointer to this function:
3970no action occurs in this case.
3971
3972=cut
3973*/
3974
3975void
3976Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) {
3977    PERL_UNUSED_CONTEXT;
3978
3979    while (he) {
3980        struct refcounted_he *copy;
3981        U32 new_count;
3982
3983        HINTS_REFCNT_LOCK;
3984        new_count = --he->refcounted_he_refcnt;
3985        HINTS_REFCNT_UNLOCK;
3986
3987        if (new_count) {
3988            return;
3989        }
3990
3991#ifndef USE_ITHREADS
3992        unshare_hek_or_pvn (he->refcounted_he_hek, 0, 0, 0);
3993#endif
3994        copy = he;
3995        he = he->refcounted_he_next;
3996        PerlMemShared_free(copy);
3997    }
3998}
3999
4000/*
4001=for apidoc refcounted_he_inc
4002
4003Increment the reference count of a C<refcounted_he>.  The pointer to the
4004C<refcounted_he> is also returned.  It is safe to pass a null pointer
4005to this function: no action occurs and a null pointer is returned.
4006
4007=cut
4008*/
4009
4010struct refcounted_he *
4011Perl_refcounted_he_inc(pTHX_ struct refcounted_he *he)
4012{
4013    PERL_UNUSED_CONTEXT;
4014    if (he) {
4015        HINTS_REFCNT_LOCK;
4016        he->refcounted_he_refcnt++;
4017        HINTS_REFCNT_UNLOCK;
4018    }
4019    return he;
4020}
4021
4022/*
4023=for apidoc_section $COP
4024=for apidoc cop_fetch_label
4025
4026Returns the label attached to a cop, and stores its length in bytes into
4027C<*len>.
4028Upon return, C<*flags> will be set to either C<SVf_UTF8> or 0.
4029
4030Alternatively, use the macro C<L</CopLABEL_len_flags>>;
4031or if you don't need to know if the label is UTF-8 or not, the macro
4032C<L</CopLABEL_len>>;
4033or if you additionally don't need to know the length, C<L</CopLABEL>>.
4034
4035=cut
4036*/
4037
4038/* pp_entereval is aware that labels are stored with a key ':' at the top of
4039   the linked list.  */
4040const char *
4041Perl_cop_fetch_label(pTHX_ COP *const cop, STRLEN *len, U32 *flags) {
4042    struct refcounted_he *const chain = cop->cop_hints_hash;
4043
4044    PERL_ARGS_ASSERT_COP_FETCH_LABEL;
4045    PERL_UNUSED_CONTEXT;
4046
4047    if (!chain)
4048        return NULL;
4049#ifdef USE_ITHREADS
4050    if (chain->refcounted_he_keylen != 1)
4051        return NULL;
4052    if (*REF_HE_KEY(chain) != ':')
4053        return NULL;
4054#else
4055    if ((STRLEN)HEK_LEN(chain->refcounted_he_hek) != 1)
4056        return NULL;
4057    if (*HEK_KEY(chain->refcounted_he_hek) != ':')
4058        return NULL;
4059#endif
4060    /* Stop anyone trying to really mess us up by adding their own value for
4061       ':' into %^H  */
4062    if ((chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV
4063        && (chain->refcounted_he_data[0] & HVrhek_typemask) != HVrhek_PV_UTF8)
4064        return NULL;
4065
4066    if (len)
4067        *len = chain->refcounted_he_val.refcounted_he_u_len;
4068    if (flags) {
4069        *flags = ((chain->refcounted_he_data[0] & HVrhek_typemask)
4070                  == HVrhek_PV_UTF8) ? SVf_UTF8 : 0;
4071    }
4072    return chain->refcounted_he_data + 1;
4073}
4074
4075/*
4076=for apidoc cop_store_label
4077
4078Save a label into a C<cop_hints_hash>.
4079You need to set flags to C<SVf_UTF8>
4080for a UTF-8 label.  Any other flag is ignored.
4081
4082=cut
4083*/
4084
4085void
4086Perl_cop_store_label(pTHX_ COP *const cop, const char *label, STRLEN len,
4087                     U32 flags)
4088{
4089    SV *labelsv;
4090    PERL_ARGS_ASSERT_COP_STORE_LABEL;
4091
4092    if (flags & ~(SVf_UTF8))
4093        Perl_croak(aTHX_ "panic: cop_store_label illegal flag bits 0x%" UVxf,
4094                   (UV)flags);
4095    labelsv = newSVpvn_flags(label, len, SVs_TEMP);
4096    if (flags & SVf_UTF8)
4097        SvUTF8_on(labelsv);
4098    cop->cop_hints_hash
4099        = refcounted_he_new_pvs(cop->cop_hints_hash, ":", labelsv, 0);
4100}
4101
4102/*
4103=for apidoc_section $HV
4104=for apidoc hv_assert
4105
4106Check that a hash is in an internally consistent state.
4107
4108=cut
4109*/
4110
4111#ifdef DEBUGGING
4112
4113void
4114Perl_hv_assert(pTHX_ HV *hv)
4115{
4116    HE* entry;
4117    int withflags = 0;
4118    int placeholders = 0;
4119    int real = 0;
4120    int bad = 0;
4121    const I32 riter = HvRITER_get(hv);
4122    HE *eiter = HvEITER_get(hv);
4123
4124    PERL_ARGS_ASSERT_HV_ASSERT;
4125
4126    (void)hv_iterinit(hv);
4127
4128    while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) {
4129        /* sanity check the values */
4130        if (HeVAL(entry) == &PL_sv_placeholder)
4131            placeholders++;
4132        else
4133            real++;
4134        /* sanity check the keys */
4135        if (HeSVKEY(entry)) {
4136            NOOP;   /* Don't know what to check on SV keys.  */
4137        } else if (HeKUTF8(entry)) {
4138            withflags++;
4139            if (HeKWASUTF8(entry)) {
4140                PerlIO_printf(Perl_debug_log,
4141                            "hash key has both WASUTF8 and UTF8: '%.*s'\n",
4142                            (int) HeKLEN(entry),  HeKEY(entry));
4143                bad = 1;
4144            }
4145        } else if (HeKWASUTF8(entry))
4146            withflags++;
4147    }
4148    if (!SvTIED_mg((const SV *)hv, PERL_MAGIC_tied)) {
4149        static const char bad_count[] = "Count %d %s(s), but hash reports %d\n";
4150        const int nhashkeys = HvUSEDKEYS(hv);
4151        const int nhashplaceholders = HvPLACEHOLDERS_get(hv);
4152
4153        if (nhashkeys != real) {
4154            PerlIO_printf(Perl_debug_log, bad_count, real, "keys", nhashkeys );
4155            bad = 1;
4156        }
4157        if (nhashplaceholders != placeholders) {
4158            PerlIO_printf(Perl_debug_log, bad_count, placeholders, "placeholder", nhashplaceholders );
4159            bad = 1;
4160        }
4161    }
4162    if (withflags && ! HvHASKFLAGS(hv)) {
4163        PerlIO_printf(Perl_debug_log,
4164                    "Hash has HASKFLAGS off but I count %d key(s) with flags\n",
4165                    withflags);
4166        bad = 1;
4167    }
4168    if (bad) {
4169        sv_dump(MUTABLE_SV(hv));
4170    }
4171    HvRITER_set(hv, riter);		/* Restore hash iterator state */
4172    HvEITER_set(hv, eiter);
4173}
4174
4175#endif
4176
4177/*
4178 * ex: set ts=8 sts=4 sw=4 et:
4179 */
4180