1#ifndef JEMALLOC_INTERNAL_RTREE_H
2#define JEMALLOC_INTERNAL_RTREE_H
3
4#include "jemalloc/internal/atomic.h"
5#include "jemalloc/internal/mutex.h"
6#include "jemalloc/internal/rtree_tsd.h"
7#include "jemalloc/internal/size_classes.h"
8#include "jemalloc/internal/tsd.h"
9
10/*
11 * This radix tree implementation is tailored to the singular purpose of
12 * associating metadata with extents that are currently owned by jemalloc.
13 *
14 *******************************************************************************
15 */
16
17/* Number of high insignificant bits. */
18#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
19/* Number of low insigificant bits. */
20#define RTREE_NLIB LG_PAGE
21/* Number of significant bits. */
22#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
23/* Number of levels in radix tree. */
24#if RTREE_NSB <= 10
25#  define RTREE_HEIGHT 1
26#elif RTREE_NSB <= 36
27#  define RTREE_HEIGHT 2
28#elif RTREE_NSB <= 52
29#  define RTREE_HEIGHT 3
30#else
31#  error Unsupported number of significant virtual address bits
32#endif
33/* Use compact leaf representation if virtual address encoding allows. */
34#if RTREE_NHIB >= LG_CEIL_NSIZES
35#  define RTREE_LEAF_COMPACT
36#endif
37
38/* Needed for initialization only. */
39#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
40
41typedef struct rtree_node_elm_s rtree_node_elm_t;
42struct rtree_node_elm_s {
43	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
44};
45
46struct rtree_leaf_elm_s {
47#ifdef RTREE_LEAF_COMPACT
48	/*
49	 * Single pointer-width field containing all three leaf element fields.
50	 * For example, on a 64-bit x64 system with 48 significant virtual
51	 * memory address bits, the index, extent, and slab fields are packed as
52	 * such:
53	 *
54	 * x: index
55	 * e: extent
56	 * b: slab
57	 *
58	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
59	 */
60	atomic_p_t	le_bits;
61#else
62	atomic_p_t	le_extent; /* (extent_t *) */
63	atomic_u_t	le_szind; /* (szind_t) */
64	atomic_b_t	le_slab; /* (bool) */
65#endif
66};
67
68typedef struct rtree_level_s rtree_level_t;
69struct rtree_level_s {
70	/* Number of key bits distinguished by this level. */
71	unsigned		bits;
72	/*
73	 * Cumulative number of key bits distinguished by traversing to
74	 * corresponding tree level.
75	 */
76	unsigned		cumbits;
77};
78
79typedef struct rtree_s rtree_t;
80struct rtree_s {
81	malloc_mutex_t		init_lock;
82	/* Number of elements based on rtree_levels[0].bits. */
83#if RTREE_HEIGHT > 1
84	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
85#else
86	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
87#endif
88};
89
90/*
91 * Split the bits into one to three partitions depending on number of
92 * significant bits.  It the number of bits does not divide evenly into the
93 * number of levels, place one remainder bit per level starting at the leaf
94 * level.
95 */
96static const rtree_level_t rtree_levels[] = {
97#if RTREE_HEIGHT == 1
98	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
99#elif RTREE_HEIGHT == 2
100	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
101	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
102#elif RTREE_HEIGHT == 3
103	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
104	{RTREE_NSB/3 + RTREE_NSB%3/2,
105	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
106	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
107#else
108#  error Unsupported rtree height
109#endif
110};
111
112bool rtree_new(rtree_t *rtree, bool zeroed);
113
114typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
115extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
116
117typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
118extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
119
120typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
121extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
122
123typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
124extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
125#ifdef JEMALLOC_JET
126void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
127#endif
128rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
129    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
130
131JEMALLOC_ALWAYS_INLINE uintptr_t
132rtree_leafkey(uintptr_t key) {
133	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
134	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
135	    rtree_levels[RTREE_HEIGHT-1].bits);
136	unsigned maskbits = ptrbits - cumbits;
137	uintptr_t mask = ~((ZU(1) << maskbits) - 1);
138	return (key & mask);
139}
140
141JEMALLOC_ALWAYS_INLINE size_t
142rtree_cache_direct_map(uintptr_t key) {
143	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
144	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
145	    rtree_levels[RTREE_HEIGHT-1].bits);
146	unsigned maskbits = ptrbits - cumbits;
147	return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
148}
149
150JEMALLOC_ALWAYS_INLINE uintptr_t
151rtree_subkey(uintptr_t key, unsigned level) {
152	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
153	unsigned cumbits = rtree_levels[level].cumbits;
154	unsigned shiftbits = ptrbits - cumbits;
155	unsigned maskbits = rtree_levels[level].bits;
156	uintptr_t mask = (ZU(1) << maskbits) - 1;
157	return ((key >> shiftbits) & mask);
158}
159
160/*
161 * Atomic getters.
162 *
163 * dependent: Reading a value on behalf of a pointer to a valid allocation
164 *            is guaranteed to be a clean read even without synchronization,
165 *            because the rtree update became visible in memory before the
166 *            pointer came into existence.
167 * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be
168 *             dependent on a previous rtree write, which means a stale read
169 *             could result if synchronization were omitted here.
170 */
171#  ifdef RTREE_LEAF_COMPACT
172JEMALLOC_ALWAYS_INLINE uintptr_t
173rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
174    bool dependent) {
175	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
176	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
177}
178
179JEMALLOC_ALWAYS_INLINE extent_t *
180rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
181#    ifdef __aarch64__
182	/*
183	 * aarch64 doesn't sign extend the highest virtual address bit to set
184	 * the higher ones.  Instead, the high bits gets zeroed.
185	 */
186	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
187	/* Mask off the slab bit. */
188	uintptr_t low_bit_mask = ~(uintptr_t)1;
189	uintptr_t mask = high_bit_mask & low_bit_mask;
190	return (extent_t *)(bits & mask);
191#    else
192	/* Restore sign-extended high bits, mask slab bit. */
193	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
194	    RTREE_NHIB) & ~((uintptr_t)0x1));
195#    endif
196}
197
198JEMALLOC_ALWAYS_INLINE szind_t
199rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
200	return (szind_t)(bits >> LG_VADDR);
201}
202
203JEMALLOC_ALWAYS_INLINE bool
204rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
205	return (bool)(bits & (uintptr_t)0x1);
206}
207
208#  endif
209
210JEMALLOC_ALWAYS_INLINE extent_t *
211rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
212    rtree_leaf_elm_t *elm, bool dependent) {
213#ifdef RTREE_LEAF_COMPACT
214	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
215	return rtree_leaf_elm_bits_extent_get(bits);
216#else
217	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
218	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
219	return extent;
220#endif
221}
222
223JEMALLOC_ALWAYS_INLINE szind_t
224rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
225    rtree_leaf_elm_t *elm, bool dependent) {
226#ifdef RTREE_LEAF_COMPACT
227	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
228	return rtree_leaf_elm_bits_szind_get(bits);
229#else
230	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
231	    : ATOMIC_ACQUIRE);
232#endif
233}
234
235JEMALLOC_ALWAYS_INLINE bool
236rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
237    rtree_leaf_elm_t *elm, bool dependent) {
238#ifdef RTREE_LEAF_COMPACT
239	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
240	return rtree_leaf_elm_bits_slab_get(bits);
241#else
242	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
243	    ATOMIC_ACQUIRE);
244#endif
245}
246
247static inline void
248rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
249    rtree_leaf_elm_t *elm, extent_t *extent) {
250#ifdef RTREE_LEAF_COMPACT
251	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
252	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
253	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
254	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
255	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
256#else
257	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
258#endif
259}
260
261static inline void
262rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
263    rtree_leaf_elm_t *elm, szind_t szind) {
264	assert(szind <= NSIZES);
265
266#ifdef RTREE_LEAF_COMPACT
267	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
268	    true);
269	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
270	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
271	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
272	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
273	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
274#else
275	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
276#endif
277}
278
279static inline void
280rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
281    rtree_leaf_elm_t *elm, bool slab) {
282#ifdef RTREE_LEAF_COMPACT
283	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
284	    true);
285	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
286	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
287	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
288	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
289#else
290	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
291#endif
292}
293
294static inline void
295rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
296    extent_t *extent, szind_t szind, bool slab) {
297#ifdef RTREE_LEAF_COMPACT
298	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
299	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
300	    ((uintptr_t)slab);
301	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
302#else
303	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
304	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
305	/*
306	 * Write extent last, since the element is atomically considered valid
307	 * as soon as the extent field is non-NULL.
308	 */
309	rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
310#endif
311}
312
313static inline void
314rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
315    rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
316	assert(!slab || szind < NBINS);
317
318	/*
319	 * The caller implicitly assures that it is the only writer to the szind
320	 * and slab fields, and that the extent field cannot currently change.
321	 */
322	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
323	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
324}
325
326JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
327rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
328    uintptr_t key, bool dependent, bool init_missing) {
329	assert(key != 0);
330	assert(!dependent || !init_missing);
331
332	size_t slot = rtree_cache_direct_map(key);
333	uintptr_t leafkey = rtree_leafkey(key);
334	assert(leafkey != RTREE_LEAFKEY_INVALID);
335
336	/* Fast path: L1 direct mapped cache. */
337	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
338		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
339		assert(leaf != NULL);
340		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
341		return &leaf[subkey];
342	}
343	/*
344	 * Search the L2 LRU cache.  On hit, swap the matching element into the
345	 * slot in L1 cache, and move the position in L2 up by 1.
346	 */
347#define RTREE_CACHE_CHECK_L2(i) do {					\
348	if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {	\
349		rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;	\
350		assert(leaf != NULL);					\
351		if (i > 0) {						\
352			/* Bubble up by one. */				\
353			rtree_ctx->l2_cache[i].leafkey =		\
354				rtree_ctx->l2_cache[i - 1].leafkey;	\
355			rtree_ctx->l2_cache[i].leaf =			\
356				rtree_ctx->l2_cache[i - 1].leaf;	\
357			rtree_ctx->l2_cache[i - 1].leafkey =		\
358			    rtree_ctx->cache[slot].leafkey;		\
359			rtree_ctx->l2_cache[i - 1].leaf =		\
360			    rtree_ctx->cache[slot].leaf;		\
361		} else {						\
362			rtree_ctx->l2_cache[0].leafkey =		\
363			    rtree_ctx->cache[slot].leafkey;		\
364			rtree_ctx->l2_cache[0].leaf =			\
365			    rtree_ctx->cache[slot].leaf;		\
366		}							\
367		rtree_ctx->cache[slot].leafkey = leafkey;		\
368		rtree_ctx->cache[slot].leaf = leaf;			\
369		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
370		return &leaf[subkey];					\
371	}								\
372} while (0)
373	/* Check the first cache entry. */
374	RTREE_CACHE_CHECK_L2(0);
375	/* Search the remaining cache elements. */
376	for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) {
377		RTREE_CACHE_CHECK_L2(i);
378	}
379#undef RTREE_CACHE_CHECK_L2
380
381	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
382	    dependent, init_missing);
383}
384
385static inline bool
386rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
387    extent_t *extent, szind_t szind, bool slab) {
388	/* Use rtree_clear() to set the extent to NULL. */
389	assert(extent != NULL);
390
391	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
392	    key, false, true);
393	if (elm == NULL) {
394		return true;
395	}
396
397	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
398	rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
399
400	return false;
401}
402
403JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
404rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
405    bool dependent) {
406	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
407	    key, dependent, false);
408	if (!dependent && elm == NULL) {
409		return NULL;
410	}
411	assert(elm != NULL);
412	return elm;
413}
414
415JEMALLOC_ALWAYS_INLINE extent_t *
416rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
417    uintptr_t key, bool dependent) {
418	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
419	    dependent);
420	if (!dependent && elm == NULL) {
421		return NULL;
422	}
423	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
424}
425
426JEMALLOC_ALWAYS_INLINE szind_t
427rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
428    uintptr_t key, bool dependent) {
429	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
430	    dependent);
431	if (!dependent && elm == NULL) {
432		return NSIZES;
433	}
434	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
435}
436
437/*
438 * rtree_slab_read() is intentionally omitted because slab is always read in
439 * conjunction with szind, which makes rtree_szind_slab_read() a better choice.
440 */
441
442JEMALLOC_ALWAYS_INLINE bool
443rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
444    uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) {
445	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
446	    dependent);
447	if (!dependent && elm == NULL) {
448		return true;
449	}
450	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
451	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
452	return false;
453}
454
455JEMALLOC_ALWAYS_INLINE bool
456rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
457    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
458	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
459	    dependent);
460	if (!dependent && elm == NULL) {
461		return true;
462	}
463#ifdef RTREE_LEAF_COMPACT
464	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
465	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
466	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
467#else
468	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
469	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
470#endif
471	return false;
472}
473
474static inline void
475rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
476    uintptr_t key, szind_t szind, bool slab) {
477	assert(!slab || szind < NBINS);
478
479	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
480	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
481}
482
483static inline void
484rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
485    uintptr_t key) {
486	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
487	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
488	    NULL);
489	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
490}
491
492#endif /* JEMALLOC_INTERNAL_RTREE_H */
493