1/*
2 * This radix tree implementation is tailored to the singular purpose of
3 * associating metadata with chunks that are currently owned by jemalloc.
4 *
5 *******************************************************************************
6 */
7#ifdef JEMALLOC_H_TYPES
8
9typedef struct rtree_node_elm_s rtree_node_elm_t;
10typedef struct rtree_level_s rtree_level_t;
11typedef struct rtree_s rtree_t;
12
13/*
14 * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
15 * machine address width.
16 */
17#define	LG_RTREE_BITS_PER_LEVEL	4
18#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
19/* Maximum rtree height. */
20#define	RTREE_HEIGHT_MAX						\
21    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
22
23/* Used for two-stage lock-free node initialization. */
24#define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
25
26/*
27 * The node allocation callback function's argument is the number of contiguous
28 * rtree_node_elm_t structures to allocate, and the resulting memory must be
29 * zeroed.
30 */
31typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
32typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
33
34#endif /* JEMALLOC_H_TYPES */
35/******************************************************************************/
36#ifdef JEMALLOC_H_STRUCTS
37
38struct rtree_node_elm_s {
39	union {
40		void			*pun;
41		rtree_node_elm_t	*child;
42		extent_node_t		*val;
43	};
44};
45
46struct rtree_level_s {
47	/*
48	 * A non-NULL subtree points to a subtree rooted along the hypothetical
49	 * path to the leaf node corresponding to key 0.  Depending on what keys
50	 * have been used to store to the tree, an arbitrary combination of
51	 * subtree pointers may remain NULL.
52	 *
53	 * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
54	 * This results in a 3-level tree, and the leftmost leaf can be directly
55	 * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
56	 * 0x00000000) can be accessed via subtrees[1], and the remainder of the
57	 * tree can be accessed via subtrees[0].
58	 *
59	 *   levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
60	 *
61	 *   levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
62	 *
63	 *   levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
64	 *
65	 * This has practical implications on x64, which currently uses only the
66	 * lower 47 bits of virtual address space in userland, thus leaving
67	 * subtrees[0] unused and avoiding a level of tree traversal.
68	 */
69	union {
70		void			*subtree_pun;
71		rtree_node_elm_t	*subtree;
72	};
73	/* Number of key bits distinguished by this level. */
74	unsigned		bits;
75	/*
76	 * Cumulative number of key bits distinguished by traversing to
77	 * corresponding tree level.
78	 */
79	unsigned		cumbits;
80};
81
82struct rtree_s {
83	rtree_node_alloc_t	*alloc;
84	rtree_node_dalloc_t	*dalloc;
85	unsigned		height;
86	/*
87	 * Precomputed table used to convert from the number of leading 0 key
88	 * bits to which subtree level to start at.
89	 */
90	unsigned		start_level[RTREE_HEIGHT_MAX];
91	rtree_level_t		levels[RTREE_HEIGHT_MAX];
92};
93
94#endif /* JEMALLOC_H_STRUCTS */
95/******************************************************************************/
96#ifdef JEMALLOC_H_EXTERNS
97
98bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
99    rtree_node_dalloc_t *dalloc);
100void	rtree_delete(rtree_t *rtree);
101rtree_node_elm_t	*rtree_subtree_read_hard(rtree_t *rtree,
102    unsigned level);
103rtree_node_elm_t	*rtree_child_read_hard(rtree_t *rtree,
104    rtree_node_elm_t *elm, unsigned level);
105
106#endif /* JEMALLOC_H_EXTERNS */
107/******************************************************************************/
108#ifdef JEMALLOC_H_INLINES
109
110#ifndef JEMALLOC_ENABLE_INLINE
111unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
112uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
113
114bool	rtree_node_valid(rtree_node_elm_t *node);
115rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
116    bool dependent);
117rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
118    unsigned level, bool dependent);
119extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
120    bool dependent);
121void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
122    const extent_node_t *val);
123rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
124    bool dependent);
125rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
126    bool dependent);
127
128extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
129bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
130#endif
131
132#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
133JEMALLOC_ALWAYS_INLINE unsigned
134rtree_start_level(rtree_t *rtree, uintptr_t key)
135{
136	unsigned start_level;
137
138	if (unlikely(key == 0))
139		return (rtree->height - 1);
140
141	start_level = rtree->start_level[lg_floor(key) >>
142	    LG_RTREE_BITS_PER_LEVEL];
143	assert(start_level < rtree->height);
144	return (start_level);
145}
146
147JEMALLOC_ALWAYS_INLINE uintptr_t
148rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
149{
150
151	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
152	    rtree->levels[level].cumbits)) & ((ZU(1) <<
153	    rtree->levels[level].bits) - 1));
154}
155
156JEMALLOC_ALWAYS_INLINE bool
157rtree_node_valid(rtree_node_elm_t *node)
158{
159
160	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
161}
162
163JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
164rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
165{
166	rtree_node_elm_t *child;
167
168	/* Double-checked read (first read may be stale. */
169	child = elm->child;
170	if (!dependent && !rtree_node_valid(child))
171		child = atomic_read_p(&elm->pun);
172	assert(!dependent || child != NULL);
173	return (child);
174}
175
176JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
177rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
178    bool dependent)
179{
180	rtree_node_elm_t *child;
181
182	child = rtree_child_tryread(elm, dependent);
183	if (!dependent && unlikely(!rtree_node_valid(child)))
184		child = rtree_child_read_hard(rtree, elm, level);
185	assert(!dependent || child != NULL);
186	return (child);
187}
188
189JEMALLOC_ALWAYS_INLINE extent_node_t *
190rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
191{
192
193	if (dependent) {
194		/*
195		 * Reading a val on behalf of a pointer to a valid allocation is
196		 * guaranteed to be a clean read even without synchronization,
197		 * because the rtree update became visible in memory before the
198		 * pointer came into existence.
199		 */
200		return (elm->val);
201	} else {
202		/*
203		 * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
204		 * dependent on a previous rtree write, which means a stale read
205		 * could result if synchronization were omitted here.
206		 */
207		return (atomic_read_p(&elm->pun));
208	}
209}
210
211JEMALLOC_INLINE void
212rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
213{
214
215	atomic_write_p(&elm->pun, val);
216}
217
218JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
219rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
220{
221	rtree_node_elm_t *subtree;
222
223	/* Double-checked read (first read may be stale. */
224	subtree = rtree->levels[level].subtree;
225	if (!dependent && unlikely(!rtree_node_valid(subtree)))
226		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
227	assert(!dependent || subtree != NULL);
228	return (subtree);
229}
230
231JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
232rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
233{
234	rtree_node_elm_t *subtree;
235
236	subtree = rtree_subtree_tryread(rtree, level, dependent);
237	if (!dependent && unlikely(!rtree_node_valid(subtree)))
238		subtree = rtree_subtree_read_hard(rtree, level);
239	assert(!dependent || subtree != NULL);
240	return (subtree);
241}
242
243JEMALLOC_ALWAYS_INLINE extent_node_t *
244rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
245{
246	uintptr_t subkey;
247	unsigned start_level;
248	rtree_node_elm_t *node;
249
250	start_level = rtree_start_level(rtree, key);
251
252	node = rtree_subtree_tryread(rtree, start_level, dependent);
253#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
254	switch (start_level + RTREE_GET_BIAS) {
255#define	RTREE_GET_SUBTREE(level)					\
256	case level:							\
257		assert(level < (RTREE_HEIGHT_MAX-1));			\
258		if (!dependent && unlikely(!rtree_node_valid(node)))	\
259			return (NULL);					\
260		subkey = rtree_subkey(rtree, key, level -		\
261		    RTREE_GET_BIAS);					\
262		node = rtree_child_tryread(&node[subkey], dependent);	\
263		/* Fall through. */
264#define	RTREE_GET_LEAF(level)						\
265	case level:							\
266		assert(level == (RTREE_HEIGHT_MAX-1));			\
267		if (!dependent && unlikely(!rtree_node_valid(node)))	\
268			return (NULL);					\
269		subkey = rtree_subkey(rtree, key, level -		\
270		    RTREE_GET_BIAS);					\
271		/*							\
272		 * node is a leaf, so it contains values rather than	\
273		 * child pointers.					\
274		 */							\
275		return (rtree_val_read(rtree, &node[subkey],		\
276		    dependent));
277#if RTREE_HEIGHT_MAX > 1
278	RTREE_GET_SUBTREE(0)
279#endif
280#if RTREE_HEIGHT_MAX > 2
281	RTREE_GET_SUBTREE(1)
282#endif
283#if RTREE_HEIGHT_MAX > 3
284	RTREE_GET_SUBTREE(2)
285#endif
286#if RTREE_HEIGHT_MAX > 4
287	RTREE_GET_SUBTREE(3)
288#endif
289#if RTREE_HEIGHT_MAX > 5
290	RTREE_GET_SUBTREE(4)
291#endif
292#if RTREE_HEIGHT_MAX > 6
293	RTREE_GET_SUBTREE(5)
294#endif
295#if RTREE_HEIGHT_MAX > 7
296	RTREE_GET_SUBTREE(6)
297#endif
298#if RTREE_HEIGHT_MAX > 8
299	RTREE_GET_SUBTREE(7)
300#endif
301#if RTREE_HEIGHT_MAX > 9
302	RTREE_GET_SUBTREE(8)
303#endif
304#if RTREE_HEIGHT_MAX > 10
305	RTREE_GET_SUBTREE(9)
306#endif
307#if RTREE_HEIGHT_MAX > 11
308	RTREE_GET_SUBTREE(10)
309#endif
310#if RTREE_HEIGHT_MAX > 12
311	RTREE_GET_SUBTREE(11)
312#endif
313#if RTREE_HEIGHT_MAX > 13
314	RTREE_GET_SUBTREE(12)
315#endif
316#if RTREE_HEIGHT_MAX > 14
317	RTREE_GET_SUBTREE(13)
318#endif
319#if RTREE_HEIGHT_MAX > 15
320	RTREE_GET_SUBTREE(14)
321#endif
322#if RTREE_HEIGHT_MAX > 16
323#  error Unsupported RTREE_HEIGHT_MAX
324#endif
325	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
326#undef RTREE_GET_SUBTREE
327#undef RTREE_GET_LEAF
328	default: not_reached();
329	}
330#undef RTREE_GET_BIAS
331	not_reached();
332}
333
334JEMALLOC_INLINE bool
335rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
336{
337	uintptr_t subkey;
338	unsigned i, start_level;
339	rtree_node_elm_t *node, *child;
340
341	start_level = rtree_start_level(rtree, key);
342
343	node = rtree_subtree_read(rtree, start_level, false);
344	if (node == NULL)
345		return (true);
346	for (i = start_level; /**/; i++, node = child) {
347		subkey = rtree_subkey(rtree, key, i);
348		if (i == rtree->height - 1) {
349			/*
350			 * node is a leaf, so it contains values rather than
351			 * child pointers.
352			 */
353			rtree_val_write(rtree, &node[subkey], val);
354			return (false);
355		}
356		assert(i + 1 < rtree->height);
357		child = rtree_child_read(rtree, &node[subkey], i, false);
358		if (child == NULL)
359			return (true);
360	}
361	not_reached();
362}
363#endif
364
365#endif /* JEMALLOC_H_INLINES */
366/******************************************************************************/
367