1#define JEMALLOC_BASE_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6#include "jemalloc/internal/extent_mmap.h"
7#include "jemalloc/internal/mutex.h"
8#include "jemalloc/internal/sz.h"
9
10/******************************************************************************/
11/* Data. */
12
13static base_t *b0;
14
15metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
16
17const char *metadata_thp_mode_names[] = {
18	"disabled",
19	"auto",
20	"always"
21};
22
23/******************************************************************************/
24
25static inline bool
26metadata_thp_madvise(void) {
27	return (metadata_thp_enabled() &&
28	    (init_system_thp_mode == thp_mode_default));
29}
30
31static void *
32base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
33	void *addr;
34	bool zero = true;
35	bool commit = true;
36
37	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
38	assert(size == HUGEPAGE_CEILING(size));
39	size_t alignment = HUGEPAGE;
40	if (extent_hooks == &extent_hooks_default) {
41		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
42	} else {
43		/* No arena context as we are creating new arenas. */
44		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
45		pre_reentrancy(tsd, NULL);
46		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
47		    &zero, &commit, ind);
48		post_reentrancy(tsd);
49	}
50
51	return addr;
52}
53
54static void
55base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
56    size_t size) {
57	/*
58	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
59	 * stopping at first success.  This cascade is performed for consistency
60	 * with the cascade in extent_dalloc_wrapper() because an application's
61	 * custom hooks may not support e.g. dalloc.  This function is only ever
62	 * called as a side effect of arena destruction, so although it might
63	 * seem pointless to do anything besides dalloc here, the application
64	 * may in fact want the end state of all associated virtual memory to be
65	 * in some consistent-but-allocated state.
66	 */
67	if (extent_hooks == &extent_hooks_default) {
68		if (!extent_dalloc_mmap(addr, size)) {
69			goto label_done;
70		}
71		if (!pages_decommit(addr, size)) {
72			goto label_done;
73		}
74		if (!pages_purge_forced(addr, size)) {
75			goto label_done;
76		}
77		if (!pages_purge_lazy(addr, size)) {
78			goto label_done;
79		}
80		/* Nothing worked.  This should never happen. */
81		not_reached();
82	} else {
83		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
84		pre_reentrancy(tsd, NULL);
85		if (extent_hooks->dalloc != NULL &&
86		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
87		    ind)) {
88			goto label_post_reentrancy;
89		}
90		if (extent_hooks->decommit != NULL &&
91		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
92		    ind)) {
93			goto label_post_reentrancy;
94		}
95		if (extent_hooks->purge_forced != NULL &&
96		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
97		    size, ind)) {
98			goto label_post_reentrancy;
99		}
100		if (extent_hooks->purge_lazy != NULL &&
101		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
102		    ind)) {
103			goto label_post_reentrancy;
104		}
105		/* Nothing worked.  That's the application's problem. */
106	label_post_reentrancy:
107		post_reentrancy(tsd);
108	}
109label_done:
110	if (metadata_thp_madvise()) {
111		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
112		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
113		    (size & HUGEPAGE_MASK) == 0);
114		pages_nohuge(addr, size);
115	}
116}
117
118static void
119base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
120    size_t size) {
121	size_t sn;
122
123	sn = *extent_sn_next;
124	(*extent_sn_next)++;
125
126	extent_binit(extent, addr, size, sn);
127}
128
129static size_t
130base_get_num_blocks(base_t *base, bool with_new_block) {
131	base_block_t *b = base->blocks;
132	assert(b != NULL);
133
134	size_t n_blocks = with_new_block ? 2 : 1;
135	while (b->next != NULL) {
136		n_blocks++;
137		b = b->next;
138	}
139
140	return n_blocks;
141}
142
143static void
144base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
145	assert(opt_metadata_thp == metadata_thp_auto);
146	malloc_mutex_assert_owner(tsdn, &base->mtx);
147	if (base->auto_thp_switched) {
148		return;
149	}
150	/* Called when adding a new block. */
151	bool should_switch;
152	if (base_ind_get(base) != 0) {
153		should_switch = (base_get_num_blocks(base, true) ==
154		    BASE_AUTO_THP_THRESHOLD);
155	} else {
156		should_switch = (base_get_num_blocks(base, true) ==
157		    BASE_AUTO_THP_THRESHOLD_A0);
158	}
159	if (!should_switch) {
160		return;
161	}
162
163	base->auto_thp_switched = true;
164	assert(!config_stats || base->n_thp == 0);
165	/* Make the initial blocks THP lazily. */
166	base_block_t *block = base->blocks;
167	while (block != NULL) {
168		assert((block->size & HUGEPAGE_MASK) == 0);
169		pages_huge(block, block->size);
170		if (config_stats) {
171			base->n_thp += HUGEPAGE_CEILING(block->size -
172			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
173		}
174		block = block->next;
175		assert(block == NULL || (base_ind_get(base) == 0));
176	}
177}
178
179static void *
180base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
181    size_t alignment) {
182	void *ret;
183
184	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
185	assert(size == ALIGNMENT_CEILING(size, alignment));
186
187	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
188	    alignment) - (uintptr_t)extent_addr_get(extent);
189	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
190	assert(extent_bsize_get(extent) >= *gap_size + size);
191	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
192	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
193	    extent_sn_get(extent));
194	return ret;
195}
196
197static void
198base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
199    void *addr, size_t size) {
200	if (extent_bsize_get(extent) > 0) {
201		/*
202		 * Compute the index for the largest size class that does not
203		 * exceed extent's size.
204		 */
205		szind_t index_floor =
206		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
207		extent_heap_insert(&base->avail[index_floor], extent);
208	}
209
210	if (config_stats) {
211		base->allocated += size;
212		/*
213		 * Add one PAGE to base_resident for every page boundary that is
214		 * crossed by the new allocation. Adjust n_thp similarly when
215		 * metadata_thp is enabled.
216		 */
217		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
218		    PAGE_CEILING((uintptr_t)addr - gap_size);
219		assert(base->allocated <= base->resident);
220		assert(base->resident <= base->mapped);
221		if (metadata_thp_madvise() && (opt_metadata_thp ==
222		    metadata_thp_always || base->auto_thp_switched)) {
223			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
224			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
225			    LG_HUGEPAGE;
226			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
227		}
228	}
229}
230
231static void *
232base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
233    size_t alignment) {
234	void *ret;
235	size_t gap_size;
236
237	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
238	base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
239	return ret;
240}
241
242/*
243 * Allocate a block of virtual memory that is large enough to start with a
244 * base_block_t header, followed by an object of specified size and alignment.
245 * On success a pointer to the initialized base_block_t header is returned.
246 */
247static base_block_t *
248base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
249    unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
250    size_t alignment) {
251	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
252	size_t usize = ALIGNMENT_CEILING(size, alignment);
253	size_t header_size = sizeof(base_block_t);
254	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
255	    header_size;
256	/*
257	 * Create increasingly larger blocks in order to limit the total number
258	 * of disjoint virtual memory ranges.  Choose the next size in the page
259	 * size class series (skipping size classes that are not a multiple of
260	 * HUGEPAGE), or a size large enough to satisfy the requested size and
261	 * alignment, whichever is larger.
262	 */
263	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
264	    + usize));
265	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
266	    *pind_last;
267	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
268	size_t block_size = (min_block_size > next_block_size) ? min_block_size
269	    : next_block_size;
270	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
271	    block_size);
272	if (block == NULL) {
273		return NULL;
274	}
275
276	if (metadata_thp_madvise()) {
277		void *addr = (void *)block;
278		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
279		    (block_size & HUGEPAGE_MASK) == 0);
280		if (opt_metadata_thp == metadata_thp_always) {
281			pages_huge(addr, block_size);
282		} else if (opt_metadata_thp == metadata_thp_auto &&
283		    base != NULL) {
284			/* base != NULL indicates this is not a new base. */
285			malloc_mutex_lock(tsdn, &base->mtx);
286			base_auto_thp_switch(tsdn, base);
287			if (base->auto_thp_switched) {
288				pages_huge(addr, block_size);
289			}
290			malloc_mutex_unlock(tsdn, &base->mtx);
291		}
292	}
293
294	*pind_last = sz_psz2ind(block_size);
295	block->size = block_size;
296	block->next = NULL;
297	assert(block_size >= header_size);
298	base_extent_init(extent_sn_next, &block->extent,
299	    (void *)((uintptr_t)block + header_size), block_size - header_size);
300	return block;
301}
302
303/*
304 * Allocate an extent that is at least as large as specified size, with
305 * specified alignment.
306 */
307static extent_t *
308base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
309	malloc_mutex_assert_owner(tsdn, &base->mtx);
310
311	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
312	/*
313	 * Drop mutex during base_block_alloc(), because an extent hook will be
314	 * called.
315	 */
316	malloc_mutex_unlock(tsdn, &base->mtx);
317	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
318	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
319	    alignment);
320	malloc_mutex_lock(tsdn, &base->mtx);
321	if (block == NULL) {
322		return NULL;
323	}
324	block->next = base->blocks;
325	base->blocks = block;
326	if (config_stats) {
327		base->allocated += sizeof(base_block_t);
328		base->resident += PAGE_CEILING(sizeof(base_block_t));
329		base->mapped += block->size;
330		if (metadata_thp_madvise() &&
331		    !(opt_metadata_thp == metadata_thp_auto
332		      && !base->auto_thp_switched)) {
333			assert(base->n_thp > 0);
334			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
335			    LG_HUGEPAGE;
336		}
337		assert(base->allocated <= base->resident);
338		assert(base->resident <= base->mapped);
339		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
340	}
341	return &block->extent;
342}
343
344base_t *
345b0get(void) {
346	return b0;
347}
348
349base_t *
350base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
351	pszind_t pind_last = 0;
352	size_t extent_sn_next = 0;
353	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
354	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
355	if (block == NULL) {
356		return NULL;
357	}
358
359	size_t gap_size;
360	size_t base_alignment = CACHELINE;
361	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
362	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
363	    &gap_size, base_size, base_alignment);
364	base->ind = ind;
365	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
366	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
367	    malloc_mutex_rank_exclusive)) {
368		base_unmap(tsdn, extent_hooks, ind, block, block->size);
369		return NULL;
370	}
371	base->pind_last = pind_last;
372	base->extent_sn_next = extent_sn_next;
373	base->blocks = block;
374	base->auto_thp_switched = false;
375	for (szind_t i = 0; i < NSIZES; i++) {
376		extent_heap_new(&base->avail[i]);
377	}
378	if (config_stats) {
379		base->allocated = sizeof(base_block_t);
380		base->resident = PAGE_CEILING(sizeof(base_block_t));
381		base->mapped = block->size;
382		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
383		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
384		    >> LG_HUGEPAGE : 0;
385		assert(base->allocated <= base->resident);
386		assert(base->resident <= base->mapped);
387		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
388	}
389	base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
390	    base_size);
391
392	return base;
393}
394
395void
396base_delete(tsdn_t *tsdn, base_t *base) {
397	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
398	base_block_t *next = base->blocks;
399	do {
400		base_block_t *block = next;
401		next = block->next;
402		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
403		    block->size);
404	} while (next != NULL);
405}
406
407extent_hooks_t *
408base_extent_hooks_get(base_t *base) {
409	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
410	    ATOMIC_ACQUIRE);
411}
412
413extent_hooks_t *
414base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
415	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
416	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
417	return old_extent_hooks;
418}
419
420static void *
421base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
422    size_t *esn) {
423	alignment = QUANTUM_CEILING(alignment);
424	size_t usize = ALIGNMENT_CEILING(size, alignment);
425	size_t asize = usize + alignment - QUANTUM;
426
427	extent_t *extent = NULL;
428	malloc_mutex_lock(tsdn, &base->mtx);
429	for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
430		extent = extent_heap_remove_first(&base->avail[i]);
431		if (extent != NULL) {
432			/* Use existing space. */
433			break;
434		}
435	}
436	if (extent == NULL) {
437		/* Try to allocate more space. */
438		extent = base_extent_alloc(tsdn, base, usize, alignment);
439	}
440	void *ret;
441	if (extent == NULL) {
442		ret = NULL;
443		goto label_return;
444	}
445
446	ret = base_extent_bump_alloc(base, extent, usize, alignment);
447	if (esn != NULL) {
448		*esn = extent_sn_get(extent);
449	}
450label_return:
451	malloc_mutex_unlock(tsdn, &base->mtx);
452	return ret;
453}
454
455/*
456 * base_alloc() returns zeroed memory, which is always demand-zeroed for the
457 * auto arenas, in order to make multi-page sparse data structures such as radix
458 * tree nodes efficient with respect to physical memory usage.  Upon success a
459 * pointer to at least size bytes with specified alignment is returned.  Note
460 * that size is rounded up to the nearest multiple of alignment to avoid false
461 * sharing.
462 */
463void *
464base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
465	return base_alloc_impl(tsdn, base, size, alignment, NULL);
466}
467
468extent_t *
469base_alloc_extent(tsdn_t *tsdn, base_t *base) {
470	size_t esn;
471	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
472	    CACHELINE, &esn);
473	if (extent == NULL) {
474		return NULL;
475	}
476	extent_esn_set(extent, esn);
477	return extent;
478}
479
480void
481base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
482    size_t *mapped, size_t *n_thp) {
483	cassert(config_stats);
484
485	malloc_mutex_lock(tsdn, &base->mtx);
486	assert(base->allocated <= base->resident);
487	assert(base->resident <= base->mapped);
488	*allocated = base->allocated;
489	*resident = base->resident;
490	*mapped = base->mapped;
491	*n_thp = base->n_thp;
492	malloc_mutex_unlock(tsdn, &base->mtx);
493}
494
495void
496base_prefork(tsdn_t *tsdn, base_t *base) {
497	malloc_mutex_prefork(tsdn, &base->mtx);
498}
499
500void
501base_postfork_parent(tsdn_t *tsdn, base_t *base) {
502	malloc_mutex_postfork_parent(tsdn, &base->mtx);
503}
504
505void
506base_postfork_child(tsdn_t *tsdn, base_t *base) {
507	malloc_mutex_postfork_child(tsdn, &base->mtx);
508}
509
510bool
511base_boot(tsdn_t *tsdn) {
512	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
513	return (b0 == NULL);
514}
515