arena.c revision 235238
1#define	JEMALLOC_ARENA_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3
4/******************************************************************************/
5/* Data. */
6
7ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
8arena_bin_info_t	arena_bin_info[NBINS];
9
10JEMALLOC_ALIGNED(CACHELINE)
11const uint8_t	small_size2bin[] = {
12#define	S2B_8(i)	i,
13#define	S2B_16(i)	S2B_8(i) S2B_8(i)
14#define	S2B_32(i)	S2B_16(i) S2B_16(i)
15#define	S2B_64(i)	S2B_32(i) S2B_32(i)
16#define	S2B_128(i)	S2B_64(i) S2B_64(i)
17#define	S2B_256(i)	S2B_128(i) S2B_128(i)
18#define	S2B_512(i)	S2B_256(i) S2B_256(i)
19#define	S2B_1024(i)	S2B_512(i) S2B_512(i)
20#define	S2B_2048(i)	S2B_1024(i) S2B_1024(i)
21#define	S2B_4096(i)	S2B_2048(i) S2B_2048(i)
22#define	S2B_8192(i)	S2B_4096(i) S2B_4096(i)
23#define	SIZE_CLASS(bin, delta, size)					\
24	S2B_##delta(bin)
25	SIZE_CLASSES
26#undef S2B_8
27#undef S2B_16
28#undef S2B_32
29#undef S2B_64
30#undef S2B_128
31#undef S2B_256
32#undef S2B_512
33#undef S2B_1024
34#undef S2B_2048
35#undef S2B_4096
36#undef S2B_8192
37#undef SIZE_CLASS
38};
39
40/******************************************************************************/
41/* Function prototypes for non-inline static functions. */
42
43static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
44    bool large, size_t binind, bool zero);
45static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
46static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
47static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
48    size_t binind, bool zero);
49static void	arena_purge(arena_t *arena, bool all);
50static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
51static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
52    arena_run_t *run, size_t oldsize, size_t newsize);
53static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
54    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
55static arena_run_t	*arena_bin_runs_first(arena_bin_t *bin);
56static void	arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run);
57static void	arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run);
58static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin);
59static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
60static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
61static void	arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
62    arena_bin_t *bin);
63static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
64    arena_run_t *run, arena_bin_t *bin);
65static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
66    arena_run_t *run, arena_bin_t *bin);
67static void	arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
68    void *ptr, size_t oldsize, size_t size);
69static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
70    void *ptr, size_t oldsize, size_t size, size_t extra, bool zero);
71static bool	arena_ralloc_large(void *ptr, size_t oldsize, size_t size,
72    size_t extra, bool zero);
73static size_t	bin_info_run_size_calc(arena_bin_info_t *bin_info,
74    size_t min_run_size);
75static void	bin_info_init(void);
76
77/******************************************************************************/
78
79static inline int
80arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
81{
82	uintptr_t a_mapelm = (uintptr_t)a;
83	uintptr_t b_mapelm = (uintptr_t)b;
84
85	assert(a != NULL);
86	assert(b != NULL);
87
88	return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
89}
90
91/* Generate red-black tree functions. */
92rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
93    u.rb_link, arena_run_comp)
94
95static inline int
96arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
97{
98	int ret;
99	size_t a_size = a->bits & ~PAGE_MASK;
100	size_t b_size = b->bits & ~PAGE_MASK;
101
102	assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
103	    CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
104
105	ret = (a_size > b_size) - (a_size < b_size);
106	if (ret == 0) {
107		uintptr_t a_mapelm, b_mapelm;
108
109		if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
110			a_mapelm = (uintptr_t)a;
111		else {
112			/*
113			 * Treat keys as though they are lower than anything
114			 * else.
115			 */
116			a_mapelm = 0;
117		}
118		b_mapelm = (uintptr_t)b;
119
120		ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
121	}
122
123	return (ret);
124}
125
126/* Generate red-black tree functions. */
127rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
128    u.rb_link, arena_avail_comp)
129
130static inline void *
131arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
132{
133	void *ret;
134	unsigned regind;
135	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
136	    (uintptr_t)bin_info->bitmap_offset);
137
138	assert(run->nfree > 0);
139	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
140
141	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
142	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
143	    (uintptr_t)(bin_info->reg_interval * regind));
144	run->nfree--;
145	if (regind == run->nextind)
146		run->nextind++;
147	assert(regind < run->nextind);
148	return (ret);
149}
150
151static inline void
152arena_run_reg_dalloc(arena_run_t *run, void *ptr)
153{
154	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
155	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
156	size_t mapbits = arena_mapbits_get(chunk, pageind);
157	size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
158	arena_bin_info_t *bin_info = &arena_bin_info[binind];
159	unsigned regind = arena_run_regind(run, bin_info, ptr);
160	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
161	    (uintptr_t)bin_info->bitmap_offset);
162
163	assert(run->nfree < bin_info->nregs);
164	/* Freeing an interior pointer can cause assertion failure. */
165	assert(((uintptr_t)ptr - ((uintptr_t)run +
166	    (uintptr_t)bin_info->reg0_offset)) %
167	    (uintptr_t)bin_info->reg_interval == 0);
168	assert((uintptr_t)ptr >= (uintptr_t)run +
169	    (uintptr_t)bin_info->reg0_offset);
170	/* Freeing an unallocated pointer can cause assertion failure. */
171	assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
172
173	bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
174	run->nfree++;
175}
176
177static inline void
178arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
179{
180	size_t i;
181	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
182
183	for (i = 0; i < PAGE / sizeof(size_t); i++)
184		assert(p[i] == 0);
185}
186
187static void
188arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
189    size_t binind, bool zero)
190{
191	arena_chunk_t *chunk;
192	size_t run_ind, total_pages, need_pages, rem_pages, i;
193	size_t flag_dirty;
194	arena_avail_tree_t *runs_avail;
195
196	assert((large && binind == BININD_INVALID) || (large == false && binind
197	    != BININD_INVALID));
198
199	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
200	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
201	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
202	runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
203	    &arena->runs_avail_clean;
204	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
205	    LG_PAGE;
206	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
207	    flag_dirty);
208	need_pages = (size >> LG_PAGE);
209	assert(need_pages > 0);
210	assert(need_pages <= total_pages);
211	rem_pages = total_pages - need_pages;
212
213	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind));
214	if (config_stats) {
215		/*
216		 * Update stats_cactive if nactive is crossing a chunk
217		 * multiple.
218		 */
219		size_t cactive_diff = CHUNK_CEILING((arena->nactive +
220		    need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
221		    LG_PAGE);
222		if (cactive_diff != 0)
223			stats_cactive_add(cactive_diff);
224	}
225	arena->nactive += need_pages;
226
227	/* Keep track of trailing unused pages for later use. */
228	if (rem_pages > 0) {
229		if (flag_dirty != 0) {
230			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
231			    (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY);
232			arena_mapbits_unallocated_set(chunk,
233			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
234			    CHUNK_MAP_DIRTY);
235		} else {
236			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
237			    (rem_pages << LG_PAGE),
238			    arena_mapbits_unzeroed_get(chunk,
239			    run_ind+need_pages));
240			arena_mapbits_unallocated_set(chunk,
241			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
242			    arena_mapbits_unzeroed_get(chunk,
243			    run_ind+total_pages-1));
244		}
245		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
246		    run_ind+need_pages));
247	}
248
249	/* Update dirty page accounting. */
250	if (flag_dirty != 0) {
251		chunk->ndirty -= need_pages;
252		arena->ndirty -= need_pages;
253	}
254
255	/*
256	 * Update the page map separately for large vs. small runs, since it is
257	 * possible to avoid iteration for large mallocs.
258	 */
259	if (large) {
260		if (zero) {
261			if (flag_dirty == 0) {
262				/*
263				 * The run is clean, so some pages may be
264				 * zeroed (i.e. never before touched).
265				 */
266				for (i = 0; i < need_pages; i++) {
267					if (arena_mapbits_unzeroed_get(chunk,
268					    run_ind+i) != 0) {
269						VALGRIND_MAKE_MEM_UNDEFINED(
270						    (void *)((uintptr_t)
271						    chunk + ((run_ind+i) <<
272						    LG_PAGE)), PAGE);
273						memset((void *)((uintptr_t)
274						    chunk + ((run_ind+i) <<
275						    LG_PAGE)), 0, PAGE);
276					} else if (config_debug) {
277						VALGRIND_MAKE_MEM_DEFINED(
278						    (void *)((uintptr_t)
279						    chunk + ((run_ind+i) <<
280						    LG_PAGE)), PAGE);
281						arena_chunk_validate_zeroed(
282						    chunk, run_ind+i);
283					}
284				}
285			} else {
286				/*
287				 * The run is dirty, so all pages must be
288				 * zeroed.
289				 */
290				VALGRIND_MAKE_MEM_UNDEFINED((void
291				    *)((uintptr_t)chunk + (run_ind <<
292				    LG_PAGE)), (need_pages << LG_PAGE));
293				memset((void *)((uintptr_t)chunk + (run_ind <<
294				    LG_PAGE)), 0, (need_pages << LG_PAGE));
295			}
296		}
297
298		/*
299		 * Set the last element first, in case the run only contains one
300		 * page (i.e. both statements set the same element).
301		 */
302		arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0,
303		    flag_dirty);
304		arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
305	} else {
306		assert(zero == false);
307		/*
308		 * Propagate the dirty and unzeroed flags to the allocated
309		 * small run, so that arena_dalloc_bin_run() has the ability to
310		 * conditionally trim clean pages.
311		 */
312		arena_mapbits_small_set(chunk, run_ind, 0, binind,
313		    arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty);
314		/*
315		 * The first page will always be dirtied during small run
316		 * initialization, so a validation failure here would not
317		 * actually cause an observable failure.
318		 */
319		if (config_debug && flag_dirty == 0 &&
320		    arena_mapbits_unzeroed_get(chunk, run_ind) == 0)
321			arena_chunk_validate_zeroed(chunk, run_ind);
322		for (i = 1; i < need_pages - 1; i++) {
323			arena_mapbits_small_set(chunk, run_ind+i, i,
324			    binind, arena_mapbits_unzeroed_get(chunk,
325			    run_ind+i));
326			if (config_debug && flag_dirty == 0 &&
327			    arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
328				arena_chunk_validate_zeroed(chunk, run_ind+i);
329		}
330		arena_mapbits_small_set(chunk, run_ind+need_pages-1,
331		    need_pages-1, binind, arena_mapbits_unzeroed_get(chunk,
332		    run_ind+need_pages-1) | flag_dirty);
333		if (config_debug && flag_dirty == 0 &&
334		    arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) ==
335		    0) {
336			arena_chunk_validate_zeroed(chunk,
337			    run_ind+need_pages-1);
338		}
339	}
340}
341
342static arena_chunk_t *
343arena_chunk_alloc(arena_t *arena)
344{
345	arena_chunk_t *chunk;
346	size_t i;
347
348	if (arena->spare != NULL) {
349		arena_avail_tree_t *runs_avail;
350
351		chunk = arena->spare;
352		arena->spare = NULL;
353
354		/* Insert the run into the appropriate runs_avail_* tree. */
355		if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
356			runs_avail = &arena->runs_avail_clean;
357		else
358			runs_avail = &arena->runs_avail_dirty;
359		assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
360		    arena_maxclass);
361		assert(arena_mapbits_unallocated_size_get(chunk,
362		    chunk_npages-1) == arena_maxclass);
363		assert(arena_mapbits_dirty_get(chunk, map_bias) ==
364		    arena_mapbits_dirty_get(chunk, chunk_npages-1));
365		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
366		    map_bias));
367	} else {
368		bool zero;
369		size_t unzeroed;
370
371		zero = false;
372		malloc_mutex_unlock(&arena->lock);
373		chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize,
374		    false, &zero);
375		malloc_mutex_lock(&arena->lock);
376		if (chunk == NULL)
377			return (NULL);
378		if (config_stats)
379			arena->stats.mapped += chunksize;
380
381		chunk->arena = arena;
382		ql_elm_new(chunk, link_dirty);
383		chunk->dirtied = false;
384
385		/*
386		 * Claim that no pages are in use, since the header is merely
387		 * overhead.
388		 */
389		chunk->ndirty = 0;
390
391		/*
392		 * Initialize the map to contain one maximal free untouched run.
393		 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
394		 * chunk.
395		 */
396		unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
397		arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
398		    unzeroed);
399		/*
400		 * There is no need to initialize the internal page map entries
401		 * unless the chunk is not zeroed.
402		 */
403		if (zero == false) {
404			for (i = map_bias+1; i < chunk_npages-1; i++)
405				arena_mapbits_unzeroed_set(chunk, i, unzeroed);
406		} else if (config_debug) {
407			for (i = map_bias+1; i < chunk_npages-1; i++) {
408				assert(arena_mapbits_unzeroed_get(chunk, i) ==
409				    unzeroed);
410			}
411		}
412		arena_mapbits_unallocated_set(chunk, chunk_npages-1,
413		    arena_maxclass, unzeroed);
414
415		/* Insert the run into the runs_avail_clean tree. */
416		arena_avail_tree_insert(&arena->runs_avail_clean,
417		    arena_mapp_get(chunk, map_bias));
418	}
419
420	return (chunk);
421}
422
423static void
424arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
425{
426	arena_avail_tree_t *runs_avail;
427
428	/*
429	 * Remove run from the appropriate runs_avail_* tree, so that the arena
430	 * does not use it.
431	 */
432	if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
433		runs_avail = &arena->runs_avail_clean;
434	else
435		runs_avail = &arena->runs_avail_dirty;
436	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias));
437
438	if (arena->spare != NULL) {
439		arena_chunk_t *spare = arena->spare;
440
441		arena->spare = chunk;
442		if (spare->dirtied) {
443			ql_remove(&chunk->arena->chunks_dirty, spare,
444			    link_dirty);
445			arena->ndirty -= spare->ndirty;
446		}
447		malloc_mutex_unlock(&arena->lock);
448		chunk_dealloc((void *)spare, chunksize, true);
449		malloc_mutex_lock(&arena->lock);
450		if (config_stats)
451			arena->stats.mapped -= chunksize;
452	} else
453		arena->spare = chunk;
454}
455
456static arena_run_t *
457arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind,
458    bool zero)
459{
460	arena_chunk_t *chunk;
461	arena_run_t *run;
462	arena_chunk_map_t *mapelm, key;
463
464	assert(size <= arena_maxclass);
465	assert((size & PAGE_MASK) == 0);
466	assert((large && binind == BININD_INVALID) || (large == false && binind
467	    != BININD_INVALID));
468
469	/* Search the arena's chunks for the lowest best fit. */
470	key.bits = size | CHUNK_MAP_KEY;
471	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
472	if (mapelm != NULL) {
473		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
474		size_t pageind = (((uintptr_t)mapelm -
475		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
476		    + map_bias;
477
478		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
479		    LG_PAGE));
480		arena_run_split(arena, run, size, large, binind, zero);
481		return (run);
482	}
483	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
484	if (mapelm != NULL) {
485		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
486		size_t pageind = (((uintptr_t)mapelm -
487		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
488		    + map_bias;
489
490		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
491		    LG_PAGE));
492		arena_run_split(arena, run, size, large, binind, zero);
493		return (run);
494	}
495
496	/*
497	 * No usable runs.  Create a new chunk from which to allocate the run.
498	 */
499	chunk = arena_chunk_alloc(arena);
500	if (chunk != NULL) {
501		run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
502		arena_run_split(arena, run, size, large, binind, zero);
503		return (run);
504	}
505
506	/*
507	 * arena_chunk_alloc() failed, but another thread may have made
508	 * sufficient memory available while this one dropped arena->lock in
509	 * arena_chunk_alloc(), so search one more time.
510	 */
511	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
512	if (mapelm != NULL) {
513		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
514		size_t pageind = (((uintptr_t)mapelm -
515		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
516		    + map_bias;
517
518		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
519		    LG_PAGE));
520		arena_run_split(arena, run, size, large, binind, zero);
521		return (run);
522	}
523	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
524	if (mapelm != NULL) {
525		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
526		size_t pageind = (((uintptr_t)mapelm -
527		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
528		    + map_bias;
529
530		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
531		    LG_PAGE));
532		arena_run_split(arena, run, size, large, binind, zero);
533		return (run);
534	}
535
536	return (NULL);
537}
538
539static inline void
540arena_maybe_purge(arena_t *arena)
541{
542
543	/* Enforce opt_lg_dirty_mult. */
544	if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
545	    (arena->ndirty - arena->npurgatory) > chunk_npages &&
546	    (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
547	    arena->npurgatory))
548		arena_purge(arena, false);
549}
550
551static inline void
552arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
553{
554	ql_head(arena_chunk_map_t) mapelms;
555	arena_chunk_map_t *mapelm;
556	size_t pageind, flag_unzeroed;
557	size_t ndirty;
558	size_t nmadvise;
559
560	ql_new(&mapelms);
561
562	flag_unzeroed =
563#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
564   /*
565    * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
566    * mappings, but not for file-backed mappings.
567    */
568	    0
569#else
570	    CHUNK_MAP_UNZEROED
571#endif
572	    ;
573
574	/*
575	 * If chunk is the spare, temporarily re-allocate it, 1) so that its
576	 * run is reinserted into runs_avail_dirty, and 2) so that it cannot be
577	 * completely discarded by another thread while arena->lock is dropped
578	 * by this thread.  Note that the arena_run_dalloc() call will
579	 * implicitly deallocate the chunk, so no explicit action is required
580	 * in this function to deallocate the chunk.
581	 *
582	 * Note that once a chunk contains dirty pages, it cannot again contain
583	 * a single run unless 1) it is a dirty run, or 2) this function purges
584	 * dirty pages and causes the transition to a single clean run.  Thus
585	 * (chunk == arena->spare) is possible, but it is not possible for
586	 * this function to be called on the spare unless it contains a dirty
587	 * run.
588	 */
589	if (chunk == arena->spare) {
590		assert(arena_mapbits_dirty_get(chunk, map_bias) != 0);
591		arena_chunk_alloc(arena);
592	}
593
594	/* Temporarily allocate all free dirty runs within chunk. */
595	for (pageind = map_bias; pageind < chunk_npages;) {
596		mapelm = arena_mapp_get(chunk, pageind);
597		if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
598			size_t npages;
599
600			npages = arena_mapbits_unallocated_size_get(chunk,
601			    pageind) >> LG_PAGE;
602			assert(pageind + npages <= chunk_npages);
603			if (arena_mapbits_dirty_get(chunk, pageind)) {
604				size_t i;
605
606				arena_avail_tree_remove(
607				    &arena->runs_avail_dirty, mapelm);
608
609				arena_mapbits_large_set(chunk, pageind,
610				    (npages << LG_PAGE), flag_unzeroed);
611				/*
612				 * Update internal elements in the page map, so
613				 * that CHUNK_MAP_UNZEROED is properly set.
614				 */
615				for (i = 1; i < npages - 1; i++) {
616					arena_mapbits_unzeroed_set(chunk,
617					    pageind+i, flag_unzeroed);
618				}
619				if (npages > 1) {
620					arena_mapbits_large_set(chunk,
621					    pageind+npages-1, 0, flag_unzeroed);
622				}
623
624				if (config_stats) {
625					/*
626					 * Update stats_cactive if nactive is
627					 * crossing a chunk multiple.
628					 */
629					size_t cactive_diff =
630					    CHUNK_CEILING((arena->nactive +
631					    npages) << LG_PAGE) -
632					    CHUNK_CEILING(arena->nactive <<
633					    LG_PAGE);
634					if (cactive_diff != 0)
635						stats_cactive_add(cactive_diff);
636				}
637				arena->nactive += npages;
638				/* Append to list for later processing. */
639				ql_elm_new(mapelm, u.ql_link);
640				ql_tail_insert(&mapelms, mapelm, u.ql_link);
641			}
642
643			pageind += npages;
644		} else {
645			/* Skip allocated run. */
646			if (arena_mapbits_large_get(chunk, pageind))
647				pageind += arena_mapbits_large_size_get(chunk,
648				    pageind) >> LG_PAGE;
649			else {
650				size_t binind;
651				arena_bin_info_t *bin_info;
652				arena_run_t *run = (arena_run_t *)((uintptr_t)
653				    chunk + (uintptr_t)(pageind << LG_PAGE));
654
655				assert(arena_mapbits_small_runind_get(chunk,
656				    pageind) == 0);
657				binind = arena_bin_index(arena, run->bin);
658				bin_info = &arena_bin_info[binind];
659				pageind += bin_info->run_size >> LG_PAGE;
660			}
661		}
662	}
663	assert(pageind == chunk_npages);
664
665	if (config_debug)
666		ndirty = chunk->ndirty;
667	if (config_stats)
668		arena->stats.purged += chunk->ndirty;
669	arena->ndirty -= chunk->ndirty;
670	chunk->ndirty = 0;
671	ql_remove(&arena->chunks_dirty, chunk, link_dirty);
672	chunk->dirtied = false;
673
674	malloc_mutex_unlock(&arena->lock);
675	if (config_stats)
676		nmadvise = 0;
677	ql_foreach(mapelm, &mapelms, u.ql_link) {
678		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
679		    sizeof(arena_chunk_map_t)) + map_bias;
680		size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
681		    LG_PAGE;
682
683		assert(pageind + npages <= chunk_npages);
684		assert(ndirty >= npages);
685		if (config_debug)
686			ndirty -= npages;
687
688		pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)),
689		    (npages << LG_PAGE));
690		if (config_stats)
691			nmadvise++;
692	}
693	assert(ndirty == 0);
694	malloc_mutex_lock(&arena->lock);
695	if (config_stats)
696		arena->stats.nmadvise += nmadvise;
697
698	/* Deallocate runs. */
699	for (mapelm = ql_first(&mapelms); mapelm != NULL;
700	    mapelm = ql_first(&mapelms)) {
701		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
702		    sizeof(arena_chunk_map_t)) + map_bias;
703		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
704		    (uintptr_t)(pageind << LG_PAGE));
705
706		ql_remove(&mapelms, mapelm, u.ql_link);
707		arena_run_dalloc(arena, run, false);
708	}
709}
710
711static void
712arena_purge(arena_t *arena, bool all)
713{
714	arena_chunk_t *chunk;
715	size_t npurgatory;
716	if (config_debug) {
717		size_t ndirty = 0;
718
719		ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
720		    assert(chunk->dirtied);
721		    ndirty += chunk->ndirty;
722		}
723		assert(ndirty == arena->ndirty);
724	}
725	assert(arena->ndirty > arena->npurgatory || all);
726	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
727	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
728	    arena->npurgatory) || all);
729
730	if (config_stats)
731		arena->stats.npurge++;
732
733	/*
734	 * Compute the minimum number of pages that this thread should try to
735	 * purge, and add the result to arena->npurgatory.  This will keep
736	 * multiple threads from racing to reduce ndirty below the threshold.
737	 */
738	npurgatory = arena->ndirty - arena->npurgatory;
739	if (all == false) {
740		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
741		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
742	}
743	arena->npurgatory += npurgatory;
744
745	while (npurgatory > 0) {
746		/* Get next chunk with dirty pages. */
747		chunk = ql_first(&arena->chunks_dirty);
748		if (chunk == NULL) {
749			/*
750			 * This thread was unable to purge as many pages as
751			 * originally intended, due to races with other threads
752			 * that either did some of the purging work, or re-used
753			 * dirty pages.
754			 */
755			arena->npurgatory -= npurgatory;
756			return;
757		}
758		while (chunk->ndirty == 0) {
759			ql_remove(&arena->chunks_dirty, chunk, link_dirty);
760			chunk->dirtied = false;
761			chunk = ql_first(&arena->chunks_dirty);
762			if (chunk == NULL) {
763				/* Same logic as for above. */
764				arena->npurgatory -= npurgatory;
765				return;
766			}
767		}
768
769		if (chunk->ndirty > npurgatory) {
770			/*
771			 * This thread will, at a minimum, purge all the dirty
772			 * pages in chunk, so set npurgatory to reflect this
773			 * thread's commitment to purge the pages.  This tends
774			 * to reduce the chances of the following scenario:
775			 *
776			 * 1) This thread sets arena->npurgatory such that
777			 *    (arena->ndirty - arena->npurgatory) is at the
778			 *    threshold.
779			 * 2) This thread drops arena->lock.
780			 * 3) Another thread causes one or more pages to be
781			 *    dirtied, and immediately determines that it must
782			 *    purge dirty pages.
783			 *
784			 * If this scenario *does* play out, that's okay,
785			 * because all of the purging work being done really
786			 * needs to happen.
787			 */
788			arena->npurgatory += chunk->ndirty - npurgatory;
789			npurgatory = chunk->ndirty;
790		}
791
792		arena->npurgatory -= chunk->ndirty;
793		npurgatory -= chunk->ndirty;
794		arena_chunk_purge(arena, chunk);
795	}
796}
797
798void
799arena_purge_all(arena_t *arena)
800{
801
802	malloc_mutex_lock(&arena->lock);
803	arena_purge(arena, true);
804	malloc_mutex_unlock(&arena->lock);
805}
806
807static void
808arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
809{
810	arena_chunk_t *chunk;
811	size_t size, run_ind, run_pages, flag_dirty;
812	arena_avail_tree_t *runs_avail;
813
814	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
815	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
816	assert(run_ind >= map_bias);
817	assert(run_ind < chunk_npages);
818	if (arena_mapbits_large_get(chunk, run_ind) != 0) {
819		size = arena_mapbits_large_size_get(chunk, run_ind);
820		assert(size == PAGE ||
821		    arena_mapbits_large_size_get(chunk,
822		    run_ind+(size>>LG_PAGE)-1) == 0);
823	} else {
824		size_t binind = arena_bin_index(arena, run->bin);
825		arena_bin_info_t *bin_info = &arena_bin_info[binind];
826		size = bin_info->run_size;
827	}
828	run_pages = (size >> LG_PAGE);
829	if (config_stats) {
830		/*
831		 * Update stats_cactive if nactive is crossing a chunk
832		 * multiple.
833		 */
834		size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) -
835		    CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE);
836		if (cactive_diff != 0)
837			stats_cactive_sub(cactive_diff);
838	}
839	arena->nactive -= run_pages;
840
841	/*
842	 * The run is dirty if the caller claims to have dirtied it, as well as
843	 * if it was already dirty before being allocated.
844	 */
845	if (arena_mapbits_dirty_get(chunk, run_ind) != 0)
846		dirty = true;
847	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
848	runs_avail = dirty ? &arena->runs_avail_dirty :
849	    &arena->runs_avail_clean;
850
851	/* Mark pages as unallocated in the chunk map. */
852	if (dirty) {
853		arena_mapbits_unallocated_set(chunk, run_ind, size,
854		    CHUNK_MAP_DIRTY);
855		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
856		    CHUNK_MAP_DIRTY);
857
858		chunk->ndirty += run_pages;
859		arena->ndirty += run_pages;
860	} else {
861		arena_mapbits_unallocated_set(chunk, run_ind, size,
862		    arena_mapbits_unzeroed_get(chunk, run_ind));
863		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
864		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
865	}
866
867	/* Try to coalesce forward. */
868	if (run_ind + run_pages < chunk_npages &&
869	    arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
870	    arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
871		size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
872		    run_ind+run_pages);
873		size_t nrun_pages = nrun_size >> LG_PAGE;
874
875		/*
876		 * Remove successor from runs_avail; the coalesced run is
877		 * inserted later.
878		 */
879		assert(arena_mapbits_unallocated_size_get(chunk,
880		    run_ind+run_pages+nrun_pages-1) == nrun_size);
881		assert(arena_mapbits_dirty_get(chunk,
882		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
883		arena_avail_tree_remove(runs_avail,
884		    arena_mapp_get(chunk, run_ind+run_pages));
885
886		size += nrun_size;
887		run_pages += nrun_pages;
888
889		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
890		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
891		    size);
892	}
893
894	/* Try to coalesce backward. */
895	if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1)
896	    == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) {
897		size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
898		    run_ind-1);
899		size_t prun_pages = prun_size >> LG_PAGE;
900
901		run_ind -= prun_pages;
902
903		/*
904		 * Remove predecessor from runs_avail; the coalesced run is
905		 * inserted later.
906		 */
907		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
908		    prun_size);
909		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
910		arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk,
911		    run_ind));
912
913		size += prun_size;
914		run_pages += prun_pages;
915
916		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
917		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
918		    size);
919	}
920
921	/* Insert into runs_avail, now that coalescing is complete. */
922	assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
923	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
924	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
925	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
926	arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind));
927
928	if (dirty) {
929		/*
930		 * Insert into chunks_dirty before potentially calling
931		 * arena_chunk_dealloc(), so that chunks_dirty and
932		 * arena->ndirty are consistent.
933		 */
934		if (chunk->dirtied == false) {
935			ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
936			chunk->dirtied = true;
937		}
938	}
939
940	/* Deallocate chunk if it is now completely unused. */
941	if (size == arena_maxclass) {
942		assert(run_ind == map_bias);
943		assert(run_pages == (arena_maxclass >> LG_PAGE));
944		assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
945		assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
946		    arena_maxclass);
947		arena_chunk_dealloc(arena, chunk);
948	}
949
950	/*
951	 * It is okay to do dirty page processing here even if the chunk was
952	 * deallocated above, since in that case it is the spare.  Waiting
953	 * until after possible chunk deallocation to do dirty processing
954	 * allows for an old spare to be fully deallocated, thus decreasing the
955	 * chances of spuriously crossing the dirty page purging threshold.
956	 */
957	if (dirty)
958		arena_maybe_purge(arena);
959}
960
961static void
962arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
963    size_t oldsize, size_t newsize)
964{
965	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
966	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
967	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
968
969	assert(oldsize > newsize);
970
971	/*
972	 * Update the chunk map so that arena_run_dalloc() can treat the
973	 * leading run as separately allocated.  Set the last element of each
974	 * run first, in case of single-page runs.
975	 */
976	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
977	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
978	    arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1));
979	arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty |
980	    arena_mapbits_unzeroed_get(chunk, pageind));
981
982	if (config_debug) {
983		UNUSED size_t tail_npages = newsize >> LG_PAGE;
984		assert(arena_mapbits_large_size_get(chunk,
985		    pageind+head_npages+tail_npages-1) == 0);
986		assert(arena_mapbits_dirty_get(chunk,
987		    pageind+head_npages+tail_npages-1) == flag_dirty);
988	}
989	arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty
990	    | arena_mapbits_unzeroed_get(chunk, pageind+head_npages));
991
992	arena_run_dalloc(arena, run, false);
993}
994
995static void
996arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
997    size_t oldsize, size_t newsize, bool dirty)
998{
999	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1000	size_t head_npages = newsize >> LG_PAGE;
1001	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
1002
1003	assert(oldsize > newsize);
1004
1005	/*
1006	 * Update the chunk map so that arena_run_dalloc() can treat the
1007	 * trailing run as separately allocated.  Set the last element of each
1008	 * run first, in case of single-page runs.
1009	 */
1010	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
1011	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
1012	    arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1));
1013	arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty |
1014	    arena_mapbits_unzeroed_get(chunk, pageind));
1015
1016	if (config_debug) {
1017		UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
1018		assert(arena_mapbits_large_size_get(chunk,
1019		    pageind+head_npages+tail_npages-1) == 0);
1020		assert(arena_mapbits_dirty_get(chunk,
1021		    pageind+head_npages+tail_npages-1) == flag_dirty);
1022	}
1023	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
1024	    flag_dirty | arena_mapbits_unzeroed_get(chunk,
1025	    pageind+head_npages));
1026
1027	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
1028	    dirty);
1029}
1030
1031static arena_run_t *
1032arena_bin_runs_first(arena_bin_t *bin)
1033{
1034	arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs);
1035	if (mapelm != NULL) {
1036		arena_chunk_t *chunk;
1037		size_t pageind;
1038		arena_run_t *run;
1039
1040		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
1041		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
1042		    sizeof(arena_chunk_map_t))) + map_bias;
1043		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1044		    arena_mapbits_small_runind_get(chunk, pageind)) <<
1045		    LG_PAGE));
1046		return (run);
1047	}
1048
1049	return (NULL);
1050}
1051
1052static void
1053arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
1054{
1055	arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
1056	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1057	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
1058
1059	assert(arena_run_tree_search(&bin->runs, mapelm) == NULL);
1060
1061	arena_run_tree_insert(&bin->runs, mapelm);
1062}
1063
1064static void
1065arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
1066{
1067	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1068	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1069	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
1070
1071	assert(arena_run_tree_search(&bin->runs, mapelm) != NULL);
1072
1073	arena_run_tree_remove(&bin->runs, mapelm);
1074}
1075
1076static arena_run_t *
1077arena_bin_nonfull_run_tryget(arena_bin_t *bin)
1078{
1079	arena_run_t *run = arena_bin_runs_first(bin);
1080	if (run != NULL) {
1081		arena_bin_runs_remove(bin, run);
1082		if (config_stats)
1083			bin->stats.reruns++;
1084	}
1085	return (run);
1086}
1087
1088static arena_run_t *
1089arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
1090{
1091	arena_run_t *run;
1092	size_t binind;
1093	arena_bin_info_t *bin_info;
1094
1095	/* Look for a usable run. */
1096	run = arena_bin_nonfull_run_tryget(bin);
1097	if (run != NULL)
1098		return (run);
1099	/* No existing runs have any space available. */
1100
1101	binind = arena_bin_index(arena, bin);
1102	bin_info = &arena_bin_info[binind];
1103
1104	/* Allocate a new run. */
1105	malloc_mutex_unlock(&bin->lock);
1106	/******************************/
1107	malloc_mutex_lock(&arena->lock);
1108	run = arena_run_alloc(arena, bin_info->run_size, false, binind, false);
1109	if (run != NULL) {
1110		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
1111		    (uintptr_t)bin_info->bitmap_offset);
1112
1113		/* Initialize run internals. */
1114		VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset -
1115		    bin_info->redzone_size);
1116		run->bin = bin;
1117		run->nextind = 0;
1118		run->nfree = bin_info->nregs;
1119		bitmap_init(bitmap, &bin_info->bitmap_info);
1120	}
1121	malloc_mutex_unlock(&arena->lock);
1122	/********************************/
1123	malloc_mutex_lock(&bin->lock);
1124	if (run != NULL) {
1125		if (config_stats) {
1126			bin->stats.nruns++;
1127			bin->stats.curruns++;
1128		}
1129		return (run);
1130	}
1131
1132	/*
1133	 * arena_run_alloc() failed, but another thread may have made
1134	 * sufficient memory available while this one dropped bin->lock above,
1135	 * so search one more time.
1136	 */
1137	run = arena_bin_nonfull_run_tryget(bin);
1138	if (run != NULL)
1139		return (run);
1140
1141	return (NULL);
1142}
1143
1144/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
1145static void *
1146arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
1147{
1148	void *ret;
1149	size_t binind;
1150	arena_bin_info_t *bin_info;
1151	arena_run_t *run;
1152
1153	binind = arena_bin_index(arena, bin);
1154	bin_info = &arena_bin_info[binind];
1155	bin->runcur = NULL;
1156	run = arena_bin_nonfull_run_get(arena, bin);
1157	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
1158		/*
1159		 * Another thread updated runcur while this one ran without the
1160		 * bin lock in arena_bin_nonfull_run_get().
1161		 */
1162		assert(bin->runcur->nfree > 0);
1163		ret = arena_run_reg_alloc(bin->runcur, bin_info);
1164		if (run != NULL) {
1165			arena_chunk_t *chunk;
1166
1167			/*
1168			 * arena_run_alloc() may have allocated run, or it may
1169			 * have pulled run from the bin's run tree.  Therefore
1170			 * it is unsafe to make any assumptions about how run
1171			 * has previously been used, and arena_bin_lower_run()
1172			 * must be called, as if a region were just deallocated
1173			 * from the run.
1174			 */
1175			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1176			if (run->nfree == bin_info->nregs)
1177				arena_dalloc_bin_run(arena, chunk, run, bin);
1178			else
1179				arena_bin_lower_run(arena, chunk, run, bin);
1180		}
1181		return (ret);
1182	}
1183
1184	if (run == NULL)
1185		return (NULL);
1186
1187	bin->runcur = run;
1188
1189	assert(bin->runcur->nfree > 0);
1190
1191	return (arena_run_reg_alloc(bin->runcur, bin_info));
1192}
1193
1194void
1195arena_prof_accum(arena_t *arena, uint64_t accumbytes)
1196{
1197
1198	cassert(config_prof);
1199
1200	if (config_prof && prof_interval != 0) {
1201		arena->prof_accumbytes += accumbytes;
1202		if (arena->prof_accumbytes >= prof_interval) {
1203			prof_idump();
1204			arena->prof_accumbytes -= prof_interval;
1205		}
1206	}
1207}
1208
1209void
1210arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
1211    uint64_t prof_accumbytes)
1212{
1213	unsigned i, nfill;
1214	arena_bin_t *bin;
1215	arena_run_t *run;
1216	void *ptr;
1217
1218	assert(tbin->ncached == 0);
1219
1220	if (config_prof) {
1221		malloc_mutex_lock(&arena->lock);
1222		arena_prof_accum(arena, prof_accumbytes);
1223		malloc_mutex_unlock(&arena->lock);
1224	}
1225	bin = &arena->bins[binind];
1226	malloc_mutex_lock(&bin->lock);
1227	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
1228	    tbin->lg_fill_div); i < nfill; i++) {
1229		if ((run = bin->runcur) != NULL && run->nfree > 0)
1230			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
1231		else
1232			ptr = arena_bin_malloc_hard(arena, bin);
1233		if (ptr == NULL)
1234			break;
1235		if (config_fill && opt_junk) {
1236			arena_alloc_junk_small(ptr, &arena_bin_info[binind],
1237			    true);
1238		}
1239		/* Insert such that low regions get used first. */
1240		tbin->avail[nfill - 1 - i] = ptr;
1241	}
1242	if (config_stats) {
1243		bin->stats.allocated += i * arena_bin_info[binind].reg_size;
1244		bin->stats.nmalloc += i;
1245		bin->stats.nrequests += tbin->tstats.nrequests;
1246		bin->stats.nfills++;
1247		tbin->tstats.nrequests = 0;
1248	}
1249	malloc_mutex_unlock(&bin->lock);
1250	tbin->ncached = i;
1251}
1252
1253void
1254arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
1255{
1256
1257	if (zero) {
1258		size_t redzone_size = bin_info->redzone_size;
1259		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
1260		    redzone_size);
1261		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
1262		    redzone_size);
1263	} else {
1264		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
1265		    bin_info->reg_interval);
1266	}
1267}
1268
1269void
1270arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
1271{
1272	size_t size = bin_info->reg_size;
1273	size_t redzone_size = bin_info->redzone_size;
1274	size_t i;
1275	bool error = false;
1276
1277	for (i = 1; i <= redzone_size; i++) {
1278		unsigned byte;
1279		if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) {
1280			error = true;
1281			malloc_printf("<jemalloc>: Corrupt redzone "
1282			    "%zu byte%s before %p (size %zu), byte=%#x\n", i,
1283			    (i == 1) ? "" : "s", ptr, size, byte);
1284		}
1285	}
1286	for (i = 0; i < redzone_size; i++) {
1287		unsigned byte;
1288		if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) {
1289			error = true;
1290			malloc_printf("<jemalloc>: Corrupt redzone "
1291			    "%zu byte%s after end of %p (size %zu), byte=%#x\n",
1292			    i, (i == 1) ? "" : "s", ptr, size, byte);
1293		}
1294	}
1295	if (opt_abort && error)
1296		abort();
1297
1298	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
1299	    bin_info->reg_interval);
1300}
1301
1302void *
1303arena_malloc_small(arena_t *arena, size_t size, bool zero)
1304{
1305	void *ret;
1306	arena_bin_t *bin;
1307	arena_run_t *run;
1308	size_t binind;
1309
1310	binind = SMALL_SIZE2BIN(size);
1311	assert(binind < NBINS);
1312	bin = &arena->bins[binind];
1313	size = arena_bin_info[binind].reg_size;
1314
1315	malloc_mutex_lock(&bin->lock);
1316	if ((run = bin->runcur) != NULL && run->nfree > 0)
1317		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
1318	else
1319		ret = arena_bin_malloc_hard(arena, bin);
1320
1321	if (ret == NULL) {
1322		malloc_mutex_unlock(&bin->lock);
1323		return (NULL);
1324	}
1325
1326	if (config_stats) {
1327		bin->stats.allocated += size;
1328		bin->stats.nmalloc++;
1329		bin->stats.nrequests++;
1330	}
1331	malloc_mutex_unlock(&bin->lock);
1332	if (config_prof && isthreaded == false) {
1333		malloc_mutex_lock(&arena->lock);
1334		arena_prof_accum(arena, size);
1335		malloc_mutex_unlock(&arena->lock);
1336	}
1337
1338	if (zero == false) {
1339		if (config_fill) {
1340			if (opt_junk) {
1341				arena_alloc_junk_small(ret,
1342				    &arena_bin_info[binind], false);
1343			} else if (opt_zero)
1344				memset(ret, 0, size);
1345		}
1346	} else {
1347		if (config_fill && opt_junk) {
1348			arena_alloc_junk_small(ret, &arena_bin_info[binind],
1349			    true);
1350		}
1351		VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
1352		memset(ret, 0, size);
1353	}
1354
1355	return (ret);
1356}
1357
1358void *
1359arena_malloc_large(arena_t *arena, size_t size, bool zero)
1360{
1361	void *ret;
1362
1363	/* Large allocation. */
1364	size = PAGE_CEILING(size);
1365	malloc_mutex_lock(&arena->lock);
1366	ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero);
1367	if (ret == NULL) {
1368		malloc_mutex_unlock(&arena->lock);
1369		return (NULL);
1370	}
1371	if (config_stats) {
1372		arena->stats.nmalloc_large++;
1373		arena->stats.nrequests_large++;
1374		arena->stats.allocated_large += size;
1375		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1376		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1377		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1378	}
1379	if (config_prof)
1380		arena_prof_accum(arena, size);
1381	malloc_mutex_unlock(&arena->lock);
1382
1383	if (zero == false) {
1384		if (config_fill) {
1385			if (opt_junk)
1386				memset(ret, 0xa5, size);
1387			else if (opt_zero)
1388				memset(ret, 0, size);
1389		}
1390	}
1391
1392	return (ret);
1393}
1394
1395/* Only handles large allocations that require more than page alignment. */
1396void *
1397arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
1398{
1399	void *ret;
1400	size_t alloc_size, leadsize, trailsize;
1401	arena_run_t *run;
1402	arena_chunk_t *chunk;
1403
1404	assert((size & PAGE_MASK) == 0);
1405
1406	alignment = PAGE_CEILING(alignment);
1407	alloc_size = size + alignment - PAGE;
1408
1409	malloc_mutex_lock(&arena->lock);
1410	run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero);
1411	if (run == NULL) {
1412		malloc_mutex_unlock(&arena->lock);
1413		return (NULL);
1414	}
1415	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1416
1417	leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) -
1418	    (uintptr_t)run;
1419	assert(alloc_size >= leadsize + size);
1420	trailsize = alloc_size - leadsize - size;
1421	ret = (void *)((uintptr_t)run + leadsize);
1422	if (leadsize != 0) {
1423		arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size -
1424		    leadsize);
1425	}
1426	if (trailsize != 0) {
1427		arena_run_trim_tail(arena, chunk, ret, size + trailsize, size,
1428		    false);
1429	}
1430
1431	if (config_stats) {
1432		arena->stats.nmalloc_large++;
1433		arena->stats.nrequests_large++;
1434		arena->stats.allocated_large += size;
1435		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1436		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1437		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1438	}
1439	malloc_mutex_unlock(&arena->lock);
1440
1441	if (config_fill && zero == false) {
1442		if (opt_junk)
1443			memset(ret, 0xa5, size);
1444		else if (opt_zero)
1445			memset(ret, 0, size);
1446	}
1447	return (ret);
1448}
1449
1450void
1451arena_prof_promoted(const void *ptr, size_t size)
1452{
1453	arena_chunk_t *chunk;
1454	size_t pageind, binind;
1455
1456	cassert(config_prof);
1457	assert(ptr != NULL);
1458	assert(CHUNK_ADDR2BASE(ptr) != ptr);
1459	assert(isalloc(ptr, false) == PAGE);
1460	assert(isalloc(ptr, true) == PAGE);
1461	assert(size <= SMALL_MAXCLASS);
1462
1463	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
1464	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1465	binind = SMALL_SIZE2BIN(size);
1466	assert(binind < NBINS);
1467	arena_mapbits_large_binind_set(chunk, pageind, binind);
1468
1469	assert(isalloc(ptr, false) == PAGE);
1470	assert(isalloc(ptr, true) == size);
1471}
1472
1473static void
1474arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
1475    arena_bin_t *bin)
1476{
1477
1478	/* Dissociate run from bin. */
1479	if (run == bin->runcur)
1480		bin->runcur = NULL;
1481	else {
1482		size_t binind = arena_bin_index(chunk->arena, bin);
1483		arena_bin_info_t *bin_info = &arena_bin_info[binind];
1484
1485		if (bin_info->nregs != 1) {
1486			/*
1487			 * This block's conditional is necessary because if the
1488			 * run only contains one region, then it never gets
1489			 * inserted into the non-full runs tree.
1490			 */
1491			arena_bin_runs_remove(bin, run);
1492		}
1493	}
1494}
1495
1496static void
1497arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1498    arena_bin_t *bin)
1499{
1500	size_t binind;
1501	arena_bin_info_t *bin_info;
1502	size_t npages, run_ind, past;
1503
1504	assert(run != bin->runcur);
1505	assert(arena_run_tree_search(&bin->runs,
1506	    arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE))
1507	    == NULL);
1508
1509	binind = arena_bin_index(chunk->arena, run->bin);
1510	bin_info = &arena_bin_info[binind];
1511
1512	malloc_mutex_unlock(&bin->lock);
1513	/******************************/
1514	npages = bin_info->run_size >> LG_PAGE;
1515	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
1516	past = (size_t)(PAGE_CEILING((uintptr_t)run +
1517	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
1518	    bin_info->reg_interval - bin_info->redzone_size) -
1519	    (uintptr_t)chunk) >> LG_PAGE);
1520	malloc_mutex_lock(&arena->lock);
1521
1522	/*
1523	 * If the run was originally clean, and some pages were never touched,
1524	 * trim the clean pages before deallocating the dirty portion of the
1525	 * run.
1526	 */
1527	if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
1528	    npages) {
1529		/*
1530		 * Trim clean pages.  Convert to large run beforehand.  Set the
1531		 * last map element first, in case this is a one-page run.
1532		 */
1533		arena_mapbits_large_set(chunk, run_ind+npages-1, 0,
1534		    arena_mapbits_unzeroed_get(chunk, run_ind+npages-1));
1535		arena_mapbits_large_set(chunk, run_ind, bin_info->run_size,
1536		    arena_mapbits_unzeroed_get(chunk, run_ind));
1537		arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
1538		    ((past - run_ind) << LG_PAGE), false);
1539		/* npages = past - run_ind; */
1540	}
1541	arena_run_dalloc(arena, run, true);
1542	malloc_mutex_unlock(&arena->lock);
1543	/****************************/
1544	malloc_mutex_lock(&bin->lock);
1545	if (config_stats)
1546		bin->stats.curruns--;
1547}
1548
1549static void
1550arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1551    arena_bin_t *bin)
1552{
1553
1554	/*
1555	 * Make sure that if bin->runcur is non-NULL, it refers to the lowest
1556	 * non-full run.  It is okay to NULL runcur out rather than proactively
1557	 * keeping it pointing at the lowest non-full run.
1558	 */
1559	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
1560		/* Switch runcur. */
1561		if (bin->runcur->nfree > 0)
1562			arena_bin_runs_insert(bin, bin->runcur);
1563		bin->runcur = run;
1564		if (config_stats)
1565			bin->stats.reruns++;
1566	} else
1567		arena_bin_runs_insert(bin, run);
1568}
1569
1570void
1571arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1572    arena_chunk_map_t *mapelm)
1573{
1574	size_t pageind;
1575	arena_run_t *run;
1576	arena_bin_t *bin;
1577	arena_bin_info_t *bin_info;
1578	size_t size, binind;
1579
1580	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1581	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1582	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
1583	bin = run->bin;
1584	binind = arena_ptr_small_binind_get(ptr, mapelm->bits);
1585	bin_info = &arena_bin_info[binind];
1586	if (config_fill || config_stats)
1587		size = bin_info->reg_size;
1588
1589	if (config_fill && opt_junk)
1590		arena_dalloc_junk_small(ptr, bin_info);
1591
1592	arena_run_reg_dalloc(run, ptr);
1593	if (run->nfree == bin_info->nregs) {
1594		arena_dissociate_bin_run(chunk, run, bin);
1595		arena_dalloc_bin_run(arena, chunk, run, bin);
1596	} else if (run->nfree == 1 && run != bin->runcur)
1597		arena_bin_lower_run(arena, chunk, run, bin);
1598
1599	if (config_stats) {
1600		bin->stats.allocated -= size;
1601		bin->stats.ndalloc++;
1602	}
1603}
1604
1605void
1606arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1607    size_t pageind, arena_chunk_map_t *mapelm)
1608{
1609	arena_run_t *run;
1610	arena_bin_t *bin;
1611
1612	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1613	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
1614	bin = run->bin;
1615	malloc_mutex_lock(&bin->lock);
1616	arena_dalloc_bin_locked(arena, chunk, ptr, mapelm);
1617	malloc_mutex_unlock(&bin->lock);
1618}
1619
1620void
1621arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1622    size_t pageind)
1623{
1624	arena_chunk_map_t *mapelm;
1625
1626	if (config_debug) {
1627		/* arena_ptr_small_binind_get() does extra sanity checking. */
1628		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
1629		    pageind)) != BININD_INVALID);
1630	}
1631	mapelm = arena_mapp_get(chunk, pageind);
1632	arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm);
1633}
1634void
1635arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
1636    arena_stats_t *astats, malloc_bin_stats_t *bstats,
1637    malloc_large_stats_t *lstats)
1638{
1639	unsigned i;
1640
1641	malloc_mutex_lock(&arena->lock);
1642	*nactive += arena->nactive;
1643	*ndirty += arena->ndirty;
1644
1645	astats->mapped += arena->stats.mapped;
1646	astats->npurge += arena->stats.npurge;
1647	astats->nmadvise += arena->stats.nmadvise;
1648	astats->purged += arena->stats.purged;
1649	astats->allocated_large += arena->stats.allocated_large;
1650	astats->nmalloc_large += arena->stats.nmalloc_large;
1651	astats->ndalloc_large += arena->stats.ndalloc_large;
1652	astats->nrequests_large += arena->stats.nrequests_large;
1653
1654	for (i = 0; i < nlclasses; i++) {
1655		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
1656		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
1657		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
1658		lstats[i].curruns += arena->stats.lstats[i].curruns;
1659	}
1660	malloc_mutex_unlock(&arena->lock);
1661
1662	for (i = 0; i < NBINS; i++) {
1663		arena_bin_t *bin = &arena->bins[i];
1664
1665		malloc_mutex_lock(&bin->lock);
1666		bstats[i].allocated += bin->stats.allocated;
1667		bstats[i].nmalloc += bin->stats.nmalloc;
1668		bstats[i].ndalloc += bin->stats.ndalloc;
1669		bstats[i].nrequests += bin->stats.nrequests;
1670		if (config_tcache) {
1671			bstats[i].nfills += bin->stats.nfills;
1672			bstats[i].nflushes += bin->stats.nflushes;
1673		}
1674		bstats[i].nruns += bin->stats.nruns;
1675		bstats[i].reruns += bin->stats.reruns;
1676		bstats[i].curruns += bin->stats.curruns;
1677		malloc_mutex_unlock(&bin->lock);
1678	}
1679}
1680
1681void
1682arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
1683{
1684
1685	if (config_fill || config_stats) {
1686		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1687		size_t size = arena_mapbits_large_size_get(chunk, pageind);
1688
1689		if (config_fill && config_stats && opt_junk)
1690			memset(ptr, 0x5a, size);
1691		if (config_stats) {
1692			arena->stats.ndalloc_large++;
1693			arena->stats.allocated_large -= size;
1694			arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++;
1695			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--;
1696		}
1697	}
1698
1699	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
1700}
1701
1702void
1703arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
1704{
1705
1706	malloc_mutex_lock(&arena->lock);
1707	arena_dalloc_large_locked(arena, chunk, ptr);
1708	malloc_mutex_unlock(&arena->lock);
1709}
1710
1711static void
1712arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1713    size_t oldsize, size_t size)
1714{
1715
1716	assert(size < oldsize);
1717
1718	/*
1719	 * Shrink the run, and make trailing pages available for other
1720	 * allocations.
1721	 */
1722	malloc_mutex_lock(&arena->lock);
1723	arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
1724	    true);
1725	if (config_stats) {
1726		arena->stats.ndalloc_large++;
1727		arena->stats.allocated_large -= oldsize;
1728		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
1729		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
1730
1731		arena->stats.nmalloc_large++;
1732		arena->stats.nrequests_large++;
1733		arena->stats.allocated_large += size;
1734		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1735		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1736		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1737	}
1738	malloc_mutex_unlock(&arena->lock);
1739}
1740
1741static bool
1742arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1743    size_t oldsize, size_t size, size_t extra, bool zero)
1744{
1745	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1746	size_t npages = oldsize >> LG_PAGE;
1747	size_t followsize;
1748
1749	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
1750
1751	/* Try to extend the run. */
1752	assert(size + extra > oldsize);
1753	malloc_mutex_lock(&arena->lock);
1754	if (pageind + npages < chunk_npages &&
1755	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
1756	    (followsize = arena_mapbits_unallocated_size_get(chunk,
1757	    pageind+npages)) >= size - oldsize) {
1758		/*
1759		 * The next run is available and sufficiently large.  Split the
1760		 * following run, then merge the first part with the existing
1761		 * allocation.
1762		 */
1763		size_t flag_dirty;
1764		size_t splitsize = (oldsize + followsize <= size + extra)
1765		    ? followsize : size + extra - oldsize;
1766		arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
1767		    ((pageind+npages) << LG_PAGE)), splitsize, true,
1768		    BININD_INVALID, zero);
1769
1770		size = oldsize + splitsize;
1771		npages = size >> LG_PAGE;
1772
1773		/*
1774		 * Mark the extended run as dirty if either portion of the run
1775		 * was dirty before allocation.  This is rather pedantic,
1776		 * because there's not actually any sequence of events that
1777		 * could cause the resulting run to be passed to
1778		 * arena_run_dalloc() with the dirty argument set to false
1779		 * (which is when dirty flag consistency would really matter).
1780		 */
1781		flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
1782		    arena_mapbits_dirty_get(chunk, pageind+npages-1);
1783		arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
1784		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
1785
1786		if (config_stats) {
1787			arena->stats.ndalloc_large++;
1788			arena->stats.allocated_large -= oldsize;
1789			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
1790			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
1791
1792			arena->stats.nmalloc_large++;
1793			arena->stats.nrequests_large++;
1794			arena->stats.allocated_large += size;
1795			arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1796			arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1797			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1798		}
1799		malloc_mutex_unlock(&arena->lock);
1800		return (false);
1801	}
1802	malloc_mutex_unlock(&arena->lock);
1803
1804	return (true);
1805}
1806
1807/*
1808 * Try to resize a large allocation, in order to avoid copying.  This will
1809 * always fail if growing an object, and the following run is already in use.
1810 */
1811static bool
1812arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
1813    bool zero)
1814{
1815	size_t psize;
1816
1817	psize = PAGE_CEILING(size + extra);
1818	if (psize == oldsize) {
1819		/* Same size class. */
1820		if (config_fill && opt_junk && size < oldsize) {
1821			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
1822			    size);
1823		}
1824		return (false);
1825	} else {
1826		arena_chunk_t *chunk;
1827		arena_t *arena;
1828
1829		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
1830		arena = chunk->arena;
1831
1832		if (psize < oldsize) {
1833			/* Fill before shrinking in order avoid a race. */
1834			if (config_fill && opt_junk) {
1835				memset((void *)((uintptr_t)ptr + size), 0x5a,
1836				    oldsize - size);
1837			}
1838			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
1839			    psize);
1840			return (false);
1841		} else {
1842			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
1843			    oldsize, PAGE_CEILING(size),
1844			    psize - PAGE_CEILING(size), zero);
1845			if (config_fill && ret == false && zero == false &&
1846			    opt_zero) {
1847				memset((void *)((uintptr_t)ptr + oldsize), 0,
1848				    size - oldsize);
1849			}
1850			return (ret);
1851		}
1852	}
1853}
1854
1855void *
1856arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
1857    bool zero)
1858{
1859
1860	/*
1861	 * Avoid moving the allocation if the size class can be left the same.
1862	 */
1863	if (oldsize <= arena_maxclass) {
1864		if (oldsize <= SMALL_MAXCLASS) {
1865			assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
1866			    == oldsize);
1867			if ((size + extra <= SMALL_MAXCLASS &&
1868			    SMALL_SIZE2BIN(size + extra) ==
1869			    SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
1870			    size + extra >= oldsize)) {
1871				if (config_fill && opt_junk && size < oldsize) {
1872					memset((void *)((uintptr_t)ptr + size),
1873					    0x5a, oldsize - size);
1874				}
1875				return (ptr);
1876			}
1877		} else {
1878			assert(size <= arena_maxclass);
1879			if (size + extra > SMALL_MAXCLASS) {
1880				if (arena_ralloc_large(ptr, oldsize, size,
1881				    extra, zero) == false)
1882					return (ptr);
1883			}
1884		}
1885	}
1886
1887	/* Reallocation would require a move. */
1888	return (NULL);
1889}
1890
1891void *
1892arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
1893    size_t alignment, bool zero, bool try_tcache)
1894{
1895	void *ret;
1896	size_t copysize;
1897
1898	/* Try to avoid moving the allocation. */
1899	ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero);
1900	if (ret != NULL)
1901		return (ret);
1902
1903	/*
1904	 * size and oldsize are different enough that we need to move the
1905	 * object.  In that case, fall back to allocating new space and
1906	 * copying.
1907	 */
1908	if (alignment != 0) {
1909		size_t usize = sa2u(size + extra, alignment);
1910		if (usize == 0)
1911			return (NULL);
1912		ret = ipalloc(usize, alignment, zero);
1913	} else
1914		ret = arena_malloc(NULL, size + extra, zero, try_tcache);
1915
1916	if (ret == NULL) {
1917		if (extra == 0)
1918			return (NULL);
1919		/* Try again, this time without extra. */
1920		if (alignment != 0) {
1921			size_t usize = sa2u(size, alignment);
1922			if (usize == 0)
1923				return (NULL);
1924			ret = ipalloc(usize, alignment, zero);
1925		} else
1926			ret = arena_malloc(NULL, size, zero, try_tcache);
1927
1928		if (ret == NULL)
1929			return (NULL);
1930	}
1931
1932	/* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
1933
1934	/*
1935	 * Copy at most size bytes (not size+extra), since the caller has no
1936	 * expectation that the extra bytes will be reliably preserved.
1937	 */
1938	copysize = (size < oldsize) ? size : oldsize;
1939	VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
1940	memcpy(ret, ptr, copysize);
1941	iqalloc(ptr);
1942	return (ret);
1943}
1944
1945bool
1946arena_new(arena_t *arena, unsigned ind)
1947{
1948	unsigned i;
1949	arena_bin_t *bin;
1950
1951	arena->ind = ind;
1952	arena->nthreads = 0;
1953
1954	if (malloc_mutex_init(&arena->lock))
1955		return (true);
1956
1957	if (config_stats) {
1958		memset(&arena->stats, 0, sizeof(arena_stats_t));
1959		arena->stats.lstats =
1960		    (malloc_large_stats_t *)base_alloc(nlclasses *
1961		    sizeof(malloc_large_stats_t));
1962		if (arena->stats.lstats == NULL)
1963			return (true);
1964		memset(arena->stats.lstats, 0, nlclasses *
1965		    sizeof(malloc_large_stats_t));
1966		if (config_tcache)
1967			ql_new(&arena->tcache_ql);
1968	}
1969
1970	if (config_prof)
1971		arena->prof_accumbytes = 0;
1972
1973	/* Initialize chunks. */
1974	ql_new(&arena->chunks_dirty);
1975	arena->spare = NULL;
1976
1977	arena->nactive = 0;
1978	arena->ndirty = 0;
1979	arena->npurgatory = 0;
1980
1981	arena_avail_tree_new(&arena->runs_avail_clean);
1982	arena_avail_tree_new(&arena->runs_avail_dirty);
1983
1984	/* Initialize bins. */
1985	for (i = 0; i < NBINS; i++) {
1986		bin = &arena->bins[i];
1987		if (malloc_mutex_init(&bin->lock))
1988			return (true);
1989		bin->runcur = NULL;
1990		arena_run_tree_new(&bin->runs);
1991		if (config_stats)
1992			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
1993	}
1994
1995	return (false);
1996}
1997
1998/*
1999 * Calculate bin_info->run_size such that it meets the following constraints:
2000 *
2001 *   *) bin_info->run_size >= min_run_size
2002 *   *) bin_info->run_size <= arena_maxclass
2003 *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2004 *   *) bin_info->nregs <= RUN_MAXREGS
2005 *
2006 * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
2007 * calculated here, since these settings are all interdependent.
2008 */
2009static size_t
2010bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
2011{
2012	size_t pad_size;
2013	size_t try_run_size, good_run_size;
2014	uint32_t try_nregs, good_nregs;
2015	uint32_t try_hdr_size, good_hdr_size;
2016	uint32_t try_bitmap_offset, good_bitmap_offset;
2017	uint32_t try_ctx0_offset, good_ctx0_offset;
2018	uint32_t try_redzone0_offset, good_redzone0_offset;
2019
2020	assert(min_run_size >= PAGE);
2021	assert(min_run_size <= arena_maxclass);
2022
2023	/*
2024	 * Determine redzone size based on minimum alignment and minimum
2025	 * redzone size.  Add padding to the end of the run if it is needed to
2026	 * align the regions.  The padding allows each redzone to be half the
2027	 * minimum alignment; without the padding, each redzone would have to
2028	 * be twice as large in order to maintain alignment.
2029	 */
2030	if (config_fill && opt_redzone) {
2031		size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1);
2032		if (align_min <= REDZONE_MINSIZE) {
2033			bin_info->redzone_size = REDZONE_MINSIZE;
2034			pad_size = 0;
2035		} else {
2036			bin_info->redzone_size = align_min >> 1;
2037			pad_size = bin_info->redzone_size;
2038		}
2039	} else {
2040		bin_info->redzone_size = 0;
2041		pad_size = 0;
2042	}
2043	bin_info->reg_interval = bin_info->reg_size +
2044	    (bin_info->redzone_size << 1);
2045
2046	/*
2047	 * Calculate known-valid settings before entering the run_size
2048	 * expansion loop, so that the first part of the loop always copies
2049	 * valid settings.
2050	 *
2051	 * The do..while loop iteratively reduces the number of regions until
2052	 * the run header and the regions no longer overlap.  A closed formula
2053	 * would be quite messy, since there is an interdependency between the
2054	 * header's mask length and the number of regions.
2055	 */
2056	try_run_size = min_run_size;
2057	try_nregs = ((try_run_size - sizeof(arena_run_t)) /
2058	    bin_info->reg_interval)
2059	    + 1; /* Counter-act try_nregs-- in loop. */
2060	if (try_nregs > RUN_MAXREGS) {
2061		try_nregs = RUN_MAXREGS
2062		    + 1; /* Counter-act try_nregs-- in loop. */
2063	}
2064	do {
2065		try_nregs--;
2066		try_hdr_size = sizeof(arena_run_t);
2067		/* Pad to a long boundary. */
2068		try_hdr_size = LONG_CEILING(try_hdr_size);
2069		try_bitmap_offset = try_hdr_size;
2070		/* Add space for bitmap. */
2071		try_hdr_size += bitmap_size(try_nregs);
2072		if (config_prof && opt_prof && prof_promote == false) {
2073			/* Pad to a quantum boundary. */
2074			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
2075			try_ctx0_offset = try_hdr_size;
2076			/* Add space for one (prof_ctx_t *) per region. */
2077			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
2078		} else
2079			try_ctx0_offset = 0;
2080		try_redzone0_offset = try_run_size - (try_nregs *
2081		    bin_info->reg_interval) - pad_size;
2082	} while (try_hdr_size > try_redzone0_offset);
2083
2084	/* run_size expansion loop. */
2085	do {
2086		/*
2087		 * Copy valid settings before trying more aggressive settings.
2088		 */
2089		good_run_size = try_run_size;
2090		good_nregs = try_nregs;
2091		good_hdr_size = try_hdr_size;
2092		good_bitmap_offset = try_bitmap_offset;
2093		good_ctx0_offset = try_ctx0_offset;
2094		good_redzone0_offset = try_redzone0_offset;
2095
2096		/* Try more aggressive settings. */
2097		try_run_size += PAGE;
2098		try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) /
2099		    bin_info->reg_interval)
2100		    + 1; /* Counter-act try_nregs-- in loop. */
2101		if (try_nregs > RUN_MAXREGS) {
2102			try_nregs = RUN_MAXREGS
2103			    + 1; /* Counter-act try_nregs-- in loop. */
2104		}
2105		do {
2106			try_nregs--;
2107			try_hdr_size = sizeof(arena_run_t);
2108			/* Pad to a long boundary. */
2109			try_hdr_size = LONG_CEILING(try_hdr_size);
2110			try_bitmap_offset = try_hdr_size;
2111			/* Add space for bitmap. */
2112			try_hdr_size += bitmap_size(try_nregs);
2113			if (config_prof && opt_prof && prof_promote == false) {
2114				/* Pad to a quantum boundary. */
2115				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
2116				try_ctx0_offset = try_hdr_size;
2117				/*
2118				 * Add space for one (prof_ctx_t *) per region.
2119				 */
2120				try_hdr_size += try_nregs *
2121				    sizeof(prof_ctx_t *);
2122			}
2123			try_redzone0_offset = try_run_size - (try_nregs *
2124			    bin_info->reg_interval) - pad_size;
2125		} while (try_hdr_size > try_redzone0_offset);
2126	} while (try_run_size <= arena_maxclass
2127	    && try_run_size <= arena_maxclass
2128	    && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) >
2129	    RUN_MAX_OVRHD_RELAX
2130	    && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
2131	    && try_nregs < RUN_MAXREGS);
2132
2133	assert(good_hdr_size <= good_redzone0_offset);
2134
2135	/* Copy final settings. */
2136	bin_info->run_size = good_run_size;
2137	bin_info->nregs = good_nregs;
2138	bin_info->bitmap_offset = good_bitmap_offset;
2139	bin_info->ctx0_offset = good_ctx0_offset;
2140	bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
2141
2142	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
2143	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
2144
2145	return (good_run_size);
2146}
2147
2148static void
2149bin_info_init(void)
2150{
2151	arena_bin_info_t *bin_info;
2152	size_t prev_run_size = PAGE;
2153
2154#define	SIZE_CLASS(bin, delta, size)					\
2155	bin_info = &arena_bin_info[bin];				\
2156	bin_info->reg_size = size;					\
2157	prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
2158	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
2159	SIZE_CLASSES
2160#undef SIZE_CLASS
2161}
2162
2163void
2164arena_boot(void)
2165{
2166	size_t header_size;
2167	unsigned i;
2168
2169	/*
2170	 * Compute the header size such that it is large enough to contain the
2171	 * page map.  The page map is biased to omit entries for the header
2172	 * itself, so some iteration is necessary to compute the map bias.
2173	 *
2174	 * 1) Compute safe header_size and map_bias values that include enough
2175	 *    space for an unbiased page map.
2176	 * 2) Refine map_bias based on (1) to omit the header pages in the page
2177	 *    map.  The resulting map_bias may be one too small.
2178	 * 3) Refine map_bias based on (2).  The result will be >= the result
2179	 *    from (2), and will always be correct.
2180	 */
2181	map_bias = 0;
2182	for (i = 0; i < 3; i++) {
2183		header_size = offsetof(arena_chunk_t, map) +
2184		    (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
2185		map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK)
2186		    != 0);
2187	}
2188	assert(map_bias > 0);
2189
2190	arena_maxclass = chunksize - (map_bias << LG_PAGE);
2191
2192	bin_info_init();
2193}
2194
2195void
2196arena_prefork(arena_t *arena)
2197{
2198	unsigned i;
2199
2200	malloc_mutex_prefork(&arena->lock);
2201	for (i = 0; i < NBINS; i++)
2202		malloc_mutex_prefork(&arena->bins[i].lock);
2203}
2204
2205void
2206arena_postfork_parent(arena_t *arena)
2207{
2208	unsigned i;
2209
2210	for (i = 0; i < NBINS; i++)
2211		malloc_mutex_postfork_parent(&arena->bins[i].lock);
2212	malloc_mutex_postfork_parent(&arena->lock);
2213}
2214
2215void
2216arena_postfork_child(arena_t *arena)
2217{
2218	unsigned i;
2219
2220	for (i = 0; i < NBINS; i++)
2221		malloc_mutex_postfork_child(&arena->bins[i].lock);
2222	malloc_mutex_postfork_child(&arena->lock);
2223}
2224