1/*-
2 * Copyright (C) 2006-2010 Jason Evans <jasone@FreeBSD.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in
14 *    the documentation and/or other materials provided with the
15 *    distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 *******************************************************************************
30 *
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems.  The following
33 * features are included for this purpose:
34 *
35 *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 *     contention and cache sloshing.
37 *
38 *   + Thread-specific caching is used if there are multiple threads, which
39 *     reduces the amount of locking.
40 *
41 *   + Cache line sharing between arenas is avoided for internal data
42 *     structures.
43 *
44 *   + Memory is managed in chunks and runs (chunks can be split into runs),
45 *     rather than as individual pages.  This provides a constant-time
46 *     mechanism for associating allocations with particular arenas.
47 *
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained.  Allocations are broken into
50 * categories according to size class.  Assuming runtime defaults, 4 KiB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
52 * are as follows:
53 *
54 *   |========================================|
55 *   | Category | Subcategory      |     Size |
56 *   |========================================|
57 *   | Small    | Tiny             |        2 |
58 *   |          |                  |        4 |
59 *   |          |                  |        8 |
60 *   |          |------------------+----------|
61 *   |          | Quantum-spaced   |       16 |
62 *   |          |                  |       32 |
63 *   |          |                  |       48 |
64 *   |          |                  |      ... |
65 *   |          |                  |       96 |
66 *   |          |                  |      112 |
67 *   |          |                  |      128 |
68 *   |          |------------------+----------|
69 *   |          | Cacheline-spaced |      192 |
70 *   |          |                  |      256 |
71 *   |          |                  |      320 |
72 *   |          |                  |      384 |
73 *   |          |                  |      448 |
74 *   |          |                  |      512 |
75 *   |          |------------------+----------|
76 *   |          | Sub-page         |      760 |
77 *   |          |                  |     1024 |
78 *   |          |                  |     1280 |
79 *   |          |                  |      ... |
80 *   |          |                  |     3328 |
81 *   |          |                  |     3584 |
82 *   |          |                  |     3840 |
83 *   |========================================|
84 *   | Medium                      |    4 KiB |
85 *   |                             |    6 KiB |
86 *   |                             |    8 KiB |
87 *   |                             |      ... |
88 *   |                             |   28 KiB |
89 *   |                             |   30 KiB |
90 *   |                             |   32 KiB |
91 *   |========================================|
92 *   | Large                       |   36 KiB |
93 *   |                             |   40 KiB |
94 *   |                             |   44 KiB |
95 *   |                             |      ... |
96 *   |                             | 1012 KiB |
97 *   |                             | 1016 KiB |
98 *   |                             | 1020 KiB |
99 *   |========================================|
100 *   | Huge                        |    1 MiB |
101 *   |                             |    2 MiB |
102 *   |                             |    3 MiB |
103 *   |                             |      ... |
104 *   |========================================|
105 *
106 * Different mechanisms are used accoding to category:
107 *
108 *   Small/medium : Each size class is segregated into its own set of runs.
109 *                  Each run maintains a bitmap of which regions are
110 *                  free/allocated.
111 *
112 *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
113 *           in the associated arena chunk header maps.
114 *
115 *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
116 *          Metadata are stored in a separate red-black tree.
117 *
118 *******************************************************************************
119 */
120
121/*
122 * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
123 * defaults the A and J runtime options to off.  These settings are appropriate
124 * for production systems.
125 */
126#ifndef MALLOC_PRODUCTION
127#define	MALLOC_PRODUCTION
128#endif
129
130#ifndef MALLOC_PRODUCTION
131   /*
132    * MALLOC_DEBUG enables assertions and other sanity checks, and disables
133    * inline functions.
134    */
135#  define MALLOC_DEBUG
136
137   /* MALLOC_STATS enables statistics calculation. */
138#  define MALLOC_STATS
139#endif
140
141/*
142 * MALLOC_TINY enables support for tiny objects, which are smaller than one
143 * quantum.
144 */
145#define	MALLOC_TINY
146
147/*
148 * MALLOC_TCACHE enables a thread-specific caching layer for small and medium
149 * objects.  This makes it possible to allocate/deallocate objects without any
150 * locking when the cache is in the steady state.
151 */
152#define	MALLOC_TCACHE
153
154/*
155 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
156 * segment (DSS).  In an ideal world, this functionality would be completely
157 * unnecessary, but we are burdened by history and the lack of resource limits
158 * for anonymous mapped memory.
159 */
160#define	MALLOC_DSS
161
162#include <sys/cdefs.h>
163__FBSDID("$FreeBSD$");
164
165#include "libc_private.h"
166#ifdef MALLOC_DEBUG
167#  define _LOCK_DEBUG
168#endif
169#include "spinlock.h"
170#include "namespace.h"
171#include <sys/mman.h>
172#include <sys/param.h>
173#include <sys/time.h>
174#include <sys/types.h>
175#include <sys/sysctl.h>
176#include <sys/uio.h>
177#include <sys/ktrace.h> /* Must come after several other sys/ includes. */
178
179#include <machine/cpufunc.h>
180#include <machine/param.h>
181#include <machine/vmparam.h>
182
183#include <errno.h>
184#include <limits.h>
185#include <link.h>
186#include <pthread.h>
187#include <sched.h>
188#include <stdarg.h>
189#include <stdbool.h>
190#include <stdio.h>
191#include <stdint.h>
192#include <inttypes.h>
193#include <stdlib.h>
194#include <string.h>
195#include <strings.h>
196#include <unistd.h>
197
198#include "un-namespace.h"
199
200#include "libc_private.h"
201
202#define	RB_COMPACT
203#include "rb.h"
204#if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
205#include "qr.h"
206#include "ql.h"
207#endif
208
209#ifdef MALLOC_DEBUG
210   /* Disable inlining to make debugging easier. */
211#  define inline
212#endif
213
214/* Size of stack-allocated buffer passed to strerror_r(). */
215#define	STRERROR_BUF		64
216
217/*
218 * Minimum alignment of allocations is 2^LG_QUANTUM bytes.
219 */
220#ifdef __i386__
221#  define LG_QUANTUM		4
222#  define LG_SIZEOF_PTR		2
223#  define CPU_SPINWAIT		__asm__ volatile("pause")
224#  ifdef __clang__
225#    define TLS_MODEL		/* clang does not support tls_model yet */
226#  else
227#    define TLS_MODEL		__attribute__((tls_model("initial-exec")))
228#  endif
229#endif
230#ifdef __ia64__
231#  define LG_QUANTUM		4
232#  define LG_SIZEOF_PTR		3
233#  define TLS_MODEL		/* default */
234#endif
235#ifdef __alpha__
236#  define LG_QUANTUM		4
237#  define LG_SIZEOF_PTR		3
238#  define NO_TLS
239#endif
240#ifdef __sparc64__
241#  define LG_QUANTUM		4
242#  define LG_SIZEOF_PTR		3
243#  define TLS_MODEL		__attribute__((tls_model("initial-exec")))
244#endif
245#ifdef __amd64__
246#  define LG_QUANTUM		4
247#  define LG_SIZEOF_PTR		3
248#  define CPU_SPINWAIT		__asm__ volatile("pause")
249#  ifdef __clang__
250#    define TLS_MODEL		/* clang does not support tls_model yet */
251#  else
252#    define TLS_MODEL		__attribute__((tls_model("initial-exec")))
253#  endif
254#endif
255#ifdef __arm__
256#  define LG_QUANTUM		3
257#  define LG_SIZEOF_PTR		2
258#  define NO_TLS
259#endif
260#ifdef __mips__
261#  define LG_QUANTUM		3
262#  define LG_SIZEOF_PTR		2
263#  define NO_TLS
264#endif
265#ifdef __powerpc64__
266#  define LG_QUANTUM		4
267#  define LG_SIZEOF_PTR		3
268#  define TLS_MODEL		/* default */
269#elif defined(__powerpc__)
270#  define LG_QUANTUM		4
271#  define LG_SIZEOF_PTR		2
272#  define TLS_MODEL		/* default */
273#endif
274#ifdef __s390x__
275#  define LG_QUANTUM		4
276#endif
277
278#define	QUANTUM			((size_t)(1U << LG_QUANTUM))
279#define	QUANTUM_MASK		(QUANTUM - 1)
280
281#define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
282
283/* sizeof(int) == (1U << LG_SIZEOF_INT). */
284#ifndef LG_SIZEOF_INT
285#  define LG_SIZEOF_INT	2
286#endif
287
288/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
289#if (!defined(PIC) && !defined(NO_TLS))
290#  define NO_TLS
291#endif
292
293#ifdef NO_TLS
294   /* MALLOC_TCACHE requires TLS. */
295#  ifdef MALLOC_TCACHE
296#    undef MALLOC_TCACHE
297#  endif
298#endif
299
300/*
301 * Size and alignment of memory chunks that are allocated by the OS's virtual
302 * memory system.
303 */
304#define	LG_CHUNK_DEFAULT	22
305
306/*
307 * The minimum ratio of active:dirty pages per arena is computed as:
308 *
309 *   (nactive >> opt_lg_dirty_mult) >= ndirty
310 *
311 * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
312 * times as many active pages as dirty pages.
313 */
314#define	LG_DIRTY_MULT_DEFAULT	5
315
316/*
317 * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
318 * In addition, this controls the spacing of cacheline-spaced size classes.
319 */
320#define	LG_CACHELINE		6
321#define	CACHELINE		((size_t)(1U << LG_CACHELINE))
322#define	CACHELINE_MASK		(CACHELINE - 1)
323
324/*
325 * Subpages are an artificially designated partitioning of pages.  Their only
326 * purpose is to support subpage-spaced size classes.
327 *
328 * There must be at least 4 subpages per page, due to the way size classes are
329 * handled.
330 */
331#define	LG_SUBPAGE		8
332#define	SUBPAGE			((size_t)(1U << LG_SUBPAGE))
333#define	SUBPAGE_MASK		(SUBPAGE - 1)
334
335#ifdef MALLOC_TINY
336   /* Smallest size class to support. */
337#  define LG_TINY_MIN		1
338#endif
339
340/*
341 * Maximum size class that is a multiple of the quantum, but not (necessarily)
342 * a power of 2.  Above this size, allocations are rounded up to the nearest
343 * power of 2.
344 */
345#define	LG_QSPACE_MAX_DEFAULT	7
346
347/*
348 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
349 * a power of 2.  Above this size, allocations are rounded up to the nearest
350 * power of 2.
351 */
352#define	LG_CSPACE_MAX_DEFAULT	9
353
354/*
355 * Maximum medium size class.  This must not be more than 1/4 of a chunk
356 * (LG_MEDIUM_MAX_DEFAULT <= LG_CHUNK_DEFAULT - 2).
357 */
358#define	LG_MEDIUM_MAX_DEFAULT	15
359
360/*
361 * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
362 * as small as possible such that this setting is still honored, without
363 * violating other constraints.  The goal is to make runs as small as possible
364 * without exceeding a per run external fragmentation threshold.
365 *
366 * We use binary fixed point math for overhead computations, where the binary
367 * point is implicitly RUN_BFP bits to the left.
368 *
369 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
370 * honored for some/all object sizes, since there is one bit of header overhead
371 * per object (plus a constant).  This constraint is relaxed (ignored) for runs
372 * that are so small that the per-region overhead is greater than:
373 *
374 *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
375 */
376#define	RUN_BFP			12
377/*                                    \/   Implicit binary fixed point. */
378#define	RUN_MAX_OVRHD		0x0000003dU
379#define	RUN_MAX_OVRHD_RELAX	0x00001800U
380
381/* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
382#define	RUN_MAX_SMALL							\
383	(arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT))	\
384	    ? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE +		\
385	    PAGE_SHIFT)))
386
387/*
388 * Hyper-threaded CPUs may need a special instruction inside spin loops in
389 * order to yield to another virtual CPU.  If no such instruction is defined
390 * above, make CPU_SPINWAIT a no-op.
391 */
392#ifndef CPU_SPINWAIT
393#  define CPU_SPINWAIT
394#endif
395
396/*
397 * Adaptive spinning must eventually switch to blocking, in order to avoid the
398 * potential for priority inversion deadlock.  Backing off past a certain point
399 * can actually waste time.
400 */
401#define	LG_SPIN_LIMIT		11
402
403#ifdef MALLOC_TCACHE
404   /*
405    * Default number of cache slots for each bin in the thread cache (0:
406    * disabled).
407    */
408#  define LG_TCACHE_NSLOTS_DEFAULT	7
409   /*
410    * (1U << opt_lg_tcache_gc_sweep) is the approximate number of
411    * allocation events between full GC sweeps (-1: disabled).  Integer
412    * rounding may cause the actual number to be slightly higher, since GC is
413    * performed incrementally.
414    */
415#  define LG_TCACHE_GC_SWEEP_DEFAULT	13
416#endif
417
418/******************************************************************************/
419
420/*
421 * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
422 * places, because they require malloc()ed memory, which causes bootstrapping
423 * issues in some cases.
424 */
425typedef struct {
426	spinlock_t	lock;
427} malloc_mutex_t;
428
429/* Set to true once the allocator has been initialized. */
430static bool malloc_initialized = false;
431
432/* Used to avoid initialization races. */
433static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
434
435/******************************************************************************/
436/*
437 * Statistics data structures.
438 */
439
440#ifdef MALLOC_STATS
441
442#ifdef MALLOC_TCACHE
443typedef struct tcache_bin_stats_s tcache_bin_stats_t;
444struct tcache_bin_stats_s {
445	/*
446	 * Number of allocation requests that corresponded to the size of this
447	 * bin.
448	 */
449	uint64_t	nrequests;
450};
451#endif
452
453typedef struct malloc_bin_stats_s malloc_bin_stats_t;
454struct malloc_bin_stats_s {
455	/*
456	 * Number of allocation requests that corresponded to the size of this
457	 * bin.
458	 */
459	uint64_t	nrequests;
460
461#ifdef MALLOC_TCACHE
462	/* Number of tcache fills from this bin. */
463	uint64_t	nfills;
464
465	/* Number of tcache flushes to this bin. */
466	uint64_t	nflushes;
467#endif
468
469	/* Total number of runs created for this bin's size class. */
470	uint64_t	nruns;
471
472	/*
473	 * Total number of runs reused by extracting them from the runs tree for
474	 * this bin's size class.
475	 */
476	uint64_t	reruns;
477
478	/* High-water mark for this bin. */
479	size_t		highruns;
480
481	/* Current number of runs in this bin. */
482	size_t		curruns;
483};
484
485typedef struct malloc_large_stats_s malloc_large_stats_t;
486struct malloc_large_stats_s {
487	/*
488	 * Number of allocation requests that corresponded to this size class.
489	 */
490	uint64_t	nrequests;
491
492	/* High-water mark for this size class. */
493	size_t		highruns;
494
495	/* Current number of runs of this size class. */
496	size_t		curruns;
497};
498
499typedef struct arena_stats_s arena_stats_t;
500struct arena_stats_s {
501	/* Number of bytes currently mapped. */
502	size_t		mapped;
503
504	/*
505	 * Total number of purge sweeps, total number of madvise calls made,
506	 * and total pages purged in order to keep dirty unused memory under
507	 * control.
508	 */
509	uint64_t	npurge;
510	uint64_t	nmadvise;
511	uint64_t	purged;
512
513	/* Per-size-category statistics. */
514	size_t		allocated_small;
515	uint64_t	nmalloc_small;
516	uint64_t	ndalloc_small;
517
518	size_t		allocated_medium;
519	uint64_t	nmalloc_medium;
520	uint64_t	ndalloc_medium;
521
522	size_t		allocated_large;
523	uint64_t	nmalloc_large;
524	uint64_t	ndalloc_large;
525
526	/*
527	 * One element for each possible size class, including sizes that
528	 * overlap with bin size classes.  This is necessary because ipalloc()
529	 * sometimes has to use such large objects in order to assure proper
530	 * alignment.
531	 */
532	malloc_large_stats_t	*lstats;
533};
534
535typedef struct chunk_stats_s chunk_stats_t;
536struct chunk_stats_s {
537	/* Number of chunks that were allocated. */
538	uint64_t	nchunks;
539
540	/* High-water mark for number of chunks allocated. */
541	size_t		highchunks;
542
543	/*
544	 * Current number of chunks allocated.  This value isn't maintained for
545	 * any other purpose, so keep track of it in order to be able to set
546	 * highchunks.
547	 */
548	size_t		curchunks;
549};
550
551#endif /* #ifdef MALLOC_STATS */
552
553/******************************************************************************/
554/*
555 * Extent data structures.
556 */
557
558/* Tree of extents. */
559typedef struct extent_node_s extent_node_t;
560struct extent_node_s {
561#ifdef MALLOC_DSS
562	/* Linkage for the size/address-ordered tree. */
563	rb_node(extent_node_t) link_szad;
564#endif
565
566	/* Linkage for the address-ordered tree. */
567	rb_node(extent_node_t) link_ad;
568
569	/* Pointer to the extent that this tree node is responsible for. */
570	void	*addr;
571
572	/* Total region size. */
573	size_t	size;
574};
575typedef rb_tree(extent_node_t) extent_tree_t;
576
577/******************************************************************************/
578/*
579 * Arena data structures.
580 */
581
582typedef struct arena_s arena_t;
583typedef struct arena_bin_s arena_bin_t;
584
585/* Each element of the chunk map corresponds to one page within the chunk. */
586typedef struct arena_chunk_map_s arena_chunk_map_t;
587struct arena_chunk_map_s {
588	/*
589	 * Linkage for run trees.  There are two disjoint uses:
590	 *
591	 * 1) arena_t's runs_avail tree.
592	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
593	 *    runs, rather than directly embedding linkage.
594	 */
595	rb_node(arena_chunk_map_t)	link;
596
597	/*
598	 * Run address (or size) and various flags are stored together.  The bit
599	 * layout looks like (assuming 32-bit system):
600	 *
601	 *   ???????? ???????? ????cccc ccccdzla
602	 *
603	 * ? : Unallocated: Run address for first/last pages, unset for internal
604	 *                  pages.
605	 *     Small/medium: Don't care.
606	 *     Large: Run size for first page, unset for trailing pages.
607	 * - : Unused.
608	 * c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
609	 * d : dirty?
610	 * z : zeroed?
611	 * l : large?
612	 * a : allocated?
613	 *
614	 * Following are example bit patterns for the three types of runs.
615	 *
616	 * p : run page offset
617	 * s : run size
618	 * x : don't care
619	 * - : 0
620	 * [dzla] : bit set
621	 *
622	 *   Unallocated:
623	 *     ssssssss ssssssss ssss---- --------
624	 *     xxxxxxxx xxxxxxxx xxxx---- ----d---
625	 *     ssssssss ssssssss ssss---- -----z--
626	 *
627	 *   Small/medium:
628	 *     pppppppp ppppcccc cccccccc cccc---a
629	 *     pppppppp ppppcccc cccccccc cccc---a
630	 *     pppppppp ppppcccc cccccccc cccc---a
631	 *
632	 *   Large:
633	 *     ssssssss ssssssss ssss---- ------la
634	 *     -------- -------- -------- ------la
635	 *     -------- -------- -------- ------la
636	 */
637	size_t				bits;
638#define	CHUNK_MAP_PG_MASK	((size_t)0xfff00000U)
639#define	CHUNK_MAP_PG_SHIFT	20
640#define	CHUNK_MAP_LG_PG_RANGE	12
641
642#define	CHUNK_MAP_RC_MASK	((size_t)0xffff0U)
643#define	CHUNK_MAP_RC_ONE	((size_t)0x00010U)
644
645#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
646#define	CHUNK_MAP_DIRTY		((size_t)0x8U)
647#define	CHUNK_MAP_ZEROED	((size_t)0x4U)
648#define	CHUNK_MAP_LARGE		((size_t)0x2U)
649#define	CHUNK_MAP_ALLOCATED	((size_t)0x1U)
650#define	CHUNK_MAP_KEY		(CHUNK_MAP_DIRTY | CHUNK_MAP_ALLOCATED)
651};
652typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
653typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
654
655/* Arena chunk header. */
656typedef struct arena_chunk_s arena_chunk_t;
657struct arena_chunk_s {
658	/* Arena that owns the chunk. */
659	arena_t		*arena;
660
661	/* Linkage for the arena's chunks_dirty tree. */
662	rb_node(arena_chunk_t) link_dirty;
663
664	/*
665	 * True if the chunk is currently in the chunks_dirty tree, due to
666	 * having at some point contained one or more dirty pages.  Removal
667	 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
668	 */
669	bool		dirtied;
670
671	/* Number of dirty pages. */
672	size_t		ndirty;
673
674	/* Map of pages within chunk that keeps track of free/large/small. */
675	arena_chunk_map_t map[1]; /* Dynamically sized. */
676};
677typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
678
679typedef struct arena_run_s arena_run_t;
680struct arena_run_s {
681#ifdef MALLOC_DEBUG
682	uint32_t	magic;
683#  define ARENA_RUN_MAGIC 0x384adf93
684#endif
685
686	/* Bin this run is associated with. */
687	arena_bin_t	*bin;
688
689	/* Index of first element that might have a free region. */
690	unsigned	regs_minelm;
691
692	/* Number of free regions in run. */
693	unsigned	nfree;
694
695	/* Bitmask of in-use regions (0: in use, 1: free). */
696	unsigned	regs_mask[1]; /* Dynamically sized. */
697};
698
699struct arena_bin_s {
700	/*
701	 * Current run being used to service allocations of this bin's size
702	 * class.
703	 */
704	arena_run_t	*runcur;
705
706	/*
707	 * Tree of non-full runs.  This tree is used when looking for an
708	 * existing run when runcur is no longer usable.  We choose the
709	 * non-full run that is lowest in memory; this policy tends to keep
710	 * objects packed well, and it can also help reduce the number of
711	 * almost-empty chunks.
712	 */
713	arena_run_tree_t runs;
714
715	/* Size of regions in a run for this bin's size class. */
716	size_t		reg_size;
717
718	/* Total size of a run for this bin's size class. */
719	size_t		run_size;
720
721	/* Total number of regions in a run for this bin's size class. */
722	uint32_t	nregs;
723
724	/* Number of elements in a run's regs_mask for this bin's size class. */
725	uint32_t	regs_mask_nelms;
726
727	/* Offset of first region in a run for this bin's size class. */
728	uint32_t	reg0_offset;
729
730#ifdef MALLOC_STATS
731	/* Bin statistics. */
732	malloc_bin_stats_t stats;
733#endif
734};
735
736#ifdef MALLOC_TCACHE
737typedef struct tcache_s tcache_t;
738#endif
739
740struct arena_s {
741#ifdef MALLOC_DEBUG
742	uint32_t		magic;
743#  define ARENA_MAGIC 0x947d3d24
744#endif
745
746	/* All operations on this arena require that lock be locked. */
747	pthread_mutex_t		lock;
748
749#ifdef MALLOC_STATS
750	arena_stats_t		stats;
751#  ifdef MALLOC_TCACHE
752	/*
753	 * List of tcaches for extant threads associated with this arena.
754	 * Stats from these are merged incrementally, and at exit.
755	 */
756	ql_head(tcache_t)	tcache_ql;
757#  endif
758#endif
759
760	/* Tree of dirty-page-containing chunks this arena manages. */
761	arena_chunk_tree_t	chunks_dirty;
762
763	/*
764	 * In order to avoid rapid chunk allocation/deallocation when an arena
765	 * oscillates right on the cusp of needing a new chunk, cache the most
766	 * recently freed chunk.  The spare is left in the arena's chunk trees
767	 * until it is deleted.
768	 *
769	 * There is one spare chunk per arena, rather than one spare total, in
770	 * order to avoid interactions between multiple threads that could make
771	 * a single spare inadequate.
772	 */
773	arena_chunk_t		*spare;
774
775	/* Number of pages in active runs. */
776	size_t			nactive;
777
778	/*
779	 * Current count of pages within unused runs that are potentially
780	 * dirty, and for which madvise(... MADV_FREE) has not been called.  By
781	 * tracking this, we can institute a limit on how much dirty unused
782	 * memory is mapped for each arena.
783	 */
784	size_t			ndirty;
785
786	/*
787	 * Size/address-ordered tree of this arena's available runs.  This tree
788	 * is used for first-best-fit run allocation.
789	 */
790	arena_avail_tree_t	runs_avail;
791
792	/*
793	 * bins is used to store trees of free regions of the following sizes,
794	 * assuming a 16-byte quantum, 4 KiB page size, and default
795	 * MALLOC_OPTIONS.
796	 *
797	 *   bins[i] |   size |
798	 *   --------+--------+
799	 *        0  |      2 |
800	 *        1  |      4 |
801	 *        2  |      8 |
802	 *   --------+--------+
803	 *        3  |     16 |
804	 *        4  |     32 |
805	 *        5  |     48 |
806	 *           :        :
807	 *        8  |     96 |
808	 *        9  |    112 |
809	 *       10  |    128 |
810	 *   --------+--------+
811	 *       11  |    192 |
812	 *       12  |    256 |
813	 *       13  |    320 |
814	 *       14  |    384 |
815	 *       15  |    448 |
816	 *       16  |    512 |
817	 *   --------+--------+
818	 *       17  |    768 |
819	 *       18  |   1024 |
820	 *       19  |   1280 |
821	 *           :        :
822	 *       27  |   3328 |
823	 *       28  |   3584 |
824	 *       29  |   3840 |
825	 *   --------+--------+
826	 *       30  |  4 KiB |
827	 *       31  |  6 KiB |
828	 *       33  |  8 KiB |
829	 *           :        :
830	 *       43  | 28 KiB |
831	 *       44  | 30 KiB |
832	 *       45  | 32 KiB |
833	 *   --------+--------+
834	 */
835	arena_bin_t		bins[1]; /* Dynamically sized. */
836};
837
838/******************************************************************************/
839/*
840 * Thread cache data structures.
841 */
842
843#ifdef MALLOC_TCACHE
844typedef struct tcache_bin_s tcache_bin_t;
845struct tcache_bin_s {
846#  ifdef MALLOC_STATS
847	tcache_bin_stats_t tstats;
848#  endif
849	unsigned	low_water;	/* Min # cached since last GC. */
850	unsigned	high_water;	/* Max # cached since last GC. */
851	unsigned	ncached;	/* # of cached objects. */
852	void		*slots[1];	/* Dynamically sized. */
853};
854
855struct tcache_s {
856#  ifdef MALLOC_STATS
857	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
858#  endif
859	arena_t		*arena;		/* This thread's arena. */
860	unsigned	ev_cnt;		/* Event count since incremental GC. */
861	unsigned	next_gc_bin;	/* Next bin to GC. */
862	tcache_bin_t	*tbins[1];	/* Dynamically sized. */
863};
864#endif
865
866/******************************************************************************/
867/*
868 * Data.
869 */
870
871/* Number of CPUs. */
872static unsigned		ncpus;
873
874/* Various bin-related settings. */
875#ifdef MALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
876#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
877#else
878#  define		ntbins	0
879#endif
880static unsigned		nqbins; /* Number of quantum-spaced bins. */
881static unsigned		ncbins; /* Number of cacheline-spaced bins. */
882static unsigned		nsbins; /* Number of subpage-spaced bins. */
883static unsigned		nmbins; /* Number of medium bins. */
884static unsigned		nbins;
885static unsigned		mbin0; /* mbin offset (nbins - nmbins). */
886#ifdef MALLOC_TINY
887#  define		tspace_max	((size_t)(QUANTUM >> 1))
888#endif
889#define			qspace_min	QUANTUM
890static size_t		qspace_max;
891static size_t		cspace_min;
892static size_t		cspace_max;
893static size_t		sspace_min;
894static size_t		sspace_max;
895#define			small_maxclass	sspace_max
896#define			medium_min	PAGE_SIZE
897static size_t		medium_max;
898#define			bin_maxclass	medium_max
899
900/*
901 * Soft limit on the number of medium size classes.  Spacing between medium
902 * size classes never exceeds pagesize, which can force more than NBINS_MAX
903 * medium size classes.
904 */
905#define	NMBINS_MAX	16
906/* Spacing between medium size classes. */
907static size_t		lg_mspace;
908static size_t		mspace_mask;
909
910static uint8_t const	*small_size2bin;
911/*
912 * const_small_size2bin is a static constant lookup table that in the common
913 * case can be used as-is for small_size2bin.  For dynamically linked programs,
914 * this avoids a page of memory overhead per process.
915 */
916#define	S2B_1(i)	i,
917#define	S2B_2(i)	S2B_1(i) S2B_1(i)
918#define	S2B_4(i)	S2B_2(i) S2B_2(i)
919#define	S2B_8(i)	S2B_4(i) S2B_4(i)
920#define	S2B_16(i)	S2B_8(i) S2B_8(i)
921#define	S2B_32(i)	S2B_16(i) S2B_16(i)
922#define	S2B_64(i)	S2B_32(i) S2B_32(i)
923#define	S2B_128(i)	S2B_64(i) S2B_64(i)
924#define	S2B_256(i)	S2B_128(i) S2B_128(i)
925static const uint8_t	const_small_size2bin[PAGE_SIZE - 255] = {
926	S2B_1(0xffU)		/*    0 */
927#if (LG_QUANTUM == 4)
928/* 64-bit system ************************/
929#  ifdef MALLOC_TINY
930	S2B_2(0)		/*    2 */
931	S2B_2(1)		/*    4 */
932	S2B_4(2)		/*    8 */
933	S2B_8(3)		/*   16 */
934#    define S2B_QMIN 3
935#  else
936	S2B_16(0)		/*   16 */
937#    define S2B_QMIN 0
938#  endif
939	S2B_16(S2B_QMIN + 1)	/*   32 */
940	S2B_16(S2B_QMIN + 2)	/*   48 */
941	S2B_16(S2B_QMIN + 3)	/*   64 */
942	S2B_16(S2B_QMIN + 4)	/*   80 */
943	S2B_16(S2B_QMIN + 5)	/*   96 */
944	S2B_16(S2B_QMIN + 6)	/*  112 */
945	S2B_16(S2B_QMIN + 7)	/*  128 */
946#  define S2B_CMIN (S2B_QMIN + 8)
947#else
948/* 32-bit system ************************/
949#  ifdef MALLOC_TINY
950	S2B_2(0)		/*    2 */
951	S2B_2(1)		/*    4 */
952	S2B_4(2)		/*    8 */
953#    define S2B_QMIN 2
954#  else
955	S2B_8(0)		/*    8 */
956#    define S2B_QMIN 0
957#  endif
958	S2B_8(S2B_QMIN + 1)	/*   16 */
959	S2B_8(S2B_QMIN + 2)	/*   24 */
960	S2B_8(S2B_QMIN + 3)	/*   32 */
961	S2B_8(S2B_QMIN + 4)	/*   40 */
962	S2B_8(S2B_QMIN + 5)	/*   48 */
963	S2B_8(S2B_QMIN + 6)	/*   56 */
964	S2B_8(S2B_QMIN + 7)	/*   64 */
965	S2B_8(S2B_QMIN + 8)	/*   72 */
966	S2B_8(S2B_QMIN + 9)	/*   80 */
967	S2B_8(S2B_QMIN + 10)	/*   88 */
968	S2B_8(S2B_QMIN + 11)	/*   96 */
969	S2B_8(S2B_QMIN + 12)	/*  104 */
970	S2B_8(S2B_QMIN + 13)	/*  112 */
971	S2B_8(S2B_QMIN + 14)	/*  120 */
972	S2B_8(S2B_QMIN + 15)	/*  128 */
973#  define S2B_CMIN (S2B_QMIN + 16)
974#endif
975/****************************************/
976	S2B_64(S2B_CMIN + 0)	/*  192 */
977	S2B_64(S2B_CMIN + 1)	/*  256 */
978	S2B_64(S2B_CMIN + 2)	/*  320 */
979	S2B_64(S2B_CMIN + 3)	/*  384 */
980	S2B_64(S2B_CMIN + 4)	/*  448 */
981	S2B_64(S2B_CMIN + 5)	/*  512 */
982#  define S2B_SMIN (S2B_CMIN + 6)
983	S2B_256(S2B_SMIN + 0)	/*  768 */
984	S2B_256(S2B_SMIN + 1)	/* 1024 */
985	S2B_256(S2B_SMIN + 2)	/* 1280 */
986	S2B_256(S2B_SMIN + 3)	/* 1536 */
987	S2B_256(S2B_SMIN + 4)	/* 1792 */
988	S2B_256(S2B_SMIN + 5)	/* 2048 */
989	S2B_256(S2B_SMIN + 6)	/* 2304 */
990	S2B_256(S2B_SMIN + 7)	/* 2560 */
991	S2B_256(S2B_SMIN + 8)	/* 2816 */
992	S2B_256(S2B_SMIN + 9)	/* 3072 */
993	S2B_256(S2B_SMIN + 10)	/* 3328 */
994	S2B_256(S2B_SMIN + 11)	/* 3584 */
995	S2B_256(S2B_SMIN + 12)	/* 3840 */
996#if (PAGE_SHIFT == 13)
997	S2B_256(S2B_SMIN + 13)	/* 4096 */
998	S2B_256(S2B_SMIN + 14)	/* 4352 */
999	S2B_256(S2B_SMIN + 15)	/* 4608 */
1000	S2B_256(S2B_SMIN + 16)	/* 4864 */
1001	S2B_256(S2B_SMIN + 17)	/* 5120 */
1002	S2B_256(S2B_SMIN + 18)	/* 5376 */
1003	S2B_256(S2B_SMIN + 19)	/* 5632 */
1004	S2B_256(S2B_SMIN + 20)	/* 5888 */
1005	S2B_256(S2B_SMIN + 21)	/* 6144 */
1006	S2B_256(S2B_SMIN + 22)	/* 6400 */
1007	S2B_256(S2B_SMIN + 23)	/* 6656 */
1008	S2B_256(S2B_SMIN + 24)	/* 6912 */
1009	S2B_256(S2B_SMIN + 25)	/* 7168 */
1010	S2B_256(S2B_SMIN + 26)	/* 7424 */
1011	S2B_256(S2B_SMIN + 27)	/* 7680 */
1012	S2B_256(S2B_SMIN + 28)	/* 7936 */
1013#endif
1014};
1015#undef S2B_1
1016#undef S2B_2
1017#undef S2B_4
1018#undef S2B_8
1019#undef S2B_16
1020#undef S2B_32
1021#undef S2B_64
1022#undef S2B_128
1023#undef S2B_256
1024#undef S2B_QMIN
1025#undef S2B_CMIN
1026#undef S2B_SMIN
1027
1028/* Various chunk-related settings. */
1029static size_t		chunksize;
1030static size_t		chunksize_mask; /* (chunksize - 1). */
1031static size_t		chunk_npages;
1032static size_t		arena_chunk_header_npages;
1033static size_t		arena_maxclass; /* Max size class for arenas. */
1034
1035/********/
1036/*
1037 * Chunks.
1038 */
1039
1040/* Protects chunk-related data structures. */
1041static malloc_mutex_t	huge_mtx;
1042
1043/* Tree of chunks that are stand-alone huge allocations. */
1044static extent_tree_t	huge;
1045
1046#ifdef MALLOC_DSS
1047/*
1048 * Protects sbrk() calls.  This avoids malloc races among threads, though it
1049 * does not protect against races with threads that call sbrk() directly.
1050 */
1051static malloc_mutex_t	dss_mtx;
1052/* Base address of the DSS. */
1053static void		*dss_base;
1054/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
1055static void		*dss_prev;
1056/* Current upper limit on DSS addresses. */
1057static void		*dss_max;
1058
1059/*
1060 * Trees of chunks that were previously allocated (trees differ only in node
1061 * ordering).  These are used when allocating chunks, in an attempt to re-use
1062 * address space.  Depending on function, different tree orderings are needed,
1063 * which is why there are two trees with the same contents.
1064 */
1065static extent_tree_t	dss_chunks_szad;
1066static extent_tree_t	dss_chunks_ad;
1067#endif
1068
1069#ifdef MALLOC_STATS
1070/* Huge allocation statistics. */
1071static uint64_t		huge_nmalloc;
1072static uint64_t		huge_ndalloc;
1073static size_t		huge_allocated;
1074#endif
1075
1076/****************************/
1077/*
1078 * base (internal allocation).
1079 */
1080
1081/*
1082 * Current pages that are being used for internal memory allocations.  These
1083 * pages are carved up in cacheline-size quanta, so that there is no chance of
1084 * false cache line sharing.
1085 */
1086static void		*base_pages;
1087static void		*base_next_addr;
1088static void		*base_past_addr; /* Addr immediately past base_pages. */
1089static extent_node_t	*base_nodes;
1090static malloc_mutex_t	base_mtx;
1091#ifdef MALLOC_STATS
1092static size_t		base_mapped;
1093#endif
1094
1095/********/
1096/*
1097 * Arenas.
1098 */
1099
1100/*
1101 * Arenas that are used to service external requests.  Not all elements of the
1102 * arenas array are necessarily used; arenas are created lazily as needed.
1103 */
1104static arena_t		**arenas;
1105static unsigned		narenas;
1106#ifndef NO_TLS
1107static unsigned		next_arena;
1108#endif
1109static pthread_mutex_t	arenas_lock; /* Protects arenas initialization. */
1110
1111#ifndef NO_TLS
1112/*
1113 * Map of _pthread_self() --> arenas[???], used for selecting an arena to use
1114 * for allocations.
1115 */
1116static __thread arena_t		*arenas_map TLS_MODEL;
1117#endif
1118
1119#ifdef MALLOC_TCACHE
1120/* Map of thread-specific caches. */
1121static __thread tcache_t	*tcache_tls TLS_MODEL;
1122
1123/*
1124 * Number of cache slots for each bin in the thread cache, or 0 if tcache is
1125 * disabled.
1126 */
1127size_t				tcache_nslots;
1128
1129/* Number of tcache allocation/deallocation events between incremental GCs. */
1130unsigned			tcache_gc_incr;
1131#endif
1132
1133/*
1134 * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
1135 * potentially avoid some system calls.  We can get away without TLS here,
1136 * since the state of mmap_unaligned only affects performance, rather than
1137 * correct function.
1138 */
1139#ifndef NO_TLS
1140static __thread bool	mmap_unaligned TLS_MODEL;
1141#else
1142static		bool	mmap_unaligned;
1143#endif
1144
1145#ifdef MALLOC_STATS
1146static malloc_mutex_t	chunks_mtx;
1147/* Chunk statistics. */
1148static chunk_stats_t	stats_chunks;
1149#endif
1150
1151/*******************************/
1152/*
1153 * Runtime configuration options.
1154 */
1155const char	*_malloc_options;
1156
1157#ifndef MALLOC_PRODUCTION
1158static bool	opt_abort = true;
1159static bool	opt_junk = true;
1160#else
1161static bool	opt_abort = false;
1162static bool	opt_junk = false;
1163#endif
1164#ifdef MALLOC_TCACHE
1165static size_t	opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
1166static ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
1167#endif
1168#ifdef MALLOC_DSS
1169static bool	opt_dss = true;
1170static bool	opt_mmap = true;
1171#endif
1172static ssize_t	opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
1173static bool	opt_stats_print = false;
1174static size_t	opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
1175static size_t	opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
1176static size_t	opt_lg_medium_max = LG_MEDIUM_MAX_DEFAULT;
1177static size_t	opt_lg_chunk = LG_CHUNK_DEFAULT;
1178static bool	opt_utrace = false;
1179static bool	opt_sysv = false;
1180static bool	opt_xmalloc = false;
1181static bool	opt_zero = false;
1182static int	opt_narenas_lshift = 0;
1183
1184typedef struct {
1185	void	*p;
1186	size_t	s;
1187	void	*r;
1188} malloc_utrace_t;
1189
1190#define	UTRACE(a, b, c)							\
1191	if (opt_utrace) {						\
1192		malloc_utrace_t ut;					\
1193		ut.p = (a);						\
1194		ut.s = (b);						\
1195		ut.r = (c);						\
1196		utrace(&ut, sizeof(ut));				\
1197	}
1198
1199/******************************************************************************/
1200/*
1201 * Begin function prototypes for non-inline static functions.
1202 */
1203
1204static void	malloc_mutex_init(malloc_mutex_t *mutex);
1205static bool	malloc_spin_init(pthread_mutex_t *lock);
1206#ifdef MALLOC_TINY
1207static size_t	pow2_ceil(size_t x);
1208#endif
1209static void	wrtmessage(const char *p1, const char *p2, const char *p3,
1210		const char *p4);
1211#ifdef MALLOC_STATS
1212static void	malloc_printf(const char *format, ...);
1213#endif
1214static char	*umax2s(uintmax_t x, unsigned base, char *s);
1215#ifdef MALLOC_DSS
1216static bool	base_pages_alloc_dss(size_t minsize);
1217#endif
1218static bool	base_pages_alloc_mmap(size_t minsize);
1219static bool	base_pages_alloc(size_t minsize);
1220static void	*base_alloc(size_t size);
1221static void	*base_calloc(size_t number, size_t size);
1222static extent_node_t *base_node_alloc(void);
1223static void	base_node_dealloc(extent_node_t *node);
1224static void	*pages_map(void *addr, size_t size);
1225static void	pages_unmap(void *addr, size_t size);
1226#ifdef MALLOC_DSS
1227static void	*chunk_alloc_dss(size_t size, bool *zero);
1228static void	*chunk_recycle_dss(size_t size, bool *zero);
1229#endif
1230static void	*chunk_alloc_mmap_slow(size_t size, bool unaligned);
1231static void	*chunk_alloc_mmap(size_t size);
1232static void	*chunk_alloc(size_t size, bool *zero);
1233#ifdef MALLOC_DSS
1234static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1235static bool	chunk_dealloc_dss(void *chunk, size_t size);
1236#endif
1237static void	chunk_dealloc_mmap(void *chunk, size_t size);
1238static void	chunk_dealloc(void *chunk, size_t size);
1239#ifndef NO_TLS
1240static arena_t	*choose_arena_hard(void);
1241#endif
1242static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1243    bool large, bool zero);
1244static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1245static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1246static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1247    bool zero);
1248static void	arena_purge(arena_t *arena);
1249static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1250static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1251    arena_run_t *run, size_t oldsize, size_t newsize);
1252static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1253    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1254static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1255static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1256static size_t	arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1257#ifdef MALLOC_TCACHE
1258static void	tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin,
1259    size_t binind);
1260static void	*tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin,
1261    size_t binind);
1262#endif
1263static void	*arena_malloc_medium(arena_t *arena, size_t size, bool zero);
1264static void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
1265static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
1266    size_t alloc_size);
1267static bool	arena_is_large(const void *ptr);
1268static size_t	arena_salloc(const void *ptr);
1269static void
1270arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1271    arena_bin_t *bin);
1272#ifdef MALLOC_STATS
1273static void	arena_stats_print(arena_t *arena);
1274#endif
1275static void	stats_print_atexit(void);
1276#ifdef MALLOC_TCACHE
1277static void	tcache_bin_flush(tcache_bin_t *tbin, size_t binind,
1278    unsigned rem);
1279#endif
1280static void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1281    void *ptr);
1282#ifdef MALLOC_TCACHE
1283static void	arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk,
1284    void *ptr, arena_chunk_map_t *mapelm, tcache_t *tcache);
1285#endif
1286static void	arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1287    void *ptr, size_t size, size_t oldsize);
1288static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1289    void *ptr, size_t size, size_t oldsize);
1290static bool	arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1291static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
1292static bool	arena_new(arena_t *arena, unsigned ind);
1293static arena_t	*arenas_extend(unsigned ind);
1294#ifdef MALLOC_TCACHE
1295static tcache_bin_t	*tcache_bin_create(arena_t *arena);
1296static void	tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
1297    unsigned binind);
1298#  ifdef MALLOC_STATS
1299static void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
1300#  endif
1301static tcache_t	*tcache_create(arena_t *arena);
1302static void	tcache_destroy(tcache_t *tcache);
1303#endif
1304static void	*huge_malloc(size_t size, bool zero);
1305static void	*huge_palloc(size_t alignment, size_t size);
1306static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
1307static void	huge_dalloc(void *ptr);
1308static void	malloc_stats_print(void);
1309#ifdef MALLOC_DEBUG
1310static void	small_size2bin_validate(void);
1311#endif
1312static bool	small_size2bin_init(void);
1313static bool	small_size2bin_init_hard(void);
1314static unsigned	malloc_ncpus(void);
1315static bool	malloc_init_hard(void);
1316
1317/*
1318 * End function prototypes.
1319 */
1320/******************************************************************************/
1321
1322static void
1323wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1324{
1325
1326	if (_write(STDERR_FILENO, p1, strlen(p1)) < 0
1327	    || _write(STDERR_FILENO, p2, strlen(p2)) < 0
1328	    || _write(STDERR_FILENO, p3, strlen(p3)) < 0
1329	    || _write(STDERR_FILENO, p4, strlen(p4)) < 0)
1330		return;
1331}
1332
1333void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
1334	    const char *p4) = wrtmessage;
1335
1336/*
1337 * We don't want to depend on vsnprintf() for production builds, since that can
1338 * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1339 * integer printing functionality, so that malloc_printf() use can be limited to
1340 * MALLOC_STATS code.
1341 */
1342#define	UMAX2S_BUFSIZE	65
1343static char *
1344umax2s(uintmax_t x, unsigned base, char *s)
1345{
1346	unsigned i;
1347
1348	i = UMAX2S_BUFSIZE - 1;
1349	s[i] = '\0';
1350	switch (base) {
1351	case 10:
1352		do {
1353			i--;
1354			s[i] = "0123456789"[x % 10];
1355			x /= 10;
1356		} while (x > 0);
1357		break;
1358	case 16:
1359		do {
1360			i--;
1361			s[i] = "0123456789abcdef"[x & 0xf];
1362			x >>= 4;
1363		} while (x > 0);
1364		break;
1365	default:
1366		do {
1367			i--;
1368			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
1369			x /= base;
1370		} while (x > 0);
1371	}
1372
1373	return (&s[i]);
1374}
1375
1376/*
1377 * Define a custom assert() in order to reduce the chances of deadlock during
1378 * assertion failure.
1379 */
1380#ifdef MALLOC_DEBUG
1381#  define assert(e) do {						\
1382	if (!(e)) {							\
1383		char line_buf[UMAX2S_BUFSIZE];				\
1384		_malloc_message(_getprogname(), ": (malloc) ",		\
1385		    __FILE__, ":");					\
1386		_malloc_message(umax2s(__LINE__, 10, line_buf),		\
1387		    ": Failed assertion: ", "\"", #e);			\
1388		_malloc_message("\"\n", "", "", "");			\
1389		abort();						\
1390	}								\
1391} while (0)
1392#else
1393#define assert(e)
1394#endif
1395
1396#ifdef MALLOC_STATS
1397/*
1398 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1399 */
1400static void
1401malloc_printf(const char *format, ...)
1402{
1403	char buf[4096];
1404	va_list ap;
1405
1406	va_start(ap, format);
1407	vsnprintf(buf, sizeof(buf), format, ap);
1408	va_end(ap);
1409	_malloc_message(buf, "", "", "");
1410}
1411#endif
1412
1413/******************************************************************************/
1414/*
1415 * Begin mutex.  We can't use normal pthread mutexes in all places, because
1416 * they require malloc()ed memory, which causes bootstrapping issues in some
1417 * cases.
1418 */
1419
1420static void
1421malloc_mutex_init(malloc_mutex_t *mutex)
1422{
1423	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1424
1425	mutex->lock = lock;
1426}
1427
1428static inline void
1429malloc_mutex_lock(malloc_mutex_t *mutex)
1430{
1431
1432	if (__isthreaded)
1433		_SPINLOCK(&mutex->lock);
1434}
1435
1436static inline void
1437malloc_mutex_unlock(malloc_mutex_t *mutex)
1438{
1439
1440	if (__isthreaded)
1441		_SPINUNLOCK(&mutex->lock);
1442}
1443
1444/*
1445 * End mutex.
1446 */
1447/******************************************************************************/
1448/*
1449 * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1450 * after a period of spinning, because unbounded spinning would allow for
1451 * priority inversion.
1452 */
1453
1454/*
1455 * We use an unpublished interface to initialize pthread mutexes with an
1456 * allocation callback, in order to avoid infinite recursion.
1457 */
1458int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1459    void *(calloc_cb)(size_t, size_t));
1460
1461__weak_reference(_pthread_mutex_init_calloc_cb_stub,
1462    _pthread_mutex_init_calloc_cb);
1463
1464int
1465_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1466    void *(calloc_cb)(size_t, size_t))
1467{
1468
1469	return (0);
1470}
1471
1472static bool
1473malloc_spin_init(pthread_mutex_t *lock)
1474{
1475
1476	if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1477		return (true);
1478
1479	return (false);
1480}
1481
1482static inline void
1483malloc_spin_lock(pthread_mutex_t *lock)
1484{
1485
1486	if (__isthreaded) {
1487		if (_pthread_mutex_trylock(lock) != 0) {
1488			/* Exponentially back off if there are multiple CPUs. */
1489			if (ncpus > 1) {
1490				unsigned i;
1491				volatile unsigned j;
1492
1493				for (i = 1; i <= LG_SPIN_LIMIT; i++) {
1494					for (j = 0; j < (1U << i); j++) {
1495						CPU_SPINWAIT;
1496					}
1497
1498					if (_pthread_mutex_trylock(lock) == 0)
1499						return;
1500				}
1501			}
1502
1503			/*
1504			 * Spinning failed.  Block until the lock becomes
1505			 * available, in order to avoid indefinite priority
1506			 * inversion.
1507			 */
1508			_pthread_mutex_lock(lock);
1509		}
1510	}
1511}
1512
1513static inline void
1514malloc_spin_unlock(pthread_mutex_t *lock)
1515{
1516
1517	if (__isthreaded)
1518		_pthread_mutex_unlock(lock);
1519}
1520
1521/*
1522 * End spin lock.
1523 */
1524/******************************************************************************/
1525/*
1526 * Begin Utility functions/macros.
1527 */
1528
1529/* Return the chunk address for allocation address a. */
1530#define	CHUNK_ADDR2BASE(a)						\
1531	((void *)((uintptr_t)(a) & ~chunksize_mask))
1532
1533/* Return the chunk offset of address a. */
1534#define	CHUNK_ADDR2OFFSET(a)						\
1535	((size_t)((uintptr_t)(a) & chunksize_mask))
1536
1537/* Return the smallest chunk multiple that is >= s. */
1538#define	CHUNK_CEILING(s)						\
1539	(((s) + chunksize_mask) & ~chunksize_mask)
1540
1541/* Return the smallest quantum multiple that is >= a. */
1542#define	QUANTUM_CEILING(a)						\
1543	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1544
1545/* Return the smallest cacheline multiple that is >= s. */
1546#define	CACHELINE_CEILING(s)						\
1547	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1548
1549/* Return the smallest subpage multiple that is >= s. */
1550#define	SUBPAGE_CEILING(s)						\
1551	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1552
1553/* Return the smallest medium size class that is >= s. */
1554#define	MEDIUM_CEILING(s)						\
1555	(((s) + mspace_mask) & ~mspace_mask)
1556
1557/* Return the smallest pagesize multiple that is >= s. */
1558#define	PAGE_CEILING(s)							\
1559	(((s) + PAGE_MASK) & ~PAGE_MASK)
1560
1561#ifdef MALLOC_TINY
1562/* Compute the smallest power of 2 that is >= x. */
1563static size_t
1564pow2_ceil(size_t x)
1565{
1566
1567	x--;
1568	x |= x >> 1;
1569	x |= x >> 2;
1570	x |= x >> 4;
1571	x |= x >> 8;
1572	x |= x >> 16;
1573#if (SIZEOF_PTR == 8)
1574	x |= x >> 32;
1575#endif
1576	x++;
1577	return (x);
1578}
1579#endif
1580
1581/******************************************************************************/
1582
1583#ifdef MALLOC_DSS
1584static bool
1585base_pages_alloc_dss(size_t minsize)
1586{
1587
1588	/*
1589	 * Do special DSS allocation here, since base allocations don't need to
1590	 * be chunk-aligned.
1591	 */
1592	malloc_mutex_lock(&dss_mtx);
1593	if (dss_prev != (void *)-1) {
1594		intptr_t incr;
1595		size_t csize = CHUNK_CEILING(minsize);
1596
1597		do {
1598			/* Get the current end of the DSS. */
1599			dss_max = sbrk(0);
1600
1601			/*
1602			 * Calculate how much padding is necessary to
1603			 * chunk-align the end of the DSS.  Don't worry about
1604			 * dss_max not being chunk-aligned though.
1605			 */
1606			incr = (intptr_t)chunksize
1607			    - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1608			assert(incr >= 0);
1609			if ((size_t)incr < minsize)
1610				incr += csize;
1611
1612			dss_prev = sbrk(incr);
1613			if (dss_prev == dss_max) {
1614				/* Success. */
1615				dss_max = (void *)((intptr_t)dss_prev + incr);
1616				base_pages = dss_prev;
1617				base_next_addr = base_pages;
1618				base_past_addr = dss_max;
1619#ifdef MALLOC_STATS
1620				base_mapped += incr;
1621#endif
1622				malloc_mutex_unlock(&dss_mtx);
1623				return (false);
1624			}
1625		} while (dss_prev != (void *)-1);
1626	}
1627	malloc_mutex_unlock(&dss_mtx);
1628
1629	return (true);
1630}
1631#endif
1632
1633static bool
1634base_pages_alloc_mmap(size_t minsize)
1635{
1636	size_t csize;
1637
1638	assert(minsize != 0);
1639	csize = PAGE_CEILING(minsize);
1640	base_pages = pages_map(NULL, csize);
1641	if (base_pages == NULL)
1642		return (true);
1643	base_next_addr = base_pages;
1644	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1645#ifdef MALLOC_STATS
1646	base_mapped += csize;
1647#endif
1648
1649	return (false);
1650}
1651
1652static bool
1653base_pages_alloc(size_t minsize)
1654{
1655
1656#ifdef MALLOC_DSS
1657	if (opt_mmap && minsize != 0)
1658#endif
1659	{
1660		if (base_pages_alloc_mmap(minsize) == false)
1661			return (false);
1662	}
1663
1664#ifdef MALLOC_DSS
1665	if (opt_dss) {
1666		if (base_pages_alloc_dss(minsize) == false)
1667			return (false);
1668	}
1669
1670#endif
1671
1672	return (true);
1673}
1674
1675static void *
1676base_alloc(size_t size)
1677{
1678	void *ret;
1679	size_t csize;
1680
1681	/* Round size up to nearest multiple of the cacheline size. */
1682	csize = CACHELINE_CEILING(size);
1683
1684	malloc_mutex_lock(&base_mtx);
1685	/* Make sure there's enough space for the allocation. */
1686	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1687		if (base_pages_alloc(csize)) {
1688			malloc_mutex_unlock(&base_mtx);
1689			return (NULL);
1690		}
1691	}
1692	/* Allocate. */
1693	ret = base_next_addr;
1694	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1695	malloc_mutex_unlock(&base_mtx);
1696
1697	return (ret);
1698}
1699
1700static void *
1701base_calloc(size_t number, size_t size)
1702{
1703	void *ret;
1704
1705	ret = base_alloc(number * size);
1706	if (ret != NULL)
1707		memset(ret, 0, number * size);
1708
1709	return (ret);
1710}
1711
1712static extent_node_t *
1713base_node_alloc(void)
1714{
1715	extent_node_t *ret;
1716
1717	malloc_mutex_lock(&base_mtx);
1718	if (base_nodes != NULL) {
1719		ret = base_nodes;
1720		base_nodes = *(extent_node_t **)ret;
1721		malloc_mutex_unlock(&base_mtx);
1722	} else {
1723		malloc_mutex_unlock(&base_mtx);
1724		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1725	}
1726
1727	return (ret);
1728}
1729
1730static void
1731base_node_dealloc(extent_node_t *node)
1732{
1733
1734	malloc_mutex_lock(&base_mtx);
1735	*(extent_node_t **)node = base_nodes;
1736	base_nodes = node;
1737	malloc_mutex_unlock(&base_mtx);
1738}
1739
1740/*
1741 * End Utility functions/macros.
1742 */
1743/******************************************************************************/
1744/*
1745 * Begin extent tree code.
1746 */
1747
1748#ifdef MALLOC_DSS
1749static inline int
1750extent_szad_comp(extent_node_t *a, extent_node_t *b)
1751{
1752	int ret;
1753	size_t a_size = a->size;
1754	size_t b_size = b->size;
1755
1756	ret = (a_size > b_size) - (a_size < b_size);
1757	if (ret == 0) {
1758		uintptr_t a_addr = (uintptr_t)a->addr;
1759		uintptr_t b_addr = (uintptr_t)b->addr;
1760
1761		ret = (a_addr > b_addr) - (a_addr < b_addr);
1762	}
1763
1764	return (ret);
1765}
1766
1767/* Wrap red-black tree macros in functions. */
1768rb_gen(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1769    link_szad, extent_szad_comp)
1770#endif
1771
1772static inline int
1773extent_ad_comp(extent_node_t *a, extent_node_t *b)
1774{
1775	uintptr_t a_addr = (uintptr_t)a->addr;
1776	uintptr_t b_addr = (uintptr_t)b->addr;
1777
1778	return ((a_addr > b_addr) - (a_addr < b_addr));
1779}
1780
1781/* Wrap red-black tree macros in functions. */
1782rb_gen(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1783    extent_ad_comp)
1784
1785/*
1786 * End extent tree code.
1787 */
1788/******************************************************************************/
1789/*
1790 * Begin chunk management functions.
1791 */
1792
1793static void *
1794pages_map(void *addr, size_t size)
1795{
1796	void *ret;
1797
1798	/*
1799	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1800	 * of existing mappings, and we only want to create new mappings.
1801	 */
1802	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1803	    -1, 0);
1804	assert(ret != NULL);
1805
1806	if (ret == MAP_FAILED)
1807		ret = NULL;
1808	else if (addr != NULL && ret != addr) {
1809		/*
1810		 * We succeeded in mapping memory, but not in the right place.
1811		 */
1812		if (munmap(ret, size) == -1) {
1813			char buf[STRERROR_BUF];
1814
1815			strerror_r(errno, buf, sizeof(buf));
1816			_malloc_message(_getprogname(),
1817			    ": (malloc) Error in munmap(): ", buf, "\n");
1818			if (opt_abort)
1819				abort();
1820		}
1821		ret = NULL;
1822	}
1823
1824	assert(ret == NULL || (addr == NULL && ret != addr)
1825	    || (addr != NULL && ret == addr));
1826	return (ret);
1827}
1828
1829static void
1830pages_unmap(void *addr, size_t size)
1831{
1832
1833	if (munmap(addr, size) == -1) {
1834		char buf[STRERROR_BUF];
1835
1836		strerror_r(errno, buf, sizeof(buf));
1837		_malloc_message(_getprogname(),
1838		    ": (malloc) Error in munmap(): ", buf, "\n");
1839		if (opt_abort)
1840			abort();
1841	}
1842}
1843
1844#ifdef MALLOC_DSS
1845static void *
1846chunk_alloc_dss(size_t size, bool *zero)
1847{
1848	void *ret;
1849
1850	ret = chunk_recycle_dss(size, zero);
1851	if (ret != NULL)
1852		return (ret);
1853
1854	/*
1855	 * sbrk() uses a signed increment argument, so take care not to
1856	 * interpret a huge allocation request as a negative increment.
1857	 */
1858	if ((intptr_t)size < 0)
1859		return (NULL);
1860
1861	malloc_mutex_lock(&dss_mtx);
1862	if (dss_prev != (void *)-1) {
1863		intptr_t incr;
1864
1865		/*
1866		 * The loop is necessary to recover from races with other
1867		 * threads that are using the DSS for something other than
1868		 * malloc.
1869		 */
1870		do {
1871			/* Get the current end of the DSS. */
1872			dss_max = sbrk(0);
1873
1874			/*
1875			 * Calculate how much padding is necessary to
1876			 * chunk-align the end of the DSS.
1877			 */
1878			incr = (intptr_t)size
1879			    - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1880			if (incr == (intptr_t)size)
1881				ret = dss_max;
1882			else {
1883				ret = (void *)((intptr_t)dss_max + incr);
1884				incr += size;
1885			}
1886
1887			dss_prev = sbrk(incr);
1888			if (dss_prev == dss_max) {
1889				/* Success. */
1890				dss_max = (void *)((intptr_t)dss_prev + incr);
1891				malloc_mutex_unlock(&dss_mtx);
1892				*zero = true;
1893				return (ret);
1894			}
1895		} while (dss_prev != (void *)-1);
1896	}
1897	malloc_mutex_unlock(&dss_mtx);
1898
1899	return (NULL);
1900}
1901
1902static void *
1903chunk_recycle_dss(size_t size, bool *zero)
1904{
1905	extent_node_t *node, key;
1906
1907	key.addr = NULL;
1908	key.size = size;
1909	malloc_mutex_lock(&dss_mtx);
1910	node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1911	if (node != NULL) {
1912		void *ret = node->addr;
1913
1914		/* Remove node from the tree. */
1915		extent_tree_szad_remove(&dss_chunks_szad, node);
1916		if (node->size == size) {
1917			extent_tree_ad_remove(&dss_chunks_ad, node);
1918			base_node_dealloc(node);
1919		} else {
1920			/*
1921			 * Insert the remainder of node's address range as a
1922			 * smaller chunk.  Its position within dss_chunks_ad
1923			 * does not change.
1924			 */
1925			assert(node->size > size);
1926			node->addr = (void *)((uintptr_t)node->addr + size);
1927			node->size -= size;
1928			extent_tree_szad_insert(&dss_chunks_szad, node);
1929		}
1930		malloc_mutex_unlock(&dss_mtx);
1931
1932		if (*zero)
1933			memset(ret, 0, size);
1934		return (ret);
1935	}
1936	malloc_mutex_unlock(&dss_mtx);
1937
1938	return (NULL);
1939}
1940#endif
1941
1942static void *
1943chunk_alloc_mmap_slow(size_t size, bool unaligned)
1944{
1945	void *ret;
1946	size_t offset;
1947
1948	/* Beware size_t wrap-around. */
1949	if (size + chunksize <= size)
1950		return (NULL);
1951
1952	ret = pages_map(NULL, size + chunksize);
1953	if (ret == NULL)
1954		return (NULL);
1955
1956	/* Clean up unneeded leading/trailing space. */
1957	offset = CHUNK_ADDR2OFFSET(ret);
1958	if (offset != 0) {
1959		/* Note that mmap() returned an unaligned mapping. */
1960		unaligned = true;
1961
1962		/* Leading space. */
1963		pages_unmap(ret, chunksize - offset);
1964
1965		ret = (void *)((uintptr_t)ret +
1966		    (chunksize - offset));
1967
1968		/* Trailing space. */
1969		pages_unmap((void *)((uintptr_t)ret + size),
1970		    offset);
1971	} else {
1972		/* Trailing space only. */
1973		pages_unmap((void *)((uintptr_t)ret + size),
1974		    chunksize);
1975	}
1976
1977	/*
1978	 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
1979	 * the next chunk_alloc_mmap() execution tries the fast allocation
1980	 * method.
1981	 */
1982	if (unaligned == false)
1983		mmap_unaligned = false;
1984
1985	return (ret);
1986}
1987
1988static void *
1989chunk_alloc_mmap(size_t size)
1990{
1991	void *ret;
1992
1993	/*
1994	 * Ideally, there would be a way to specify alignment to mmap() (like
1995	 * NetBSD has), but in the absence of such a feature, we have to work
1996	 * hard to efficiently create aligned mappings.  The reliable, but
1997	 * slow method is to create a mapping that is over-sized, then trim the
1998	 * excess.  However, that always results in at least one call to
1999	 * pages_unmap().
2000	 *
2001	 * A more optimistic approach is to try mapping precisely the right
2002	 * amount, then try to append another mapping if alignment is off.  In
2003	 * practice, this works out well as long as the application is not
2004	 * interleaving mappings via direct mmap() calls.  If we do run into a
2005	 * situation where there is an interleaved mapping and we are unable to
2006	 * extend an unaligned mapping, our best option is to switch to the
2007	 * slow method until mmap() returns another aligned mapping.  This will
2008	 * tend to leave a gap in the memory map that is too small to cause
2009	 * later problems for the optimistic method.
2010	 *
2011	 * Another possible confounding factor is address space layout
2012	 * randomization (ASLR), which causes mmap(2) to disregard the
2013	 * requested address.  mmap_unaligned tracks whether the previous
2014	 * chunk_alloc_mmap() execution received any unaligned or relocated
2015	 * mappings, and if so, the current execution will immediately fall
2016	 * back to the slow method.  However, we keep track of whether the fast
2017	 * method would have succeeded, and if so, we make a note to try the
2018	 * fast method next time.
2019	 */
2020
2021	if (mmap_unaligned == false) {
2022		size_t offset;
2023
2024		ret = pages_map(NULL, size);
2025		if (ret == NULL)
2026			return (NULL);
2027
2028		offset = CHUNK_ADDR2OFFSET(ret);
2029		if (offset != 0) {
2030			mmap_unaligned = true;
2031			/* Try to extend chunk boundary. */
2032			if (pages_map((void *)((uintptr_t)ret + size),
2033			    chunksize - offset) == NULL) {
2034				/*
2035				 * Extension failed.  Clean up, then revert to
2036				 * the reliable-but-expensive method.
2037				 */
2038				pages_unmap(ret, size);
2039				ret = chunk_alloc_mmap_slow(size, true);
2040			} else {
2041				/* Clean up unneeded leading space. */
2042				pages_unmap(ret, chunksize - offset);
2043				ret = (void *)((uintptr_t)ret + (chunksize -
2044				    offset));
2045			}
2046		}
2047	} else
2048		ret = chunk_alloc_mmap_slow(size, false);
2049
2050	return (ret);
2051}
2052
2053/*
2054 * If the caller specifies (*zero == false), it is still possible to receive
2055 * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
2056 * takes advantage of this to avoid demanding zeroed chunks, but taking
2057 * advantage of them if they are returned.
2058 */
2059static void *
2060chunk_alloc(size_t size, bool *zero)
2061{
2062	void *ret;
2063
2064	assert(size != 0);
2065	assert((size & chunksize_mask) == 0);
2066
2067#ifdef MALLOC_DSS
2068	if (opt_mmap)
2069#endif
2070	{
2071		ret = chunk_alloc_mmap(size);
2072		if (ret != NULL) {
2073			*zero = true;
2074			goto RETURN;
2075		}
2076	}
2077
2078#ifdef MALLOC_DSS
2079	if (opt_dss) {
2080		ret = chunk_alloc_dss(size, zero);
2081		if (ret != NULL)
2082			goto RETURN;
2083	}
2084#endif
2085
2086	/* All strategies for allocation failed. */
2087	ret = NULL;
2088RETURN:
2089#ifdef MALLOC_STATS
2090	if (ret != NULL) {
2091		malloc_mutex_lock(&chunks_mtx);
2092		stats_chunks.nchunks += (size / chunksize);
2093		stats_chunks.curchunks += (size / chunksize);
2094		if (stats_chunks.curchunks > stats_chunks.highchunks)
2095			stats_chunks.highchunks = stats_chunks.curchunks;
2096		malloc_mutex_unlock(&chunks_mtx);
2097	}
2098#endif
2099
2100	assert(CHUNK_ADDR2BASE(ret) == ret);
2101	return (ret);
2102}
2103
2104#ifdef MALLOC_DSS
2105static extent_node_t *
2106chunk_dealloc_dss_record(void *chunk, size_t size)
2107{
2108	extent_node_t *node, *prev, key;
2109
2110	key.addr = (void *)((uintptr_t)chunk + size);
2111	node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2112	/* Try to coalesce forward. */
2113	if (node != NULL && node->addr == key.addr) {
2114		/*
2115		 * Coalesce chunk with the following address range.  This does
2116		 * not change the position within dss_chunks_ad, so only
2117		 * remove/insert from/into dss_chunks_szad.
2118		 */
2119		extent_tree_szad_remove(&dss_chunks_szad, node);
2120		node->addr = chunk;
2121		node->size += size;
2122		extent_tree_szad_insert(&dss_chunks_szad, node);
2123	} else {
2124		/*
2125		 * Coalescing forward failed, so insert a new node.  Drop
2126		 * dss_mtx during node allocation, since it is possible that a
2127		 * new base chunk will be allocated.
2128		 */
2129		malloc_mutex_unlock(&dss_mtx);
2130		node = base_node_alloc();
2131		malloc_mutex_lock(&dss_mtx);
2132		if (node == NULL)
2133			return (NULL);
2134		node->addr = chunk;
2135		node->size = size;
2136		extent_tree_ad_insert(&dss_chunks_ad, node);
2137		extent_tree_szad_insert(&dss_chunks_szad, node);
2138	}
2139
2140	/* Try to coalesce backward. */
2141	prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2142	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2143	    chunk) {
2144		/*
2145		 * Coalesce chunk with the previous address range.  This does
2146		 * not change the position within dss_chunks_ad, so only
2147		 * remove/insert node from/into dss_chunks_szad.
2148		 */
2149		extent_tree_szad_remove(&dss_chunks_szad, prev);
2150		extent_tree_ad_remove(&dss_chunks_ad, prev);
2151
2152		extent_tree_szad_remove(&dss_chunks_szad, node);
2153		node->addr = prev->addr;
2154		node->size += prev->size;
2155		extent_tree_szad_insert(&dss_chunks_szad, node);
2156
2157		base_node_dealloc(prev);
2158	}
2159
2160	return (node);
2161}
2162
2163static bool
2164chunk_dealloc_dss(void *chunk, size_t size)
2165{
2166	bool ret;
2167
2168	malloc_mutex_lock(&dss_mtx);
2169	if ((uintptr_t)chunk >= (uintptr_t)dss_base
2170	    && (uintptr_t)chunk < (uintptr_t)dss_max) {
2171		extent_node_t *node;
2172
2173		/* Try to coalesce with other unused chunks. */
2174		node = chunk_dealloc_dss_record(chunk, size);
2175		if (node != NULL) {
2176			chunk = node->addr;
2177			size = node->size;
2178		}
2179
2180		/* Get the current end of the DSS. */
2181		dss_max = sbrk(0);
2182
2183		/*
2184		 * Try to shrink the DSS if this chunk is at the end of the
2185		 * DSS.  The sbrk() call here is subject to a race condition
2186		 * with threads that use brk(2) or sbrk(2) directly, but the
2187		 * alternative would be to leak memory for the sake of poorly
2188		 * designed multi-threaded programs.
2189		 */
2190		if ((void *)((uintptr_t)chunk + size) == dss_max
2191		    && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2192			/* Success. */
2193			dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2194
2195			if (node != NULL) {
2196				extent_tree_szad_remove(&dss_chunks_szad, node);
2197				extent_tree_ad_remove(&dss_chunks_ad, node);
2198				base_node_dealloc(node);
2199			}
2200		} else
2201			madvise(chunk, size, MADV_FREE);
2202
2203		ret = false;
2204		goto RETURN;
2205	}
2206
2207	ret = true;
2208RETURN:
2209	malloc_mutex_unlock(&dss_mtx);
2210	return (ret);
2211}
2212#endif
2213
2214static void
2215chunk_dealloc_mmap(void *chunk, size_t size)
2216{
2217
2218	pages_unmap(chunk, size);
2219}
2220
2221static void
2222chunk_dealloc(void *chunk, size_t size)
2223{
2224
2225	assert(chunk != NULL);
2226	assert(CHUNK_ADDR2BASE(chunk) == chunk);
2227	assert(size != 0);
2228	assert((size & chunksize_mask) == 0);
2229
2230#ifdef MALLOC_STATS
2231	malloc_mutex_lock(&chunks_mtx);
2232	stats_chunks.curchunks -= (size / chunksize);
2233	malloc_mutex_unlock(&chunks_mtx);
2234#endif
2235
2236#ifdef MALLOC_DSS
2237	if (opt_dss) {
2238		if (chunk_dealloc_dss(chunk, size) == false)
2239			return;
2240	}
2241
2242	if (opt_mmap)
2243#endif
2244		chunk_dealloc_mmap(chunk, size);
2245}
2246
2247/*
2248 * End chunk management functions.
2249 */
2250/******************************************************************************/
2251/*
2252 * Begin arena.
2253 */
2254
2255/*
2256 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2257 * code if necessary).
2258 */
2259static inline arena_t *
2260choose_arena(void)
2261{
2262	arena_t *ret;
2263
2264	/*
2265	 * We can only use TLS if this is a PIC library, since for the static
2266	 * library version, libc's malloc is used by TLS allocation, which
2267	 * introduces a bootstrapping issue.
2268	 */
2269#ifndef NO_TLS
2270	if (__isthreaded == false) {
2271	    /* Avoid the overhead of TLS for single-threaded operation. */
2272	    return (arenas[0]);
2273	}
2274
2275	ret = arenas_map;
2276	if (ret == NULL) {
2277		ret = choose_arena_hard();
2278		assert(ret != NULL);
2279	}
2280#else
2281	if (__isthreaded && narenas > 1) {
2282		unsigned long ind;
2283
2284		/*
2285		 * Hash _pthread_self() to one of the arenas.  There is a prime
2286		 * number of arenas, so this has a reasonable chance of
2287		 * working.  Even so, the hashing can be easily thwarted by
2288		 * inconvenient _pthread_self() values.  Without specific
2289		 * knowledge of how _pthread_self() calculates values, we can't
2290		 * easily do much better than this.
2291		 */
2292		ind = (unsigned long) _pthread_self() % narenas;
2293
2294		/*
2295		 * Optimistially assume that arenas[ind] has been initialized.
2296		 * At worst, we find out that some other thread has already
2297		 * done so, after acquiring the lock in preparation.  Note that
2298		 * this lazy locking also has the effect of lazily forcing
2299		 * cache coherency; without the lock acquisition, there's no
2300		 * guarantee that modification of arenas[ind] by another thread
2301		 * would be seen on this CPU for an arbitrary amount of time.
2302		 *
2303		 * In general, this approach to modifying a synchronized value
2304		 * isn't a good idea, but in this case we only ever modify the
2305		 * value once, so things work out well.
2306		 */
2307		ret = arenas[ind];
2308		if (ret == NULL) {
2309			/*
2310			 * Avoid races with another thread that may have already
2311			 * initialized arenas[ind].
2312			 */
2313			malloc_spin_lock(&arenas_lock);
2314			if (arenas[ind] == NULL)
2315				ret = arenas_extend((unsigned)ind);
2316			else
2317				ret = arenas[ind];
2318			malloc_spin_unlock(&arenas_lock);
2319		}
2320	} else
2321		ret = arenas[0];
2322#endif
2323
2324	assert(ret != NULL);
2325	return (ret);
2326}
2327
2328#ifndef NO_TLS
2329/*
2330 * Choose an arena based on a per-thread value (slow-path code only, called
2331 * only by choose_arena()).
2332 */
2333static arena_t *
2334choose_arena_hard(void)
2335{
2336	arena_t *ret;
2337
2338	assert(__isthreaded);
2339
2340	if (narenas > 1) {
2341		malloc_spin_lock(&arenas_lock);
2342		if ((ret = arenas[next_arena]) == NULL)
2343			ret = arenas_extend(next_arena);
2344		next_arena = (next_arena + 1) % narenas;
2345		malloc_spin_unlock(&arenas_lock);
2346	} else
2347		ret = arenas[0];
2348
2349	arenas_map = ret;
2350
2351	return (ret);
2352}
2353#endif
2354
2355static inline int
2356arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2357{
2358	uintptr_t a_chunk = (uintptr_t)a;
2359	uintptr_t b_chunk = (uintptr_t)b;
2360
2361	assert(a != NULL);
2362	assert(b != NULL);
2363
2364	return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2365}
2366
2367/* Wrap red-black tree macros in functions. */
2368rb_gen(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2369    arena_chunk_t, link_dirty, arena_chunk_comp)
2370
2371static inline int
2372arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2373{
2374	uintptr_t a_mapelm = (uintptr_t)a;
2375	uintptr_t b_mapelm = (uintptr_t)b;
2376
2377	assert(a != NULL);
2378	assert(b != NULL);
2379
2380	return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2381}
2382
2383/* Wrap red-black tree macros in functions. */
2384rb_gen(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2385    link, arena_run_comp)
2386
2387static inline int
2388arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2389{
2390	int ret;
2391	size_t a_size = a->bits & ~PAGE_MASK;
2392	size_t b_size = b->bits & ~PAGE_MASK;
2393
2394	ret = (a_size > b_size) - (a_size < b_size);
2395	if (ret == 0) {
2396		uintptr_t a_mapelm, b_mapelm;
2397
2398		if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
2399			a_mapelm = (uintptr_t)a;
2400		else {
2401			/*
2402			 * Treat keys as though they are lower than anything
2403			 * else.
2404			 */
2405			a_mapelm = 0;
2406		}
2407		b_mapelm = (uintptr_t)b;
2408
2409		ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2410	}
2411
2412	return (ret);
2413}
2414
2415/* Wrap red-black tree macros in functions. */
2416rb_gen(__unused static, arena_avail_tree_, arena_avail_tree_t,
2417    arena_chunk_map_t, link, arena_avail_comp)
2418
2419static inline void
2420arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2421{
2422	arena_chunk_t *chunk;
2423	arena_t *arena;
2424	size_t pagebeg, pageend, i;
2425
2426	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2427	arena = chunk->arena;
2428	pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2429	pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2430	    (uintptr_t)chunk) >> PAGE_SHIFT;
2431
2432	for (i = pagebeg; i <= pageend; i++) {
2433		size_t mapbits = chunk->map[i].bits;
2434
2435		if (mapbits & CHUNK_MAP_DIRTY) {
2436			assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2437			chunk->ndirty--;
2438			arena->ndirty--;
2439			mapbits ^= CHUNK_MAP_DIRTY;
2440		}
2441		assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
2442		mapbits += CHUNK_MAP_RC_ONE;
2443		chunk->map[i].bits = mapbits;
2444	}
2445}
2446
2447static inline void
2448arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2449{
2450	arena_chunk_t *chunk;
2451	arena_t *arena;
2452	size_t pagebeg, pageend, mapbits, i;
2453	bool dirtier = false;
2454
2455	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2456	arena = chunk->arena;
2457	pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2458	pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2459	    (uintptr_t)chunk) >> PAGE_SHIFT;
2460
2461	/* First page. */
2462	mapbits = chunk->map[pagebeg].bits;
2463	mapbits -= CHUNK_MAP_RC_ONE;
2464	if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2465		dirtier = true;
2466		assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2467		mapbits |= CHUNK_MAP_DIRTY;
2468		chunk->ndirty++;
2469		arena->ndirty++;
2470	}
2471	chunk->map[pagebeg].bits = mapbits;
2472
2473	if (pageend - pagebeg >= 1) {
2474		/*
2475		 * Interior pages are completely consumed by the object being
2476		 * deallocated, which means that the pages can be
2477		 * unconditionally marked dirty.
2478		 */
2479		for (i = pagebeg + 1; i < pageend; i++) {
2480			mapbits = chunk->map[i].bits;
2481			mapbits -= CHUNK_MAP_RC_ONE;
2482			assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2483			dirtier = true;
2484			assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2485			mapbits |= CHUNK_MAP_DIRTY;
2486			chunk->ndirty++;
2487			arena->ndirty++;
2488			chunk->map[i].bits = mapbits;
2489		}
2490
2491		/* Last page. */
2492		mapbits = chunk->map[pageend].bits;
2493		mapbits -= CHUNK_MAP_RC_ONE;
2494		if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2495			dirtier = true;
2496			assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2497			mapbits |= CHUNK_MAP_DIRTY;
2498			chunk->ndirty++;
2499			arena->ndirty++;
2500		}
2501		chunk->map[pageend].bits = mapbits;
2502	}
2503
2504	if (dirtier) {
2505		if (chunk->dirtied == false) {
2506			arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2507			    chunk);
2508			chunk->dirtied = true;
2509		}
2510
2511		/* Enforce opt_lg_dirty_mult. */
2512		if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
2513		    opt_lg_dirty_mult) < arena->ndirty)
2514			arena_purge(arena);
2515	}
2516}
2517
2518static inline void *
2519arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2520{
2521	void *ret;
2522	unsigned i, mask, bit, regind;
2523
2524	assert(run->magic == ARENA_RUN_MAGIC);
2525	assert(run->regs_minelm < bin->regs_mask_nelms);
2526
2527	/*
2528	 * Move the first check outside the loop, so that run->regs_minelm can
2529	 * be updated unconditionally, without the possibility of updating it
2530	 * multiple times.
2531	 */
2532	i = run->regs_minelm;
2533	mask = run->regs_mask[i];
2534	if (mask != 0) {
2535		/* Usable allocation found. */
2536		bit = ffs((int)mask) - 1;
2537
2538		regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2539		assert(regind < bin->nregs);
2540		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2541		    + (bin->reg_size * regind));
2542
2543		/* Clear bit. */
2544		mask ^= (1U << bit);
2545		run->regs_mask[i] = mask;
2546
2547		arena_run_rc_incr(run, bin, ret);
2548
2549		return (ret);
2550	}
2551
2552	for (i++; i < bin->regs_mask_nelms; i++) {
2553		mask = run->regs_mask[i];
2554		if (mask != 0) {
2555			/* Usable allocation found. */
2556			bit = ffs((int)mask) - 1;
2557
2558			regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2559			assert(regind < bin->nregs);
2560			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2561			    + (bin->reg_size * regind));
2562
2563			/* Clear bit. */
2564			mask ^= (1U << bit);
2565			run->regs_mask[i] = mask;
2566
2567			/*
2568			 * Make a note that nothing before this element
2569			 * contains a free region.
2570			 */
2571			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2572
2573			arena_run_rc_incr(run, bin, ret);
2574
2575			return (ret);
2576		}
2577	}
2578	/* Not reached. */
2579	assert(0);
2580	return (NULL);
2581}
2582
2583static inline void
2584arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2585{
2586	unsigned shift, diff, regind, elm, bit;
2587
2588	assert(run->magic == ARENA_RUN_MAGIC);
2589
2590	/*
2591	 * Avoid doing division with a variable divisor if possible.  Using
2592	 * actual division here can reduce allocator throughput by over 20%!
2593	 */
2594	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2595
2596	/* Rescale (factor powers of 2 out of the numerator and denominator). */
2597	shift = ffs(size) - 1;
2598	diff >>= shift;
2599	size >>= shift;
2600
2601	if (size == 1) {
2602		/* The divisor was a power of 2. */
2603		regind = diff;
2604	} else {
2605		/*
2606		 * To divide by a number D that is not a power of two we
2607		 * multiply by (2^21 / D) and then right shift by 21 positions.
2608		 *
2609		 *   X / D
2610		 *
2611		 * becomes
2612		 *
2613		 *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
2614		 *
2615		 * We can omit the first three elements, because we never
2616		 * divide by 0, and 1 and 2 are both powers of two, which are
2617		 * handled above.
2618		 */
2619#define	SIZE_INV_SHIFT 21
2620#define	SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
2621		static const unsigned size_invs[] = {
2622		    SIZE_INV(3),
2623		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2624		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2625		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2626		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2627		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2628		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2629		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2630		};
2631
2632		if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
2633			regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
2634		else
2635			regind = diff / size;
2636#undef SIZE_INV
2637#undef SIZE_INV_SHIFT
2638	}
2639	assert(diff == regind * size);
2640	assert(regind < bin->nregs);
2641
2642	elm = regind >> (LG_SIZEOF_INT + 3);
2643	if (elm < run->regs_minelm)
2644		run->regs_minelm = elm;
2645	bit = regind - (elm << (LG_SIZEOF_INT + 3));
2646	assert((run->regs_mask[elm] & (1U << bit)) == 0);
2647	run->regs_mask[elm] |= (1U << bit);
2648
2649	arena_run_rc_decr(run, bin, ptr);
2650}
2651
2652static void
2653arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2654    bool zero)
2655{
2656	arena_chunk_t *chunk;
2657	size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2658
2659	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2660	old_ndirty = chunk->ndirty;
2661	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2662	    >> PAGE_SHIFT);
2663	total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2664	    PAGE_SHIFT;
2665	need_pages = (size >> PAGE_SHIFT);
2666	assert(need_pages > 0);
2667	assert(need_pages <= total_pages);
2668	rem_pages = total_pages - need_pages;
2669
2670	arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2671	arena->nactive += need_pages;
2672
2673	/* Keep track of trailing unused pages for later use. */
2674	if (rem_pages > 0) {
2675		chunk->map[run_ind+need_pages].bits = (rem_pages <<
2676		    PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2677		    CHUNK_MAP_FLAGS_MASK);
2678		chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2679		    PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2680		    CHUNK_MAP_FLAGS_MASK);
2681		arena_avail_tree_insert(&arena->runs_avail,
2682		    &chunk->map[run_ind+need_pages]);
2683	}
2684
2685	for (i = 0; i < need_pages; i++) {
2686		/* Zero if necessary. */
2687		if (zero) {
2688			if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2689			    == 0) {
2690				memset((void *)((uintptr_t)chunk + ((run_ind
2691				    + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2692				/* CHUNK_MAP_ZEROED is cleared below. */
2693			}
2694		}
2695
2696		/* Update dirty page accounting. */
2697		if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2698			chunk->ndirty--;
2699			arena->ndirty--;
2700			/* CHUNK_MAP_DIRTY is cleared below. */
2701		}
2702
2703		/* Initialize the chunk map. */
2704		if (large) {
2705			chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2706			    | CHUNK_MAP_ALLOCATED;
2707		} else {
2708			chunk->map[run_ind + i].bits = (i << CHUNK_MAP_PG_SHIFT)
2709			    | CHUNK_MAP_ALLOCATED;
2710		}
2711	}
2712
2713	if (large) {
2714		/*
2715		 * Set the run size only in the first element for large runs.
2716		 * This is primarily a debugging aid, since the lack of size
2717		 * info for trailing pages only matters if the application
2718		 * tries to operate on an interior pointer.
2719		 */
2720		chunk->map[run_ind].bits |= size;
2721	} else {
2722		/*
2723		 * Initialize the first page's refcount to 1, so that the run
2724		 * header is protected from dirty page purging.
2725		 */
2726		chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
2727	}
2728}
2729
2730static arena_chunk_t *
2731arena_chunk_alloc(arena_t *arena)
2732{
2733	arena_chunk_t *chunk;
2734	size_t i;
2735
2736	if (arena->spare != NULL) {
2737		chunk = arena->spare;
2738		arena->spare = NULL;
2739	} else {
2740		bool zero;
2741		size_t zeroed;
2742
2743		zero = false;
2744		chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
2745		if (chunk == NULL)
2746			return (NULL);
2747#ifdef MALLOC_STATS
2748		arena->stats.mapped += chunksize;
2749#endif
2750
2751		chunk->arena = arena;
2752		chunk->dirtied = false;
2753
2754		/*
2755		 * Claim that no pages are in use, since the header is merely
2756		 * overhead.
2757		 */
2758		chunk->ndirty = 0;
2759
2760		/*
2761		 * Initialize the map to contain one maximal free untouched run.
2762		 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
2763		 * chunk.
2764		 */
2765		zeroed = zero ? CHUNK_MAP_ZEROED : 0;
2766		for (i = 0; i < arena_chunk_header_npages; i++)
2767			chunk->map[i].bits = 0;
2768		chunk->map[i].bits = arena_maxclass | zeroed;
2769		for (i++; i < chunk_npages-1; i++)
2770			chunk->map[i].bits = zeroed;
2771		chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
2772	}
2773
2774	/* Insert the run into the runs_avail tree. */
2775	arena_avail_tree_insert(&arena->runs_avail,
2776	    &chunk->map[arena_chunk_header_npages]);
2777
2778	return (chunk);
2779}
2780
2781static void
2782arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2783{
2784
2785	if (arena->spare != NULL) {
2786		if (arena->spare->dirtied) {
2787			arena_chunk_tree_dirty_remove(
2788			    &chunk->arena->chunks_dirty, arena->spare);
2789			arena->ndirty -= arena->spare->ndirty;
2790		}
2791		chunk_dealloc((void *)arena->spare, chunksize);
2792#ifdef MALLOC_STATS
2793		arena->stats.mapped -= chunksize;
2794#endif
2795	}
2796
2797	/*
2798	 * Remove run from runs_avail, regardless of whether this chunk
2799	 * will be cached, so that the arena does not use it.  Dirty page
2800	 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2801	 * the chunks_* trees is sufficient for that purpose.
2802	 */
2803	arena_avail_tree_remove(&arena->runs_avail,
2804	    &chunk->map[arena_chunk_header_npages]);
2805
2806	arena->spare = chunk;
2807}
2808
2809static arena_run_t *
2810arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2811{
2812	arena_chunk_t *chunk;
2813	arena_run_t *run;
2814	arena_chunk_map_t *mapelm, key;
2815
2816	assert(size <= arena_maxclass);
2817	assert((size & PAGE_MASK) == 0);
2818
2819	/* Search the arena's chunks for the lowest best fit. */
2820	key.bits = size | CHUNK_MAP_KEY;
2821	mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2822	if (mapelm != NULL) {
2823		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2824		size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2825		    / sizeof(arena_chunk_map_t);
2826
2827		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2828		    << PAGE_SHIFT));
2829		arena_run_split(arena, run, size, large, zero);
2830		return (run);
2831	}
2832
2833	/*
2834	 * No usable runs.  Create a new chunk from which to allocate the run.
2835	 */
2836	chunk = arena_chunk_alloc(arena);
2837	if (chunk == NULL)
2838		return (NULL);
2839	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2840	    PAGE_SHIFT));
2841	/* Update page map. */
2842	arena_run_split(arena, run, size, large, zero);
2843	return (run);
2844}
2845
2846#ifdef MALLOC_DEBUG
2847static arena_chunk_t *
2848chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
2849{
2850	size_t *ndirty = (size_t *)arg;
2851
2852	assert(chunk->dirtied);
2853	*ndirty += chunk->ndirty;
2854	return (NULL);
2855}
2856#endif
2857
2858static void
2859arena_purge(arena_t *arena)
2860{
2861	arena_chunk_t *chunk;
2862	size_t i, npages;
2863#ifdef MALLOC_DEBUG
2864	size_t ndirty = 0;
2865
2866	arena_chunk_tree_dirty_iter(&arena->chunks_dirty, NULL,
2867	    chunks_dirty_iter_cb, (void *)&ndirty);
2868	assert(ndirty == arena->ndirty);
2869#endif
2870	assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
2871
2872#ifdef MALLOC_STATS
2873	arena->stats.npurge++;
2874#endif
2875
2876	/*
2877	 * Iterate downward through chunks until enough dirty memory has been
2878	 * purged.  Terminate as soon as possible in order to minimize the
2879	 * number of system calls, even if a chunk has only been partially
2880	 * purged.
2881	 */
2882
2883	while ((arena->nactive >> (opt_lg_dirty_mult + 1)) < arena->ndirty) {
2884		chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2885		assert(chunk != NULL);
2886
2887		for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2888			assert(i >= arena_chunk_header_npages);
2889			if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2890				chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2891				/* Find adjacent dirty run(s). */
2892				for (npages = 1; i > arena_chunk_header_npages
2893				    && (chunk->map[i - 1].bits &
2894				    CHUNK_MAP_DIRTY); npages++) {
2895					i--;
2896					chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2897				}
2898				chunk->ndirty -= npages;
2899				arena->ndirty -= npages;
2900
2901				madvise((void *)((uintptr_t)chunk + (i <<
2902				    PAGE_SHIFT)), (npages << PAGE_SHIFT),
2903				    MADV_FREE);
2904#ifdef MALLOC_STATS
2905				arena->stats.nmadvise++;
2906				arena->stats.purged += npages;
2907#endif
2908				if ((arena->nactive >> (opt_lg_dirty_mult + 1))
2909				    >= arena->ndirty)
2910					break;
2911			}
2912		}
2913
2914		if (chunk->ndirty == 0) {
2915			arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2916			    chunk);
2917			chunk->dirtied = false;
2918		}
2919	}
2920}
2921
2922static void
2923arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2924{
2925	arena_chunk_t *chunk;
2926	size_t size, run_ind, run_pages;
2927
2928	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2929	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2930	    >> PAGE_SHIFT);
2931	assert(run_ind >= arena_chunk_header_npages);
2932	assert(run_ind < chunk_npages);
2933	if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2934		size = chunk->map[run_ind].bits & ~PAGE_MASK;
2935	else
2936		size = run->bin->run_size;
2937	run_pages = (size >> PAGE_SHIFT);
2938	arena->nactive -= run_pages;
2939
2940	/* Mark pages as unallocated in the chunk map. */
2941	if (dirty) {
2942		size_t i;
2943
2944		for (i = 0; i < run_pages; i++) {
2945			/*
2946			 * When (dirty == true), *all* pages within the run
2947			 * need to have their dirty bits set, because only
2948			 * small runs can create a mixture of clean/dirty
2949			 * pages, but such runs are passed to this function
2950			 * with (dirty == false).
2951			 */
2952			assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2953			    == 0);
2954			chunk->ndirty++;
2955			arena->ndirty++;
2956			chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2957		}
2958	} else {
2959		size_t i;
2960
2961		for (i = 0; i < run_pages; i++) {
2962			chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2963			    CHUNK_MAP_ALLOCATED);
2964		}
2965	}
2966	chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2967	    CHUNK_MAP_FLAGS_MASK);
2968	chunk->map[run_ind+run_pages-1].bits = size |
2969	    (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_FLAGS_MASK);
2970
2971	/* Try to coalesce forward. */
2972	if (run_ind + run_pages < chunk_npages &&
2973	    (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2974		size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2975		    ~PAGE_MASK;
2976
2977		/*
2978		 * Remove successor from runs_avail; the coalesced run is
2979		 * inserted later.
2980		 */
2981		arena_avail_tree_remove(&arena->runs_avail,
2982		    &chunk->map[run_ind+run_pages]);
2983
2984		size += nrun_size;
2985		run_pages = size >> PAGE_SHIFT;
2986
2987		assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2988		    == nrun_size);
2989		chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2990		    CHUNK_MAP_FLAGS_MASK);
2991		chunk->map[run_ind+run_pages-1].bits = size |
2992		    (chunk->map[run_ind+run_pages-1].bits &
2993		    CHUNK_MAP_FLAGS_MASK);
2994	}
2995
2996	/* Try to coalesce backward. */
2997	if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2998	    CHUNK_MAP_ALLOCATED) == 0) {
2999		size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
3000
3001		run_ind -= prun_size >> PAGE_SHIFT;
3002
3003		/*
3004		 * Remove predecessor from runs_avail; the coalesced run is
3005		 * inserted later.
3006		 */
3007		arena_avail_tree_remove(&arena->runs_avail,
3008		    &chunk->map[run_ind]);
3009
3010		size += prun_size;
3011		run_pages = size >> PAGE_SHIFT;
3012
3013		assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
3014		chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
3015		    CHUNK_MAP_FLAGS_MASK);
3016		chunk->map[run_ind+run_pages-1].bits = size |
3017		    (chunk->map[run_ind+run_pages-1].bits &
3018		    CHUNK_MAP_FLAGS_MASK);
3019	}
3020
3021	/* Insert into runs_avail, now that coalescing is complete. */
3022	arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
3023
3024	/*
3025	 * Deallocate chunk if it is now completely unused.  The bit
3026	 * manipulation checks whether the first run is unallocated and extends
3027	 * to the end of the chunk.
3028	 */
3029	if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
3030	    CHUNK_MAP_ALLOCATED)) == arena_maxclass)
3031		arena_chunk_dealloc(arena, chunk);
3032
3033	/*
3034	 * It is okay to do dirty page processing even if the chunk was
3035	 * deallocated above, since in that case it is the spare.  Waiting
3036	 * until after possible chunk deallocation to do dirty processing
3037	 * allows for an old spare to be fully deallocated, thus decreasing the
3038	 * chances of spuriously crossing the dirty page purging threshold.
3039	 */
3040	if (dirty) {
3041		if (chunk->dirtied == false) {
3042			arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
3043			    chunk);
3044			chunk->dirtied = true;
3045		}
3046
3047		/* Enforce opt_lg_dirty_mult. */
3048		if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
3049		    opt_lg_dirty_mult) < arena->ndirty)
3050			arena_purge(arena);
3051	}
3052}
3053
3054static void
3055arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3056    size_t oldsize, size_t newsize)
3057{
3058	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3059	size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
3060
3061	assert(oldsize > newsize);
3062
3063	/*
3064	 * Update the chunk map so that arena_run_dalloc() can treat the
3065	 * leading run as separately allocated.
3066	 */
3067	assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3068	chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
3069	    CHUNK_MAP_ALLOCATED;
3070	assert((chunk->map[pageind+head_npages].bits & CHUNK_MAP_DIRTY) == 0);
3071	chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
3072	    CHUNK_MAP_ALLOCATED;
3073
3074	arena_run_dalloc(arena, run, false);
3075}
3076
3077static void
3078arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3079    size_t oldsize, size_t newsize, bool dirty)
3080{
3081	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3082	size_t npages = newsize >> PAGE_SHIFT;
3083
3084	assert(oldsize > newsize);
3085
3086	/*
3087	 * Update the chunk map so that arena_run_dalloc() can treat the
3088	 * trailing run as separately allocated.
3089	 */
3090	assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3091	chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
3092	    CHUNK_MAP_ALLOCATED;
3093	assert((chunk->map[pageind+npages].bits & CHUNK_MAP_DIRTY) == 0);
3094	chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
3095	    | CHUNK_MAP_ALLOCATED;
3096
3097	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
3098	    dirty);
3099}
3100
3101static arena_run_t *
3102arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
3103{
3104	arena_chunk_map_t *mapelm;
3105	arena_run_t *run;
3106	unsigned i, remainder;
3107
3108	/* Look for a usable run. */
3109	mapelm = arena_run_tree_first(&bin->runs);
3110	if (mapelm != NULL) {
3111		arena_chunk_t *chunk;
3112		size_t pageind;
3113
3114		/* run is guaranteed to have available space. */
3115		arena_run_tree_remove(&bin->runs, mapelm);
3116
3117		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
3118		pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
3119		    sizeof(arena_chunk_map_t));
3120		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3121		    ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT))
3122		    << PAGE_SHIFT));
3123#ifdef MALLOC_STATS
3124		bin->stats.reruns++;
3125#endif
3126		return (run);
3127	}
3128	/* No existing runs have any space available. */
3129
3130	/* Allocate a new run. */
3131	run = arena_run_alloc(arena, bin->run_size, false, false);
3132	if (run == NULL)
3133		return (NULL);
3134
3135	/* Initialize run internals. */
3136	run->bin = bin;
3137
3138	for (i = 0; i < bin->regs_mask_nelms - 1; i++)
3139		run->regs_mask[i] = UINT_MAX;
3140	remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
3141	if (remainder == 0)
3142		run->regs_mask[i] = UINT_MAX;
3143	else {
3144		/* The last element has spare bits that need to be unset. */
3145		run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
3146		    - remainder));
3147	}
3148
3149	run->regs_minelm = 0;
3150
3151	run->nfree = bin->nregs;
3152#ifdef MALLOC_DEBUG
3153	run->magic = ARENA_RUN_MAGIC;
3154#endif
3155
3156#ifdef MALLOC_STATS
3157	bin->stats.nruns++;
3158	bin->stats.curruns++;
3159	if (bin->stats.curruns > bin->stats.highruns)
3160		bin->stats.highruns = bin->stats.curruns;
3161#endif
3162	return (run);
3163}
3164
3165/* bin->runcur must have space available before this function is called. */
3166static inline void *
3167arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3168{
3169	void *ret;
3170
3171	assert(run->magic == ARENA_RUN_MAGIC);
3172	assert(run->nfree > 0);
3173
3174	ret = arena_run_reg_alloc(run, bin);
3175	assert(ret != NULL);
3176	run->nfree--;
3177
3178	return (ret);
3179}
3180
3181/* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3182static void *
3183arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3184{
3185
3186	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3187	if (bin->runcur == NULL)
3188		return (NULL);
3189	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3190	assert(bin->runcur->nfree > 0);
3191
3192	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3193}
3194
3195/*
3196 * Calculate bin->run_size such that it meets the following constraints:
3197 *
3198 *   *) bin->run_size >= min_run_size
3199 *   *) bin->run_size <= arena_maxclass
3200 *   *) bin->run_size <= RUN_MAX_SMALL
3201 *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3202 *   *) run header size < PAGE_SIZE
3203 *
3204 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3205 * also calculated here, since these settings are all interdependent.
3206 */
3207static size_t
3208arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3209{
3210	size_t try_run_size, good_run_size;
3211	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3212	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3213
3214	assert(min_run_size >= PAGE_SIZE);
3215	assert(min_run_size <= arena_maxclass);
3216	assert(min_run_size <= RUN_MAX_SMALL);
3217
3218	/*
3219	 * Calculate known-valid settings before entering the run_size
3220	 * expansion loop, so that the first part of the loop always copies
3221	 * valid settings.
3222	 *
3223	 * The do..while loop iteratively reduces the number of regions until
3224	 * the run header and the regions no longer overlap.  A closed formula
3225	 * would be quite messy, since there is an interdependency between the
3226	 * header's mask length and the number of regions.
3227	 */
3228	try_run_size = min_run_size;
3229	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3230	    + 1; /* Counter-act try_nregs-- in loop. */
3231	do {
3232		try_nregs--;
3233		try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3234		    ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
3235		try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3236	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3237	    > try_reg0_offset);
3238
3239	/* run_size expansion loop. */
3240	do {
3241		/*
3242		 * Copy valid settings before trying more aggressive settings.
3243		 */
3244		good_run_size = try_run_size;
3245		good_nregs = try_nregs;
3246		good_mask_nelms = try_mask_nelms;
3247		good_reg0_offset = try_reg0_offset;
3248
3249		/* Try more aggressive settings. */
3250		try_run_size += PAGE_SIZE;
3251		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3252		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3253		do {
3254			try_nregs--;
3255			try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3256			    ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
3257			    1 : 0);
3258			try_reg0_offset = try_run_size - (try_nregs *
3259			    bin->reg_size);
3260		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
3261		    (try_mask_nelms - 1)) > try_reg0_offset);
3262	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3263	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3264	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
3265	    && (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)))
3266	    < PAGE_SIZE);
3267
3268	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3269	    <= good_reg0_offset);
3270	assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
3271
3272	/* Copy final settings. */
3273	bin->run_size = good_run_size;
3274	bin->nregs = good_nregs;
3275	bin->regs_mask_nelms = good_mask_nelms;
3276	bin->reg0_offset = good_reg0_offset;
3277
3278	return (good_run_size);
3279}
3280
3281#ifdef MALLOC_TCACHE
3282static inline void
3283tcache_event(tcache_t *tcache)
3284{
3285
3286	if (tcache_gc_incr == 0)
3287		return;
3288
3289	tcache->ev_cnt++;
3290	assert(tcache->ev_cnt <= tcache_gc_incr);
3291	if (tcache->ev_cnt >= tcache_gc_incr) {
3292		size_t binind = tcache->next_gc_bin;
3293		tcache_bin_t *tbin = tcache->tbins[binind];
3294
3295		if (tbin != NULL) {
3296			if (tbin->high_water == 0) {
3297				/*
3298				 * This bin went completely unused for an
3299				 * entire GC cycle, so throw away the tbin.
3300				 */
3301				assert(tbin->ncached == 0);
3302				tcache_bin_destroy(tcache, tbin, binind);
3303				tcache->tbins[binind] = NULL;
3304			} else {
3305				if (tbin->low_water > 0) {
3306					/*
3307					 * Flush (ceiling) half of the objects
3308					 * below the low water mark.
3309					 */
3310					tcache_bin_flush(tbin, binind,
3311					    tbin->ncached - (tbin->low_water >>
3312					    1) - (tbin->low_water & 1));
3313				}
3314				tbin->low_water = tbin->ncached;
3315				tbin->high_water = tbin->ncached;
3316			}
3317		}
3318
3319		tcache->next_gc_bin++;
3320		if (tcache->next_gc_bin == nbins)
3321			tcache->next_gc_bin = 0;
3322		tcache->ev_cnt = 0;
3323	}
3324}
3325
3326static inline void *
3327tcache_bin_alloc(tcache_bin_t *tbin)
3328{
3329
3330	if (tbin->ncached == 0)
3331		return (NULL);
3332	tbin->ncached--;
3333	if (tbin->ncached < tbin->low_water)
3334		tbin->low_water = tbin->ncached;
3335	return (tbin->slots[tbin->ncached]);
3336}
3337
3338static void
3339tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3340{
3341	arena_t *arena;
3342	arena_bin_t *bin;
3343	arena_run_t *run;
3344	void *ptr;
3345	unsigned i;
3346
3347	assert(tbin->ncached == 0);
3348
3349	arena = tcache->arena;
3350	bin = &arena->bins[binind];
3351	malloc_spin_lock(&arena->lock);
3352	for (i = 0; i < (tcache_nslots >> 1); i++) {
3353		if ((run = bin->runcur) != NULL && run->nfree > 0)
3354			ptr = arena_bin_malloc_easy(arena, bin, run);
3355		else
3356			ptr = arena_bin_malloc_hard(arena, bin);
3357		if (ptr == NULL)
3358			break;
3359		/*
3360		 * Fill tbin such that the objects lowest in memory are used
3361		 * first.
3362		 */
3363		tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
3364	}
3365#ifdef MALLOC_STATS
3366	bin->stats.nfills++;
3367	bin->stats.nrequests += tbin->tstats.nrequests;
3368	if (bin->reg_size <= small_maxclass) {
3369		arena->stats.nmalloc_small += (i - tbin->ncached);
3370		arena->stats.allocated_small += (i - tbin->ncached) *
3371		    bin->reg_size;
3372		arena->stats.nmalloc_small += tbin->tstats.nrequests;
3373	} else {
3374		arena->stats.nmalloc_medium += (i - tbin->ncached);
3375		arena->stats.allocated_medium += (i - tbin->ncached) *
3376		    bin->reg_size;
3377		arena->stats.nmalloc_medium += tbin->tstats.nrequests;
3378	}
3379	tbin->tstats.nrequests = 0;
3380#endif
3381	malloc_spin_unlock(&arena->lock);
3382	tbin->ncached = i;
3383	if (tbin->ncached > tbin->high_water)
3384		tbin->high_water = tbin->ncached;
3385}
3386
3387static inline void *
3388tcache_alloc(tcache_t *tcache, size_t size, bool zero)
3389{
3390	void *ret;
3391	tcache_bin_t *tbin;
3392	size_t binind;
3393
3394	if (size <= small_maxclass)
3395		binind = small_size2bin[size];
3396	else {
3397		binind = mbin0 + ((MEDIUM_CEILING(size) - medium_min) >>
3398		    lg_mspace);
3399	}
3400	assert(binind < nbins);
3401	tbin = tcache->tbins[binind];
3402	if (tbin == NULL) {
3403		tbin = tcache_bin_create(tcache->arena);
3404		if (tbin == NULL)
3405			return (NULL);
3406		tcache->tbins[binind] = tbin;
3407	}
3408
3409	ret = tcache_bin_alloc(tbin);
3410	if (ret == NULL) {
3411		ret = tcache_alloc_hard(tcache, tbin, binind);
3412		if (ret == NULL)
3413			return (NULL);
3414	}
3415
3416	if (zero == false) {
3417		if (opt_junk)
3418			memset(ret, 0xa5, size);
3419		else if (opt_zero)
3420			memset(ret, 0, size);
3421	} else
3422		memset(ret, 0, size);
3423
3424#ifdef MALLOC_STATS
3425	tbin->tstats.nrequests++;
3426#endif
3427	tcache_event(tcache);
3428	return (ret);
3429}
3430
3431static void *
3432tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3433{
3434	void *ret;
3435
3436	tcache_bin_fill(tcache, tbin, binind);
3437	ret = tcache_bin_alloc(tbin);
3438
3439	return (ret);
3440}
3441#endif
3442
3443static inline void *
3444arena_malloc_small(arena_t *arena, size_t size, bool zero)
3445{
3446	void *ret;
3447	arena_bin_t *bin;
3448	arena_run_t *run;
3449	size_t binind;
3450
3451	binind = small_size2bin[size];
3452	assert(binind < mbin0);
3453	bin = &arena->bins[binind];
3454	size = bin->reg_size;
3455
3456	malloc_spin_lock(&arena->lock);
3457	if ((run = bin->runcur) != NULL && run->nfree > 0)
3458		ret = arena_bin_malloc_easy(arena, bin, run);
3459	else
3460		ret = arena_bin_malloc_hard(arena, bin);
3461
3462	if (ret == NULL) {
3463		malloc_spin_unlock(&arena->lock);
3464		return (NULL);
3465	}
3466
3467#ifdef MALLOC_STATS
3468#  ifdef MALLOC_TCACHE
3469	if (__isthreaded == false) {
3470#  endif
3471		bin->stats.nrequests++;
3472		arena->stats.nmalloc_small++;
3473#  ifdef MALLOC_TCACHE
3474	}
3475#  endif
3476	arena->stats.allocated_small += size;
3477#endif
3478	malloc_spin_unlock(&arena->lock);
3479
3480	if (zero == false) {
3481		if (opt_junk)
3482			memset(ret, 0xa5, size);
3483		else if (opt_zero)
3484			memset(ret, 0, size);
3485	} else
3486		memset(ret, 0, size);
3487
3488	return (ret);
3489}
3490
3491static void *
3492arena_malloc_medium(arena_t *arena, size_t size, bool zero)
3493{
3494	void *ret;
3495	arena_bin_t *bin;
3496	arena_run_t *run;
3497	size_t binind;
3498
3499	size = MEDIUM_CEILING(size);
3500	binind = mbin0 + ((size - medium_min) >> lg_mspace);
3501	assert(binind < nbins);
3502	bin = &arena->bins[binind];
3503	assert(bin->reg_size == size);
3504
3505	malloc_spin_lock(&arena->lock);
3506	if ((run = bin->runcur) != NULL && run->nfree > 0)
3507		ret = arena_bin_malloc_easy(arena, bin, run);
3508	else
3509		ret = arena_bin_malloc_hard(arena, bin);
3510
3511	if (ret == NULL) {
3512		malloc_spin_unlock(&arena->lock);
3513		return (NULL);
3514	}
3515
3516#ifdef MALLOC_STATS
3517#  ifdef MALLOC_TCACHE
3518	if (__isthreaded == false) {
3519#  endif
3520		bin->stats.nrequests++;
3521		arena->stats.nmalloc_medium++;
3522#  ifdef MALLOC_TCACHE
3523	}
3524#  endif
3525	arena->stats.allocated_medium += size;
3526#endif
3527	malloc_spin_unlock(&arena->lock);
3528
3529	if (zero == false) {
3530		if (opt_junk)
3531			memset(ret, 0xa5, size);
3532		else if (opt_zero)
3533			memset(ret, 0, size);
3534	} else
3535		memset(ret, 0, size);
3536
3537	return (ret);
3538}
3539
3540static void *
3541arena_malloc_large(arena_t *arena, size_t size, bool zero)
3542{
3543	void *ret;
3544
3545	/* Large allocation. */
3546	size = PAGE_CEILING(size);
3547	malloc_spin_lock(&arena->lock);
3548	ret = (void *)arena_run_alloc(arena, size, true, zero);
3549	if (ret == NULL) {
3550		malloc_spin_unlock(&arena->lock);
3551		return (NULL);
3552	}
3553#ifdef MALLOC_STATS
3554	arena->stats.nmalloc_large++;
3555	arena->stats.allocated_large += size;
3556	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3557	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3558	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3559	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3560		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3561		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3562	}
3563#endif
3564	malloc_spin_unlock(&arena->lock);
3565
3566	if (zero == false) {
3567		if (opt_junk)
3568			memset(ret, 0xa5, size);
3569		else if (opt_zero)
3570			memset(ret, 0, size);
3571	}
3572
3573	return (ret);
3574}
3575
3576static inline void *
3577arena_malloc(size_t size, bool zero)
3578{
3579
3580	assert(size != 0);
3581	assert(QUANTUM_CEILING(size) <= arena_maxclass);
3582
3583	if (size <= bin_maxclass) {
3584#ifdef MALLOC_TCACHE
3585		if (__isthreaded && tcache_nslots) {
3586			tcache_t *tcache = tcache_tls;
3587			if ((uintptr_t)tcache > (uintptr_t)1)
3588				return (tcache_alloc(tcache, size, zero));
3589			else if (tcache == NULL) {
3590				tcache = tcache_create(choose_arena());
3591				if (tcache == NULL)
3592					return (NULL);
3593				return (tcache_alloc(tcache, size, zero));
3594			}
3595		}
3596#endif
3597		if (size <= small_maxclass) {
3598			return (arena_malloc_small(choose_arena(), size,
3599			    zero));
3600		} else {
3601			return (arena_malloc_medium(choose_arena(),
3602			    size, zero));
3603		}
3604	} else
3605		return (arena_malloc_large(choose_arena(), size, zero));
3606}
3607
3608static inline void *
3609imalloc(size_t size)
3610{
3611
3612	assert(size != 0);
3613
3614	if (size <= arena_maxclass)
3615		return (arena_malloc(size, false));
3616	else
3617		return (huge_malloc(size, false));
3618}
3619
3620static inline void *
3621icalloc(size_t size)
3622{
3623
3624	if (size <= arena_maxclass)
3625		return (arena_malloc(size, true));
3626	else
3627		return (huge_malloc(size, true));
3628}
3629
3630/* Only handles large allocations that require more than page alignment. */
3631static void *
3632arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3633{
3634	void *ret;
3635	size_t offset;
3636	arena_chunk_t *chunk;
3637
3638	assert((size & PAGE_MASK) == 0);
3639	assert((alignment & PAGE_MASK) == 0);
3640
3641	malloc_spin_lock(&arena->lock);
3642	ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3643	if (ret == NULL) {
3644		malloc_spin_unlock(&arena->lock);
3645		return (NULL);
3646	}
3647
3648	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3649
3650	offset = (uintptr_t)ret & (alignment - 1);
3651	assert((offset & PAGE_MASK) == 0);
3652	assert(offset < alloc_size);
3653	if (offset == 0)
3654		arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3655	else {
3656		size_t leadsize, trailsize;
3657
3658		leadsize = alignment - offset;
3659		if (leadsize > 0) {
3660			arena_run_trim_head(arena, chunk, ret, alloc_size,
3661			    alloc_size - leadsize);
3662			ret = (void *)((uintptr_t)ret + leadsize);
3663		}
3664
3665		trailsize = alloc_size - leadsize - size;
3666		if (trailsize != 0) {
3667			/* Trim trailing space. */
3668			assert(trailsize < alloc_size);
3669			arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3670			    size, false);
3671		}
3672	}
3673
3674#ifdef MALLOC_STATS
3675	arena->stats.nmalloc_large++;
3676	arena->stats.allocated_large += size;
3677	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3678	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3679	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3680	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3681		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3682		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3683	}
3684#endif
3685	malloc_spin_unlock(&arena->lock);
3686
3687	if (opt_junk)
3688		memset(ret, 0xa5, size);
3689	else if (opt_zero)
3690		memset(ret, 0, size);
3691	return (ret);
3692}
3693
3694static inline void *
3695ipalloc(size_t alignment, size_t size)
3696{
3697	void *ret;
3698	size_t ceil_size;
3699
3700	/*
3701	 * Round size up to the nearest multiple of alignment.
3702	 *
3703	 * This done, we can take advantage of the fact that for each small
3704	 * size class, every object is aligned at the smallest power of two
3705	 * that is non-zero in the base two representation of the size.  For
3706	 * example:
3707	 *
3708	 *   Size |   Base 2 | Minimum alignment
3709	 *   -----+----------+------------------
3710	 *     96 |  1100000 |  32
3711	 *    144 | 10100000 |  32
3712	 *    192 | 11000000 |  64
3713	 *
3714	 * Depending on runtime settings, it is possible that arena_malloc()
3715	 * will further round up to a power of two, but that never causes
3716	 * correctness issues.
3717	 */
3718	ceil_size = (size + (alignment - 1)) & (-alignment);
3719	/*
3720	 * (ceil_size < size) protects against the combination of maximal
3721	 * alignment and size greater than maximal alignment.
3722	 */
3723	if (ceil_size < size) {
3724		/* size_t overflow. */
3725		return (NULL);
3726	}
3727
3728	if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3729	    && ceil_size <= arena_maxclass))
3730		ret = arena_malloc(ceil_size, false);
3731	else {
3732		size_t run_size;
3733
3734		/*
3735		 * We can't achieve subpage alignment, so round up alignment
3736		 * permanently; it makes later calculations simpler.
3737		 */
3738		alignment = PAGE_CEILING(alignment);
3739		ceil_size = PAGE_CEILING(size);
3740		/*
3741		 * (ceil_size < size) protects against very large sizes within
3742		 * PAGE_SIZE of SIZE_T_MAX.
3743		 *
3744		 * (ceil_size + alignment < ceil_size) protects against the
3745		 * combination of maximal alignment and ceil_size large enough
3746		 * to cause overflow.  This is similar to the first overflow
3747		 * check above, but it needs to be repeated due to the new
3748		 * ceil_size value, which may now be *equal* to maximal
3749		 * alignment, whereas before we only detected overflow if the
3750		 * original size was *greater* than maximal alignment.
3751		 */
3752		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3753			/* size_t overflow. */
3754			return (NULL);
3755		}
3756
3757		/*
3758		 * Calculate the size of the over-size run that arena_palloc()
3759		 * would need to allocate in order to guarantee the alignment.
3760		 */
3761		if (ceil_size >= alignment)
3762			run_size = ceil_size + alignment - PAGE_SIZE;
3763		else {
3764			/*
3765			 * It is possible that (alignment << 1) will cause
3766			 * overflow, but it doesn't matter because we also
3767			 * subtract PAGE_SIZE, which in the case of overflow
3768			 * leaves us with a very large run_size.  That causes
3769			 * the first conditional below to fail, which means
3770			 * that the bogus run_size value never gets used for
3771			 * anything important.
3772			 */
3773			run_size = (alignment << 1) - PAGE_SIZE;
3774		}
3775
3776		if (run_size <= arena_maxclass) {
3777			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3778			    run_size);
3779		} else if (alignment <= chunksize)
3780			ret = huge_malloc(ceil_size, false);
3781		else
3782			ret = huge_palloc(alignment, ceil_size);
3783	}
3784
3785	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3786	return (ret);
3787}
3788
3789static bool
3790arena_is_large(const void *ptr)
3791{
3792	arena_chunk_t *chunk;
3793	size_t pageind, mapbits;
3794
3795	assert(ptr != NULL);
3796	assert(CHUNK_ADDR2BASE(ptr) != ptr);
3797
3798	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3799	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3800	mapbits = chunk->map[pageind].bits;
3801	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3802	return ((mapbits & CHUNK_MAP_LARGE) != 0);
3803}
3804
3805/* Return the size of the allocation pointed to by ptr. */
3806static size_t
3807arena_salloc(const void *ptr)
3808{
3809	size_t ret;
3810	arena_chunk_t *chunk;
3811	size_t pageind, mapbits;
3812
3813	assert(ptr != NULL);
3814	assert(CHUNK_ADDR2BASE(ptr) != ptr);
3815
3816	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3817	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3818	mapbits = chunk->map[pageind].bits;
3819	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3820	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3821		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
3822		    (uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
3823		    CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
3824		assert(run->magic == ARENA_RUN_MAGIC);
3825		ret = run->bin->reg_size;
3826	} else {
3827		ret = mapbits & ~PAGE_MASK;
3828		assert(ret != 0);
3829	}
3830
3831	return (ret);
3832}
3833
3834static inline size_t
3835isalloc(const void *ptr)
3836{
3837	size_t ret;
3838	arena_chunk_t *chunk;
3839
3840	assert(ptr != NULL);
3841
3842	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3843	if (chunk != ptr) {
3844		/* Region. */
3845		assert(chunk->arena->magic == ARENA_MAGIC);
3846
3847		ret = arena_salloc(ptr);
3848	} else {
3849		extent_node_t *node, key;
3850
3851		/* Chunk (huge allocation). */
3852
3853		malloc_mutex_lock(&huge_mtx);
3854
3855		/* Extract from tree of huge allocations. */
3856		key.addr = __DECONST(void *, ptr);
3857		node = extent_tree_ad_search(&huge, &key);
3858		assert(node != NULL);
3859
3860		ret = node->size;
3861
3862		malloc_mutex_unlock(&huge_mtx);
3863	}
3864
3865	return (ret);
3866}
3867
3868static inline void
3869arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3870    arena_chunk_map_t *mapelm)
3871{
3872	size_t pageind;
3873	arena_run_t *run;
3874	arena_bin_t *bin;
3875	size_t size;
3876
3877	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3878	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3879	    ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
3880	    PAGE_SHIFT));
3881	assert(run->magic == ARENA_RUN_MAGIC);
3882	bin = run->bin;
3883	size = bin->reg_size;
3884
3885	if (opt_junk)
3886		memset(ptr, 0x5a, size);
3887
3888	arena_run_reg_dalloc(run, bin, ptr, size);
3889	run->nfree++;
3890
3891	if (run->nfree == bin->nregs)
3892		arena_dalloc_bin_run(arena, chunk, run, bin);
3893	else if (run->nfree == 1 && run != bin->runcur) {
3894		/*
3895		 * Make sure that bin->runcur always refers to the lowest
3896		 * non-full run, if one exists.
3897		 */
3898		if (bin->runcur == NULL)
3899			bin->runcur = run;
3900		else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3901			/* Switch runcur. */
3902			if (bin->runcur->nfree > 0) {
3903				arena_chunk_t *runcur_chunk =
3904				    CHUNK_ADDR2BASE(bin->runcur);
3905				size_t runcur_pageind =
3906				    (((uintptr_t)bin->runcur -
3907				    (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3908				arena_chunk_map_t *runcur_mapelm =
3909				    &runcur_chunk->map[runcur_pageind];
3910
3911				/* Insert runcur. */
3912				arena_run_tree_insert(&bin->runs,
3913				    runcur_mapelm);
3914			}
3915			bin->runcur = run;
3916		} else {
3917			size_t run_pageind = (((uintptr_t)run -
3918			    (uintptr_t)chunk)) >> PAGE_SHIFT;
3919			arena_chunk_map_t *run_mapelm =
3920			    &chunk->map[run_pageind];
3921
3922			assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3923			    NULL);
3924			arena_run_tree_insert(&bin->runs, run_mapelm);
3925		}
3926	}
3927
3928#ifdef MALLOC_STATS
3929	if (size <= small_maxclass) {
3930		arena->stats.allocated_small -= size;
3931		arena->stats.ndalloc_small++;
3932	} else {
3933		arena->stats.allocated_medium -= size;
3934		arena->stats.ndalloc_medium++;
3935	}
3936#endif
3937}
3938
3939static void
3940arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3941    arena_bin_t *bin)
3942{
3943	size_t run_ind;
3944
3945	/* Deallocate run. */
3946	if (run == bin->runcur)
3947		bin->runcur = NULL;
3948	else if (bin->nregs != 1) {
3949		size_t run_pageind = (((uintptr_t)run -
3950		    (uintptr_t)chunk)) >> PAGE_SHIFT;
3951		arena_chunk_map_t *run_mapelm =
3952		    &chunk->map[run_pageind];
3953		/*
3954		 * This block's conditional is necessary because if the
3955		 * run only contains one region, then it never gets
3956		 * inserted into the non-full runs tree.
3957		 */
3958		arena_run_tree_remove(&bin->runs, run_mapelm);
3959	}
3960	/*
3961	 * Mark the first page as dirty.  The dirty bit for every other page in
3962	 * the run is already properly set, which means we can call
3963	 * arena_run_dalloc(..., false), thus potentially avoiding the needless
3964	 * creation of many dirty pages.
3965	 */
3966	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
3967	assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
3968	chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
3969	chunk->ndirty++;
3970	arena->ndirty++;
3971
3972#ifdef MALLOC_DEBUG
3973	run->magic = 0;
3974#endif
3975	arena_run_dalloc(arena, run, false);
3976#ifdef MALLOC_STATS
3977	bin->stats.curruns--;
3978#endif
3979
3980	if (chunk->dirtied == false) {
3981		arena_chunk_tree_dirty_insert(&arena->chunks_dirty, chunk);
3982		chunk->dirtied = true;
3983	}
3984	/* Enforce opt_lg_dirty_mult. */
3985	if (opt_lg_dirty_mult >= 0 && (arena->nactive >> opt_lg_dirty_mult) <
3986	    arena->ndirty)
3987		arena_purge(arena);
3988}
3989
3990#ifdef MALLOC_STATS
3991static void
3992arena_stats_print(arena_t *arena)
3993{
3994
3995	malloc_printf("dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
3996	    " %"PRIu64" madvise%s, %"PRIu64" purged\n",
3997	    arena->nactive, arena->ndirty,
3998	    arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
3999	    arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
4000	    arena->stats.purged);
4001
4002	malloc_printf("            allocated      nmalloc      ndalloc\n");
4003	malloc_printf("small:   %12zu %12"PRIu64" %12"PRIu64"\n",
4004	    arena->stats.allocated_small, arena->stats.nmalloc_small,
4005	    arena->stats.ndalloc_small);
4006	malloc_printf("medium:  %12zu %12"PRIu64" %12"PRIu64"\n",
4007	    arena->stats.allocated_medium, arena->stats.nmalloc_medium,
4008	    arena->stats.ndalloc_medium);
4009	malloc_printf("large:   %12zu %12"PRIu64" %12"PRIu64"\n",
4010	    arena->stats.allocated_large, arena->stats.nmalloc_large,
4011	    arena->stats.ndalloc_large);
4012	malloc_printf("total:   %12zu %12"PRIu64" %12"PRIu64"\n",
4013	    arena->stats.allocated_small + arena->stats.allocated_medium +
4014	    arena->stats.allocated_large, arena->stats.nmalloc_small +
4015	    arena->stats.nmalloc_medium + arena->stats.nmalloc_large,
4016	    arena->stats.ndalloc_small + arena->stats.ndalloc_medium +
4017	    arena->stats.ndalloc_large);
4018	malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
4019
4020	if (arena->stats.nmalloc_small + arena->stats.nmalloc_medium > 0) {
4021		unsigned i, gap_start;
4022#ifdef MALLOC_TCACHE
4023		malloc_printf("bins:     bin    size regs pgs  requests    "
4024		    "nfills  nflushes   newruns    reruns maxruns curruns\n");
4025#else
4026		malloc_printf("bins:     bin    size regs pgs  requests   "
4027		    "newruns    reruns maxruns curruns\n");
4028#endif
4029		for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
4030			if (arena->bins[i].stats.nruns == 0) {
4031				if (gap_start == UINT_MAX)
4032					gap_start = i;
4033			} else {
4034				if (gap_start != UINT_MAX) {
4035					if (i > gap_start + 1) {
4036						/*
4037						 * Gap of more than one size
4038						 * class.
4039						 */
4040						malloc_printf("[%u..%u]\n",
4041						    gap_start, i - 1);
4042					} else {
4043						/* Gap of one size class. */
4044						malloc_printf("[%u]\n",
4045						    gap_start);
4046					}
4047					gap_start = UINT_MAX;
4048				}
4049				malloc_printf(
4050				    "%13u %1s %5u %4u %3u %9"PRIu64" %9"PRIu64
4051#ifdef MALLOC_TCACHE
4052				    " %9"PRIu64" %9"PRIu64
4053#endif
4054				    " %9"PRIu64" %7zu %7zu\n",
4055				    i,
4056				    i < ntbins ? "T" : i < ntbins + nqbins ?
4057				    "Q" : i < ntbins + nqbins + ncbins ? "C" :
4058				    i < ntbins + nqbins + ncbins + nsbins ? "S"
4059				    : "M",
4060				    arena->bins[i].reg_size,
4061				    arena->bins[i].nregs,
4062				    arena->bins[i].run_size >> PAGE_SHIFT,
4063				    arena->bins[i].stats.nrequests,
4064#ifdef MALLOC_TCACHE
4065				    arena->bins[i].stats.nfills,
4066				    arena->bins[i].stats.nflushes,
4067#endif
4068				    arena->bins[i].stats.nruns,
4069				    arena->bins[i].stats.reruns,
4070				    arena->bins[i].stats.highruns,
4071				    arena->bins[i].stats.curruns);
4072			}
4073		}
4074		if (gap_start != UINT_MAX) {
4075			if (i > gap_start + 1) {
4076				/* Gap of more than one size class. */
4077				malloc_printf("[%u..%u]\n", gap_start, i - 1);
4078			} else {
4079				/* Gap of one size class. */
4080				malloc_printf("[%u]\n", gap_start);
4081			}
4082		}
4083	}
4084
4085	if (arena->stats.nmalloc_large > 0) {
4086		size_t i;
4087		ssize_t gap_start;
4088		size_t nlclasses = (chunksize - PAGE_SIZE) >> PAGE_SHIFT;
4089
4090		malloc_printf(
4091		    "large:   size pages nrequests   maxruns   curruns\n");
4092
4093		for (i = 0, gap_start = -1; i < nlclasses; i++) {
4094			if (arena->stats.lstats[i].nrequests == 0) {
4095				if (gap_start == -1)
4096					gap_start = i;
4097			} else {
4098				if (gap_start != -1) {
4099					malloc_printf("[%zu]\n", i - gap_start);
4100					gap_start = -1;
4101				}
4102				malloc_printf(
4103				    "%13zu %5zu %9"PRIu64" %9zu %9zu\n",
4104				    (i+1) << PAGE_SHIFT, i+1,
4105				    arena->stats.lstats[i].nrequests,
4106				    arena->stats.lstats[i].highruns,
4107				    arena->stats.lstats[i].curruns);
4108			}
4109		}
4110		if (gap_start != -1)
4111			malloc_printf("[%zu]\n", i - gap_start);
4112	}
4113}
4114#endif
4115
4116static void
4117stats_print_atexit(void)
4118{
4119
4120#if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
4121	unsigned i;
4122
4123	/*
4124	 * Merge stats from extant threads.  This is racy, since individual
4125	 * threads do not lock when recording tcache stats events.  As a
4126	 * consequence, the final stats may be slightly out of date by the time
4127	 * they are reported, if other threads continue to allocate.
4128	 */
4129	for (i = 0; i < narenas; i++) {
4130		arena_t *arena = arenas[i];
4131		if (arena != NULL) {
4132			tcache_t *tcache;
4133
4134			malloc_spin_lock(&arena->lock);
4135			ql_foreach(tcache, &arena->tcache_ql, link) {
4136				tcache_stats_merge(tcache, arena);
4137			}
4138			malloc_spin_unlock(&arena->lock);
4139		}
4140	}
4141#endif
4142	malloc_stats_print();
4143}
4144
4145#ifdef MALLOC_TCACHE
4146static void
4147tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
4148{
4149	arena_chunk_t *chunk;
4150	arena_t *arena;
4151	void *ptr;
4152	unsigned i, ndeferred, ncached;
4153
4154	for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
4155		ncached = ndeferred;
4156		/* Lock the arena associated with the first object. */
4157		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
4158		arena = chunk->arena;
4159		malloc_spin_lock(&arena->lock);
4160		/* Deallocate every object that belongs to the locked arena. */
4161		for (i = ndeferred = 0; i < ncached; i++) {
4162			ptr = tbin->slots[i];
4163			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4164			if (chunk->arena == arena) {
4165				size_t pageind = (((uintptr_t)ptr -
4166				    (uintptr_t)chunk) >> PAGE_SHIFT);
4167				arena_chunk_map_t *mapelm =
4168				    &chunk->map[pageind];
4169				arena_dalloc_bin(arena, chunk, ptr, mapelm);
4170			} else {
4171				/*
4172				 * This object was allocated via a different
4173				 * arena than the one that is currently locked.
4174				 * Stash the object, so that it can be handled
4175				 * in a future pass.
4176				 */
4177				tbin->slots[ndeferred] = ptr;
4178				ndeferred++;
4179			}
4180		}
4181#ifdef MALLOC_STATS
4182		arena->bins[binind].stats.nflushes++;
4183		{
4184			arena_bin_t *bin = &arena->bins[binind];
4185			bin->stats.nrequests += tbin->tstats.nrequests;
4186			if (bin->reg_size <= small_maxclass) {
4187				arena->stats.nmalloc_small +=
4188				    tbin->tstats.nrequests;
4189			} else {
4190				arena->stats.nmalloc_medium +=
4191				    tbin->tstats.nrequests;
4192			}
4193			tbin->tstats.nrequests = 0;
4194		}
4195#endif
4196		malloc_spin_unlock(&arena->lock);
4197	}
4198
4199	if (rem > 0) {
4200		/*
4201		 * Shift the remaining valid pointers to the base of the slots
4202		 * array.
4203		 */
4204		memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
4205		    rem * sizeof(void *));
4206	}
4207	tbin->ncached = rem;
4208}
4209
4210static inline void
4211tcache_dalloc(tcache_t *tcache, void *ptr)
4212{
4213	arena_t *arena;
4214	arena_chunk_t *chunk;
4215	arena_run_t *run;
4216	arena_bin_t *bin;
4217	tcache_bin_t *tbin;
4218	size_t pageind, binind;
4219	arena_chunk_map_t *mapelm;
4220
4221	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4222	arena = chunk->arena;
4223	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4224	mapelm = &chunk->map[pageind];
4225	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
4226	    ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
4227	    PAGE_SHIFT));
4228	assert(run->magic == ARENA_RUN_MAGIC);
4229	bin = run->bin;
4230	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
4231	    sizeof(arena_bin_t);
4232	assert(binind < nbins);
4233
4234	if (opt_junk)
4235		memset(ptr, 0x5a, arena->bins[binind].reg_size);
4236
4237	tbin = tcache->tbins[binind];
4238	if (tbin == NULL) {
4239		tbin = tcache_bin_create(choose_arena());
4240		if (tbin == NULL) {
4241			malloc_spin_lock(&arena->lock);
4242			arena_dalloc_bin(arena, chunk, ptr, mapelm);
4243			malloc_spin_unlock(&arena->lock);
4244			return;
4245		}
4246		tcache->tbins[binind] = tbin;
4247	}
4248
4249	if (tbin->ncached == tcache_nslots)
4250		tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
4251	assert(tbin->ncached < tcache_nslots);
4252	tbin->slots[tbin->ncached] = ptr;
4253	tbin->ncached++;
4254	if (tbin->ncached > tbin->high_water)
4255		tbin->high_water = tbin->ncached;
4256
4257	tcache_event(tcache);
4258}
4259#endif
4260
4261static void
4262arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4263{
4264
4265	/* Large allocation. */
4266	malloc_spin_lock(&arena->lock);
4267
4268#ifndef MALLOC_STATS
4269	if (opt_junk)
4270#endif
4271	{
4272		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
4273		    PAGE_SHIFT;
4274		size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
4275
4276#ifdef MALLOC_STATS
4277		if (opt_junk)
4278#endif
4279			memset(ptr, 0x5a, size);
4280#ifdef MALLOC_STATS
4281		arena->stats.ndalloc_large++;
4282		arena->stats.allocated_large -= size;
4283		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
4284#endif
4285	}
4286
4287	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
4288	malloc_spin_unlock(&arena->lock);
4289}
4290
4291static inline void
4292arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4293{
4294	size_t pageind;
4295	arena_chunk_map_t *mapelm;
4296
4297	assert(arena != NULL);
4298	assert(arena->magic == ARENA_MAGIC);
4299	assert(chunk->arena == arena);
4300	assert(ptr != NULL);
4301	assert(CHUNK_ADDR2BASE(ptr) != ptr);
4302
4303	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4304	mapelm = &chunk->map[pageind];
4305	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
4306	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
4307		/* Small allocation. */
4308#ifdef MALLOC_TCACHE
4309		if (__isthreaded && tcache_nslots) {
4310			tcache_t *tcache = tcache_tls;
4311			if ((uintptr_t)tcache > (uintptr_t)1)
4312				tcache_dalloc(tcache, ptr);
4313			else {
4314				arena_dalloc_hard(arena, chunk, ptr, mapelm,
4315				    tcache);
4316			}
4317		} else {
4318#endif
4319			malloc_spin_lock(&arena->lock);
4320			arena_dalloc_bin(arena, chunk, ptr, mapelm);
4321			malloc_spin_unlock(&arena->lock);
4322#ifdef MALLOC_TCACHE
4323		}
4324#endif
4325	} else
4326		arena_dalloc_large(arena, chunk, ptr);
4327}
4328
4329#ifdef MALLOC_TCACHE
4330static void
4331arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4332    arena_chunk_map_t *mapelm, tcache_t *tcache)
4333{
4334
4335	if (tcache == NULL) {
4336		tcache = tcache_create(arena);
4337		if (tcache == NULL) {
4338			malloc_spin_lock(&arena->lock);
4339			arena_dalloc_bin(arena, chunk, ptr, mapelm);
4340			malloc_spin_unlock(&arena->lock);
4341		} else
4342			tcache_dalloc(tcache, ptr);
4343	} else {
4344		/* This thread is currently exiting, so directly deallocate. */
4345		assert(tcache == (void *)(uintptr_t)1);
4346		malloc_spin_lock(&arena->lock);
4347		arena_dalloc_bin(arena, chunk, ptr, mapelm);
4348		malloc_spin_unlock(&arena->lock);
4349	}
4350}
4351#endif
4352
4353static inline void
4354idalloc(void *ptr)
4355{
4356	arena_chunk_t *chunk;
4357
4358	assert(ptr != NULL);
4359
4360	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4361	if (chunk != ptr)
4362		arena_dalloc(chunk->arena, chunk, ptr);
4363	else
4364		huge_dalloc(ptr);
4365}
4366
4367static void
4368arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4369    size_t size, size_t oldsize)
4370{
4371
4372	assert(size < oldsize);
4373
4374	/*
4375	 * Shrink the run, and make trailing pages available for other
4376	 * allocations.
4377	 */
4378	malloc_spin_lock(&arena->lock);
4379	arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
4380	    true);
4381#ifdef MALLOC_STATS
4382	arena->stats.ndalloc_large++;
4383	arena->stats.allocated_large -= oldsize;
4384	arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4385
4386	arena->stats.nmalloc_large++;
4387	arena->stats.allocated_large += size;
4388	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4389	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4390	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4391	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4392		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4393		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4394	}
4395#endif
4396	malloc_spin_unlock(&arena->lock);
4397}
4398
4399static bool
4400arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4401    size_t size, size_t oldsize)
4402{
4403	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
4404	size_t npages = oldsize >> PAGE_SHIFT;
4405
4406	assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
4407
4408	/* Try to extend the run. */
4409	assert(size > oldsize);
4410	malloc_spin_lock(&arena->lock);
4411	if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
4412	    & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
4413	    ~PAGE_MASK) >= size - oldsize) {
4414		/*
4415		 * The next run is available and sufficiently large.  Split the
4416		 * following run, then merge the first part with the existing
4417		 * allocation.
4418		 */
4419		arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
4420		    ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
4421		    false);
4422
4423		chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
4424		    CHUNK_MAP_ALLOCATED;
4425		chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
4426		    CHUNK_MAP_ALLOCATED;
4427
4428#ifdef MALLOC_STATS
4429		arena->stats.ndalloc_large++;
4430		arena->stats.allocated_large -= oldsize;
4431		arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4432
4433		arena->stats.nmalloc_large++;
4434		arena->stats.allocated_large += size;
4435		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4436		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4437		if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4438		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4439			arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4440			    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4441		}
4442#endif
4443		malloc_spin_unlock(&arena->lock);
4444		return (false);
4445	}
4446	malloc_spin_unlock(&arena->lock);
4447
4448	return (true);
4449}
4450
4451/*
4452 * Try to resize a large allocation, in order to avoid copying.  This will
4453 * always fail if growing an object, and the following run is already in use.
4454 */
4455static bool
4456arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
4457{
4458	size_t psize;
4459
4460	psize = PAGE_CEILING(size);
4461	if (psize == oldsize) {
4462		/* Same size class. */
4463		if (opt_junk && size < oldsize) {
4464			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
4465			    size);
4466		}
4467		return (false);
4468	} else {
4469		arena_chunk_t *chunk;
4470		arena_t *arena;
4471
4472		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4473		arena = chunk->arena;
4474		assert(arena->magic == ARENA_MAGIC);
4475
4476		if (psize < oldsize) {
4477			/* Fill before shrinking in order avoid a race. */
4478			if (opt_junk) {
4479				memset((void *)((uintptr_t)ptr + size), 0x5a,
4480				    oldsize - size);
4481			}
4482			arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4483			    oldsize);
4484			return (false);
4485		} else {
4486			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4487			    psize, oldsize);
4488			if (ret == false && opt_zero) {
4489				memset((void *)((uintptr_t)ptr + oldsize), 0,
4490				    size - oldsize);
4491			}
4492			return (ret);
4493		}
4494	}
4495}
4496
4497static void *
4498arena_ralloc(void *ptr, size_t size, size_t oldsize)
4499{
4500	void *ret;
4501	size_t copysize;
4502
4503	/*
4504	 * Try to avoid moving the allocation.
4505	 *
4506	 * posix_memalign() can cause allocation of "large" objects that are
4507	 * smaller than bin_maxclass (in order to meet alignment requirements).
4508	 * Therefore, do not assume that (oldsize <= bin_maxclass) indicates
4509	 * ptr refers to a bin-allocated object.
4510	 */
4511	if (oldsize <= arena_maxclass) {
4512		if (arena_is_large(ptr) == false ) {
4513			if (size <= small_maxclass) {
4514				if (oldsize <= small_maxclass &&
4515				    small_size2bin[size] ==
4516				    small_size2bin[oldsize])
4517					goto IN_PLACE;
4518			} else if (size <= bin_maxclass) {
4519				if (small_maxclass < oldsize && oldsize <=
4520				    bin_maxclass && MEDIUM_CEILING(size) ==
4521				    MEDIUM_CEILING(oldsize))
4522					goto IN_PLACE;
4523			}
4524		} else {
4525			assert(size <= arena_maxclass);
4526			if (size > bin_maxclass) {
4527				if (arena_ralloc_large(ptr, size, oldsize) ==
4528				    false)
4529					return (ptr);
4530			}
4531		}
4532	}
4533
4534	/* Try to avoid moving the allocation. */
4535	if (size <= small_maxclass) {
4536		if (oldsize <= small_maxclass && small_size2bin[size] ==
4537		    small_size2bin[oldsize])
4538			goto IN_PLACE;
4539	} else if (size <= bin_maxclass) {
4540		if (small_maxclass < oldsize && oldsize <= bin_maxclass &&
4541		    MEDIUM_CEILING(size) == MEDIUM_CEILING(oldsize))
4542			goto IN_PLACE;
4543	} else {
4544		if (bin_maxclass < oldsize && oldsize <= arena_maxclass) {
4545			assert(size > bin_maxclass);
4546			if (arena_ralloc_large(ptr, size, oldsize) == false)
4547				return (ptr);
4548		}
4549	}
4550
4551	/*
4552	 * If we get here, then size and oldsize are different enough that we
4553	 * need to move the object.  In that case, fall back to allocating new
4554	 * space and copying.
4555	 */
4556	ret = arena_malloc(size, false);
4557	if (ret == NULL)
4558		return (NULL);
4559
4560	/* Junk/zero-filling were already done by arena_malloc(). */
4561	copysize = (size < oldsize) ? size : oldsize;
4562	memcpy(ret, ptr, copysize);
4563	idalloc(ptr);
4564	return (ret);
4565IN_PLACE:
4566	if (opt_junk && size < oldsize)
4567		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4568	else if (opt_zero && size > oldsize)
4569		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4570	return (ptr);
4571}
4572
4573static inline void *
4574iralloc(void *ptr, size_t size)
4575{
4576	size_t oldsize;
4577
4578	assert(ptr != NULL);
4579	assert(size != 0);
4580
4581	oldsize = isalloc(ptr);
4582
4583	if (size <= arena_maxclass)
4584		return (arena_ralloc(ptr, size, oldsize));
4585	else
4586		return (huge_ralloc(ptr, size, oldsize));
4587}
4588
4589static bool
4590arena_new(arena_t *arena, unsigned ind)
4591{
4592	unsigned i;
4593	arena_bin_t *bin;
4594	size_t prev_run_size;
4595
4596	if (malloc_spin_init(&arena->lock))
4597		return (true);
4598
4599#ifdef MALLOC_STATS
4600	memset(&arena->stats, 0, sizeof(arena_stats_t));
4601	arena->stats.lstats = (malloc_large_stats_t *)base_alloc(
4602	    sizeof(malloc_large_stats_t) * ((chunksize - PAGE_SIZE) >>
4603	        PAGE_SHIFT));
4604	if (arena->stats.lstats == NULL)
4605		return (true);
4606	memset(arena->stats.lstats, 0, sizeof(malloc_large_stats_t) *
4607	    ((chunksize - PAGE_SIZE) >> PAGE_SHIFT));
4608#  ifdef MALLOC_TCACHE
4609	ql_new(&arena->tcache_ql);
4610#  endif
4611#endif
4612
4613	/* Initialize chunks. */
4614	arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4615	arena->spare = NULL;
4616
4617	arena->nactive = 0;
4618	arena->ndirty = 0;
4619
4620	arena_avail_tree_new(&arena->runs_avail);
4621
4622	/* Initialize bins. */
4623	prev_run_size = PAGE_SIZE;
4624
4625	i = 0;
4626#ifdef MALLOC_TINY
4627	/* (2^n)-spaced tiny bins. */
4628	for (; i < ntbins; i++) {
4629		bin = &arena->bins[i];
4630		bin->runcur = NULL;
4631		arena_run_tree_new(&bin->runs);
4632
4633		bin->reg_size = (1U << (LG_TINY_MIN + i));
4634
4635		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4636
4637#ifdef MALLOC_STATS
4638		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4639#endif
4640	}
4641#endif
4642
4643	/* Quantum-spaced bins. */
4644	for (; i < ntbins + nqbins; i++) {
4645		bin = &arena->bins[i];
4646		bin->runcur = NULL;
4647		arena_run_tree_new(&bin->runs);
4648
4649		bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
4650
4651		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4652
4653#ifdef MALLOC_STATS
4654		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4655#endif
4656	}
4657
4658	/* Cacheline-spaced bins. */
4659	for (; i < ntbins + nqbins + ncbins; i++) {
4660		bin = &arena->bins[i];
4661		bin->runcur = NULL;
4662		arena_run_tree_new(&bin->runs);
4663
4664		bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4665		    LG_CACHELINE);
4666
4667		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4668
4669#ifdef MALLOC_STATS
4670		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4671#endif
4672	}
4673
4674	/* Subpage-spaced bins. */
4675	for (; i < ntbins + nqbins + ncbins + nsbins; i++) {
4676		bin = &arena->bins[i];
4677		bin->runcur = NULL;
4678		arena_run_tree_new(&bin->runs);
4679
4680		bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4681		    << LG_SUBPAGE);
4682
4683		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4684
4685#ifdef MALLOC_STATS
4686		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4687#endif
4688	}
4689
4690	/* Medium bins. */
4691	for (; i < nbins; i++) {
4692		bin = &arena->bins[i];
4693		bin->runcur = NULL;
4694		arena_run_tree_new(&bin->runs);
4695
4696		bin->reg_size = medium_min + ((i - (ntbins + nqbins + ncbins +
4697		    nsbins)) << lg_mspace);
4698
4699		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4700
4701#ifdef MALLOC_STATS
4702		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4703#endif
4704	}
4705
4706#ifdef MALLOC_DEBUG
4707	arena->magic = ARENA_MAGIC;
4708#endif
4709
4710	return (false);
4711}
4712
4713/* Create a new arena and insert it into the arenas array at index ind. */
4714static arena_t *
4715arenas_extend(unsigned ind)
4716{
4717	arena_t *ret;
4718
4719	/* Allocate enough space for trailing bins. */
4720	ret = (arena_t *)base_alloc(sizeof(arena_t)
4721	    + (sizeof(arena_bin_t) * (nbins - 1)));
4722	if (ret != NULL && arena_new(ret, ind) == false) {
4723		arenas[ind] = ret;
4724		return (ret);
4725	}
4726	/* Only reached if there is an OOM error. */
4727
4728	/*
4729	 * OOM here is quite inconvenient to propagate, since dealing with it
4730	 * would require a check for failure in the fast path.  Instead, punt
4731	 * by using arenas[0].  In practice, this is an extremely unlikely
4732	 * failure.
4733	 */
4734	_malloc_message(_getprogname(),
4735	    ": (malloc) Error initializing arena\n", "", "");
4736	if (opt_abort)
4737		abort();
4738
4739	return (arenas[0]);
4740}
4741
4742#ifdef MALLOC_TCACHE
4743static tcache_bin_t *
4744tcache_bin_create(arena_t *arena)
4745{
4746	tcache_bin_t *ret;
4747	size_t tsize;
4748
4749	tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4750	if (tsize <= small_maxclass)
4751		ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
4752	else if (tsize <= bin_maxclass)
4753		ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
4754	else
4755		ret = (tcache_bin_t *)imalloc(tsize);
4756	if (ret == NULL)
4757		return (NULL);
4758#ifdef MALLOC_STATS
4759	memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
4760#endif
4761	ret->low_water = 0;
4762	ret->high_water = 0;
4763	ret->ncached = 0;
4764
4765	return (ret);
4766}
4767
4768static void
4769tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
4770{
4771	arena_t *arena;
4772	arena_chunk_t *chunk;
4773	size_t pageind, tsize;
4774	arena_chunk_map_t *mapelm;
4775
4776	chunk = CHUNK_ADDR2BASE(tbin);
4777	arena = chunk->arena;
4778	pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
4779	mapelm = &chunk->map[pageind];
4780
4781#ifdef MALLOC_STATS
4782	if (tbin->tstats.nrequests != 0) {
4783		arena_t *arena = tcache->arena;
4784		arena_bin_t *bin = &arena->bins[binind];
4785		malloc_spin_lock(&arena->lock);
4786		bin->stats.nrequests += tbin->tstats.nrequests;
4787		if (bin->reg_size <= small_maxclass)
4788			arena->stats.nmalloc_small += tbin->tstats.nrequests;
4789		else
4790			arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4791		malloc_spin_unlock(&arena->lock);
4792	}
4793#endif
4794
4795	assert(tbin->ncached == 0);
4796	tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4797	if (tsize <= bin_maxclass) {
4798		malloc_spin_lock(&arena->lock);
4799		arena_dalloc_bin(arena, chunk, tbin, mapelm);
4800		malloc_spin_unlock(&arena->lock);
4801	} else
4802		idalloc(tbin);
4803}
4804
4805#ifdef MALLOC_STATS
4806static void
4807tcache_stats_merge(tcache_t *tcache, arena_t *arena)
4808{
4809	unsigned i;
4810
4811	/* Merge and reset tcache stats. */
4812	for (i = 0; i < mbin0; i++) {
4813		arena_bin_t *bin = &arena->bins[i];
4814		tcache_bin_t *tbin = tcache->tbins[i];
4815		if (tbin != NULL) {
4816			bin->stats.nrequests += tbin->tstats.nrequests;
4817			arena->stats.nmalloc_small += tbin->tstats.nrequests;
4818			tbin->tstats.nrequests = 0;
4819		}
4820	}
4821	for (; i < nbins; i++) {
4822		arena_bin_t *bin = &arena->bins[i];
4823		tcache_bin_t *tbin = tcache->tbins[i];
4824		if (tbin != NULL) {
4825			bin->stats.nrequests += tbin->tstats.nrequests;
4826			arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4827			tbin->tstats.nrequests = 0;
4828		}
4829	}
4830}
4831#endif
4832
4833static tcache_t *
4834tcache_create(arena_t *arena)
4835{
4836	tcache_t *tcache;
4837
4838	if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4839	    small_maxclass) {
4840		tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
4841		    + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4842	} else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4843	    bin_maxclass) {
4844		tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
4845		    + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4846	} else {
4847		tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
4848		    (sizeof(tcache_bin_t *) * (nbins - 1)));
4849	}
4850
4851	if (tcache == NULL)
4852		return (NULL);
4853
4854#ifdef MALLOC_STATS
4855	/* Link into list of extant tcaches. */
4856	malloc_spin_lock(&arena->lock);
4857	ql_elm_new(tcache, link);
4858	ql_tail_insert(&arena->tcache_ql, tcache, link);
4859	malloc_spin_unlock(&arena->lock);
4860#endif
4861
4862	tcache->arena = arena;
4863
4864	tcache_tls = tcache;
4865
4866	return (tcache);
4867}
4868
4869static void
4870tcache_destroy(tcache_t *tcache)
4871{
4872	unsigned i;
4873
4874#ifdef MALLOC_STATS
4875	/* Unlink from list of extant tcaches. */
4876	malloc_spin_lock(&tcache->arena->lock);
4877	ql_remove(&tcache->arena->tcache_ql, tcache, link);
4878	tcache_stats_merge(tcache, tcache->arena);
4879	malloc_spin_unlock(&tcache->arena->lock);
4880#endif
4881
4882	for (i = 0; i < nbins; i++) {
4883		tcache_bin_t *tbin = tcache->tbins[i];
4884		if (tbin != NULL) {
4885			tcache_bin_flush(tbin, i, 0);
4886			tcache_bin_destroy(tcache, tbin, i);
4887		}
4888	}
4889
4890	if (arena_salloc(tcache) <= bin_maxclass) {
4891		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
4892		arena_t *arena = chunk->arena;
4893		size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
4894		    PAGE_SHIFT);
4895		arena_chunk_map_t *mapelm = &chunk->map[pageind];
4896
4897		malloc_spin_lock(&arena->lock);
4898		arena_dalloc_bin(arena, chunk, tcache, mapelm);
4899		malloc_spin_unlock(&arena->lock);
4900	} else
4901		idalloc(tcache);
4902}
4903#endif
4904
4905/*
4906 * End arena.
4907 */
4908/******************************************************************************/
4909/*
4910 * Begin general internal functions.
4911 */
4912
4913static void *
4914huge_malloc(size_t size, bool zero)
4915{
4916	void *ret;
4917	size_t csize;
4918	extent_node_t *node;
4919
4920	/* Allocate one or more contiguous chunks for this request. */
4921
4922	csize = CHUNK_CEILING(size);
4923	if (csize == 0) {
4924		/* size is large enough to cause size_t wrap-around. */
4925		return (NULL);
4926	}
4927
4928	/* Allocate an extent node with which to track the chunk. */
4929	node = base_node_alloc();
4930	if (node == NULL)
4931		return (NULL);
4932
4933	ret = chunk_alloc(csize, &zero);
4934	if (ret == NULL) {
4935		base_node_dealloc(node);
4936		return (NULL);
4937	}
4938
4939	/* Insert node into huge. */
4940	node->addr = ret;
4941	node->size = csize;
4942
4943	malloc_mutex_lock(&huge_mtx);
4944	extent_tree_ad_insert(&huge, node);
4945#ifdef MALLOC_STATS
4946	huge_nmalloc++;
4947	huge_allocated += csize;
4948#endif
4949	malloc_mutex_unlock(&huge_mtx);
4950
4951	if (zero == false) {
4952		if (opt_junk)
4953			memset(ret, 0xa5, csize);
4954		else if (opt_zero)
4955			memset(ret, 0, csize);
4956	}
4957
4958	return (ret);
4959}
4960
4961/* Only handles large allocations that require more than chunk alignment. */
4962static void *
4963huge_palloc(size_t alignment, size_t size)
4964{
4965	void *ret;
4966	size_t alloc_size, chunk_size, offset;
4967	extent_node_t *node;
4968	bool zero;
4969
4970	/*
4971	 * This allocation requires alignment that is even larger than chunk
4972	 * alignment.  This means that huge_malloc() isn't good enough.
4973	 *
4974	 * Allocate almost twice as many chunks as are demanded by the size or
4975	 * alignment, in order to assure the alignment can be achieved, then
4976	 * unmap leading and trailing chunks.
4977	 */
4978	assert(alignment >= chunksize);
4979
4980	chunk_size = CHUNK_CEILING(size);
4981
4982	if (size >= alignment)
4983		alloc_size = chunk_size + alignment - chunksize;
4984	else
4985		alloc_size = (alignment << 1) - chunksize;
4986
4987	/* Allocate an extent node with which to track the chunk. */
4988	node = base_node_alloc();
4989	if (node == NULL)
4990		return (NULL);
4991
4992	zero = false;
4993	ret = chunk_alloc(alloc_size, &zero);
4994	if (ret == NULL) {
4995		base_node_dealloc(node);
4996		return (NULL);
4997	}
4998
4999	offset = (uintptr_t)ret & (alignment - 1);
5000	assert((offset & chunksize_mask) == 0);
5001	assert(offset < alloc_size);
5002	if (offset == 0) {
5003		/* Trim trailing space. */
5004		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
5005		    - chunk_size);
5006	} else {
5007		size_t trailsize;
5008
5009		/* Trim leading space. */
5010		chunk_dealloc(ret, alignment - offset);
5011
5012		ret = (void *)((uintptr_t)ret + (alignment - offset));
5013
5014		trailsize = alloc_size - (alignment - offset) - chunk_size;
5015		if (trailsize != 0) {
5016		    /* Trim trailing space. */
5017		    assert(trailsize < alloc_size);
5018		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
5019			trailsize);
5020		}
5021	}
5022
5023	/* Insert node into huge. */
5024	node->addr = ret;
5025	node->size = chunk_size;
5026
5027	malloc_mutex_lock(&huge_mtx);
5028	extent_tree_ad_insert(&huge, node);
5029#ifdef MALLOC_STATS
5030	huge_nmalloc++;
5031	huge_allocated += chunk_size;
5032#endif
5033	malloc_mutex_unlock(&huge_mtx);
5034
5035	if (opt_junk)
5036		memset(ret, 0xa5, chunk_size);
5037	else if (opt_zero)
5038		memset(ret, 0, chunk_size);
5039
5040	return (ret);
5041}
5042
5043static void *
5044huge_ralloc(void *ptr, size_t size, size_t oldsize)
5045{
5046	void *ret;
5047	size_t copysize;
5048
5049	/* Avoid moving the allocation if the size class would not change. */
5050	if (oldsize > arena_maxclass &&
5051	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
5052		if (opt_junk && size < oldsize) {
5053			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
5054			    - size);
5055		} else if (opt_zero && size > oldsize) {
5056			memset((void *)((uintptr_t)ptr + oldsize), 0, size
5057			    - oldsize);
5058		}
5059		return (ptr);
5060	}
5061
5062	/*
5063	 * If we get here, then size and oldsize are different enough that we
5064	 * need to use a different size class.  In that case, fall back to
5065	 * allocating new space and copying.
5066	 */
5067	ret = huge_malloc(size, false);
5068	if (ret == NULL)
5069		return (NULL);
5070
5071	copysize = (size < oldsize) ? size : oldsize;
5072	memcpy(ret, ptr, copysize);
5073	idalloc(ptr);
5074	return (ret);
5075}
5076
5077static void
5078huge_dalloc(void *ptr)
5079{
5080	extent_node_t *node, key;
5081
5082	malloc_mutex_lock(&huge_mtx);
5083
5084	/* Extract from tree of huge allocations. */
5085	key.addr = ptr;
5086	node = extent_tree_ad_search(&huge, &key);
5087	assert(node != NULL);
5088	assert(node->addr == ptr);
5089	extent_tree_ad_remove(&huge, node);
5090
5091#ifdef MALLOC_STATS
5092	huge_ndalloc++;
5093	huge_allocated -= node->size;
5094#endif
5095
5096	malloc_mutex_unlock(&huge_mtx);
5097
5098	/* Unmap chunk. */
5099#ifdef MALLOC_DSS
5100	if (opt_dss && opt_junk)
5101		memset(node->addr, 0x5a, node->size);
5102#endif
5103	chunk_dealloc(node->addr, node->size);
5104
5105	base_node_dealloc(node);
5106}
5107
5108static void
5109malloc_stats_print(void)
5110{
5111	char s[UMAX2S_BUFSIZE];
5112
5113	_malloc_message("___ Begin malloc statistics ___\n", "", "", "");
5114	_malloc_message("Assertions ",
5115#ifdef NDEBUG
5116	    "disabled",
5117#else
5118	    "enabled",
5119#endif
5120	    "\n", "");
5121	_malloc_message("Boolean MALLOC_OPTIONS: ", opt_abort ? "A" : "a", "", "");
5122#ifdef MALLOC_DSS
5123	_malloc_message(opt_dss ? "D" : "d", "", "", "");
5124#endif
5125	_malloc_message(opt_junk ? "J" : "j", "", "", "");
5126#ifdef MALLOC_DSS
5127	_malloc_message(opt_mmap ? "M" : "m", "", "", "");
5128#endif
5129	_malloc_message("P", "", "", "");
5130	_malloc_message(opt_utrace ? "U" : "u", "", "", "");
5131	_malloc_message(opt_sysv ? "V" : "v", "", "", "");
5132	_malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
5133	_malloc_message(opt_zero ? "Z" : "z", "", "", "");
5134	_malloc_message("\n", "", "", "");
5135
5136	_malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
5137	_malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
5138	_malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n", "");
5139	_malloc_message("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", "");
5140	_malloc_message("Cacheline size (assumed): ",
5141	    umax2s(CACHELINE, 10, s), "\n", "");
5142	_malloc_message("Subpage spacing: ", umax2s(SUBPAGE, 10, s), "\n", "");
5143	_malloc_message("Medium spacing: ", umax2s((1U << lg_mspace), 10, s), "\n",
5144	    "");
5145#ifdef MALLOC_TINY
5146	_malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U << LG_TINY_MIN), 10,
5147	    s), "..", "");
5148	_malloc_message(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
5149#endif
5150	_malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min, 10, s), "..",
5151	    "");
5152	_malloc_message(umax2s(qspace_max, 10, s), "]\n", "", "");
5153	_malloc_message("Cacheline-spaced sizes: [",
5154	    umax2s(cspace_min, 10, s), "..", "");
5155	_malloc_message(umax2s(cspace_max, 10, s), "]\n", "", "");
5156	_malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min, 10, s), "..",
5157	    "");
5158	_malloc_message(umax2s(sspace_max, 10, s), "]\n", "", "");
5159	_malloc_message("Medium sizes: [", umax2s(medium_min, 10, s), "..", "");
5160	_malloc_message(umax2s(medium_max, 10, s), "]\n", "", "");
5161	if (opt_lg_dirty_mult >= 0) {
5162		_malloc_message("Min active:dirty page ratio per arena: ",
5163		    umax2s((1U << opt_lg_dirty_mult), 10, s), ":1\n", "");
5164	} else {
5165		_malloc_message("Min active:dirty page ratio per arena: N/A\n", "",
5166		    "", "");
5167	}
5168#ifdef MALLOC_TCACHE
5169	_malloc_message("Thread cache slots per size class: ",
5170	    tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A", "\n", "");
5171	_malloc_message("Thread cache GC sweep interval: ",
5172	    (tcache_nslots && tcache_gc_incr > 0) ?
5173	    umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A", "", "");
5174	_malloc_message(" (increment interval: ",
5175	    (tcache_nslots && tcache_gc_incr > 0) ?  umax2s(tcache_gc_incr, 10, s)
5176	    : "N/A", ")\n", "");
5177#endif
5178	_malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "", "");
5179	_malloc_message(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
5180
5181#ifdef MALLOC_STATS
5182	{
5183		size_t allocated, mapped;
5184		unsigned i;
5185		arena_t *arena;
5186
5187		/* Calculate and print allocated/mapped stats. */
5188
5189		/* arenas. */
5190		for (i = 0, allocated = 0; i < narenas; i++) {
5191			if (arenas[i] != NULL) {
5192				malloc_spin_lock(&arenas[i]->lock);
5193				allocated += arenas[i]->stats.allocated_small;
5194				allocated += arenas[i]->stats.allocated_large;
5195				malloc_spin_unlock(&arenas[i]->lock);
5196			}
5197		}
5198
5199		/* huge/base. */
5200		malloc_mutex_lock(&huge_mtx);
5201		allocated += huge_allocated;
5202		mapped = stats_chunks.curchunks * chunksize;
5203		malloc_mutex_unlock(&huge_mtx);
5204
5205		malloc_mutex_lock(&base_mtx);
5206		mapped += base_mapped;
5207		malloc_mutex_unlock(&base_mtx);
5208
5209		malloc_printf("Allocated: %zu, mapped: %zu\n", allocated,
5210		    mapped);
5211
5212		/* Print chunk stats. */
5213		{
5214			chunk_stats_t chunks_stats;
5215
5216			malloc_mutex_lock(&huge_mtx);
5217			chunks_stats = stats_chunks;
5218			malloc_mutex_unlock(&huge_mtx);
5219
5220			malloc_printf("chunks: nchunks   "
5221			    "highchunks    curchunks\n");
5222			malloc_printf("  %13"PRIu64"%13zu%13zu\n",
5223			    chunks_stats.nchunks, chunks_stats.highchunks,
5224			    chunks_stats.curchunks);
5225		}
5226
5227		/* Print chunk stats. */
5228		malloc_printf(
5229		    "huge: nmalloc      ndalloc    allocated\n");
5230		malloc_printf(" %12"PRIu64" %12"PRIu64" %12zu\n", huge_nmalloc,
5231		    huge_ndalloc, huge_allocated);
5232
5233		/* Print stats for each arena. */
5234		for (i = 0; i < narenas; i++) {
5235			arena = arenas[i];
5236			if (arena != NULL) {
5237				malloc_printf("\narenas[%u]:\n", i);
5238				malloc_spin_lock(&arena->lock);
5239				arena_stats_print(arena);
5240				malloc_spin_unlock(&arena->lock);
5241			}
5242		}
5243	}
5244#endif /* #ifdef MALLOC_STATS */
5245	_malloc_message("--- End malloc statistics ---\n", "", "", "");
5246}
5247
5248#ifdef MALLOC_DEBUG
5249static void
5250small_size2bin_validate(void)
5251{
5252	size_t i, size, binind;
5253
5254	assert(small_size2bin[0] == 0xffU);
5255	i = 1;
5256#  ifdef MALLOC_TINY
5257	/* Tiny. */
5258	for (; i < (1U << LG_TINY_MIN); i++) {
5259		size = pow2_ceil(1U << LG_TINY_MIN);
5260		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5261		assert(small_size2bin[i] == binind);
5262	}
5263	for (; i < qspace_min; i++) {
5264		size = pow2_ceil(i);
5265		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5266		assert(small_size2bin[i] == binind);
5267	}
5268#  endif
5269	/* Quantum-spaced. */
5270	for (; i <= qspace_max; i++) {
5271		size = QUANTUM_CEILING(i);
5272		binind = ntbins + (size >> LG_QUANTUM) - 1;
5273		assert(small_size2bin[i] == binind);
5274	}
5275	/* Cacheline-spaced. */
5276	for (; i <= cspace_max; i++) {
5277		size = CACHELINE_CEILING(i);
5278		binind = ntbins + nqbins + ((size - cspace_min) >>
5279		    LG_CACHELINE);
5280		assert(small_size2bin[i] == binind);
5281	}
5282	/* Sub-page. */
5283	for (; i <= sspace_max; i++) {
5284		size = SUBPAGE_CEILING(i);
5285		binind = ntbins + nqbins + ncbins + ((size - sspace_min)
5286		    >> LG_SUBPAGE);
5287		assert(small_size2bin[i] == binind);
5288	}
5289}
5290#endif
5291
5292static bool
5293small_size2bin_init(void)
5294{
5295
5296	if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5297	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5298	    || sizeof(const_small_size2bin) != small_maxclass + 1)
5299		return (small_size2bin_init_hard());
5300
5301	small_size2bin = const_small_size2bin;
5302#ifdef MALLOC_DEBUG
5303	assert(sizeof(const_small_size2bin) == small_maxclass + 1);
5304	small_size2bin_validate();
5305#endif
5306	return (false);
5307}
5308
5309static bool
5310small_size2bin_init_hard(void)
5311{
5312	size_t i, size, binind;
5313	uint8_t *custom_small_size2bin;
5314
5315	assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5316	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5317	    || sizeof(const_small_size2bin) != small_maxclass + 1);
5318
5319	custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
5320	if (custom_small_size2bin == NULL)
5321		return (true);
5322
5323	custom_small_size2bin[0] = 0xffU;
5324	i = 1;
5325#ifdef MALLOC_TINY
5326	/* Tiny. */
5327	for (; i < (1U << LG_TINY_MIN); i++) {
5328		size = pow2_ceil(1U << LG_TINY_MIN);
5329		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5330		custom_small_size2bin[i] = binind;
5331	}
5332	for (; i < qspace_min; i++) {
5333		size = pow2_ceil(i);
5334		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5335		custom_small_size2bin[i] = binind;
5336	}
5337#endif
5338	/* Quantum-spaced. */
5339	for (; i <= qspace_max; i++) {
5340		size = QUANTUM_CEILING(i);
5341		binind = ntbins + (size >> LG_QUANTUM) - 1;
5342		custom_small_size2bin[i] = binind;
5343	}
5344	/* Cacheline-spaced. */
5345	for (; i <= cspace_max; i++) {
5346		size = CACHELINE_CEILING(i);
5347		binind = ntbins + nqbins + ((size - cspace_min) >>
5348		    LG_CACHELINE);
5349		custom_small_size2bin[i] = binind;
5350	}
5351	/* Sub-page. */
5352	for (; i <= sspace_max; i++) {
5353		size = SUBPAGE_CEILING(i);
5354		binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
5355		    LG_SUBPAGE);
5356		custom_small_size2bin[i] = binind;
5357	}
5358
5359	small_size2bin = custom_small_size2bin;
5360#ifdef MALLOC_DEBUG
5361	small_size2bin_validate();
5362#endif
5363	return (false);
5364}
5365
5366static unsigned
5367malloc_ncpus(void)
5368{
5369	int mib[2];
5370	unsigned ret;
5371	int error;
5372	size_t len;
5373
5374	error = _elf_aux_info(AT_NCPUS, &ret, sizeof(ret));
5375	if (error != 0 || ret == 0) {
5376		mib[0] = CTL_HW;
5377		mib[1] = HW_NCPU;
5378		len = sizeof(ret);
5379		if (sysctl(mib, 2, &ret, &len, (void *)NULL, 0) == -1) {
5380			/* Error. */
5381			ret = 1;
5382		}
5383	}
5384
5385	return (ret);
5386}
5387
5388/*
5389 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
5390 * implementation has to take pains to avoid infinite recursion during
5391 * initialization.
5392 */
5393static inline bool
5394malloc_init(void)
5395{
5396
5397	if (malloc_initialized == false)
5398		return (malloc_init_hard());
5399
5400	return (false);
5401}
5402
5403static bool
5404malloc_init_hard(void)
5405{
5406	unsigned i;
5407	int linklen;
5408	char buf[PATH_MAX + 1];
5409	const char *opts;
5410
5411	malloc_mutex_lock(&init_lock);
5412	if (malloc_initialized) {
5413		/*
5414		 * Another thread initialized the allocator before this one
5415		 * acquired init_lock.
5416		 */
5417		malloc_mutex_unlock(&init_lock);
5418		return (false);
5419	}
5420
5421	/* Get number of CPUs. */
5422	ncpus = malloc_ncpus();
5423
5424	/*
5425	 * Increase the chunk size to the largest page size that is greater
5426	 * than the default chunk size and less than or equal to 4MB.
5427	 */
5428	{
5429		size_t pagesizes[MAXPAGESIZES];
5430		int k, nsizes;
5431
5432		nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
5433		for (k = 0; k < nsizes; k++)
5434			if (pagesizes[k] <= (1LU << 22))
5435				while ((1LU << opt_lg_chunk) < pagesizes[k])
5436					opt_lg_chunk++;
5437	}
5438
5439	for (i = 0; i < 3; i++) {
5440		unsigned j;
5441
5442		/* Get runtime configuration. */
5443		switch (i) {
5444		case 0:
5445			if ((linklen = readlink("/etc/malloc.conf", buf,
5446						sizeof(buf) - 1)) != -1) {
5447				/*
5448				 * Use the contents of the "/etc/malloc.conf"
5449				 * symbolic link's name.
5450				 */
5451				buf[linklen] = '\0';
5452				opts = buf;
5453			} else {
5454				/* No configuration specified. */
5455				buf[0] = '\0';
5456				opts = buf;
5457			}
5458			break;
5459		case 1:
5460			if (issetugid() == 0 && (opts =
5461			    getenv("MALLOC_OPTIONS")) != NULL) {
5462				/*
5463				 * Do nothing; opts is already initialized to
5464				 * the value of the MALLOC_OPTIONS environment
5465				 * variable.
5466				 */
5467			} else {
5468				/* No configuration specified. */
5469				buf[0] = '\0';
5470				opts = buf;
5471			}
5472			break;
5473		case 2:
5474			if (_malloc_options != NULL) {
5475				/*
5476				 * Use options that were compiled into the
5477				 * program.
5478				 */
5479				opts = _malloc_options;
5480			} else {
5481				/* No configuration specified. */
5482				buf[0] = '\0';
5483				opts = buf;
5484			}
5485			break;
5486		default:
5487			/* NOTREACHED */
5488			assert(false);
5489			buf[0] = '\0';
5490			opts = buf;
5491		}
5492
5493		for (j = 0; opts[j] != '\0'; j++) {
5494			unsigned k, nreps;
5495			bool nseen;
5496
5497			/* Parse repetition count, if any. */
5498			for (nreps = 0, nseen = false;; j++, nseen = true) {
5499				switch (opts[j]) {
5500					case '0': case '1': case '2': case '3':
5501					case '4': case '5': case '6': case '7':
5502					case '8': case '9':
5503						nreps *= 10;
5504						nreps += opts[j] - '0';
5505						break;
5506					default:
5507						goto MALLOC_OUT;
5508				}
5509			}
5510MALLOC_OUT:
5511			if (nseen == false)
5512				nreps = 1;
5513
5514			for (k = 0; k < nreps; k++) {
5515				switch (opts[j]) {
5516				case 'a':
5517					opt_abort = false;
5518					break;
5519				case 'A':
5520					opt_abort = true;
5521					break;
5522				case 'c':
5523					if (opt_lg_cspace_max - 1 >
5524					    opt_lg_qspace_max &&
5525					    opt_lg_cspace_max >
5526					    LG_CACHELINE)
5527						opt_lg_cspace_max--;
5528					break;
5529				case 'C':
5530					if (opt_lg_cspace_max < PAGE_SHIFT
5531					    - 1)
5532						opt_lg_cspace_max++;
5533					break;
5534				case 'd':
5535#ifdef MALLOC_DSS
5536					opt_dss = false;
5537#endif
5538					break;
5539				case 'D':
5540#ifdef MALLOC_DSS
5541					opt_dss = true;
5542#endif
5543					break;
5544				case 'e':
5545					if (opt_lg_medium_max > PAGE_SHIFT)
5546						opt_lg_medium_max--;
5547					break;
5548				case 'E':
5549					if (opt_lg_medium_max + 1 <
5550					    opt_lg_chunk)
5551						opt_lg_medium_max++;
5552					break;
5553				case 'f':
5554					if (opt_lg_dirty_mult + 1 <
5555					    (sizeof(size_t) << 3))
5556						opt_lg_dirty_mult++;
5557					break;
5558				case 'F':
5559					if (opt_lg_dirty_mult >= 0)
5560						opt_lg_dirty_mult--;
5561					break;
5562#ifdef MALLOC_TCACHE
5563				case 'g':
5564					if (opt_lg_tcache_gc_sweep >= 0)
5565						opt_lg_tcache_gc_sweep--;
5566					break;
5567				case 'G':
5568					if (opt_lg_tcache_gc_sweep + 1 <
5569					    (sizeof(size_t) << 3))
5570						opt_lg_tcache_gc_sweep++;
5571					break;
5572				case 'h':
5573					if (opt_lg_tcache_nslots > 0)
5574						opt_lg_tcache_nslots--;
5575					break;
5576				case 'H':
5577					if (opt_lg_tcache_nslots + 1 <
5578					    (sizeof(size_t) << 3))
5579						opt_lg_tcache_nslots++;
5580					break;
5581#endif
5582				case 'j':
5583					opt_junk = false;
5584					break;
5585				case 'J':
5586					opt_junk = true;
5587					break;
5588				case 'k':
5589					/*
5590					 * Chunks always require at least one
5591					 * header page, plus enough room to
5592					 * hold a run for the largest medium
5593					 * size class (one page more than the
5594					 * size).
5595					 */
5596					if ((1U << (opt_lg_chunk - 1)) >=
5597					    (2U << PAGE_SHIFT) + (1U <<
5598					    opt_lg_medium_max))
5599						opt_lg_chunk--;
5600					break;
5601				case 'K':
5602					if (opt_lg_chunk + 1 <
5603					    (sizeof(size_t) << 3))
5604						opt_lg_chunk++;
5605					break;
5606				case 'm':
5607#ifdef MALLOC_DSS
5608					opt_mmap = false;
5609#endif
5610					break;
5611				case 'M':
5612#ifdef MALLOC_DSS
5613					opt_mmap = true;
5614#endif
5615					break;
5616				case 'n':
5617					opt_narenas_lshift--;
5618					break;
5619				case 'N':
5620					opt_narenas_lshift++;
5621					break;
5622				case 'p':
5623					opt_stats_print = false;
5624					break;
5625				case 'P':
5626					opt_stats_print = true;
5627					break;
5628				case 'q':
5629					if (opt_lg_qspace_max > LG_QUANTUM)
5630						opt_lg_qspace_max--;
5631					break;
5632				case 'Q':
5633					if (opt_lg_qspace_max + 1 <
5634					    opt_lg_cspace_max)
5635						opt_lg_qspace_max++;
5636					break;
5637				case 'u':
5638					opt_utrace = false;
5639					break;
5640				case 'U':
5641					opt_utrace = true;
5642					break;
5643				case 'v':
5644					opt_sysv = false;
5645					break;
5646				case 'V':
5647					opt_sysv = true;
5648					break;
5649				case 'x':
5650					opt_xmalloc = false;
5651					break;
5652				case 'X':
5653					opt_xmalloc = true;
5654					break;
5655				case 'z':
5656					opt_zero = false;
5657					break;
5658				case 'Z':
5659					opt_zero = true;
5660					break;
5661				default: {
5662					char cbuf[2];
5663
5664					cbuf[0] = opts[j];
5665					cbuf[1] = '\0';
5666					_malloc_message(_getprogname(),
5667					    ": (malloc) Unsupported character "
5668					    "in malloc options: '", cbuf,
5669					    "'\n");
5670				}
5671				}
5672			}
5673		}
5674	}
5675
5676#ifdef MALLOC_DSS
5677	/* Make sure that there is some method for acquiring memory. */
5678	if (opt_dss == false && opt_mmap == false)
5679		opt_mmap = true;
5680#endif
5681	if (opt_stats_print) {
5682		/* Print statistics at exit. */
5683		atexit(stats_print_atexit);
5684	}
5685
5686
5687	/* Set variables according to the value of opt_lg_[qc]space_max. */
5688	qspace_max = (1U << opt_lg_qspace_max);
5689	cspace_min = CACHELINE_CEILING(qspace_max);
5690	if (cspace_min == qspace_max)
5691		cspace_min += CACHELINE;
5692	cspace_max = (1U << opt_lg_cspace_max);
5693	sspace_min = SUBPAGE_CEILING(cspace_max);
5694	if (sspace_min == cspace_max)
5695		sspace_min += SUBPAGE;
5696	assert(sspace_min < PAGE_SIZE);
5697	sspace_max = PAGE_SIZE - SUBPAGE;
5698	medium_max = (1U << opt_lg_medium_max);
5699
5700#ifdef MALLOC_TINY
5701	assert(LG_QUANTUM >= LG_TINY_MIN);
5702#endif
5703	assert(ntbins <= LG_QUANTUM);
5704	nqbins = qspace_max >> LG_QUANTUM;
5705	ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
5706	nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
5707
5708	/*
5709	 * Compute medium size class spacing and the number of medium size
5710	 * classes.  Limit spacing to no more than pagesize, but if possible
5711	 * use the smallest spacing that does not exceed NMBINS_MAX medium size
5712	 * classes.
5713	 */
5714	lg_mspace = LG_SUBPAGE;
5715	nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5716	while (lg_mspace < PAGE_SHIFT && nmbins > NMBINS_MAX) {
5717		lg_mspace = lg_mspace + 1;
5718		nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5719	}
5720	mspace_mask = (1U << lg_mspace) - 1U;
5721
5722	mbin0 = ntbins + nqbins + ncbins + nsbins;
5723	nbins = mbin0 + nmbins;
5724	/*
5725	 * The small_size2bin lookup table uses uint8_t to encode each bin
5726	 * index, so we cannot support more than 256 small size classes.  This
5727	 * limit is difficult to exceed (not even possible with 16B quantum and
5728	 * 4KiB pages), and such configurations are impractical, but
5729	 * nonetheless we need to protect against this case in order to avoid
5730	 * undefined behavior.
5731	 */
5732	if (mbin0 > 256) {
5733	    char line_buf[UMAX2S_BUFSIZE];
5734	    _malloc_message(_getprogname(),
5735	        ": (malloc) Too many small size classes (",
5736	        umax2s(mbin0, 10, line_buf), " > max 256)\n");
5737	    abort();
5738	}
5739
5740	if (small_size2bin_init()) {
5741		malloc_mutex_unlock(&init_lock);
5742		return (true);
5743	}
5744
5745#ifdef MALLOC_TCACHE
5746	if (opt_lg_tcache_nslots > 0) {
5747		tcache_nslots = (1U << opt_lg_tcache_nslots);
5748
5749		/* Compute incremental GC event threshold. */
5750		if (opt_lg_tcache_gc_sweep >= 0) {
5751			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
5752			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
5753			    0) ? 0 : 1);
5754		} else
5755			tcache_gc_incr = 0;
5756	} else
5757		tcache_nslots = 0;
5758#endif
5759
5760	/* Set variables according to the value of opt_lg_chunk. */
5761	chunksize = (1LU << opt_lg_chunk);
5762	chunksize_mask = chunksize - 1;
5763	chunk_npages = (chunksize >> PAGE_SHIFT);
5764	{
5765		size_t header_size;
5766
5767		/*
5768		 * Compute the header size such that it is large enough to
5769		 * contain the page map.
5770		 */
5771		header_size = sizeof(arena_chunk_t) +
5772		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5773		arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5774		    ((header_size & PAGE_MASK) != 0);
5775	}
5776	arena_maxclass = chunksize - (arena_chunk_header_npages <<
5777	    PAGE_SHIFT);
5778
5779	UTRACE((void *)(intptr_t)(-1), 0, 0);
5780
5781#ifdef MALLOC_STATS
5782	malloc_mutex_init(&chunks_mtx);
5783	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5784#endif
5785
5786	/* Various sanity checks that regard configuration. */
5787	assert(chunksize >= PAGE_SIZE);
5788
5789	/* Initialize chunks data. */
5790	malloc_mutex_init(&huge_mtx);
5791	extent_tree_ad_new(&huge);
5792#ifdef MALLOC_DSS
5793	malloc_mutex_init(&dss_mtx);
5794	dss_base = sbrk(0);
5795	i = (uintptr_t)dss_base & QUANTUM_MASK;
5796	if (i != 0)
5797		dss_base = sbrk(QUANTUM - i);
5798	dss_prev = dss_base;
5799	dss_max = dss_base;
5800	extent_tree_szad_new(&dss_chunks_szad);
5801	extent_tree_ad_new(&dss_chunks_ad);
5802#endif
5803#ifdef MALLOC_STATS
5804	huge_nmalloc = 0;
5805	huge_ndalloc = 0;
5806	huge_allocated = 0;
5807#endif
5808
5809	/* Initialize base allocation data structures. */
5810#ifdef MALLOC_STATS
5811	base_mapped = 0;
5812#endif
5813#ifdef MALLOC_DSS
5814	/*
5815	 * Allocate a base chunk here, since it doesn't actually have to be
5816	 * chunk-aligned.  Doing this before allocating any other chunks allows
5817	 * the use of space that would otherwise be wasted.
5818	 */
5819	if (opt_dss)
5820		base_pages_alloc(0);
5821#endif
5822	base_nodes = NULL;
5823	malloc_mutex_init(&base_mtx);
5824
5825	if (ncpus > 1) {
5826		/*
5827		 * For SMP systems, create more than one arena per CPU by
5828		 * default.
5829		 */
5830#ifdef MALLOC_TCACHE
5831		if (tcache_nslots) {
5832			/*
5833			 * Only large object allocation/deallocation is
5834			 * guaranteed to acquire an arena mutex, so we can get
5835			 * away with fewer arenas than without thread caching.
5836			 */
5837			opt_narenas_lshift += 1;
5838		} else {
5839#endif
5840			/*
5841			 * All allocations must acquire an arena mutex, so use
5842			 * plenty of arenas.
5843			 */
5844			opt_narenas_lshift += 2;
5845#ifdef MALLOC_TCACHE
5846		}
5847#endif
5848	}
5849
5850	/* Determine how many arenas to use. */
5851	narenas = ncpus;
5852	if (opt_narenas_lshift > 0) {
5853		if ((narenas << opt_narenas_lshift) > narenas)
5854			narenas <<= opt_narenas_lshift;
5855		/*
5856		 * Make sure not to exceed the limits of what base_alloc() can
5857		 * handle.
5858		 */
5859		if (narenas * sizeof(arena_t *) > chunksize)
5860			narenas = chunksize / sizeof(arena_t *);
5861	} else if (opt_narenas_lshift < 0) {
5862		if ((narenas >> -opt_narenas_lshift) < narenas)
5863			narenas >>= -opt_narenas_lshift;
5864		/* Make sure there is at least one arena. */
5865		if (narenas == 0)
5866			narenas = 1;
5867	}
5868
5869#ifdef NO_TLS
5870	if (narenas > 1) {
5871		static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5872		    23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5873		    89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5874		    151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5875		    223, 227, 229, 233, 239, 241, 251, 257, 263};
5876		unsigned nprimes, parenas;
5877
5878		/*
5879		 * Pick a prime number of hash arenas that is more than narenas
5880		 * so that direct hashing of pthread_self() pointers tends to
5881		 * spread allocations evenly among the arenas.
5882		 */
5883		assert((narenas & 1) == 0); /* narenas must be even. */
5884		nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
5885		parenas = primes[nprimes - 1]; /* In case not enough primes. */
5886		for (i = 1; i < nprimes; i++) {
5887			if (primes[i] > narenas) {
5888				parenas = primes[i];
5889				break;
5890			}
5891		}
5892		narenas = parenas;
5893	}
5894#endif
5895
5896#ifndef NO_TLS
5897	next_arena = 0;
5898#endif
5899
5900	/* Allocate and initialize arenas. */
5901	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5902	if (arenas == NULL) {
5903		malloc_mutex_unlock(&init_lock);
5904		return (true);
5905	}
5906	/*
5907	 * Zero the array.  In practice, this should always be pre-zeroed,
5908	 * since it was just mmap()ed, but let's be sure.
5909	 */
5910	memset(arenas, 0, sizeof(arena_t *) * narenas);
5911
5912	/*
5913	 * Initialize one arena here.  The rest are lazily created in
5914	 * choose_arena_hard().
5915	 */
5916	arenas_extend(0);
5917	if (arenas[0] == NULL) {
5918		malloc_mutex_unlock(&init_lock);
5919		return (true);
5920	}
5921#ifndef NO_TLS
5922	/*
5923	 * Assign the initial arena to the initial thread, in order to avoid
5924	 * spurious creation of an extra arena if the application switches to
5925	 * threaded mode.
5926	 */
5927	arenas_map = arenas[0];
5928#endif
5929	malloc_spin_init(&arenas_lock);
5930
5931	malloc_initialized = true;
5932	malloc_mutex_unlock(&init_lock);
5933	return (false);
5934}
5935
5936/*
5937 * End general internal functions.
5938 */
5939/******************************************************************************/
5940/*
5941 * Begin malloc(3)-compatible functions.
5942 */
5943
5944void *
5945malloc(size_t size)
5946{
5947	void *ret;
5948
5949	if (malloc_init()) {
5950		ret = NULL;
5951		goto OOM;
5952	}
5953
5954	if (size == 0) {
5955		if (opt_sysv == false)
5956			size = 1;
5957		else {
5958			if (opt_xmalloc) {
5959				_malloc_message(_getprogname(),
5960				    ": (malloc) Error in malloc(): "
5961				    "invalid size 0\n", "", "");
5962				abort();
5963			}
5964			ret = NULL;
5965			goto RETURN;
5966		}
5967	}
5968
5969	ret = imalloc(size);
5970
5971OOM:
5972	if (ret == NULL) {
5973		if (opt_xmalloc) {
5974			_malloc_message(_getprogname(),
5975			    ": (malloc) Error in malloc(): out of memory\n", "",
5976			    "");
5977			abort();
5978		}
5979		errno = ENOMEM;
5980	}
5981
5982RETURN:
5983	UTRACE(0, size, ret);
5984	return (ret);
5985}
5986
5987int
5988posix_memalign(void **memptr, size_t alignment, size_t size)
5989{
5990	int ret;
5991	void *result;
5992
5993	if (malloc_init())
5994		result = NULL;
5995	else {
5996		if (size == 0) {
5997			if (opt_sysv == false)
5998				size = 1;
5999			else {
6000				if (opt_xmalloc) {
6001					_malloc_message(_getprogname(),
6002					    ": (malloc) Error in "
6003					    "posix_memalign(): invalid "
6004					    "size 0\n", "", "");
6005					abort();
6006				}
6007				result = NULL;
6008				*memptr = NULL;
6009				ret = 0;
6010				goto RETURN;
6011			}
6012		}
6013
6014		/* Make sure that alignment is a large enough power of 2. */
6015		if (((alignment - 1) & alignment) != 0
6016		    || alignment < sizeof(void *)) {
6017			if (opt_xmalloc) {
6018				_malloc_message(_getprogname(),
6019				    ": (malloc) Error in posix_memalign(): "
6020				    "invalid alignment\n", "", "");
6021				abort();
6022			}
6023			result = NULL;
6024			ret = EINVAL;
6025			goto RETURN;
6026		}
6027
6028		result = ipalloc(alignment, size);
6029	}
6030
6031	if (result == NULL) {
6032		if (opt_xmalloc) {
6033			_malloc_message(_getprogname(),
6034			": (malloc) Error in posix_memalign(): out of memory\n",
6035			"", "");
6036			abort();
6037		}
6038		ret = ENOMEM;
6039		goto RETURN;
6040	}
6041
6042	*memptr = result;
6043	ret = 0;
6044
6045RETURN:
6046	UTRACE(0, size, result);
6047	return (ret);
6048}
6049
6050void *
6051aligned_alloc(size_t alignment, size_t size)
6052{
6053	void *memptr;
6054	int ret;
6055
6056	ret = posix_memalign(&memptr, alignment, size);
6057	if (ret != 0) {
6058		errno = ret;
6059		return (NULL);
6060	}
6061	return (memptr);
6062}
6063
6064void *
6065calloc(size_t num, size_t size)
6066{
6067	void *ret;
6068	size_t num_size;
6069
6070	if (malloc_init()) {
6071		num_size = 0;
6072		ret = NULL;
6073		goto RETURN;
6074	}
6075
6076	num_size = num * size;
6077	if (num_size == 0) {
6078		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
6079			num_size = 1;
6080		else {
6081			ret = NULL;
6082			goto RETURN;
6083		}
6084	/*
6085	 * Try to avoid division here.  We know that it isn't possible to
6086	 * overflow during multiplication if neither operand uses any of the
6087	 * most significant half of the bits in a size_t.
6088	 */
6089	} else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
6090	    && (num_size / size != num)) {
6091		/* size_t overflow. */
6092		ret = NULL;
6093		goto RETURN;
6094	}
6095
6096	ret = icalloc(num_size);
6097
6098RETURN:
6099	if (ret == NULL) {
6100		if (opt_xmalloc) {
6101			_malloc_message(_getprogname(),
6102			    ": (malloc) Error in calloc(): out of memory\n", "",
6103			    "");
6104			abort();
6105		}
6106		errno = ENOMEM;
6107	}
6108
6109	UTRACE(0, num_size, ret);
6110	return (ret);
6111}
6112
6113void *
6114realloc(void *ptr, size_t size)
6115{
6116	void *ret;
6117
6118	if (size == 0) {
6119		if (opt_sysv == false)
6120			size = 1;
6121		else {
6122			if (ptr != NULL)
6123				idalloc(ptr);
6124			ret = NULL;
6125			goto RETURN;
6126		}
6127	}
6128
6129	if (ptr != NULL) {
6130		assert(malloc_initialized);
6131
6132		ret = iralloc(ptr, size);
6133
6134		if (ret == NULL) {
6135			if (opt_xmalloc) {
6136				_malloc_message(_getprogname(),
6137				    ": (malloc) Error in realloc(): out of "
6138				    "memory\n", "", "");
6139				abort();
6140			}
6141			errno = ENOMEM;
6142		}
6143	} else {
6144		if (malloc_init())
6145			ret = NULL;
6146		else
6147			ret = imalloc(size);
6148
6149		if (ret == NULL) {
6150			if (opt_xmalloc) {
6151				_malloc_message(_getprogname(),
6152				    ": (malloc) Error in realloc(): out of "
6153				    "memory\n", "", "");
6154				abort();
6155			}
6156			errno = ENOMEM;
6157		}
6158	}
6159
6160RETURN:
6161	UTRACE(ptr, size, ret);
6162	return (ret);
6163}
6164
6165void
6166free(void *ptr)
6167{
6168
6169	UTRACE(ptr, 0, 0);
6170	if (ptr != NULL) {
6171		assert(malloc_initialized);
6172
6173		idalloc(ptr);
6174	}
6175}
6176
6177/*
6178 * End malloc(3)-compatible functions.
6179 */
6180/******************************************************************************/
6181/*
6182 * Begin non-standard functions.
6183 */
6184
6185size_t
6186malloc_usable_size(const void *ptr)
6187{
6188
6189	assert(ptr != NULL);
6190
6191	return (isalloc(ptr));
6192}
6193
6194/*
6195 * End non-standard functions.
6196 */
6197/******************************************************************************/
6198/*
6199 * Begin library-private functions.
6200 */
6201
6202/*
6203 * We provide an unpublished interface in order to receive notifications from
6204 * the pthreads library whenever a thread exits.  This allows us to clean up
6205 * thread caches.
6206 */
6207void
6208_malloc_thread_cleanup(void)
6209{
6210
6211#ifdef MALLOC_TCACHE
6212	tcache_t *tcache = tcache_tls;
6213
6214	if (tcache != NULL) {
6215		assert(tcache != (void *)(uintptr_t)1);
6216		tcache_destroy(tcache);
6217		tcache_tls = (void *)(uintptr_t)1;
6218	}
6219#endif
6220}
6221
6222/*
6223 * The following functions are used by threading libraries for protection of
6224 * malloc during fork().  These functions are only called if the program is
6225 * running in threaded mode, so there is no need to check whether the program
6226 * is threaded here.
6227 */
6228
6229void
6230_malloc_prefork(void)
6231{
6232	unsigned i;
6233
6234	/* Acquire all mutexes in a safe order. */
6235	malloc_spin_lock(&arenas_lock);
6236	for (i = 0; i < narenas; i++) {
6237		if (arenas[i] != NULL)
6238			malloc_spin_lock(&arenas[i]->lock);
6239	}
6240
6241	malloc_mutex_lock(&base_mtx);
6242
6243	malloc_mutex_lock(&huge_mtx);
6244
6245#ifdef MALLOC_DSS
6246	malloc_mutex_lock(&dss_mtx);
6247#endif
6248}
6249
6250void
6251_malloc_postfork(void)
6252{
6253	unsigned i;
6254
6255	/* Release all mutexes, now that fork() has completed. */
6256
6257#ifdef MALLOC_DSS
6258	malloc_mutex_unlock(&dss_mtx);
6259#endif
6260
6261	malloc_mutex_unlock(&huge_mtx);
6262
6263	malloc_mutex_unlock(&base_mtx);
6264
6265	for (i = 0; i < narenas; i++) {
6266		if (arenas[i] != NULL)
6267			malloc_spin_unlock(&arenas[i]->lock);
6268	}
6269	malloc_spin_unlock(&arenas_lock);
6270}
6271
6272/*
6273 * End library-private functions.
6274 */
6275/******************************************************************************/
6276