1#define JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6#include "jemalloc/internal/ckh.h"
7#include "jemalloc/internal/hash.h"
8#include "jemalloc/internal/malloc_io.h"
9#include "jemalloc/internal/mutex.h"
10
11/******************************************************************************/
12
13#ifdef JEMALLOC_PROF_LIBUNWIND
14#define UNW_LOCAL_ONLY
15#include <libunwind.h>
16#endif
17
18#ifdef JEMALLOC_PROF_LIBGCC
19/*
20 * We have a circular dependency -- jemalloc_internal.h tells us if we should
21 * use libgcc's unwinding functionality, but after we've included that, we've
22 * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
23 */
24#undef _Unwind_Backtrace
25#include <unwind.h>
26#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
27#endif
28
29/******************************************************************************/
30/* Data. */
31
32bool		opt_prof = false;
33bool		opt_prof_active = true;
34bool		opt_prof_thread_active_init = true;
35size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
36ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
37bool		opt_prof_gdump = false;
38bool		opt_prof_final = false;
39bool		opt_prof_leak = false;
40bool		opt_prof_accum = false;
41char		opt_prof_prefix[
42    /* Minimize memory bloat for non-prof builds. */
43#ifdef JEMALLOC_PROF
44    PATH_MAX +
45#endif
46    1];
47
48/*
49 * Initialized as opt_prof_active, and accessed via
50 * prof_active_[gs]et{_unlocked,}().
51 */
52bool			prof_active;
53static malloc_mutex_t	prof_active_mtx;
54
55/*
56 * Initialized as opt_prof_thread_active_init, and accessed via
57 * prof_thread_active_init_[gs]et().
58 */
59static bool		prof_thread_active_init;
60static malloc_mutex_t	prof_thread_active_init_mtx;
61
62/*
63 * Initialized as opt_prof_gdump, and accessed via
64 * prof_gdump_[gs]et{_unlocked,}().
65 */
66bool			prof_gdump_val;
67static malloc_mutex_t	prof_gdump_mtx;
68
69uint64_t	prof_interval = 0;
70
71size_t		lg_prof_sample;
72
73/*
74 * Table of mutexes that are shared among gctx's.  These are leaf locks, so
75 * there is no problem with using them for more than one gctx at the same time.
76 * The primary motivation for this sharing though is that gctx's are ephemeral,
77 * and destroying mutexes causes complications for systems that allocate when
78 * creating/destroying mutexes.
79 */
80static malloc_mutex_t	*gctx_locks;
81#ifdef JEMALLOC_PROF
82static atomic_u_t	cum_gctxs; /* Atomic counter. */
83#endif
84
85/*
86 * Table of mutexes that are shared among tdata's.  No operations require
87 * holding multiple tdata locks, so there is no problem with using them for more
88 * than one tdata at the same time, even though a gctx lock may be acquired
89 * while holding a tdata lock.
90 */
91static malloc_mutex_t	*tdata_locks;
92
93/*
94 * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
95 * structure that knows about all backtraces currently captured.
96 */
97static ckh_t		bt2gctx;
98/* Non static to enable profiling. */
99malloc_mutex_t		bt2gctx_mtx;
100
101/*
102 * Tree of all extant prof_tdata_t structures, regardless of state,
103 * {attached,detached,expired}.
104 */
105static prof_tdata_tree_t	tdatas;
106static malloc_mutex_t	tdatas_mtx;
107
108#ifdef JEMALLOC_PROF
109static uint64_t		next_thr_uid;
110#endif
111static malloc_mutex_t	next_thr_uid_mtx;
112
113static malloc_mutex_t	prof_dump_seq_mtx;
114#ifdef JEMALLOC_PROF
115static uint64_t		prof_dump_seq;
116static uint64_t		prof_dump_iseq;
117static uint64_t		prof_dump_mseq;
118static uint64_t		prof_dump_useq;
119#endif
120
121/*
122 * This buffer is rather large for stack allocation, so use a single buffer for
123 * all profile dumps.
124 */
125static malloc_mutex_t	prof_dump_mtx;
126static char		prof_dump_buf[
127    /* Minimize memory bloat for non-prof builds. */
128#ifdef JEMALLOC_PROF
129    PROF_DUMP_BUFSIZE
130#else
131    1
132#endif
133];
134static size_t		prof_dump_buf_end;
135static int		prof_dump_fd;
136
137#ifdef JEMALLOC_PROF
138/* Do not dump any profiles until bootstrapping is complete. */
139static bool		prof_booted = false;
140#endif
141
142/******************************************************************************/
143/*
144 * Function prototypes for static functions that are referenced prior to
145 * definition.
146 */
147
148static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
149static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
150static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
151    bool even_if_attached);
152static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
153    bool even_if_attached);
154#ifdef JEMALLOC_PROF
155static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
156#endif
157
158/******************************************************************************/
159/* Red-black trees. */
160
161static int
162prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
163	uint64_t a_thr_uid = a->thr_uid;
164	uint64_t b_thr_uid = b->thr_uid;
165	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
166	if (ret == 0) {
167		uint64_t a_thr_discrim = a->thr_discrim;
168		uint64_t b_thr_discrim = b->thr_discrim;
169		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
170		    b_thr_discrim);
171		if (ret == 0) {
172			uint64_t a_tctx_uid = a->tctx_uid;
173			uint64_t b_tctx_uid = b->tctx_uid;
174			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
175			    b_tctx_uid);
176		}
177	}
178	return ret;
179}
180
181rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
182    tctx_link, prof_tctx_comp)
183
184static int
185prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
186	unsigned a_len = a->bt.len;
187	unsigned b_len = b->bt.len;
188	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
189	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
190	if (ret == 0) {
191		ret = (a_len > b_len) - (a_len < b_len);
192	}
193	return ret;
194}
195
196rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
197    prof_gctx_comp)
198
199static int
200prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
201	int ret;
202	uint64_t a_uid = a->thr_uid;
203	uint64_t b_uid = b->thr_uid;
204
205	ret = ((a_uid > b_uid) - (a_uid < b_uid));
206	if (ret == 0) {
207		uint64_t a_discrim = a->thr_discrim;
208		uint64_t b_discrim = b->thr_discrim;
209
210		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
211	}
212	return ret;
213}
214
215rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
216    prof_tdata_comp)
217
218/******************************************************************************/
219
220JEMALLOC_PROF_NORETURN void
221prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
222	prof_tdata_t *tdata;
223
224	cassert(config_prof);
225
226	if (updated) {
227		/*
228		 * Compute a new sample threshold.  This isn't very important in
229		 * practice, because this function is rarely executed, so the
230		 * potential for sample bias is minimal except in contrived
231		 * programs.
232		 */
233		tdata = prof_tdata_get(tsd, true);
234		if (tdata != NULL) {
235			prof_sample_threshold_update(tdata);
236		}
237	}
238
239	if ((uintptr_t)tctx > (uintptr_t)1U) {
240		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
241		tctx->prepared = false;
242		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
243			prof_tctx_destroy(tsd, tctx);
244		} else {
245			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
246		}
247	}
248}
249
250JEMALLOC_PROF_NORETURN void
251prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
252    prof_tctx_t *tctx) {
253	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
254
255	malloc_mutex_lock(tsdn, tctx->tdata->lock);
256	tctx->cnts.curobjs++;
257	tctx->cnts.curbytes += usize;
258	if (opt_prof_accum) {
259		tctx->cnts.accumobjs++;
260		tctx->cnts.accumbytes += usize;
261	}
262	tctx->prepared = false;
263	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
264}
265
266void
267prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
268	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
269	assert(tctx->cnts.curobjs > 0);
270	assert(tctx->cnts.curbytes >= usize);
271	tctx->cnts.curobjs--;
272	tctx->cnts.curbytes -= usize;
273
274	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
275		prof_tctx_destroy(tsd, tctx);
276	} else {
277		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
278	}
279}
280
281JEMALLOC_PROF_NORETURN void
282bt_init(prof_bt_t *bt, void **vec) {
283	cassert(config_prof);
284
285	bt->vec = vec;
286	bt->len = 0;
287}
288
289static JEMALLOC_PROF_NORETURN void
290prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
291	cassert(config_prof);
292	assert(tdata == prof_tdata_get(tsd, false));
293
294	if (tdata != NULL) {
295		assert(!tdata->enq);
296		tdata->enq = true;
297	}
298
299	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
300}
301
302static JEMALLOC_PROF_NORETURN void
303prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
304	cassert(config_prof);
305	assert(tdata == prof_tdata_get(tsd, false));
306
307	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
308
309	if (tdata != NULL) {
310		bool idump, gdump;
311
312		assert(tdata->enq);
313		tdata->enq = false;
314		idump = tdata->enq_idump;
315		tdata->enq_idump = false;
316		gdump = tdata->enq_gdump;
317		tdata->enq_gdump = false;
318
319		if (idump) {
320			prof_idump(tsd_tsdn(tsd));
321		}
322		if (gdump) {
323			prof_gdump(tsd_tsdn(tsd));
324		}
325	}
326}
327
328#ifdef JEMALLOC_PROF_LIBUNWIND
329void
330prof_backtrace(prof_bt_t *bt) {
331	int nframes;
332
333	cassert(config_prof);
334	assert(bt->len == 0);
335	assert(bt->vec != NULL);
336
337	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
338	if (nframes <= 0) {
339		return;
340	}
341	bt->len = nframes;
342}
343#elif (defined(JEMALLOC_PROF_LIBGCC))
344static _Unwind_Reason_Code
345prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
346	cassert(config_prof);
347
348	return _URC_NO_REASON;
349}
350
351static _Unwind_Reason_Code
352prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
353	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
354	void *ip;
355
356	cassert(config_prof);
357
358	ip = (void *)_Unwind_GetIP(context);
359	if (ip == NULL) {
360		return _URC_END_OF_STACK;
361	}
362	data->bt->vec[data->bt->len] = ip;
363	data->bt->len++;
364	if (data->bt->len == data->max) {
365		return _URC_END_OF_STACK;
366	}
367
368	return _URC_NO_REASON;
369}
370
371void
372prof_backtrace(prof_bt_t *bt) {
373	prof_unwind_data_t data = {bt, PROF_BT_MAX};
374
375	cassert(config_prof);
376
377	_Unwind_Backtrace(prof_unwind_callback, &data);
378}
379#elif (defined(JEMALLOC_PROF_GCC))
380void
381prof_backtrace(prof_bt_t *bt) {
382#define BT_FRAME(i)							\
383	if ((i) < PROF_BT_MAX) {					\
384		void *p;						\
385		if (__builtin_frame_address(i) == 0) {			\
386			return;						\
387		}							\
388		p = __builtin_return_address(i);			\
389		if (p == NULL) {					\
390			return;						\
391		}							\
392		bt->vec[(i)] = p;					\
393		bt->len = (i) + 1;					\
394	} else {							\
395		return;							\
396	}
397
398	cassert(config_prof);
399
400	BT_FRAME(0)
401	BT_FRAME(1)
402	BT_FRAME(2)
403	BT_FRAME(3)
404	BT_FRAME(4)
405	BT_FRAME(5)
406	BT_FRAME(6)
407	BT_FRAME(7)
408	BT_FRAME(8)
409	BT_FRAME(9)
410
411	BT_FRAME(10)
412	BT_FRAME(11)
413	BT_FRAME(12)
414	BT_FRAME(13)
415	BT_FRAME(14)
416	BT_FRAME(15)
417	BT_FRAME(16)
418	BT_FRAME(17)
419	BT_FRAME(18)
420	BT_FRAME(19)
421
422	BT_FRAME(20)
423	BT_FRAME(21)
424	BT_FRAME(22)
425	BT_FRAME(23)
426	BT_FRAME(24)
427	BT_FRAME(25)
428	BT_FRAME(26)
429	BT_FRAME(27)
430	BT_FRAME(28)
431	BT_FRAME(29)
432
433	BT_FRAME(30)
434	BT_FRAME(31)
435	BT_FRAME(32)
436	BT_FRAME(33)
437	BT_FRAME(34)
438	BT_FRAME(35)
439	BT_FRAME(36)
440	BT_FRAME(37)
441	BT_FRAME(38)
442	BT_FRAME(39)
443
444	BT_FRAME(40)
445	BT_FRAME(41)
446	BT_FRAME(42)
447	BT_FRAME(43)
448	BT_FRAME(44)
449	BT_FRAME(45)
450	BT_FRAME(46)
451	BT_FRAME(47)
452	BT_FRAME(48)
453	BT_FRAME(49)
454
455	BT_FRAME(50)
456	BT_FRAME(51)
457	BT_FRAME(52)
458	BT_FRAME(53)
459	BT_FRAME(54)
460	BT_FRAME(55)
461	BT_FRAME(56)
462	BT_FRAME(57)
463	BT_FRAME(58)
464	BT_FRAME(59)
465
466	BT_FRAME(60)
467	BT_FRAME(61)
468	BT_FRAME(62)
469	BT_FRAME(63)
470	BT_FRAME(64)
471	BT_FRAME(65)
472	BT_FRAME(66)
473	BT_FRAME(67)
474	BT_FRAME(68)
475	BT_FRAME(69)
476
477	BT_FRAME(70)
478	BT_FRAME(71)
479	BT_FRAME(72)
480	BT_FRAME(73)
481	BT_FRAME(74)
482	BT_FRAME(75)
483	BT_FRAME(76)
484	BT_FRAME(77)
485	BT_FRAME(78)
486	BT_FRAME(79)
487
488	BT_FRAME(80)
489	BT_FRAME(81)
490	BT_FRAME(82)
491	BT_FRAME(83)
492	BT_FRAME(84)
493	BT_FRAME(85)
494	BT_FRAME(86)
495	BT_FRAME(87)
496	BT_FRAME(88)
497	BT_FRAME(89)
498
499	BT_FRAME(90)
500	BT_FRAME(91)
501	BT_FRAME(92)
502	BT_FRAME(93)
503	BT_FRAME(94)
504	BT_FRAME(95)
505	BT_FRAME(96)
506	BT_FRAME(97)
507	BT_FRAME(98)
508	BT_FRAME(99)
509
510	BT_FRAME(100)
511	BT_FRAME(101)
512	BT_FRAME(102)
513	BT_FRAME(103)
514	BT_FRAME(104)
515	BT_FRAME(105)
516	BT_FRAME(106)
517	BT_FRAME(107)
518	BT_FRAME(108)
519	BT_FRAME(109)
520
521	BT_FRAME(110)
522	BT_FRAME(111)
523	BT_FRAME(112)
524	BT_FRAME(113)
525	BT_FRAME(114)
526	BT_FRAME(115)
527	BT_FRAME(116)
528	BT_FRAME(117)
529	BT_FRAME(118)
530	BT_FRAME(119)
531
532	BT_FRAME(120)
533	BT_FRAME(121)
534	BT_FRAME(122)
535	BT_FRAME(123)
536	BT_FRAME(124)
537	BT_FRAME(125)
538	BT_FRAME(126)
539	BT_FRAME(127)
540#undef BT_FRAME
541}
542#else
543JEMALLOC_NORETURN void
544prof_backtrace(prof_bt_t *bt) {
545	cassert(config_prof);
546	not_reached();
547}
548#endif
549
550#ifdef JEMALLOC_PROF
551static malloc_mutex_t *
552prof_gctx_mutex_choose(void) {
553	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
554
555	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
556}
557
558static malloc_mutex_t *
559prof_tdata_mutex_choose(uint64_t thr_uid) {
560	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
561}
562
563static prof_gctx_t *
564prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
565	/*
566	 * Create a single allocation that has space for vec of length bt->len.
567	 */
568	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
569	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
570	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
571	    true);
572	if (gctx == NULL) {
573		return NULL;
574	}
575	gctx->lock = prof_gctx_mutex_choose();
576	/*
577	 * Set nlimbo to 1, in order to avoid a race condition with
578	 * prof_tctx_destroy()/prof_gctx_try_destroy().
579	 */
580	gctx->nlimbo = 1;
581	tctx_tree_new(&gctx->tctxs);
582	/* Duplicate bt. */
583	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
584	gctx->bt.vec = gctx->vec;
585	gctx->bt.len = bt->len;
586	return gctx;
587}
588#endif
589
590static JEMALLOC_PROF_NORETURN void
591prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
592    prof_tdata_t *tdata) {
593	cassert(config_prof);
594
595	/*
596	 * Check that gctx is still unused by any thread cache before destroying
597	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
598	 * condition with this function, as does prof_tctx_destroy() in order to
599	 * avoid a race between the main body of prof_tctx_destroy() and entry
600	 * into this function.
601	 */
602	prof_enter(tsd, tdata_self);
603	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
604	assert(gctx->nlimbo != 0);
605	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
606		/* Remove gctx from bt2gctx. */
607		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
608			not_reached();
609		}
610		prof_leave(tsd, tdata_self);
611		/* Destroy gctx. */
612		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
613		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
614	} else {
615		/*
616		 * Compensate for increment in prof_tctx_destroy() or
617		 * prof_lookup().
618		 */
619		gctx->nlimbo--;
620		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
621		prof_leave(tsd, tdata_self);
622	}
623}
624
625static bool
626prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
627	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
628
629	if (opt_prof_accum) {
630		return false;
631	}
632	if (tctx->cnts.curobjs != 0) {
633		return false;
634	}
635	if (tctx->prepared) {
636		return false;
637	}
638	return true;
639}
640
641static bool
642prof_gctx_should_destroy(prof_gctx_t *gctx) {
643	if (opt_prof_accum) {
644		return false;
645	}
646	if (!tctx_tree_empty(&gctx->tctxs)) {
647		return false;
648	}
649	if (gctx->nlimbo != 0) {
650		return false;
651	}
652	return true;
653}
654
655static void
656prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
657	prof_tdata_t *tdata = tctx->tdata;
658	prof_gctx_t *gctx = tctx->gctx;
659	bool destroy_tdata, destroy_tctx, destroy_gctx;
660
661	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
662
663	assert(tctx->cnts.curobjs == 0);
664	assert(tctx->cnts.curbytes == 0);
665	assert(!opt_prof_accum);
666	assert(tctx->cnts.accumobjs == 0);
667	assert(tctx->cnts.accumbytes == 0);
668
669	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
670	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
671	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
672
673	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
674	switch (tctx->state) {
675	case prof_tctx_state_nominal:
676		tctx_tree_remove(&gctx->tctxs, tctx);
677		destroy_tctx = true;
678		if (prof_gctx_should_destroy(gctx)) {
679			/*
680			 * Increment gctx->nlimbo in order to keep another
681			 * thread from winning the race to destroy gctx while
682			 * this one has gctx->lock dropped.  Without this, it
683			 * would be possible for another thread to:
684			 *
685			 * 1) Sample an allocation associated with gctx.
686			 * 2) Deallocate the sampled object.
687			 * 3) Successfully prof_gctx_try_destroy(gctx).
688			 *
689			 * The result would be that gctx no longer exists by the
690			 * time this thread accesses it in
691			 * prof_gctx_try_destroy().
692			 */
693			gctx->nlimbo++;
694			destroy_gctx = true;
695		} else {
696			destroy_gctx = false;
697		}
698		break;
699	case prof_tctx_state_dumping:
700		/*
701		 * A dumping thread needs tctx to remain valid until dumping
702		 * has finished.  Change state such that the dumping thread will
703		 * complete destruction during a late dump iteration phase.
704		 */
705		tctx->state = prof_tctx_state_purgatory;
706		destroy_tctx = false;
707		destroy_gctx = false;
708		break;
709	default:
710		not_reached();
711		destroy_tctx = false;
712		destroy_gctx = false;
713	}
714	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
715	if (destroy_gctx) {
716		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
717		    tdata);
718	}
719
720	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
721
722	if (destroy_tdata) {
723		prof_tdata_destroy(tsd, tdata, false);
724	}
725
726	if (destroy_tctx) {
727		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
728	}
729}
730
731#ifdef JEMALLOC_PROF
732static bool
733prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
734    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
735	union {
736		prof_gctx_t	*p;
737		void		*v;
738	} gctx, tgctx;
739	union {
740		prof_bt_t	*p;
741		void		*v;
742	} btkey;
743	bool new_gctx;
744
745	prof_enter(tsd, tdata);
746	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
747		/* bt has never been seen before.  Insert it. */
748		prof_leave(tsd, tdata);
749		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
750		if (tgctx.v == NULL) {
751			return true;
752		}
753		prof_enter(tsd, tdata);
754		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
755			gctx.p = tgctx.p;
756			btkey.p = &gctx.p->bt;
757			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
758				/* OOM. */
759				prof_leave(tsd, tdata);
760				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
761				    true, true);
762				return true;
763			}
764			new_gctx = true;
765		} else {
766			new_gctx = false;
767		}
768	} else {
769		tgctx.v = NULL;
770		new_gctx = false;
771	}
772
773	if (!new_gctx) {
774		/*
775		 * Increment nlimbo, in order to avoid a race condition with
776		 * prof_tctx_destroy()/prof_gctx_try_destroy().
777		 */
778		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
779		gctx.p->nlimbo++;
780		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
781		new_gctx = false;
782
783		if (tgctx.v != NULL) {
784			/* Lost race to insert. */
785			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
786			    true);
787		}
788	}
789	prof_leave(tsd, tdata);
790
791	*p_btkey = btkey.v;
792	*p_gctx = gctx.p;
793	*p_new_gctx = new_gctx;
794	return false;
795}
796#endif
797
798JEMALLOC_PROF_NORETURN prof_tctx_t *
799prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
800	cassert(config_prof);
801#ifdef JEMALLOC_PROF
802	union {
803		prof_tctx_t	*p;
804		void		*v;
805	} ret;
806	prof_tdata_t *tdata;
807	bool not_found;
808
809	tdata = prof_tdata_get(tsd, false);
810	if (tdata == NULL) {
811		return NULL;
812	}
813
814	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
815	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
816	if (!not_found) { /* Note double negative! */
817		ret.p->prepared = true;
818	}
819	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
820	if (not_found) {
821		void *btkey;
822		prof_gctx_t *gctx;
823		bool new_gctx, error;
824
825		/*
826		 * This thread's cache lacks bt.  Look for it in the global
827		 * cache.
828		 */
829		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
830		    &new_gctx)) {
831			return NULL;
832		}
833
834		/* Link a prof_tctx_t into gctx for this thread. */
835		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
836		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
837		    arena_ichoose(tsd, NULL), true);
838		if (ret.p == NULL) {
839			if (new_gctx) {
840				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
841			}
842			return NULL;
843		}
844		ret.p->tdata = tdata;
845		ret.p->thr_uid = tdata->thr_uid;
846		ret.p->thr_discrim = tdata->thr_discrim;
847		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
848		ret.p->gctx = gctx;
849		ret.p->tctx_uid = tdata->tctx_uid_next++;
850		ret.p->prepared = true;
851		ret.p->state = prof_tctx_state_initializing;
852		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
853		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
854		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
855		if (error) {
856			if (new_gctx) {
857				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
858			}
859			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
860			return NULL;
861		}
862		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
863		ret.p->state = prof_tctx_state_nominal;
864		tctx_tree_insert(&gctx->tctxs, ret.p);
865		gctx->nlimbo--;
866		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
867	}
868
869	return ret.p;
870#endif
871}
872
873/*
874 * The bodies of this function and prof_leakcheck() are compiled out unless heap
875 * profiling is enabled, so that it is possible to compile jemalloc with
876 * floating point support completely disabled.  Avoiding floating point code is
877 * important on memory-constrained systems, but it also enables a workaround for
878 * versions of glibc that don't properly save/restore floating point registers
879 * during dynamic lazy symbol loading (which internally calls into whatever
880 * malloc implementation happens to be integrated into the application).  Note
881 * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
882 * memory moves, so jemalloc must be compiled with such optimizations disabled
883 * (e.g.
884 * -mno-sse) in order for the workaround to be complete.
885 */
886void
887prof_sample_threshold_update(prof_tdata_t *tdata) {
888#ifdef JEMALLOC_PROF
889	uint64_t r;
890	double u;
891
892	if (!config_prof) {
893		return;
894	}
895
896	if (lg_prof_sample == 0) {
897		tdata->bytes_until_sample = 0;
898		return;
899	}
900
901	/*
902	 * Compute sample interval as a geometrically distributed random
903	 * variable with mean (2^lg_prof_sample).
904	 *
905	 *                             __        __
906	 *                             |  log(u)  |                     1
907	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
908	 *                             | log(1-p) |             lg_prof_sample
909	 *                                                     2
910	 *
911	 * For more information on the math, see:
912	 *
913	 *   Non-Uniform Random Variate Generation
914	 *   Luc Devroye
915	 *   Springer-Verlag, New York, 1986
916	 *   pp 500
917	 *   (http://luc.devroye.org/rnbookindex.html)
918	 */
919	r = prng_lg_range_u64(&tdata->prng_state, 53);
920	u = (double)r * (1.0/9007199254740992.0L);
921	tdata->bytes_until_sample = (uint64_t)(log(u) /
922	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
923	    + (uint64_t)1U;
924#endif
925}
926
927#ifdef JEMALLOC_JET
928static prof_tdata_t *
929prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
930    void *arg) {
931	size_t *tdata_count = (size_t *)arg;
932
933	(*tdata_count)++;
934
935	return NULL;
936}
937
938size_t
939prof_tdata_count(void) {
940	size_t tdata_count = 0;
941	tsdn_t *tsdn;
942
943	tsdn = tsdn_fetch();
944	malloc_mutex_lock(tsdn, &tdatas_mtx);
945	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
946	    (void *)&tdata_count);
947	malloc_mutex_unlock(tsdn, &tdatas_mtx);
948
949	return tdata_count;
950}
951
952size_t
953prof_bt_count(void) {
954	size_t bt_count;
955	tsd_t *tsd;
956	prof_tdata_t *tdata;
957
958	tsd = tsd_fetch();
959	tdata = prof_tdata_get(tsd, false);
960	if (tdata == NULL) {
961		return 0;
962	}
963
964	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
965	bt_count = ckh_count(&bt2gctx);
966	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
967
968	return bt_count;
969}
970#endif
971
972static int
973prof_dump_open_impl(bool propagate_err, const char *filename) {
974	int fd;
975
976	fd = creat(filename, 0644);
977	if (fd == -1 && !propagate_err) {
978		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
979		    filename);
980		if (opt_abort) {
981			abort();
982		}
983	}
984
985	return fd;
986}
987prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
988
989static bool
990prof_dump_flush(bool propagate_err) {
991	bool ret = false;
992	ssize_t err;
993
994	cassert(config_prof);
995
996	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
997	if (err == -1) {
998		if (!propagate_err) {
999			malloc_write("<jemalloc>: write() failed during heap "
1000			    "profile flush\n");
1001			if (opt_abort) {
1002				abort();
1003			}
1004		}
1005		ret = true;
1006	}
1007	prof_dump_buf_end = 0;
1008
1009	return ret;
1010}
1011
1012#ifdef JEMALLOC_PROF
1013static bool
1014prof_dump_close(bool propagate_err) {
1015	bool ret;
1016
1017	assert(prof_dump_fd != -1);
1018	ret = prof_dump_flush(propagate_err);
1019	close(prof_dump_fd);
1020	prof_dump_fd = -1;
1021
1022	return ret;
1023}
1024#endif
1025
1026static bool
1027prof_dump_write(bool propagate_err, const char *s) {
1028	size_t i, slen, n;
1029
1030	cassert(config_prof);
1031
1032	i = 0;
1033	slen = strlen(s);
1034	while (i < slen) {
1035		/* Flush the buffer if it is full. */
1036		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1037			if (prof_dump_flush(propagate_err) && propagate_err) {
1038				return true;
1039			}
1040		}
1041
1042		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
1043			/* Finish writing. */
1044			n = slen - i;
1045		} else {
1046			/* Write as much of s as will fit. */
1047			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1048		}
1049		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1050		prof_dump_buf_end += n;
1051		i += n;
1052	}
1053
1054	return false;
1055}
1056
1057JEMALLOC_FORMAT_PRINTF(2, 3)
1058static bool
1059prof_dump_printf(bool propagate_err, const char *format, ...) {
1060	bool ret;
1061	va_list ap;
1062	char buf[PROF_PRINTF_BUFSIZE];
1063
1064	va_start(ap, format);
1065	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1066	va_end(ap);
1067	ret = prof_dump_write(propagate_err, buf);
1068
1069	return ret;
1070}
1071
1072#ifdef JEMALLOC_PROF
1073static void
1074prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1075	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1076
1077	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1078
1079	switch (tctx->state) {
1080	case prof_tctx_state_initializing:
1081		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1082		return;
1083	case prof_tctx_state_nominal:
1084		tctx->state = prof_tctx_state_dumping;
1085		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1086
1087		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1088
1089		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1090		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1091		if (opt_prof_accum) {
1092			tdata->cnt_summed.accumobjs +=
1093			    tctx->dump_cnts.accumobjs;
1094			tdata->cnt_summed.accumbytes +=
1095			    tctx->dump_cnts.accumbytes;
1096		}
1097		break;
1098	case prof_tctx_state_dumping:
1099	case prof_tctx_state_purgatory:
1100		not_reached();
1101	}
1102}
1103
1104static void
1105prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1106	malloc_mutex_assert_owner(tsdn, gctx->lock);
1107
1108	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1109	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1110	if (opt_prof_accum) {
1111		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1112		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1113	}
1114}
1115
1116static prof_tctx_t *
1117prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1118	tsdn_t *tsdn = (tsdn_t *)arg;
1119
1120	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1121
1122	switch (tctx->state) {
1123	case prof_tctx_state_nominal:
1124		/* New since dumping started; ignore. */
1125		break;
1126	case prof_tctx_state_dumping:
1127	case prof_tctx_state_purgatory:
1128		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1129		break;
1130	default:
1131		not_reached();
1132	}
1133
1134	return NULL;
1135}
1136
1137struct prof_tctx_dump_iter_arg_s {
1138	tsdn_t	*tsdn;
1139	bool	propagate_err;
1140};
1141
1142static prof_tctx_t *
1143prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1144	struct prof_tctx_dump_iter_arg_s *arg =
1145	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1146
1147	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1148
1149	switch (tctx->state) {
1150	case prof_tctx_state_initializing:
1151	case prof_tctx_state_nominal:
1152		/* Not captured by this dump. */
1153		break;
1154	case prof_tctx_state_dumping:
1155	case prof_tctx_state_purgatory:
1156		if (prof_dump_printf(arg->propagate_err,
1157		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1158		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1159		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1160		    tctx->dump_cnts.accumbytes)) {
1161			return tctx;
1162		}
1163		break;
1164	default:
1165		not_reached();
1166	}
1167	return NULL;
1168}
1169
1170static prof_tctx_t *
1171prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1172	tsdn_t *tsdn = (tsdn_t *)arg;
1173	prof_tctx_t *ret;
1174
1175	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1176
1177	switch (tctx->state) {
1178	case prof_tctx_state_nominal:
1179		/* New since dumping started; ignore. */
1180		break;
1181	case prof_tctx_state_dumping:
1182		tctx->state = prof_tctx_state_nominal;
1183		break;
1184	case prof_tctx_state_purgatory:
1185		ret = tctx;
1186		goto label_return;
1187	default:
1188		not_reached();
1189	}
1190
1191	ret = NULL;
1192label_return:
1193	return ret;
1194}
1195
1196static void
1197prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1198
1199	malloc_mutex_lock(tsdn, gctx->lock);
1200
1201	/*
1202	 * Increment nlimbo so that gctx won't go away before dump.
1203	 * Additionally, link gctx into the dump list so that it is included in
1204	 * prof_dump()'s second pass.
1205	 */
1206	gctx->nlimbo++;
1207	gctx_tree_insert(gctxs, gctx);
1208
1209	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1210
1211	malloc_mutex_unlock(tsdn, gctx->lock);
1212}
1213
1214struct prof_gctx_merge_iter_arg_s {
1215	tsdn_t	*tsdn;
1216	size_t	leak_ngctx;
1217};
1218
1219static prof_gctx_t *
1220prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1221	struct prof_gctx_merge_iter_arg_s *arg =
1222	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1223
1224	malloc_mutex_lock(arg->tsdn, gctx->lock);
1225	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1226	    (void *)arg->tsdn);
1227	if (gctx->cnt_summed.curobjs != 0) {
1228		arg->leak_ngctx++;
1229	}
1230	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1231
1232	return NULL;
1233}
1234
1235static void
1236prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1237	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1238	prof_gctx_t *gctx;
1239
1240	/*
1241	 * Standard tree iteration won't work here, because as soon as we
1242	 * decrement gctx->nlimbo and unlock gctx, another thread can
1243	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1244	 * tear down the tree one node at a time during iteration.
1245	 */
1246	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1247		gctx_tree_remove(gctxs, gctx);
1248		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1249		{
1250			prof_tctx_t *next;
1251
1252			next = NULL;
1253			do {
1254				prof_tctx_t *to_destroy =
1255				    tctx_tree_iter(&gctx->tctxs, next,
1256				    prof_tctx_finish_iter,
1257				    (void *)tsd_tsdn(tsd));
1258				if (to_destroy != NULL) {
1259					next = tctx_tree_next(&gctx->tctxs,
1260					    to_destroy);
1261					tctx_tree_remove(&gctx->tctxs,
1262					    to_destroy);
1263					idalloctm(tsd_tsdn(tsd), to_destroy,
1264					    NULL, NULL, true, true);
1265				} else {
1266					next = NULL;
1267				}
1268			} while (next != NULL);
1269		}
1270		gctx->nlimbo--;
1271		if (prof_gctx_should_destroy(gctx)) {
1272			gctx->nlimbo++;
1273			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1274			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1275		} else {
1276			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1277		}
1278	}
1279}
1280
1281struct prof_tdata_merge_iter_arg_s {
1282	tsdn_t		*tsdn;
1283	prof_cnt_t	cnt_all;
1284};
1285
1286static prof_tdata_t *
1287prof_tdata_merge_iter(prof_tdata_tree_t *tdatasunused, prof_tdata_t *tdata,
1288    void *opaque) {
1289	struct prof_tdata_merge_iter_arg_s *arg =
1290	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1291
1292	malloc_mutex_lock(arg->tsdn, tdata->lock);
1293	if (!tdata->expired) {
1294		size_t tabind;
1295		union {
1296			prof_tctx_t	*p;
1297			void		*v;
1298		} tctx;
1299
1300		tdata->dumping = true;
1301		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1302		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1303		    &tctx.v);) {
1304			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1305		}
1306
1307		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1308		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1309		if (opt_prof_accum) {
1310			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1311			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1312		}
1313	} else {
1314		tdata->dumping = false;
1315	}
1316	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1317
1318	return NULL;
1319}
1320#endif
1321
1322static prof_tdata_t *
1323prof_tdata_dump_iter(prof_tdata_tree_t *tdatasunused, prof_tdata_t *tdata,
1324    void *arg) {
1325	bool propagate_err = *(bool *)arg;
1326
1327	if (!tdata->dumping) {
1328		return NULL;
1329	}
1330
1331	if (prof_dump_printf(propagate_err,
1332	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1333	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1334	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1335	    tdata->cnt_summed.accumbytes,
1336	    (tdata->thread_name != NULL) ? " " : "",
1337	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1338		return tdata;
1339	}
1340	return NULL;
1341}
1342
1343static bool
1344prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1345    const prof_cnt_t *cnt_all) {
1346	bool ret;
1347
1348	if (prof_dump_printf(propagate_err,
1349	    "heap_v2/%"FMTu64"\n"
1350	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1351	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1352	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1353		return true;
1354	}
1355
1356	malloc_mutex_lock(tsdn, &tdatas_mtx);
1357	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1358	    (void *)&propagate_err) != NULL);
1359	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1360	return ret;
1361}
1362prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1363
1364#ifdef JEMALLOC_PROF
1365static bool
1366prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1367    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1368	bool ret;
1369	unsigned i;
1370	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1371
1372	cassert(config_prof);
1373	malloc_mutex_assert_owner(tsdn, gctx->lock);
1374
1375	/* Avoid dumping such gctx's that have no useful data. */
1376	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1377	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1378		assert(gctx->cnt_summed.curobjs == 0);
1379		assert(gctx->cnt_summed.curbytes == 0);
1380		assert(gctx->cnt_summed.accumobjs == 0);
1381		assert(gctx->cnt_summed.accumbytes == 0);
1382		ret = false;
1383		goto label_return;
1384	}
1385
1386	if (prof_dump_printf(propagate_err, "@")) {
1387		ret = true;
1388		goto label_return;
1389	}
1390	for (i = 0; i < bt->len; i++) {
1391		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1392		    (uintptr_t)bt->vec[i])) {
1393			ret = true;
1394			goto label_return;
1395		}
1396	}
1397
1398	if (prof_dump_printf(propagate_err,
1399	    "\n"
1400	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1401	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1402	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1403		ret = true;
1404		goto label_return;
1405	}
1406
1407	prof_tctx_dump_iter_arg.tsdn = tsdn;
1408	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1409	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1410	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1411		ret = true;
1412		goto label_return;
1413	}
1414
1415	ret = false;
1416label_return:
1417	return ret;
1418}
1419
1420#ifndef _WIN32
1421JEMALLOC_FORMAT_PRINTF(1, 2)
1422static int
1423prof_open_maps(const char *format, ...) {
1424	int mfd;
1425	va_list ap;
1426	char filename[PATH_MAX + 1];
1427
1428	va_start(ap, format);
1429	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1430	va_end(ap);
1431
1432#if defined(O_CLOEXEC)
1433	mfd = open(filename, O_RDONLY | O_CLOEXEC);
1434#else
1435	mfd = open(filename, O_RDONLY);
1436	if (mfd != -1) {
1437		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1438	}
1439#endif
1440
1441	return mfd;
1442}
1443#endif
1444
1445static int
1446prof_getpid(void) {
1447#ifdef _WIN32
1448	return GetCurrentProcessId();
1449#else
1450	return getpid();
1451#endif
1452}
1453
1454static bool
1455prof_dump_maps(bool propagate_err) {
1456	bool ret;
1457	int mfd;
1458
1459	cassert(config_prof);
1460#ifdef __FreeBSD__
1461	mfd = prof_open_maps("/proc/curproc/map");
1462#elif defined(_WIN32)
1463	mfd = -1; // Not implemented
1464#else
1465	{
1466		int pid = prof_getpid();
1467
1468		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1469		if (mfd == -1) {
1470			mfd = prof_open_maps("/proc/%d/maps", pid);
1471		}
1472	}
1473#endif
1474	if (mfd != -1) {
1475		ssize_t nread;
1476
1477		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1478		    propagate_err) {
1479			ret = true;
1480			goto label_return;
1481		}
1482		nread = 0;
1483		do {
1484			prof_dump_buf_end += nread;
1485			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1486				/* Make space in prof_dump_buf before read(). */
1487				if (prof_dump_flush(propagate_err) &&
1488				    propagate_err) {
1489					ret = true;
1490					goto label_return;
1491				}
1492			}
1493			nread = malloc_read_fd(mfd,
1494			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1495			    - prof_dump_buf_end);
1496		} while (nread > 0);
1497	} else {
1498		ret = true;
1499		goto label_return;
1500	}
1501
1502	ret = false;
1503label_return:
1504	if (mfd != -1) {
1505		close(mfd);
1506	}
1507	return ret;
1508}
1509
1510/*
1511 * See prof_sample_threshold_update() comment for why the body of this function
1512 * is conditionally compiled.
1513 */
1514static void
1515prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1516    const char *filename) {
1517	/*
1518	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1519	 * differ slightly from what jeprof reports, because here we scale the
1520	 * summary values, whereas jeprof scales each context individually and
1521	 * reports the sums of the scaled values.
1522	 */
1523	if (cnt_all->curbytes != 0) {
1524		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1525		double ratio = (((double)cnt_all->curbytes) /
1526		    (double)cnt_all->curobjs) / sample_period;
1527		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1528		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1529		    * scale_factor);
1530		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1531		    scale_factor);
1532
1533		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1534		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1535		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1536		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1537		malloc_printf(
1538		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1539		    filename);
1540	}
1541}
1542
1543struct prof_gctx_dump_iter_arg_s {
1544	tsdn_t	*tsdn;
1545	bool	propagate_err;
1546};
1547
1548static prof_gctx_t *
1549prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1550	prof_gctx_t *ret;
1551	struct prof_gctx_dump_iter_arg_s *arg =
1552	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1553
1554	malloc_mutex_lock(arg->tsdn, gctx->lock);
1555
1556	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1557	    gctxs)) {
1558		ret = gctx;
1559		goto label_return;
1560	}
1561
1562	ret = NULL;
1563label_return:
1564	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1565	return ret;
1566}
1567
1568static void
1569prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1570    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1571    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1572    prof_gctx_tree_t *gctxs) {
1573	size_t tabind;
1574	union {
1575		prof_gctx_t	*p;
1576		void		*v;
1577	} gctx;
1578
1579	prof_enter(tsd, tdata);
1580
1581	/*
1582	 * Put gctx's in limbo and clear their counters in preparation for
1583	 * summing.
1584	 */
1585	gctx_tree_new(gctxs);
1586	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1587		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1588	}
1589
1590	/*
1591	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1592	 * stats and merge them into the associated gctx's.
1593	 */
1594	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1595	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1596	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1597	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1598	    (void *)prof_tdata_merge_iter_arg);
1599	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1600
1601	/* Merge tctx stats into gctx's. */
1602	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1603	prof_gctx_merge_iter_arg->leak_ngctx = 0;
1604	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1605	    (void *)prof_gctx_merge_iter_arg);
1606
1607	prof_leave(tsd, tdata);
1608}
1609
1610static bool
1611prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1612    bool leakcheck, prof_tdata_t *tdata,
1613    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1614    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1615    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1616    prof_gctx_tree_t *gctxs) {
1617	/* Create dump file. */
1618	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1619		return true;
1620	}
1621
1622	/* Dump profile header. */
1623	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1624	    &prof_tdata_merge_iter_arg->cnt_all)) {
1625		goto label_write_error;
1626	}
1627
1628	/* Dump per gctx profile stats. */
1629	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1630	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1631	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1632	    (void *)prof_gctx_dump_iter_arg) != NULL) {
1633		goto label_write_error;
1634	}
1635
1636	/* Dump /proc/<pid>/maps if possible. */
1637	if (prof_dump_maps(propagate_err)) {
1638		goto label_write_error;
1639	}
1640
1641	if (prof_dump_close(propagate_err)) {
1642		return true;
1643	}
1644
1645	return false;
1646label_write_error:
1647	prof_dump_close(propagate_err);
1648	return true;
1649}
1650
1651static bool
1652prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1653    bool leakcheck) {
1654	cassert(config_prof);
1655	assert(tsd_reentrancy_level_get(tsd) == 0);
1656
1657	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1658	if (tdata == NULL) {
1659		return true;
1660	}
1661
1662	pre_reentrancy(tsd, NULL);
1663	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1664
1665	prof_gctx_tree_t gctxs;
1666	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1667	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1668	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1669	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1670	    &prof_gctx_merge_iter_arg, &gctxs);
1671	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1672	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1673	    &prof_gctx_dump_iter_arg, &gctxs);
1674	prof_gctx_finish(tsd, &gctxs);
1675
1676	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1677	post_reentrancy(tsd);
1678
1679	if (err) {
1680		return true;
1681	}
1682
1683	if (leakcheck) {
1684		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1685		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1686	}
1687	return false;
1688}
1689#endif
1690
1691#ifdef JEMALLOC_JET
1692void
1693prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1694    uint64_t *accumbytes) {
1695	tsd_t *tsd;
1696	prof_tdata_t *tdata;
1697	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1698	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1699	prof_gctx_tree_t gctxs;
1700
1701	tsd = tsd_fetch();
1702	tdata = prof_tdata_get(tsd, false);
1703	if (tdata == NULL) {
1704		if (curobjs != NULL) {
1705			*curobjs = 0;
1706		}
1707		if (curbytes != NULL) {
1708			*curbytes = 0;
1709		}
1710		if (accumobjs != NULL) {
1711			*accumobjs = 0;
1712		}
1713		if (accumbytes != NULL) {
1714			*accumbytes = 0;
1715		}
1716		return;
1717	}
1718
1719	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1720	    &prof_gctx_merge_iter_arg, &gctxs);
1721	prof_gctx_finish(tsd, &gctxs);
1722
1723	if (curobjs != NULL) {
1724		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1725	}
1726	if (curbytes != NULL) {
1727		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1728	}
1729	if (accumobjs != NULL) {
1730		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1731	}
1732	if (accumbytes != NULL) {
1733		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1734	}
1735}
1736#endif
1737
1738#ifdef JEMALLOC_PROF
1739#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1740#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1741static void
1742prof_dump_filename(char *filename, char v, uint64_t vseq) {
1743	cassert(config_prof);
1744
1745	if (vseq != VSEQ_INVALID) {
1746	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1747		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1748		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
1749		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
1750	} else {
1751	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
1752		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1753		    "%s.%d.%"FMTu64".%c.heap",
1754		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
1755	}
1756	prof_dump_seq++;
1757}
1758
1759static void
1760prof_fdump(void) {
1761	cassert(config_prof);
1762	tsd_t *tsd;
1763	char filename[DUMP_FILENAME_BUFSIZE];
1764
1765	assert(opt_prof_final);
1766	assert(opt_prof_prefix[0] != '\0');
1767
1768	if (!prof_booted) {
1769		return;
1770	}
1771	tsd = tsd_fetch();
1772	assert(tsd_reentrancy_level_get(tsd) == 0);
1773
1774	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1775	prof_dump_filename(filename, 'f', VSEQ_INVALID);
1776	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1777	prof_dump(tsd, false, filename, opt_prof_leak);
1778}
1779#endif
1780
1781JEMALLOC_PROF_NORETURN bool
1782prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
1783	cassert(config_prof);
1784#ifdef JEMALLOC_PROF
1785#ifndef JEMALLOC_ATOMIC_U64
1786	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
1787	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
1788		return true;
1789	}
1790	prof_accum->accumbytes = 0;
1791#else
1792	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
1793#endif
1794	return false;
1795#endif
1796}
1797
1798JEMALLOC_PROF_NORETURN void
1799prof_idump(tsdn_t *tsdn) {
1800	cassert(config_prof);
1801#ifdef JEMALLOC_PROF
1802	tsd_t *tsd;
1803	prof_tdata_t *tdata;
1804
1805	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
1806		return;
1807	}
1808	tsd = tsdn_tsd(tsdn);
1809	if (tsd_reentrancy_level_get(tsd) > 0) {
1810		return;
1811	}
1812
1813	tdata = prof_tdata_get(tsd, false);
1814	if (tdata == NULL) {
1815		return;
1816	}
1817	if (tdata->enq) {
1818		tdata->enq_idump = true;
1819		return;
1820	}
1821
1822	if (opt_prof_prefix[0] != '\0') {
1823		char filename[PATH_MAX + 1];
1824		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1825		prof_dump_filename(filename, 'i', prof_dump_iseq);
1826		prof_dump_iseq++;
1827		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1828		prof_dump(tsd, false, filename, false);
1829	}
1830#endif
1831}
1832
1833JEMALLOC_PROF_NORETURN bool
1834prof_mdump(tsd_t *tsd, const char *filename) {
1835	cassert(config_prof);
1836#ifdef JEMALLOC_PROF
1837	assert(tsd_reentrancy_level_get(tsd) == 0);
1838
1839	if (!opt_prof || !prof_booted) {
1840		return true;
1841	}
1842	char filename_buf[DUMP_FILENAME_BUFSIZE];
1843	if (filename == NULL) {
1844		/* No filename specified, so automatically generate one. */
1845		if (opt_prof_prefix[0] == '\0') {
1846			return true;
1847		}
1848		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1849		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1850		prof_dump_mseq++;
1851		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1852		filename = filename_buf;
1853	}
1854	return prof_dump(tsd, true, filename, false);
1855#endif
1856}
1857
1858JEMALLOC_PROF_NORETURN void
1859prof_gdump(tsdn_t *tsdn) {
1860	cassert(config_prof);
1861#ifdef JEMALLOC_PROF
1862	tsd_t *tsd;
1863	prof_tdata_t *tdata;
1864
1865	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
1866		return;
1867	}
1868	tsd = tsdn_tsd(tsdn);
1869	if (tsd_reentrancy_level_get(tsd) > 0) {
1870		return;
1871	}
1872
1873	tdata = prof_tdata_get(tsd, false);
1874	if (tdata == NULL) {
1875		return;
1876	}
1877	if (tdata->enq) {
1878		tdata->enq_gdump = true;
1879		return;
1880	}
1881
1882	if (opt_prof_prefix[0] != '\0') {
1883		char filename[DUMP_FILENAME_BUFSIZE];
1884		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
1885		prof_dump_filename(filename, 'u', prof_dump_useq);
1886		prof_dump_useq++;
1887		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
1888		prof_dump(tsd, false, filename, false);
1889	}
1890#endif
1891}
1892
1893#ifdef JEMALLOC_PROF
1894static void
1895prof_bt_hash(const void *key, size_t r_hash[2]) {
1896	const prof_bt_t *bt = (const prof_bt_t *)key;
1897
1898	cassert(config_prof);
1899
1900	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1901}
1902
1903static bool
1904prof_bt_keycomp(const void *k1, const void *k2) {
1905	const prof_bt_t *bt1 = (const prof_bt_t *)k1;
1906	const prof_bt_t *bt2 = (const prof_bt_t *)k2;
1907
1908	cassert(config_prof);
1909
1910	if (bt1->len != bt2->len) {
1911		return false;
1912	}
1913	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1914}
1915
1916static uint64_t
1917prof_thr_uid_alloc(tsdn_t *tsdn) {
1918	uint64_t thr_uid;
1919
1920	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
1921	thr_uid = next_thr_uid;
1922	next_thr_uid++;
1923	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
1924
1925	return thr_uid;
1926}
1927
1928static prof_tdata_t *
1929prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
1930    char *thread_name, bool active) {
1931	prof_tdata_t *tdata;
1932
1933	cassert(config_prof);
1934
1935	/* Initialize an empty cache for this thread. */
1936	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
1937	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
1938	    arena_get(TSDN_NULL, 0, true), true);
1939	if (tdata == NULL) {
1940		return NULL;
1941	}
1942
1943	tdata->lock = prof_tdata_mutex_choose(thr_uid);
1944	tdata->thr_uid = thr_uid;
1945	tdata->thr_discrim = thr_discrim;
1946	tdata->thread_name = thread_name;
1947	tdata->attached = true;
1948	tdata->expired = false;
1949	tdata->tctx_uid_next = 0;
1950
1951	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
1952	    prof_bt_keycomp)) {
1953		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
1954		return NULL;
1955	}
1956
1957	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
1958	prof_sample_threshold_update(tdata);
1959
1960	tdata->enq = false;
1961	tdata->enq_idump = false;
1962	tdata->enq_gdump = false;
1963
1964	tdata->dumping = false;
1965	tdata->active = active;
1966
1967	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1968	tdata_tree_insert(&tdatas, tdata);
1969	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1970
1971	return tdata;
1972}
1973#endif
1974
1975JEMALLOC_PROF_NORETURN prof_tdata_t *
1976prof_tdata_init(tsd_t *tsd) {
1977	cassert(config_prof);
1978#ifdef JEMALLOC_PROF
1979	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
1980	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
1981#endif
1982}
1983
1984static bool
1985prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
1986	if (tdata->attached && !even_if_attached) {
1987		return false;
1988	}
1989	if (ckh_count(&tdata->bt2tctx) != 0) {
1990		return false;
1991	}
1992	return true;
1993}
1994
1995static bool
1996prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
1997    bool even_if_attached) {
1998	malloc_mutex_assert_owner(tsdn, tdata->lock);
1999
2000	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
2001}
2002
2003static void
2004prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
2005    bool even_if_attached) {
2006	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
2007
2008	tdata_tree_remove(&tdatas, tdata);
2009
2010	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
2011
2012	if (tdata->thread_name != NULL) {
2013		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2014		    true);
2015	}
2016	ckh_delete(tsd, &tdata->bt2tctx);
2017	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2018}
2019
2020static void
2021prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
2022	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2023	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
2024	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2025}
2026
2027static void
2028prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
2029	bool destroy_tdata;
2030
2031	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2032	if (tdata->attached) {
2033		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2034		    true);
2035		/*
2036		 * Only detach if !destroy_tdata, because detaching would allow
2037		 * another thread to win the race to destroy tdata.
2038		 */
2039		if (!destroy_tdata) {
2040			tdata->attached = false;
2041		}
2042		tsd_prof_tdata_set(tsd, NULL);
2043	} else {
2044		destroy_tdata = false;
2045	}
2046	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2047	if (destroy_tdata) {
2048		prof_tdata_destroy(tsd, tdata, true);
2049	}
2050}
2051
2052JEMALLOC_PROF_NORETURN prof_tdata_t *
2053prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2054	cassert(config_prof);
2055#ifdef JEMALLOC_PROF
2056	uint64_t thr_uid = tdata->thr_uid;
2057	uint64_t thr_discrim = tdata->thr_discrim + 1;
2058	char *thread_name = (tdata->thread_name != NULL) ?
2059	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2060	bool active = tdata->active;
2061
2062	prof_tdata_detach(tsd, tdata);
2063	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2064	    active);
2065#endif
2066}
2067
2068static bool
2069prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2070	bool destroy_tdata;
2071
2072	malloc_mutex_lock(tsdn, tdata->lock);
2073	if (!tdata->expired) {
2074		tdata->expired = true;
2075		destroy_tdata = tdata->attached ? false :
2076		    prof_tdata_should_destroy(tsdn, tdata, false);
2077	} else {
2078		destroy_tdata = false;
2079	}
2080	malloc_mutex_unlock(tsdn, tdata->lock);
2081
2082	return destroy_tdata;
2083}
2084
2085static prof_tdata_t *
2086prof_tdata_reset_iter(prof_tdata_tree_t *tdatasunused, prof_tdata_t *tdata,
2087    void *arg) {
2088	tsdn_t *tsdn = (tsdn_t *)arg;
2089
2090	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2091}
2092
2093void
2094prof_reset(tsd_t *tsd, size_t lg_sample) {
2095	prof_tdata_t *next;
2096
2097	assert(lg_sample < (sizeof(uint64_t) << 3));
2098
2099	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2100	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2101
2102	lg_prof_sample = lg_sample;
2103
2104	next = NULL;
2105	do {
2106		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2107		    prof_tdata_reset_iter, (void *)tsd);
2108		if (to_destroy != NULL) {
2109			next = tdata_tree_next(&tdatas, to_destroy);
2110			prof_tdata_destroy_locked(tsd, to_destroy, false);
2111		} else {
2112			next = NULL;
2113		}
2114	} while (next != NULL);
2115
2116	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2117	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2118}
2119
2120void
2121prof_tdata_cleanup(tsd_t *tsd) {
2122	prof_tdata_t *tdata;
2123
2124	if (!config_prof) {
2125		return;
2126	}
2127
2128	tdata = tsd_prof_tdata_get(tsd);
2129	if (tdata != NULL) {
2130		prof_tdata_detach(tsd, tdata);
2131	}
2132}
2133
2134bool
2135prof_active_get(tsdn_t *tsdn) {
2136	bool prof_active_current;
2137
2138	malloc_mutex_lock(tsdn, &prof_active_mtx);
2139	prof_active_current = prof_active;
2140	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2141	return prof_active_current;
2142}
2143
2144bool
2145prof_active_set(tsdn_t *tsdn, bool active) {
2146	bool prof_active_old;
2147
2148	malloc_mutex_lock(tsdn, &prof_active_mtx);
2149	prof_active_old = prof_active;
2150	prof_active = active;
2151	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2152	return prof_active_old;
2153}
2154
2155JEMALLOC_PROF_NORETURN const char *
2156prof_thread_name_get(tsd_t *tsd) {
2157	cassert(config_prof);
2158#ifdef JEMALLOC_PROF
2159	prof_tdata_t *tdata;
2160
2161	tdata = prof_tdata_get(tsd, true);
2162	if (tdata == NULL) {
2163		return "";
2164	}
2165	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2166#endif
2167}
2168
2169#ifdef JEMALLOC_PROF
2170static char *
2171prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2172	char *ret;
2173	size_t size;
2174
2175	if (thread_name == NULL) {
2176		return NULL;
2177	}
2178
2179	size = strlen(thread_name) + 1;
2180	if (size == 1) {
2181		return __UNCONST(""); // XXX
2182	}
2183
2184	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2185	    arena_get(TSDN_NULL, 0, true), true);
2186	if (ret == NULL) {
2187		return NULL;
2188	}
2189	memcpy(ret, thread_name, size);
2190	return ret;
2191}
2192#endif
2193
2194JEMALLOC_PROF_NORETURN int
2195prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2196	cassert(config_prof);
2197#ifdef JEMALLOC_PROF
2198	prof_tdata_t *tdata;
2199	unsigned i;
2200	char *s;
2201
2202	tdata = prof_tdata_get(tsd, true);
2203	if (tdata == NULL) {
2204		return EAGAIN;
2205	}
2206
2207	/* Validate input. */
2208	if (thread_name == NULL) {
2209		return EFAULT;
2210	}
2211	for (i = 0; thread_name[i] != '\0'; i++) {
2212		char c = thread_name[i];
2213		if (!isgraph((unsigned char)c) && !isblank((unsigned char)c)) {
2214			return EFAULT;
2215		}
2216	}
2217
2218	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2219	if (s == NULL) {
2220		return EAGAIN;
2221	}
2222
2223	if (tdata->thread_name != NULL) {
2224		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2225		    true);
2226		tdata->thread_name = NULL;
2227	}
2228	if (strlen(s) > 0) {
2229		tdata->thread_name = s;
2230	}
2231	return 0;
2232#endif
2233}
2234
2235JEMALLOC_PROF_NORETURN bool
2236prof_thread_active_get(tsd_t *tsd) {
2237	cassert(config_prof);
2238#ifdef JEMALLOC_PROF
2239	prof_tdata_t *tdata;
2240
2241	tdata = prof_tdata_get(tsd, true);
2242	if (tdata == NULL) {
2243		return false;
2244	}
2245	return tdata->active;
2246#endif
2247}
2248
2249JEMALLOC_PROF_NORETURN bool
2250prof_thread_active_set(tsd_t *tsd, bool active) {
2251	cassert(config_prof);
2252#ifdef JEMALLOC_PROF
2253	prof_tdata_t *tdata;
2254
2255	tdata = prof_tdata_get(tsd, true);
2256	if (tdata == NULL) {
2257		return true;
2258	}
2259	tdata->active = active;
2260	return false;
2261#endif
2262}
2263
2264bool
2265prof_thread_active_init_get(tsdn_t *tsdn) {
2266	bool active_init;
2267
2268	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2269	active_init = prof_thread_active_init;
2270	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2271	return active_init;
2272}
2273
2274bool
2275prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2276	bool active_init_old;
2277
2278	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2279	active_init_old = prof_thread_active_init;
2280	prof_thread_active_init = active_init;
2281	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2282	return active_init_old;
2283}
2284
2285bool
2286prof_gdump_get(tsdn_t *tsdn) {
2287	bool prof_gdump_current;
2288
2289	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2290	prof_gdump_current = prof_gdump_val;
2291	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2292	return prof_gdump_current;
2293}
2294
2295bool
2296prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2297	bool prof_gdump_old;
2298
2299	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2300	prof_gdump_old = prof_gdump_val;
2301	prof_gdump_val = gdump;
2302	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2303	return prof_gdump_old;
2304}
2305
2306JEMALLOC_PROF_NORETURN void
2307prof_boot0(void) {
2308	cassert(config_prof);
2309
2310	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2311	    sizeof(PROF_PREFIX_DEFAULT));
2312}
2313
2314JEMALLOC_PROF_NORETURN void
2315prof_boot1(void) {
2316	cassert(config_prof);
2317
2318	/*
2319	 * opt_prof must be in its final state before any arenas are
2320	 * initialized, so this function must be executed early.
2321	 */
2322
2323	if (opt_prof_leak && !opt_prof) {
2324		/*
2325		 * Enable opt_prof, but in such a way that profiles are never
2326		 * automatically dumped.
2327		 */
2328		opt_prof = true;
2329		opt_prof_gdump = false;
2330	} else if (opt_prof) {
2331		if (opt_lg_prof_interval >= 0) {
2332			prof_interval = (((uint64_t)1U) <<
2333			    opt_lg_prof_interval);
2334		}
2335	}
2336}
2337
2338JEMALLOC_PROF_NORETURN bool
2339prof_boot2(tsd_t *tsd) {
2340	cassert(config_prof);
2341#ifdef JEMALLOC_PROF
2342	if (opt_prof) {
2343		unsigned i;
2344
2345		lg_prof_sample = opt_lg_prof_sample;
2346
2347		prof_active = opt_prof_active;
2348		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2349		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2350			return true;
2351		}
2352
2353		prof_gdump_val = opt_prof_gdump;
2354		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2355		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2356			return true;
2357		}
2358
2359		prof_thread_active_init = opt_prof_thread_active_init;
2360		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2361		    "prof_thread_active_init",
2362		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2363		    malloc_mutex_rank_exclusive)) {
2364			return true;
2365		}
2366
2367		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2368		    prof_bt_keycomp)) {
2369			return true;
2370		}
2371		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2372		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
2373			return true;
2374		}
2375
2376		tdata_tree_new(&tdatas);
2377		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2378		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
2379			return true;
2380		}
2381
2382		next_thr_uid = 0;
2383		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2384		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
2385			return true;
2386		}
2387
2388		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
2389		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
2390			return true;
2391		}
2392		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
2393		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
2394			return true;
2395		}
2396
2397		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
2398		    atexit(prof_fdump) != 0) {
2399			malloc_write("<jemalloc>: Error in atexit()\n");
2400			if (opt_abort) {
2401				abort();
2402			}
2403		}
2404
2405		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2406		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
2407		    CACHELINE);
2408		if (gctx_locks == NULL) {
2409			return true;
2410		}
2411		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2412			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
2413			    WITNESS_RANK_PROF_GCTX,
2414			    malloc_mutex_rank_exclusive)) {
2415				return true;
2416			}
2417		}
2418
2419		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2420		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
2421		    CACHELINE);
2422		if (tdata_locks == NULL) {
2423			return true;
2424		}
2425		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2426			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
2427			    WITNESS_RANK_PROF_TDATA,
2428			    malloc_mutex_rank_exclusive)) {
2429				return true;
2430			}
2431		}
2432	}
2433
2434#ifdef JEMALLOC_PROF_LIBGCC
2435	/*
2436	 * Cause the backtracing machinery to allocate its internal state
2437	 * before enabling profiling.
2438	 */
2439	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
2440#endif
2441
2442	prof_booted = true;
2443
2444	return false;
2445#endif
2446}
2447
2448void
2449prof_prefork0(tsdn_t *tsdn) {
2450	if (config_prof && opt_prof) {
2451		unsigned i;
2452
2453		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
2454		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
2455		malloc_mutex_prefork(tsdn, &tdatas_mtx);
2456		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2457			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
2458		}
2459		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2460			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
2461		}
2462	}
2463}
2464
2465void
2466prof_prefork1(tsdn_t *tsdn) {
2467	if (config_prof && opt_prof) {
2468		malloc_mutex_prefork(tsdn, &prof_active_mtx);
2469		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
2470		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
2471		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
2472		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
2473	}
2474}
2475
2476void
2477prof_postfork_parent(tsdn_t *tsdn) {
2478	if (config_prof && opt_prof) {
2479		unsigned i;
2480
2481		malloc_mutex_postfork_parent(tsdn,
2482		    &prof_thread_active_init_mtx);
2483		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
2484		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
2485		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
2486		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
2487		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2488			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
2489		}
2490		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2491			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
2492		}
2493		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
2494		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
2495		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
2496	}
2497}
2498
2499void
2500prof_postfork_child(tsdn_t *tsdn) {
2501	if (config_prof && opt_prof) {
2502		unsigned i;
2503
2504		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
2505		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
2506		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
2507		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
2508		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
2509		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2510			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
2511		}
2512		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2513			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
2514		}
2515		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
2516		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
2517		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
2518	}
2519}
2520
2521/******************************************************************************/
2522