1#define	JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3/******************************************************************************/
4
5#ifdef JEMALLOC_PROF_LIBUNWIND
6#define	UNW_LOCAL_ONLY
7#include <libunwind.h>
8#endif
9
10#ifdef JEMALLOC_PROF_LIBGCC
11#include <unwind.h>
12#endif
13
14/******************************************************************************/
15/* Data. */
16
17bool		opt_prof = false;
18bool		opt_prof_active = true;
19bool		opt_prof_thread_active_init = true;
20size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
21ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
22bool		opt_prof_gdump = false;
23bool		opt_prof_final = false;
24bool		opt_prof_leak = false;
25bool		opt_prof_accum = false;
26char		opt_prof_prefix[
27    /* Minimize memory bloat for non-prof builds. */
28#ifdef JEMALLOC_PROF
29    PATH_MAX +
30#endif
31    1];
32
33/*
34 * Initialized as opt_prof_active, and accessed via
35 * prof_active_[gs]et{_unlocked,}().
36 */
37bool			prof_active;
38static malloc_mutex_t	prof_active_mtx;
39
40/*
41 * Initialized as opt_prof_thread_active_init, and accessed via
42 * prof_thread_active_init_[gs]et().
43 */
44static bool		prof_thread_active_init;
45static malloc_mutex_t	prof_thread_active_init_mtx;
46
47/*
48 * Initialized as opt_prof_gdump, and accessed via
49 * prof_gdump_[gs]et{_unlocked,}().
50 */
51bool			prof_gdump_val;
52static malloc_mutex_t	prof_gdump_mtx;
53
54uint64_t	prof_interval = 0;
55
56size_t		lg_prof_sample;
57
58/*
59 * Table of mutexes that are shared among gctx's.  These are leaf locks, so
60 * there is no problem with using them for more than one gctx at the same time.
61 * The primary motivation for this sharing though is that gctx's are ephemeral,
62 * and destroying mutexes causes complications for systems that allocate when
63 * creating/destroying mutexes.
64 */
65static malloc_mutex_t	*gctx_locks;
66static unsigned		cum_gctxs; /* Atomic counter. */
67
68/*
69 * Table of mutexes that are shared among tdata's.  No operations require
70 * holding multiple tdata locks, so there is no problem with using them for more
71 * than one tdata at the same time, even though a gctx lock may be acquired
72 * while holding a tdata lock.
73 */
74static malloc_mutex_t	*tdata_locks;
75
76/*
77 * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
78 * structure that knows about all backtraces currently captured.
79 */
80static ckh_t		bt2gctx;
81static malloc_mutex_t	bt2gctx_mtx;
82
83/*
84 * Tree of all extant prof_tdata_t structures, regardless of state,
85 * {attached,detached,expired}.
86 */
87static prof_tdata_tree_t	tdatas;
88static malloc_mutex_t	tdatas_mtx;
89
90static uint64_t		next_thr_uid;
91static malloc_mutex_t	next_thr_uid_mtx;
92
93static malloc_mutex_t	prof_dump_seq_mtx;
94static uint64_t		prof_dump_seq;
95static uint64_t		prof_dump_iseq;
96static uint64_t		prof_dump_mseq;
97static uint64_t		prof_dump_useq;
98
99/*
100 * This buffer is rather large for stack allocation, so use a single buffer for
101 * all profile dumps.
102 */
103static malloc_mutex_t	prof_dump_mtx;
104static char		prof_dump_buf[
105    /* Minimize memory bloat for non-prof builds. */
106#ifdef JEMALLOC_PROF
107    PROF_DUMP_BUFSIZE
108#else
109    1
110#endif
111];
112static size_t		prof_dump_buf_end;
113static int		prof_dump_fd;
114
115/* Do not dump any profiles until bootstrapping is complete. */
116static bool		prof_booted = false;
117
118/******************************************************************************/
119/*
120 * Function prototypes for static functions that are referenced prior to
121 * definition.
122 */
123
124static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
125static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
126static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
127    bool even_if_attached);
128static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
129    bool even_if_attached);
130static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
131
132/******************************************************************************/
133/* Red-black trees. */
134
135JEMALLOC_INLINE_C int
136prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
137{
138	uint64_t a_thr_uid = a->thr_uid;
139	uint64_t b_thr_uid = b->thr_uid;
140	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
141	if (ret == 0) {
142		uint64_t a_thr_discrim = a->thr_discrim;
143		uint64_t b_thr_discrim = b->thr_discrim;
144		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
145		    b_thr_discrim);
146		if (ret == 0) {
147			uint64_t a_tctx_uid = a->tctx_uid;
148			uint64_t b_tctx_uid = b->tctx_uid;
149			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
150			    b_tctx_uid);
151		}
152	}
153	return (ret);
154}
155
156rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
157    tctx_link, prof_tctx_comp)
158
159JEMALLOC_INLINE_C int
160prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b)
161{
162	unsigned a_len = a->bt.len;
163	unsigned b_len = b->bt.len;
164	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
165	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
166	if (ret == 0)
167		ret = (a_len > b_len) - (a_len < b_len);
168	return (ret);
169}
170
171rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
172    prof_gctx_comp)
173
174JEMALLOC_INLINE_C int
175prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b)
176{
177	int ret;
178	uint64_t a_uid = a->thr_uid;
179	uint64_t b_uid = b->thr_uid;
180
181	ret = ((a_uid > b_uid) - (a_uid < b_uid));
182	if (ret == 0) {
183		uint64_t a_discrim = a->thr_discrim;
184		uint64_t b_discrim = b->thr_discrim;
185
186		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
187	}
188	return (ret);
189}
190
191rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
192    prof_tdata_comp)
193
194/******************************************************************************/
195
196void
197prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
198{
199	prof_tdata_t *tdata;
200
201	cassert(config_prof);
202
203	if (updated) {
204		/*
205		 * Compute a new sample threshold.  This isn't very important in
206		 * practice, because this function is rarely executed, so the
207		 * potential for sample bias is minimal except in contrived
208		 * programs.
209		 */
210		tdata = prof_tdata_get(tsd, true);
211		if (tdata != NULL)
212			prof_sample_threshold_update(tdata);
213	}
214
215	if ((uintptr_t)tctx > (uintptr_t)1U) {
216		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
217		tctx->prepared = false;
218		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
219			prof_tctx_destroy(tsd, tctx);
220		else
221			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
222	}
223}
224
225void
226prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
227    prof_tctx_t *tctx)
228{
229
230	prof_tctx_set(tsdn, ptr, usize, tctx);
231
232	malloc_mutex_lock(tsdn, tctx->tdata->lock);
233	tctx->cnts.curobjs++;
234	tctx->cnts.curbytes += usize;
235	if (opt_prof_accum) {
236		tctx->cnts.accumobjs++;
237		tctx->cnts.accumbytes += usize;
238	}
239	tctx->prepared = false;
240	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
241}
242
243void
244prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
245{
246
247	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
248	assert(tctx->cnts.curobjs > 0);
249	assert(tctx->cnts.curbytes >= usize);
250	tctx->cnts.curobjs--;
251	tctx->cnts.curbytes -= usize;
252
253	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
254		prof_tctx_destroy(tsd, tctx);
255	else
256		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
257}
258
259void
260bt_init(prof_bt_t *bt, void **vec)
261{
262
263	cassert(config_prof);
264
265	bt->vec = vec;
266	bt->len = 0;
267}
268
269JEMALLOC_INLINE_C void
270prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
271{
272
273	cassert(config_prof);
274	assert(tdata == prof_tdata_get(tsd, false));
275
276	if (tdata != NULL) {
277		assert(!tdata->enq);
278		tdata->enq = true;
279	}
280
281	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
282}
283
284JEMALLOC_INLINE_C void
285prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
286{
287
288	cassert(config_prof);
289	assert(tdata == prof_tdata_get(tsd, false));
290
291	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
292
293	if (tdata != NULL) {
294		bool idump, gdump;
295
296		assert(tdata->enq);
297		tdata->enq = false;
298		idump = tdata->enq_idump;
299		tdata->enq_idump = false;
300		gdump = tdata->enq_gdump;
301		tdata->enq_gdump = false;
302
303		if (idump)
304			prof_idump(tsd_tsdn(tsd));
305		if (gdump)
306			prof_gdump(tsd_tsdn(tsd));
307	}
308}
309
310#ifdef JEMALLOC_PROF_LIBUNWIND
311void
312prof_backtrace(prof_bt_t *bt)
313{
314	int nframes;
315
316	cassert(config_prof);
317	assert(bt->len == 0);
318	assert(bt->vec != NULL);
319
320	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
321	if (nframes <= 0)
322		return;
323	bt->len = nframes;
324}
325#elif (defined(JEMALLOC_PROF_LIBGCC))
326static _Unwind_Reason_Code
327prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
328{
329
330	cassert(config_prof);
331
332	return (_URC_NO_REASON);
333}
334
335static _Unwind_Reason_Code
336prof_unwind_callback(struct _Unwind_Context *context, void *arg)
337{
338	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
339	void *ip;
340
341	cassert(config_prof);
342
343	ip = (void *)_Unwind_GetIP(context);
344	if (ip == NULL)
345		return (_URC_END_OF_STACK);
346	data->bt->vec[data->bt->len] = ip;
347	data->bt->len++;
348	if (data->bt->len == data->max)
349		return (_URC_END_OF_STACK);
350
351	return (_URC_NO_REASON);
352}
353
354void
355prof_backtrace(prof_bt_t *bt)
356{
357	prof_unwind_data_t data = {bt, PROF_BT_MAX};
358
359	cassert(config_prof);
360
361	_Unwind_Backtrace(prof_unwind_callback, &data);
362}
363#elif (defined(JEMALLOC_PROF_GCC))
364void
365prof_backtrace(prof_bt_t *bt)
366{
367#define	BT_FRAME(i)							\
368	if ((i) < PROF_BT_MAX) {					\
369		void *p;						\
370		if (__builtin_frame_address(i) == 0)			\
371			return;						\
372		p = __builtin_return_address(i);			\
373		if (p == NULL)						\
374			return;						\
375		bt->vec[(i)] = p;					\
376		bt->len = (i) + 1;					\
377	} else								\
378		return;
379
380	cassert(config_prof);
381
382	BT_FRAME(0)
383	BT_FRAME(1)
384	BT_FRAME(2)
385	BT_FRAME(3)
386	BT_FRAME(4)
387	BT_FRAME(5)
388	BT_FRAME(6)
389	BT_FRAME(7)
390	BT_FRAME(8)
391	BT_FRAME(9)
392
393	BT_FRAME(10)
394	BT_FRAME(11)
395	BT_FRAME(12)
396	BT_FRAME(13)
397	BT_FRAME(14)
398	BT_FRAME(15)
399	BT_FRAME(16)
400	BT_FRAME(17)
401	BT_FRAME(18)
402	BT_FRAME(19)
403
404	BT_FRAME(20)
405	BT_FRAME(21)
406	BT_FRAME(22)
407	BT_FRAME(23)
408	BT_FRAME(24)
409	BT_FRAME(25)
410	BT_FRAME(26)
411	BT_FRAME(27)
412	BT_FRAME(28)
413	BT_FRAME(29)
414
415	BT_FRAME(30)
416	BT_FRAME(31)
417	BT_FRAME(32)
418	BT_FRAME(33)
419	BT_FRAME(34)
420	BT_FRAME(35)
421	BT_FRAME(36)
422	BT_FRAME(37)
423	BT_FRAME(38)
424	BT_FRAME(39)
425
426	BT_FRAME(40)
427	BT_FRAME(41)
428	BT_FRAME(42)
429	BT_FRAME(43)
430	BT_FRAME(44)
431	BT_FRAME(45)
432	BT_FRAME(46)
433	BT_FRAME(47)
434	BT_FRAME(48)
435	BT_FRAME(49)
436
437	BT_FRAME(50)
438	BT_FRAME(51)
439	BT_FRAME(52)
440	BT_FRAME(53)
441	BT_FRAME(54)
442	BT_FRAME(55)
443	BT_FRAME(56)
444	BT_FRAME(57)
445	BT_FRAME(58)
446	BT_FRAME(59)
447
448	BT_FRAME(60)
449	BT_FRAME(61)
450	BT_FRAME(62)
451	BT_FRAME(63)
452	BT_FRAME(64)
453	BT_FRAME(65)
454	BT_FRAME(66)
455	BT_FRAME(67)
456	BT_FRAME(68)
457	BT_FRAME(69)
458
459	BT_FRAME(70)
460	BT_FRAME(71)
461	BT_FRAME(72)
462	BT_FRAME(73)
463	BT_FRAME(74)
464	BT_FRAME(75)
465	BT_FRAME(76)
466	BT_FRAME(77)
467	BT_FRAME(78)
468	BT_FRAME(79)
469
470	BT_FRAME(80)
471	BT_FRAME(81)
472	BT_FRAME(82)
473	BT_FRAME(83)
474	BT_FRAME(84)
475	BT_FRAME(85)
476	BT_FRAME(86)
477	BT_FRAME(87)
478	BT_FRAME(88)
479	BT_FRAME(89)
480
481	BT_FRAME(90)
482	BT_FRAME(91)
483	BT_FRAME(92)
484	BT_FRAME(93)
485	BT_FRAME(94)
486	BT_FRAME(95)
487	BT_FRAME(96)
488	BT_FRAME(97)
489	BT_FRAME(98)
490	BT_FRAME(99)
491
492	BT_FRAME(100)
493	BT_FRAME(101)
494	BT_FRAME(102)
495	BT_FRAME(103)
496	BT_FRAME(104)
497	BT_FRAME(105)
498	BT_FRAME(106)
499	BT_FRAME(107)
500	BT_FRAME(108)
501	BT_FRAME(109)
502
503	BT_FRAME(110)
504	BT_FRAME(111)
505	BT_FRAME(112)
506	BT_FRAME(113)
507	BT_FRAME(114)
508	BT_FRAME(115)
509	BT_FRAME(116)
510	BT_FRAME(117)
511	BT_FRAME(118)
512	BT_FRAME(119)
513
514	BT_FRAME(120)
515	BT_FRAME(121)
516	BT_FRAME(122)
517	BT_FRAME(123)
518	BT_FRAME(124)
519	BT_FRAME(125)
520	BT_FRAME(126)
521	BT_FRAME(127)
522#undef BT_FRAME
523}
524#else
525void
526prof_backtrace(prof_bt_t *bt)
527{
528
529	cassert(config_prof);
530	not_reached();
531}
532#endif
533
534static malloc_mutex_t *
535prof_gctx_mutex_choose(void)
536{
537	unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
538
539	return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]);
540}
541
542static malloc_mutex_t *
543prof_tdata_mutex_choose(uint64_t thr_uid)
544{
545
546	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
547}
548
549static prof_gctx_t *
550prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
551{
552	/*
553	 * Create a single allocation that has space for vec of length bt->len.
554	 */
555	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
556	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
557	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
558	    true);
559	if (gctx == NULL)
560		return (NULL);
561	gctx->lock = prof_gctx_mutex_choose();
562	/*
563	 * Set nlimbo to 1, in order to avoid a race condition with
564	 * prof_tctx_destroy()/prof_gctx_try_destroy().
565	 */
566	gctx->nlimbo = 1;
567	tctx_tree_new(&gctx->tctxs);
568	/* Duplicate bt. */
569	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
570	gctx->bt.vec = gctx->vec;
571	gctx->bt.len = bt->len;
572	return (gctx);
573}
574
575static void
576prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
577    prof_tdata_t *tdata)
578{
579
580	cassert(config_prof);
581
582	/*
583	 * Check that gctx is still unused by any thread cache before destroying
584	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
585	 * condition with this function, as does prof_tctx_destroy() in order to
586	 * avoid a race between the main body of prof_tctx_destroy() and entry
587	 * into this function.
588	 */
589	prof_enter(tsd, tdata_self);
590	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
591	assert(gctx->nlimbo != 0);
592	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
593		/* Remove gctx from bt2gctx. */
594		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
595			not_reached();
596		prof_leave(tsd, tdata_self);
597		/* Destroy gctx. */
598		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
599		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
600	} else {
601		/*
602		 * Compensate for increment in prof_tctx_destroy() or
603		 * prof_lookup().
604		 */
605		gctx->nlimbo--;
606		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
607		prof_leave(tsd, tdata_self);
608	}
609}
610
611static bool
612prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
613{
614
615	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
616
617	if (opt_prof_accum)
618		return (false);
619	if (tctx->cnts.curobjs != 0)
620		return (false);
621	if (tctx->prepared)
622		return (false);
623	return (true);
624}
625
626static bool
627prof_gctx_should_destroy(prof_gctx_t *gctx)
628{
629
630	if (opt_prof_accum)
631		return (false);
632	if (!tctx_tree_empty(&gctx->tctxs))
633		return (false);
634	if (gctx->nlimbo != 0)
635		return (false);
636	return (true);
637}
638
639static void
640prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
641{
642	prof_tdata_t *tdata = tctx->tdata;
643	prof_gctx_t *gctx = tctx->gctx;
644	bool destroy_tdata, destroy_tctx, destroy_gctx;
645
646	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
647
648	assert(tctx->cnts.curobjs == 0);
649	assert(tctx->cnts.curbytes == 0);
650	assert(!opt_prof_accum);
651	assert(tctx->cnts.accumobjs == 0);
652	assert(tctx->cnts.accumbytes == 0);
653
654	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
655	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
656	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
657
658	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
659	switch (tctx->state) {
660	case prof_tctx_state_nominal:
661		tctx_tree_remove(&gctx->tctxs, tctx);
662		destroy_tctx = true;
663		if (prof_gctx_should_destroy(gctx)) {
664			/*
665			 * Increment gctx->nlimbo in order to keep another
666			 * thread from winning the race to destroy gctx while
667			 * this one has gctx->lock dropped.  Without this, it
668			 * would be possible for another thread to:
669			 *
670			 * 1) Sample an allocation associated with gctx.
671			 * 2) Deallocate the sampled object.
672			 * 3) Successfully prof_gctx_try_destroy(gctx).
673			 *
674			 * The result would be that gctx no longer exists by the
675			 * time this thread accesses it in
676			 * prof_gctx_try_destroy().
677			 */
678			gctx->nlimbo++;
679			destroy_gctx = true;
680		} else
681			destroy_gctx = false;
682		break;
683	case prof_tctx_state_dumping:
684		/*
685		 * A dumping thread needs tctx to remain valid until dumping
686		 * has finished.  Change state such that the dumping thread will
687		 * complete destruction during a late dump iteration phase.
688		 */
689		tctx->state = prof_tctx_state_purgatory;
690		destroy_tctx = false;
691		destroy_gctx = false;
692		break;
693	default:
694		not_reached();
695		destroy_tctx = false;
696		destroy_gctx = false;
697	}
698	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
699	if (destroy_gctx) {
700		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
701		    tdata);
702	}
703
704	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
705
706	if (destroy_tdata)
707		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
708
709	if (destroy_tctx)
710		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
711}
712
713static bool
714prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
715    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx)
716{
717	union {
718		prof_gctx_t	*p;
719		void		*v;
720	} gctx;
721	union {
722		prof_bt_t	*p;
723		void		*v;
724	} btkey;
725	bool new_gctx;
726
727	prof_enter(tsd, tdata);
728	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
729		/* bt has never been seen before.  Insert it. */
730		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
731		if (gctx.v == NULL) {
732			prof_leave(tsd, tdata);
733			return (true);
734		}
735		btkey.p = &gctx.p->bt;
736		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
737			/* OOM. */
738			prof_leave(tsd, tdata);
739			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
740			return (true);
741		}
742		new_gctx = true;
743	} else {
744		/*
745		 * Increment nlimbo, in order to avoid a race condition with
746		 * prof_tctx_destroy()/prof_gctx_try_destroy().
747		 */
748		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
749		gctx.p->nlimbo++;
750		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
751		new_gctx = false;
752	}
753	prof_leave(tsd, tdata);
754
755	*p_btkey = btkey.v;
756	*p_gctx = gctx.p;
757	*p_new_gctx = new_gctx;
758	return (false);
759}
760
761prof_tctx_t *
762prof_lookup(tsd_t *tsd, prof_bt_t *bt)
763{
764	union {
765		prof_tctx_t	*p;
766		void		*v;
767	} ret;
768	prof_tdata_t *tdata;
769	bool not_found;
770
771	cassert(config_prof);
772
773	tdata = prof_tdata_get(tsd, false);
774	if (tdata == NULL)
775		return (NULL);
776
777	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
778	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
779	if (!not_found) /* Note double negative! */
780		ret.p->prepared = true;
781	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
782	if (not_found) {
783		void *btkey;
784		prof_gctx_t *gctx;
785		bool new_gctx, error;
786
787		/*
788		 * This thread's cache lacks bt.  Look for it in the global
789		 * cache.
790		 */
791		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
792		    &new_gctx))
793			return (NULL);
794
795		/* Link a prof_tctx_t into gctx for this thread. */
796		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
797		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
798		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
799		if (ret.p == NULL) {
800			if (new_gctx)
801				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
802			return (NULL);
803		}
804		ret.p->tdata = tdata;
805		ret.p->thr_uid = tdata->thr_uid;
806		ret.p->thr_discrim = tdata->thr_discrim;
807		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
808		ret.p->gctx = gctx;
809		ret.p->tctx_uid = tdata->tctx_uid_next++;
810		ret.p->prepared = true;
811		ret.p->state = prof_tctx_state_initializing;
812		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
813		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
814		    ret.v);
815		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
816		if (error) {
817			if (new_gctx)
818				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
819			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
820			return (NULL);
821		}
822		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
823		ret.p->state = prof_tctx_state_nominal;
824		tctx_tree_insert(&gctx->tctxs, ret.p);
825		gctx->nlimbo--;
826		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
827	}
828
829	return (ret.p);
830}
831
832/*
833 * The bodies of this function and prof_leakcheck() are compiled out unless heap
834 * profiling is enabled, so that it is possible to compile jemalloc with
835 * floating point support completely disabled.  Avoiding floating point code is
836 * important on memory-constrained systems, but it also enables a workaround for
837 * versions of glibc that don't properly save/restore floating point registers
838 * during dynamic lazy symbol loading (which internally calls into whatever
839 * malloc implementation happens to be integrated into the application).  Note
840 * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
841 * memory moves, so jemalloc must be compiled with such optimizations disabled
842 * (e.g.
843 * -mno-sse) in order for the workaround to be complete.
844 */
845void
846prof_sample_threshold_update(prof_tdata_t *tdata)
847{
848#ifdef JEMALLOC_PROF
849	uint64_t r;
850	double u;
851
852	if (!config_prof)
853		return;
854
855	if (lg_prof_sample == 0) {
856		tdata->bytes_until_sample = 0;
857		return;
858	}
859
860	/*
861	 * Compute sample interval as a geometrically distributed random
862	 * variable with mean (2^lg_prof_sample).
863	 *
864	 *                             __        __
865	 *                             |  log(u)  |                     1
866	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
867	 *                             | log(1-p) |             lg_prof_sample
868	 *                                                     2
869	 *
870	 * For more information on the math, see:
871	 *
872	 *   Non-Uniform Random Variate Generation
873	 *   Luc Devroye
874	 *   Springer-Verlag, New York, 1986
875	 *   pp 500
876	 *   (http://luc.devroye.org/rnbookindex.html)
877	 */
878	r = prng_lg_range(&tdata->prng_state, 53);
879	u = (double)r * (1.0/9007199254740992.0L);
880	tdata->bytes_until_sample = (uint64_t)(log(u) /
881	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
882	    + (uint64_t)1U;
883#endif
884}
885
886#ifdef JEMALLOC_JET
887static prof_tdata_t *
888prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
889{
890	size_t *tdata_count = (size_t *)arg;
891
892	(*tdata_count)++;
893
894	return (NULL);
895}
896
897size_t
898prof_tdata_count(void)
899{
900	size_t tdata_count = 0;
901	tsdn_t *tsdn;
902
903	tsdn = tsdn_fetch();
904	malloc_mutex_lock(tsdn, &tdatas_mtx);
905	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
906	    (void *)&tdata_count);
907	malloc_mutex_unlock(tsdn, &tdatas_mtx);
908
909	return (tdata_count);
910}
911#endif
912
913#ifdef JEMALLOC_JET
914size_t
915prof_bt_count(void)
916{
917	size_t bt_count;
918	tsd_t *tsd;
919	prof_tdata_t *tdata;
920
921	tsd = tsd_fetch();
922	tdata = prof_tdata_get(tsd, false);
923	if (tdata == NULL)
924		return (0);
925
926	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
927	bt_count = ckh_count(&bt2gctx);
928	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
929
930	return (bt_count);
931}
932#endif
933
934#ifdef JEMALLOC_JET
935#undef prof_dump_open
936#define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
937#endif
938static int
939prof_dump_open(bool propagate_err, const char *filename)
940{
941	int fd;
942
943	fd = creat(filename, 0644);
944	if (fd == -1 && !propagate_err) {
945		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
946		    filename);
947		if (opt_abort)
948			abort();
949	}
950
951	return (fd);
952}
953#ifdef JEMALLOC_JET
954#undef prof_dump_open
955#define	prof_dump_open JEMALLOC_N(prof_dump_open)
956prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
957#endif
958
959static bool
960prof_dump_flush(bool propagate_err)
961{
962	bool ret = false;
963	ssize_t err;
964
965	cassert(config_prof);
966
967	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
968	if (err == -1) {
969		if (!propagate_err) {
970			malloc_write("<jemalloc>: write() failed during heap "
971			    "profile flush\n");
972			if (opt_abort)
973				abort();
974		}
975		ret = true;
976	}
977	prof_dump_buf_end = 0;
978
979	return (ret);
980}
981
982static bool
983prof_dump_close(bool propagate_err)
984{
985	bool ret;
986
987	assert(prof_dump_fd != -1);
988	ret = prof_dump_flush(propagate_err);
989	close(prof_dump_fd);
990	prof_dump_fd = -1;
991
992	return (ret);
993}
994
995static bool
996prof_dump_write(bool propagate_err, const char *s)
997{
998	size_t i, slen, n;
999
1000	cassert(config_prof);
1001
1002	i = 0;
1003	slen = strlen(s);
1004	while (i < slen) {
1005		/* Flush the buffer if it is full. */
1006		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
1007			if (prof_dump_flush(propagate_err) && propagate_err)
1008				return (true);
1009
1010		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
1011			/* Finish writing. */
1012			n = slen - i;
1013		} else {
1014			/* Write as much of s as will fit. */
1015			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1016		}
1017		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1018		prof_dump_buf_end += n;
1019		i += n;
1020	}
1021
1022	return (false);
1023}
1024
1025JEMALLOC_FORMAT_PRINTF(2, 3)
1026static bool
1027prof_dump_printf(bool propagate_err, const char *format, ...)
1028{
1029	bool ret;
1030	va_list ap;
1031	char buf[PROF_PRINTF_BUFSIZE];
1032
1033	va_start(ap, format);
1034	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1035	va_end(ap);
1036	ret = prof_dump_write(propagate_err, buf);
1037
1038	return (ret);
1039}
1040
1041static void
1042prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
1043{
1044
1045	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1046
1047	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1048
1049	switch (tctx->state) {
1050	case prof_tctx_state_initializing:
1051		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1052		return;
1053	case prof_tctx_state_nominal:
1054		tctx->state = prof_tctx_state_dumping;
1055		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1056
1057		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1058
1059		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1060		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1061		if (opt_prof_accum) {
1062			tdata->cnt_summed.accumobjs +=
1063			    tctx->dump_cnts.accumobjs;
1064			tdata->cnt_summed.accumbytes +=
1065			    tctx->dump_cnts.accumbytes;
1066		}
1067		break;
1068	case prof_tctx_state_dumping:
1069	case prof_tctx_state_purgatory:
1070		not_reached();
1071	}
1072}
1073
1074static void
1075prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
1076{
1077
1078	malloc_mutex_assert_owner(tsdn, gctx->lock);
1079
1080	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1081	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1082	if (opt_prof_accum) {
1083		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1084		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1085	}
1086}
1087
1088static prof_tctx_t *
1089prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
1090{
1091	tsdn_t *tsdn = (tsdn_t *)arg;
1092
1093	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1094
1095	switch (tctx->state) {
1096	case prof_tctx_state_nominal:
1097		/* New since dumping started; ignore. */
1098		break;
1099	case prof_tctx_state_dumping:
1100	case prof_tctx_state_purgatory:
1101		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1102		break;
1103	default:
1104		not_reached();
1105	}
1106
1107	return (NULL);
1108}
1109
1110struct prof_tctx_dump_iter_arg_s {
1111	tsdn_t	*tsdn;
1112	bool	propagate_err;
1113};
1114
1115static prof_tctx_t *
1116prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
1117{
1118	struct prof_tctx_dump_iter_arg_s *arg =
1119	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1120
1121	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1122
1123	switch (tctx->state) {
1124	case prof_tctx_state_initializing:
1125	case prof_tctx_state_nominal:
1126		/* Not captured by this dump. */
1127		break;
1128	case prof_tctx_state_dumping:
1129	case prof_tctx_state_purgatory:
1130		if (prof_dump_printf(arg->propagate_err,
1131		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1132		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1133		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1134		    tctx->dump_cnts.accumbytes))
1135			return (tctx);
1136		break;
1137	default:
1138		not_reached();
1139	}
1140	return (NULL);
1141}
1142
1143static prof_tctx_t *
1144prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
1145{
1146	tsdn_t *tsdn = (tsdn_t *)arg;
1147	prof_tctx_t *ret;
1148
1149	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1150
1151	switch (tctx->state) {
1152	case prof_tctx_state_nominal:
1153		/* New since dumping started; ignore. */
1154		break;
1155	case prof_tctx_state_dumping:
1156		tctx->state = prof_tctx_state_nominal;
1157		break;
1158	case prof_tctx_state_purgatory:
1159		ret = tctx;
1160		goto label_return;
1161	default:
1162		not_reached();
1163	}
1164
1165	ret = NULL;
1166label_return:
1167	return (ret);
1168}
1169
1170static void
1171prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
1172{
1173
1174	cassert(config_prof);
1175
1176	malloc_mutex_lock(tsdn, gctx->lock);
1177
1178	/*
1179	 * Increment nlimbo so that gctx won't go away before dump.
1180	 * Additionally, link gctx into the dump list so that it is included in
1181	 * prof_dump()'s second pass.
1182	 */
1183	gctx->nlimbo++;
1184	gctx_tree_insert(gctxs, gctx);
1185
1186	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1187
1188	malloc_mutex_unlock(tsdn, gctx->lock);
1189}
1190
1191struct prof_gctx_merge_iter_arg_s {
1192	tsdn_t	*tsdn;
1193	size_t	leak_ngctx;
1194};
1195
1196static prof_gctx_t *
1197prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
1198{
1199	struct prof_gctx_merge_iter_arg_s *arg =
1200	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1201
1202	malloc_mutex_lock(arg->tsdn, gctx->lock);
1203	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1204	    (void *)arg->tsdn);
1205	if (gctx->cnt_summed.curobjs != 0)
1206		arg->leak_ngctx++;
1207	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1208
1209	return (NULL);
1210}
1211
1212static void
1213prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
1214{
1215	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1216	prof_gctx_t *gctx;
1217
1218	/*
1219	 * Standard tree iteration won't work here, because as soon as we
1220	 * decrement gctx->nlimbo and unlock gctx, another thread can
1221	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1222	 * tear down the tree one node at a time during iteration.
1223	 */
1224	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1225		gctx_tree_remove(gctxs, gctx);
1226		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1227		{
1228			prof_tctx_t *next;
1229
1230			next = NULL;
1231			do {
1232				prof_tctx_t *to_destroy =
1233				    tctx_tree_iter(&gctx->tctxs, next,
1234				    prof_tctx_finish_iter,
1235				    (void *)tsd_tsdn(tsd));
1236				if (to_destroy != NULL) {
1237					next = tctx_tree_next(&gctx->tctxs,
1238					    to_destroy);
1239					tctx_tree_remove(&gctx->tctxs,
1240					    to_destroy);
1241					idalloctm(tsd_tsdn(tsd), to_destroy,
1242					    NULL, true, true);
1243				} else
1244					next = NULL;
1245			} while (next != NULL);
1246		}
1247		gctx->nlimbo--;
1248		if (prof_gctx_should_destroy(gctx)) {
1249			gctx->nlimbo++;
1250			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1251			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1252		} else
1253			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1254	}
1255}
1256
1257struct prof_tdata_merge_iter_arg_s {
1258	tsdn_t		*tsdn;
1259	prof_cnt_t	cnt_all;
1260};
1261
1262static prof_tdata_t *
1263prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1264    void *opaque)
1265{
1266	struct prof_tdata_merge_iter_arg_s *arg =
1267	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1268
1269	malloc_mutex_lock(arg->tsdn, tdata->lock);
1270	if (!tdata->expired) {
1271		size_t tabind;
1272		union {
1273			prof_tctx_t	*p;
1274			void		*v;
1275		} tctx;
1276
1277		tdata->dumping = true;
1278		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1279		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1280		    &tctx.v);)
1281			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1282
1283		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1284		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1285		if (opt_prof_accum) {
1286			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1287			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1288		}
1289	} else
1290		tdata->dumping = false;
1291	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1292
1293	return (NULL);
1294}
1295
1296static prof_tdata_t *
1297prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
1298{
1299	bool propagate_err = *(bool *)arg;
1300
1301	if (!tdata->dumping)
1302		return (NULL);
1303
1304	if (prof_dump_printf(propagate_err,
1305	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1306	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1307	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1308	    tdata->cnt_summed.accumbytes,
1309	    (tdata->thread_name != NULL) ? " " : "",
1310	    (tdata->thread_name != NULL) ? tdata->thread_name : ""))
1311		return (tdata);
1312	return (NULL);
1313}
1314
1315#ifdef JEMALLOC_JET
1316#undef prof_dump_header
1317#define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
1318#endif
1319static bool
1320prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
1321{
1322	bool ret;
1323
1324	if (prof_dump_printf(propagate_err,
1325	    "heap_v2/%"FMTu64"\n"
1326	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1327	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1328	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
1329		return (true);
1330
1331	malloc_mutex_lock(tsdn, &tdatas_mtx);
1332	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1333	    (void *)&propagate_err) != NULL);
1334	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1335	return (ret);
1336}
1337#ifdef JEMALLOC_JET
1338#undef prof_dump_header
1339#define	prof_dump_header JEMALLOC_N(prof_dump_header)
1340prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
1341#endif
1342
1343static bool
1344prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1345    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
1346{
1347	bool ret;
1348	unsigned i;
1349	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1350
1351	cassert(config_prof);
1352	malloc_mutex_assert_owner(tsdn, gctx->lock);
1353
1354	/* Avoid dumping such gctx's that have no useful data. */
1355	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1356	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1357		assert(gctx->cnt_summed.curobjs == 0);
1358		assert(gctx->cnt_summed.curbytes == 0);
1359		assert(gctx->cnt_summed.accumobjs == 0);
1360		assert(gctx->cnt_summed.accumbytes == 0);
1361		ret = false;
1362		goto label_return;
1363	}
1364
1365	if (prof_dump_printf(propagate_err, "@")) {
1366		ret = true;
1367		goto label_return;
1368	}
1369	for (i = 0; i < bt->len; i++) {
1370		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1371		    (uintptr_t)bt->vec[i])) {
1372			ret = true;
1373			goto label_return;
1374		}
1375	}
1376
1377	if (prof_dump_printf(propagate_err,
1378	    "\n"
1379	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1380	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1381	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1382		ret = true;
1383		goto label_return;
1384	}
1385
1386	prof_tctx_dump_iter_arg.tsdn = tsdn;
1387	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1388	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1389	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1390		ret = true;
1391		goto label_return;
1392	}
1393
1394	ret = false;
1395label_return:
1396	return (ret);
1397}
1398
1399#ifndef _WIN32
1400JEMALLOC_FORMAT_PRINTF(1, 2)
1401static int
1402prof_open_maps(const char *format, ...)
1403{
1404	int mfd;
1405	va_list ap;
1406	char filename[PATH_MAX + 1];
1407
1408	va_start(ap, format);
1409	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1410	va_end(ap);
1411	mfd = open(filename, O_RDONLY);
1412
1413	return (mfd);
1414}
1415#endif
1416
1417static int
1418prof_getpid(void)
1419{
1420
1421#ifdef _WIN32
1422	return (GetCurrentProcessId());
1423#else
1424	return (getpid());
1425#endif
1426}
1427
1428static bool
1429prof_dump_maps(bool propagate_err)
1430{
1431	bool ret;
1432	int mfd;
1433
1434	cassert(config_prof);
1435#ifdef __FreeBSD__
1436	mfd = prof_open_maps("/proc/curproc/map");
1437#elif defined(_WIN32)
1438	mfd = -1; // Not implemented
1439#else
1440	{
1441		int pid = prof_getpid();
1442
1443		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1444		if (mfd == -1)
1445			mfd = prof_open_maps("/proc/%d/maps", pid);
1446	}
1447#endif
1448	if (mfd != -1) {
1449		ssize_t nread;
1450
1451		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1452		    propagate_err) {
1453			ret = true;
1454			goto label_return;
1455		}
1456		nread = 0;
1457		do {
1458			prof_dump_buf_end += nread;
1459			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1460				/* Make space in prof_dump_buf before read(). */
1461				if (prof_dump_flush(propagate_err) &&
1462				    propagate_err) {
1463					ret = true;
1464					goto label_return;
1465				}
1466			}
1467			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
1468			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
1469		} while (nread > 0);
1470	} else {
1471		ret = true;
1472		goto label_return;
1473	}
1474
1475	ret = false;
1476label_return:
1477	if (mfd != -1)
1478		close(mfd);
1479	return (ret);
1480}
1481
1482/*
1483 * See prof_sample_threshold_update() comment for why the body of this function
1484 * is conditionally compiled.
1485 */
1486static void
1487prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1488    const char *filename)
1489{
1490
1491#ifdef JEMALLOC_PROF
1492	/*
1493	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1494	 * differ slightly from what jeprof reports, because here we scale the
1495	 * summary values, whereas jeprof scales each context individually and
1496	 * reports the sums of the scaled values.
1497	 */
1498	if (cnt_all->curbytes != 0) {
1499		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1500		double ratio = (((double)cnt_all->curbytes) /
1501		    (double)cnt_all->curobjs) / sample_period;
1502		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1503		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1504		    * scale_factor);
1505		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1506		    scale_factor);
1507
1508		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1509		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1510		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1511		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1512		malloc_printf(
1513		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1514		    filename);
1515	}
1516#endif
1517}
1518
1519struct prof_gctx_dump_iter_arg_s {
1520	tsdn_t	*tsdn;
1521	bool	propagate_err;
1522};
1523
1524static prof_gctx_t *
1525prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
1526{
1527	prof_gctx_t *ret;
1528	struct prof_gctx_dump_iter_arg_s *arg =
1529	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1530
1531	malloc_mutex_lock(arg->tsdn, gctx->lock);
1532
1533	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1534	    gctxs)) {
1535		ret = gctx;
1536		goto label_return;
1537	}
1538
1539	ret = NULL;
1540label_return:
1541	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1542	return (ret);
1543}
1544
1545static bool
1546prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
1547{
1548	prof_tdata_t *tdata;
1549	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1550	size_t tabind;
1551	union {
1552		prof_gctx_t	*p;
1553		void		*v;
1554	} gctx;
1555	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1556	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1557	prof_gctx_tree_t gctxs;
1558
1559	cassert(config_prof);
1560
1561	tdata = prof_tdata_get(tsd, true);
1562	if (tdata == NULL)
1563		return (true);
1564
1565	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1566	prof_enter(tsd, tdata);
1567
1568	/*
1569	 * Put gctx's in limbo and clear their counters in preparation for
1570	 * summing.
1571	 */
1572	gctx_tree_new(&gctxs);
1573	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
1574		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
1575
1576	/*
1577	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1578	 * stats and merge them into the associated gctx's.
1579	 */
1580	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
1581	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
1582	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1583	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1584	    (void *)&prof_tdata_merge_iter_arg);
1585	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1586
1587	/* Merge tctx stats into gctx's. */
1588	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
1589	prof_gctx_merge_iter_arg.leak_ngctx = 0;
1590	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
1591	    (void *)&prof_gctx_merge_iter_arg);
1592
1593	prof_leave(tsd, tdata);
1594
1595	/* Create dump file. */
1596	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1597		goto label_open_close_error;
1598
1599	/* Dump profile header. */
1600	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1601	    &prof_tdata_merge_iter_arg.cnt_all))
1602		goto label_write_error;
1603
1604	/* Dump per gctx profile stats. */
1605	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
1606	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
1607	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
1608	    (void *)&prof_gctx_dump_iter_arg) != NULL)
1609		goto label_write_error;
1610
1611	/* Dump /proc/<pid>/maps if possible. */
1612	if (prof_dump_maps(propagate_err))
1613		goto label_write_error;
1614
1615	if (prof_dump_close(propagate_err))
1616		goto label_open_close_error;
1617
1618	prof_gctx_finish(tsd, &gctxs);
1619	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1620
1621	if (leakcheck) {
1622		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1623		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1624	}
1625	return (false);
1626label_write_error:
1627	prof_dump_close(propagate_err);
1628label_open_close_error:
1629	prof_gctx_finish(tsd, &gctxs);
1630	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1631	return (true);
1632}
1633
1634#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1635#define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1636static void
1637prof_dump_filename(char *filename, char v, uint64_t vseq)
1638{
1639
1640	cassert(config_prof);
1641
1642	if (vseq != VSEQ_INVALID) {
1643	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1644		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1645		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
1646		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
1647	} else {
1648	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
1649		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1650		    "%s.%d.%"FMTu64".%c.heap",
1651		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
1652	}
1653	prof_dump_seq++;
1654}
1655
1656static void
1657prof_fdump(void)
1658{
1659	tsd_t *tsd;
1660	char filename[DUMP_FILENAME_BUFSIZE];
1661
1662	cassert(config_prof);
1663	assert(opt_prof_final);
1664	assert(opt_prof_prefix[0] != '\0');
1665
1666	if (!prof_booted)
1667		return;
1668	tsd = tsd_fetch();
1669
1670	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1671	prof_dump_filename(filename, 'f', VSEQ_INVALID);
1672	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1673	prof_dump(tsd, false, filename, opt_prof_leak);
1674}
1675
1676void
1677prof_idump(tsdn_t *tsdn)
1678{
1679	tsd_t *tsd;
1680	prof_tdata_t *tdata;
1681
1682	cassert(config_prof);
1683
1684	if (!prof_booted || tsdn_null(tsdn))
1685		return;
1686	tsd = tsdn_tsd(tsdn);
1687	tdata = prof_tdata_get(tsd, false);
1688	if (tdata == NULL)
1689		return;
1690	if (tdata->enq) {
1691		tdata->enq_idump = true;
1692		return;
1693	}
1694
1695	if (opt_prof_prefix[0] != '\0') {
1696		char filename[PATH_MAX + 1];
1697		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1698		prof_dump_filename(filename, 'i', prof_dump_iseq);
1699		prof_dump_iseq++;
1700		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1701		prof_dump(tsd, false, filename, false);
1702	}
1703}
1704
1705bool
1706prof_mdump(tsd_t *tsd, const char *filename)
1707{
1708	char filename_buf[DUMP_FILENAME_BUFSIZE];
1709
1710	cassert(config_prof);
1711
1712	if (!opt_prof || !prof_booted)
1713		return (true);
1714
1715	if (filename == NULL) {
1716		/* No filename specified, so automatically generate one. */
1717		if (opt_prof_prefix[0] == '\0')
1718			return (true);
1719		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1720		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1721		prof_dump_mseq++;
1722		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1723		filename = filename_buf;
1724	}
1725	return (prof_dump(tsd, true, filename, false));
1726}
1727
1728void
1729prof_gdump(tsdn_t *tsdn)
1730{
1731	tsd_t *tsd;
1732	prof_tdata_t *tdata;
1733
1734	cassert(config_prof);
1735
1736	if (!prof_booted || tsdn_null(tsdn))
1737		return;
1738	tsd = tsdn_tsd(tsdn);
1739	tdata = prof_tdata_get(tsd, false);
1740	if (tdata == NULL)
1741		return;
1742	if (tdata->enq) {
1743		tdata->enq_gdump = true;
1744		return;
1745	}
1746
1747	if (opt_prof_prefix[0] != '\0') {
1748		char filename[DUMP_FILENAME_BUFSIZE];
1749		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
1750		prof_dump_filename(filename, 'u', prof_dump_useq);
1751		prof_dump_useq++;
1752		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
1753		prof_dump(tsd, false, filename, false);
1754	}
1755}
1756
1757static void
1758prof_bt_hash(const void *key, size_t r_hash[2])
1759{
1760	prof_bt_t *bt = (prof_bt_t *)key;
1761
1762	cassert(config_prof);
1763
1764	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1765}
1766
1767static bool
1768prof_bt_keycomp(const void *k1, const void *k2)
1769{
1770	const prof_bt_t *bt1 = (prof_bt_t *)k1;
1771	const prof_bt_t *bt2 = (prof_bt_t *)k2;
1772
1773	cassert(config_prof);
1774
1775	if (bt1->len != bt2->len)
1776		return (false);
1777	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1778}
1779
1780JEMALLOC_INLINE_C uint64_t
1781prof_thr_uid_alloc(tsdn_t *tsdn)
1782{
1783	uint64_t thr_uid;
1784
1785	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
1786	thr_uid = next_thr_uid;
1787	next_thr_uid++;
1788	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
1789
1790	return (thr_uid);
1791}
1792
1793static prof_tdata_t *
1794prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
1795    char *thread_name, bool active)
1796{
1797	prof_tdata_t *tdata;
1798
1799	cassert(config_prof);
1800
1801	/* Initialize an empty cache for this thread. */
1802	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
1803	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
1804	    arena_get(TSDN_NULL, 0, true), true);
1805	if (tdata == NULL)
1806		return (NULL);
1807
1808	tdata->lock = prof_tdata_mutex_choose(thr_uid);
1809	tdata->thr_uid = thr_uid;
1810	tdata->thr_discrim = thr_discrim;
1811	tdata->thread_name = thread_name;
1812	tdata->attached = true;
1813	tdata->expired = false;
1814	tdata->tctx_uid_next = 0;
1815
1816	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
1817	    prof_bt_hash, prof_bt_keycomp)) {
1818		idalloctm(tsdn, tdata, NULL, true, true);
1819		return (NULL);
1820	}
1821
1822	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
1823	prof_sample_threshold_update(tdata);
1824
1825	tdata->enq = false;
1826	tdata->enq_idump = false;
1827	tdata->enq_gdump = false;
1828
1829	tdata->dumping = false;
1830	tdata->active = active;
1831
1832	malloc_mutex_lock(tsdn, &tdatas_mtx);
1833	tdata_tree_insert(&tdatas, tdata);
1834	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1835
1836	return (tdata);
1837}
1838
1839prof_tdata_t *
1840prof_tdata_init(tsdn_t *tsdn)
1841{
1842
1843	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
1844	    prof_thread_active_init_get(tsdn)));
1845}
1846
1847static bool
1848prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
1849{
1850
1851	if (tdata->attached && !even_if_attached)
1852		return (false);
1853	if (ckh_count(&tdata->bt2tctx) != 0)
1854		return (false);
1855	return (true);
1856}
1857
1858static bool
1859prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
1860    bool even_if_attached)
1861{
1862
1863	malloc_mutex_assert_owner(tsdn, tdata->lock);
1864
1865	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1866}
1867
1868static void
1869prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
1870    bool even_if_attached)
1871{
1872
1873	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
1874
1875	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
1876
1877	tdata_tree_remove(&tdatas, tdata);
1878
1879	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1880
1881	if (tdata->thread_name != NULL)
1882		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
1883	ckh_delete(tsdn, &tdata->bt2tctx);
1884	idalloctm(tsdn, tdata, NULL, true, true);
1885}
1886
1887static void
1888prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
1889{
1890
1891	malloc_mutex_lock(tsdn, &tdatas_mtx);
1892	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
1893	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1894}
1895
1896static void
1897prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
1898{
1899	bool destroy_tdata;
1900
1901	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1902	if (tdata->attached) {
1903		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
1904		    true);
1905		/*
1906		 * Only detach if !destroy_tdata, because detaching would allow
1907		 * another thread to win the race to destroy tdata.
1908		 */
1909		if (!destroy_tdata)
1910			tdata->attached = false;
1911		tsd_prof_tdata_set(tsd, NULL);
1912	} else
1913		destroy_tdata = false;
1914	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1915	if (destroy_tdata)
1916		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
1917}
1918
1919prof_tdata_t *
1920prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
1921{
1922	uint64_t thr_uid = tdata->thr_uid;
1923	uint64_t thr_discrim = tdata->thr_discrim + 1;
1924	char *thread_name = (tdata->thread_name != NULL) ?
1925	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
1926	bool active = tdata->active;
1927
1928	prof_tdata_detach(tsd, tdata);
1929	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
1930	    thread_name, active));
1931}
1932
1933static bool
1934prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
1935{
1936	bool destroy_tdata;
1937
1938	malloc_mutex_lock(tsdn, tdata->lock);
1939	if (!tdata->expired) {
1940		tdata->expired = true;
1941		destroy_tdata = tdata->attached ? false :
1942		    prof_tdata_should_destroy(tsdn, tdata, false);
1943	} else
1944		destroy_tdata = false;
1945	malloc_mutex_unlock(tsdn, tdata->lock);
1946
1947	return (destroy_tdata);
1948}
1949
1950static prof_tdata_t *
1951prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
1952{
1953	tsdn_t *tsdn = (tsdn_t *)arg;
1954
1955	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
1956}
1957
1958void
1959prof_reset(tsdn_t *tsdn, size_t lg_sample)
1960{
1961	prof_tdata_t *next;
1962
1963	assert(lg_sample < (sizeof(uint64_t) << 3));
1964
1965	malloc_mutex_lock(tsdn, &prof_dump_mtx);
1966	malloc_mutex_lock(tsdn, &tdatas_mtx);
1967
1968	lg_prof_sample = lg_sample;
1969
1970	next = NULL;
1971	do {
1972		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
1973		    prof_tdata_reset_iter, (void *)tsdn);
1974		if (to_destroy != NULL) {
1975			next = tdata_tree_next(&tdatas, to_destroy);
1976			prof_tdata_destroy_locked(tsdn, to_destroy, false);
1977		} else
1978			next = NULL;
1979	} while (next != NULL);
1980
1981	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1982	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
1983}
1984
1985void
1986prof_tdata_cleanup(tsd_t *tsd)
1987{
1988	prof_tdata_t *tdata;
1989
1990	if (!config_prof)
1991		return;
1992
1993	tdata = tsd_prof_tdata_get(tsd);
1994	if (tdata != NULL)
1995		prof_tdata_detach(tsd, tdata);
1996}
1997
1998bool
1999prof_active_get(tsdn_t *tsdn)
2000{
2001	bool prof_active_current;
2002
2003	malloc_mutex_lock(tsdn, &prof_active_mtx);
2004	prof_active_current = prof_active;
2005	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2006	return (prof_active_current);
2007}
2008
2009bool
2010prof_active_set(tsdn_t *tsdn, bool active)
2011{
2012	bool prof_active_old;
2013
2014	malloc_mutex_lock(tsdn, &prof_active_mtx);
2015	prof_active_old = prof_active;
2016	prof_active = active;
2017	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2018	return (prof_active_old);
2019}
2020
2021const char *
2022prof_thread_name_get(tsd_t *tsd)
2023{
2024	prof_tdata_t *tdata;
2025
2026	tdata = prof_tdata_get(tsd, true);
2027	if (tdata == NULL)
2028		return ("");
2029	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2030}
2031
2032static char *
2033prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
2034{
2035	char *ret;
2036	size_t size;
2037
2038	if (thread_name == NULL)
2039		return (NULL);
2040
2041	size = strlen(thread_name) + 1;
2042	if (size == 1)
2043		return ("");
2044
2045	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
2046	    arena_get(TSDN_NULL, 0, true), true);
2047	if (ret == NULL)
2048		return (NULL);
2049	memcpy(ret, thread_name, size);
2050	return (ret);
2051}
2052
2053int
2054prof_thread_name_set(tsd_t *tsd, const char *thread_name)
2055{
2056	prof_tdata_t *tdata;
2057	unsigned i;
2058	char *s;
2059
2060	tdata = prof_tdata_get(tsd, true);
2061	if (tdata == NULL)
2062		return (EAGAIN);
2063
2064	/* Validate input. */
2065	if (thread_name == NULL)
2066		return (EFAULT);
2067	for (i = 0; thread_name[i] != '\0'; i++) {
2068		char c = thread_name[i];
2069		if (!isgraph(c) && !isblank(c))
2070			return (EFAULT);
2071	}
2072
2073	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2074	if (s == NULL)
2075		return (EAGAIN);
2076
2077	if (tdata->thread_name != NULL) {
2078		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
2079		tdata->thread_name = NULL;
2080	}
2081	if (strlen(s) > 0)
2082		tdata->thread_name = s;
2083	return (0);
2084}
2085
2086bool
2087prof_thread_active_get(tsd_t *tsd)
2088{
2089	prof_tdata_t *tdata;
2090
2091	tdata = prof_tdata_get(tsd, true);
2092	if (tdata == NULL)
2093		return (false);
2094	return (tdata->active);
2095}
2096
2097bool
2098prof_thread_active_set(tsd_t *tsd, bool active)
2099{
2100	prof_tdata_t *tdata;
2101
2102	tdata = prof_tdata_get(tsd, true);
2103	if (tdata == NULL)
2104		return (true);
2105	tdata->active = active;
2106	return (false);
2107}
2108
2109bool
2110prof_thread_active_init_get(tsdn_t *tsdn)
2111{
2112	bool active_init;
2113
2114	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2115	active_init = prof_thread_active_init;
2116	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2117	return (active_init);
2118}
2119
2120bool
2121prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
2122{
2123	bool active_init_old;
2124
2125	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2126	active_init_old = prof_thread_active_init;
2127	prof_thread_active_init = active_init;
2128	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2129	return (active_init_old);
2130}
2131
2132bool
2133prof_gdump_get(tsdn_t *tsdn)
2134{
2135	bool prof_gdump_current;
2136
2137	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2138	prof_gdump_current = prof_gdump_val;
2139	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2140	return (prof_gdump_current);
2141}
2142
2143bool
2144prof_gdump_set(tsdn_t *tsdn, bool gdump)
2145{
2146	bool prof_gdump_old;
2147
2148	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2149	prof_gdump_old = prof_gdump_val;
2150	prof_gdump_val = gdump;
2151	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2152	return (prof_gdump_old);
2153}
2154
2155void
2156prof_boot0(void)
2157{
2158
2159	cassert(config_prof);
2160
2161	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2162	    sizeof(PROF_PREFIX_DEFAULT));
2163}
2164
2165void
2166prof_boot1(void)
2167{
2168
2169	cassert(config_prof);
2170
2171	/*
2172	 * opt_prof must be in its final state before any arenas are
2173	 * initialized, so this function must be executed early.
2174	 */
2175
2176	if (opt_prof_leak && !opt_prof) {
2177		/*
2178		 * Enable opt_prof, but in such a way that profiles are never
2179		 * automatically dumped.
2180		 */
2181		opt_prof = true;
2182		opt_prof_gdump = false;
2183	} else if (opt_prof) {
2184		if (opt_lg_prof_interval >= 0) {
2185			prof_interval = (((uint64_t)1U) <<
2186			    opt_lg_prof_interval);
2187		}
2188	}
2189}
2190
2191bool
2192prof_boot2(tsdn_t *tsdn)
2193{
2194
2195	cassert(config_prof);
2196
2197	if (opt_prof) {
2198		unsigned i;
2199
2200		lg_prof_sample = opt_lg_prof_sample;
2201
2202		prof_active = opt_prof_active;
2203		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2204		    WITNESS_RANK_PROF_ACTIVE))
2205			return (true);
2206
2207		prof_gdump_val = opt_prof_gdump;
2208		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2209		    WITNESS_RANK_PROF_GDUMP))
2210			return (true);
2211
2212		prof_thread_active_init = opt_prof_thread_active_init;
2213		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2214		    "prof_thread_active_init",
2215		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
2216			return (true);
2217
2218		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2219		    prof_bt_keycomp))
2220			return (true);
2221		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2222		    WITNESS_RANK_PROF_BT2GCTX))
2223			return (true);
2224
2225		tdata_tree_new(&tdatas);
2226		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2227		    WITNESS_RANK_PROF_TDATAS))
2228			return (true);
2229
2230		next_thr_uid = 0;
2231		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2232		    WITNESS_RANK_PROF_NEXT_THR_UID))
2233			return (true);
2234
2235		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
2236		    WITNESS_RANK_PROF_DUMP_SEQ))
2237			return (true);
2238		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
2239		    WITNESS_RANK_PROF_DUMP))
2240			return (true);
2241
2242		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
2243		    atexit(prof_fdump) != 0) {
2244			malloc_write("<jemalloc>: Error in atexit()\n");
2245			if (opt_abort)
2246				abort();
2247		}
2248
2249		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
2250		    * sizeof(malloc_mutex_t));
2251		if (gctx_locks == NULL)
2252			return (true);
2253		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2254			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
2255			    WITNESS_RANK_PROF_GCTX))
2256				return (true);
2257		}
2258
2259		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
2260		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
2261		if (tdata_locks == NULL)
2262			return (true);
2263		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2264			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
2265			    WITNESS_RANK_PROF_TDATA))
2266				return (true);
2267		}
2268	}
2269
2270#ifdef JEMALLOC_PROF_LIBGCC
2271	/*
2272	 * Cause the backtracing machinery to allocate its internal state
2273	 * before enabling profiling.
2274	 */
2275	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
2276#endif
2277
2278	prof_booted = true;
2279
2280	return (false);
2281}
2282
2283void
2284prof_prefork0(tsdn_t *tsdn)
2285{
2286
2287	if (opt_prof) {
2288		unsigned i;
2289
2290		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
2291		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
2292		malloc_mutex_prefork(tsdn, &tdatas_mtx);
2293		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2294			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
2295		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2296			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
2297	}
2298}
2299
2300void
2301prof_prefork1(tsdn_t *tsdn)
2302{
2303
2304	if (opt_prof) {
2305		malloc_mutex_prefork(tsdn, &prof_active_mtx);
2306		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
2307		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
2308		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
2309		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
2310	}
2311}
2312
2313void
2314prof_postfork_parent(tsdn_t *tsdn)
2315{
2316
2317	if (opt_prof) {
2318		unsigned i;
2319
2320		malloc_mutex_postfork_parent(tsdn,
2321		    &prof_thread_active_init_mtx);
2322		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
2323		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
2324		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
2325		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
2326		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2327			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
2328		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2329			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
2330		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
2331		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
2332		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
2333	}
2334}
2335
2336void
2337prof_postfork_child(tsdn_t *tsdn)
2338{
2339
2340	if (opt_prof) {
2341		unsigned i;
2342
2343		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
2344		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
2345		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
2346		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
2347		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
2348		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2349			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
2350		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2351			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
2352		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
2353		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
2354		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
2355	}
2356}
2357
2358/******************************************************************************/
2359