1#define JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6#include "jemalloc/internal/ckh.h"
7#include "jemalloc/internal/hash.h"
8#include "jemalloc/internal/malloc_io.h"
9#include "jemalloc/internal/mutex.h"
10#include "jemalloc/internal/emitter.h"
11
12/******************************************************************************/
13
14#ifdef JEMALLOC_PROF_LIBUNWIND
15#define UNW_LOCAL_ONLY
16#include <libunwind.h>
17#endif
18
19#ifdef JEMALLOC_PROF_LIBGCC
20/*
21 * We have a circular dependency -- jemalloc_internal.h tells us if we should
22 * use libgcc's unwinding functionality, but after we've included that, we've
23 * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
24 */
25#undef _Unwind_Backtrace
26#include <unwind.h>
27#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
28#endif
29
30/******************************************************************************/
31/* Data. */
32
33bool		opt_prof = false;
34bool		opt_prof_active = true;
35bool		opt_prof_thread_active_init = true;
36size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
37ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
38bool		opt_prof_gdump = false;
39bool		opt_prof_final = false;
40bool		opt_prof_leak = false;
41bool		opt_prof_accum = false;
42bool		opt_prof_log = false;
43char		opt_prof_prefix[
44    /* Minimize memory bloat for non-prof builds. */
45#ifdef JEMALLOC_PROF
46    PATH_MAX +
47#endif
48    1];
49
50/*
51 * Initialized as opt_prof_active, and accessed via
52 * prof_active_[gs]et{_unlocked,}().
53 */
54bool			prof_active;
55static malloc_mutex_t	prof_active_mtx;
56
57/*
58 * Initialized as opt_prof_thread_active_init, and accessed via
59 * prof_thread_active_init_[gs]et().
60 */
61static bool		prof_thread_active_init;
62static malloc_mutex_t	prof_thread_active_init_mtx;
63
64/*
65 * Initialized as opt_prof_gdump, and accessed via
66 * prof_gdump_[gs]et{_unlocked,}().
67 */
68bool			prof_gdump_val;
69static malloc_mutex_t	prof_gdump_mtx;
70
71uint64_t	prof_interval = 0;
72
73size_t		lg_prof_sample;
74
75typedef enum prof_logging_state_e prof_logging_state_t;
76enum prof_logging_state_e {
77	prof_logging_state_stopped,
78	prof_logging_state_started,
79	prof_logging_state_dumping
80};
81
82/*
83 * - stopped: log_start never called, or previous log_stop has completed.
84 * - started: log_start called, log_stop not called yet. Allocations are logged.
85 * - dumping: log_stop called but not finished; samples are not logged anymore.
86 */
87prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
88
89#ifdef JEMALLOC_JET
90static bool prof_log_dummy = false;
91#endif
92
93/* Incremented for every log file that is output. */
94static uint64_t log_seq = 0;
95static char log_filename[
96    /* Minimize memory bloat for non-prof builds. */
97#ifdef JEMALLOC_PROF
98    PATH_MAX +
99#endif
100    1];
101
102/* Timestamp for most recent call to log_start(). */
103static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
104
105/* Increment these when adding to the log_bt and log_thr linked lists. */
106static size_t log_bt_index = 0;
107static size_t log_thr_index = 0;
108
109/* Linked list node definitions. These are only used in prof.c. */
110typedef struct prof_bt_node_s prof_bt_node_t;
111
112struct prof_bt_node_s {
113	prof_bt_node_t *next;
114	size_t index;
115	prof_bt_t bt;
116	/* Variable size backtrace vector pointed to by bt. */
117	void *vec[1];
118};
119
120typedef struct prof_thr_node_s prof_thr_node_t;
121
122struct prof_thr_node_s {
123	prof_thr_node_t *next;
124	size_t index;
125	uint64_t thr_uid;
126	/* Variable size based on thr_name_sz. */
127	char name[1];
128};
129
130typedef struct prof_alloc_node_s prof_alloc_node_t;
131
132/* This is output when logging sampled allocations. */
133struct prof_alloc_node_s {
134	prof_alloc_node_t *next;
135	/* Indices into an array of thread data. */
136	size_t alloc_thr_ind;
137	size_t free_thr_ind;
138
139	/* Indices into an array of backtraces. */
140	size_t alloc_bt_ind;
141	size_t free_bt_ind;
142
143	uint64_t alloc_time_ns;
144	uint64_t free_time_ns;
145
146	size_t usize;
147};
148
149/*
150 * Created on the first call to prof_log_start and deleted on prof_log_stop.
151 * These are the backtraces and threads that have already been logged by an
152 * allocation.
153 */
154static bool log_tables_initialized = false;
155static ckh_t log_bt_node_set;
156static ckh_t log_thr_node_set;
157
158/* Store linked lists for logged data. */
159static prof_bt_node_t *log_bt_first = NULL;
160static prof_bt_node_t *log_bt_last = NULL;
161static prof_thr_node_t *log_thr_first = NULL;
162static prof_thr_node_t *log_thr_last = NULL;
163static prof_alloc_node_t *log_alloc_first = NULL;
164static prof_alloc_node_t *log_alloc_last = NULL;
165
166/* Protects the prof_logging_state and any log_{...} variable. */
167static malloc_mutex_t log_mtx;
168
169/*
170 * Table of mutexes that are shared among gctx's.  These are leaf locks, so
171 * there is no problem with using them for more than one gctx at the same time.
172 * The primary motivation for this sharing though is that gctx's are ephemeral,
173 * and destroying mutexes causes complications for systems that allocate when
174 * creating/destroying mutexes.
175 */
176static malloc_mutex_t	*gctx_locks;
177static atomic_u_t	cum_gctxs; /* Atomic counter. */
178
179/*
180 * Table of mutexes that are shared among tdata's.  No operations require
181 * holding multiple tdata locks, so there is no problem with using them for more
182 * than one tdata at the same time, even though a gctx lock may be acquired
183 * while holding a tdata lock.
184 */
185static malloc_mutex_t	*tdata_locks;
186
187/*
188 * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
189 * structure that knows about all backtraces currently captured.
190 */
191static ckh_t		bt2gctx;
192/* Non static to enable profiling. */
193malloc_mutex_t		bt2gctx_mtx;
194
195/*
196 * Tree of all extant prof_tdata_t structures, regardless of state,
197 * {attached,detached,expired}.
198 */
199static prof_tdata_tree_t	tdatas;
200static malloc_mutex_t	tdatas_mtx;
201
202static uint64_t		next_thr_uid;
203static malloc_mutex_t	next_thr_uid_mtx;
204
205static malloc_mutex_t	prof_dump_seq_mtx;
206static uint64_t		prof_dump_seq;
207static uint64_t		prof_dump_iseq;
208static uint64_t		prof_dump_mseq;
209static uint64_t		prof_dump_useq;
210
211/*
212 * This buffer is rather large for stack allocation, so use a single buffer for
213 * all profile dumps.
214 */
215static malloc_mutex_t	prof_dump_mtx;
216static char		prof_dump_buf[
217    /* Minimize memory bloat for non-prof builds. */
218#ifdef JEMALLOC_PROF
219    PROF_DUMP_BUFSIZE
220#else
221    1
222#endif
223];
224static size_t		prof_dump_buf_end;
225static int		prof_dump_fd;
226
227/* Do not dump any profiles until bootstrapping is complete. */
228static bool		prof_booted = false;
229
230/******************************************************************************/
231/*
232 * Function prototypes for static functions that are referenced prior to
233 * definition.
234 */
235
236static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
237static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
238static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
239    bool even_if_attached);
240static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
241    bool even_if_attached);
242static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
243
244/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
245static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
246static bool prof_thr_node_keycomp(const void *k1, const void *k2);
247static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
248static bool prof_bt_node_keycomp(const void *k1, const void *k2);
249
250/******************************************************************************/
251/* Red-black trees. */
252
253static int
254prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
255	uint64_t a_thr_uid = a->thr_uid;
256	uint64_t b_thr_uid = b->thr_uid;
257	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
258	if (ret == 0) {
259		uint64_t a_thr_discrim = a->thr_discrim;
260		uint64_t b_thr_discrim = b->thr_discrim;
261		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
262		    b_thr_discrim);
263		if (ret == 0) {
264			uint64_t a_tctx_uid = a->tctx_uid;
265			uint64_t b_tctx_uid = b->tctx_uid;
266			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
267			    b_tctx_uid);
268		}
269	}
270	return ret;
271}
272
273rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
274    tctx_link, prof_tctx_comp)
275
276static int
277prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
278	unsigned a_len = a->bt.len;
279	unsigned b_len = b->bt.len;
280	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
281	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
282	if (ret == 0) {
283		ret = (a_len > b_len) - (a_len < b_len);
284	}
285	return ret;
286}
287
288rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
289    prof_gctx_comp)
290
291static int
292prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
293	int ret;
294	uint64_t a_uid = a->thr_uid;
295	uint64_t b_uid = b->thr_uid;
296
297	ret = ((a_uid > b_uid) - (a_uid < b_uid));
298	if (ret == 0) {
299		uint64_t a_discrim = a->thr_discrim;
300		uint64_t b_discrim = b->thr_discrim;
301
302		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
303	}
304	return ret;
305}
306
307rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
308    prof_tdata_comp)
309
310/******************************************************************************/
311
312void
313prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
314	prof_tdata_t *tdata;
315
316	cassert(config_prof);
317
318	if (updated) {
319		/*
320		 * Compute a new sample threshold.  This isn't very important in
321		 * practice, because this function is rarely executed, so the
322		 * potential for sample bias is minimal except in contrived
323		 * programs.
324		 */
325		tdata = prof_tdata_get(tsd, true);
326		if (tdata != NULL) {
327			prof_sample_threshold_update(tdata);
328		}
329	}
330
331	if ((uintptr_t)tctx > (uintptr_t)1U) {
332		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
333		tctx->prepared = false;
334		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
335			prof_tctx_destroy(tsd, tctx);
336		} else {
337			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
338		}
339	}
340}
341
342void
343prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
344    prof_tctx_t *tctx) {
345	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
346
347	/* Get the current time and set this in the extent_t. We'll read this
348	 * when free() is called. */
349	nstime_t t = NSTIME_ZERO_INITIALIZER;
350	nstime_update(&t);
351	prof_alloc_time_set(tsdn, ptr, NULL, t);
352
353	malloc_mutex_lock(tsdn, tctx->tdata->lock);
354	tctx->cnts.curobjs++;
355	tctx->cnts.curbytes += usize;
356	if (opt_prof_accum) {
357		tctx->cnts.accumobjs++;
358		tctx->cnts.accumbytes += usize;
359	}
360	tctx->prepared = false;
361	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
362}
363
364static size_t
365prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
366	assert(prof_logging_state == prof_logging_state_started);
367	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
368
369	prof_bt_node_t dummy_node;
370	dummy_node.bt = *bt;
371	prof_bt_node_t *node;
372
373	/* See if this backtrace is already cached in the table. */
374	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
375	    (void **)(&node), NULL)) {
376		size_t sz = offsetof(prof_bt_node_t, vec) +
377			        (bt->len * sizeof(void *));
378		prof_bt_node_t *new_node = (prof_bt_node_t *)
379		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
380		    true, arena_get(TSDN_NULL, 0, true), true);
381		if (log_bt_first == NULL) {
382			log_bt_first = new_node;
383			log_bt_last = new_node;
384		} else {
385			log_bt_last->next = new_node;
386			log_bt_last = new_node;
387		}
388
389		new_node->next = NULL;
390		new_node->index = log_bt_index;
391		/*
392		 * Copy the backtrace: bt is inside a tdata or gctx, which
393		 * might die before prof_log_stop is called.
394		 */
395		new_node->bt.len = bt->len;
396		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
397		new_node->bt.vec = new_node->vec;
398
399		log_bt_index++;
400		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
401		return new_node->index;
402	} else {
403		return node->index;
404	}
405}
406static size_t
407prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
408	assert(prof_logging_state == prof_logging_state_started);
409	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
410
411	prof_thr_node_t dummy_node;
412	dummy_node.thr_uid = thr_uid;
413	prof_thr_node_t *node;
414
415	/* See if this thread is already cached in the table. */
416	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
417	    (void **)(&node), NULL)) {
418		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
419		prof_thr_node_t *new_node = (prof_thr_node_t *)
420		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
421		    true, arena_get(TSDN_NULL, 0, true), true);
422		if (log_thr_first == NULL) {
423			log_thr_first = new_node;
424			log_thr_last = new_node;
425		} else {
426			log_thr_last->next = new_node;
427			log_thr_last = new_node;
428		}
429
430		new_node->next = NULL;
431		new_node->index = log_thr_index;
432		new_node->thr_uid = thr_uid;
433		strcpy(new_node->name, name);
434
435		log_thr_index++;
436		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
437		return new_node->index;
438	} else {
439		return node->index;
440	}
441}
442
443static void
444prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
445	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
446
447	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
448	if (cons_tdata == NULL) {
449		/*
450		 * We decide not to log these allocations. cons_tdata will be
451		 * NULL only when the current thread is in a weird state (e.g.
452		 * it's being destroyed).
453		 */
454		return;
455	}
456
457	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
458
459	if (prof_logging_state != prof_logging_state_started) {
460		goto label_done;
461	}
462
463	if (!log_tables_initialized) {
464		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
465				prof_bt_node_hash, prof_bt_node_keycomp);
466		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
467				prof_thr_node_hash, prof_thr_node_keycomp);
468		if (err1 || err2) {
469			goto label_done;
470		}
471		log_tables_initialized = true;
472	}
473
474	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
475			          (alloc_ctx_t *)NULL);
476	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
477	nstime_update(&free_time);
478
479	size_t sz = sizeof(prof_alloc_node_t);
480	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
481	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
482	    arena_get(TSDN_NULL, 0, true), true);
483
484	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
485				        "" : tctx->tdata->thread_name;
486	const char *cons_thr_name = prof_thread_name_get(tsd);
487
488	prof_bt_t bt;
489	/* Initialize the backtrace, using the buffer in tdata to store it. */
490	bt_init(&bt, cons_tdata->vec);
491	prof_backtrace(&bt);
492	prof_bt_t *cons_bt = &bt;
493
494	/* We haven't destroyed tctx yet, so gctx should be good to read. */
495	prof_bt_t *prod_bt = &tctx->gctx->bt;
496
497	new_node->next = NULL;
498	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
499				      prod_thr_name);
500	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
501				     cons_thr_name);
502	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
503	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
504	new_node->alloc_time_ns = nstime_ns(&alloc_time);
505	new_node->free_time_ns = nstime_ns(&free_time);
506	new_node->usize = usize;
507
508	if (log_alloc_first == NULL) {
509		log_alloc_first = new_node;
510		log_alloc_last = new_node;
511	} else {
512		log_alloc_last->next = new_node;
513		log_alloc_last = new_node;
514	}
515
516label_done:
517	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
518}
519
520void
521prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
522    prof_tctx_t *tctx) {
523	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
524
525	assert(tctx->cnts.curobjs > 0);
526	assert(tctx->cnts.curbytes >= usize);
527	tctx->cnts.curobjs--;
528	tctx->cnts.curbytes -= usize;
529
530	prof_try_log(tsd, ptr, usize, tctx);
531
532	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
533		prof_tctx_destroy(tsd, tctx);
534	} else {
535		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
536	}
537}
538
539void
540bt_init(prof_bt_t *bt, void **vec) {
541	cassert(config_prof);
542
543	bt->vec = vec;
544	bt->len = 0;
545}
546
547static void
548prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
549	cassert(config_prof);
550	assert(tdata == prof_tdata_get(tsd, false));
551
552	if (tdata != NULL) {
553		assert(!tdata->enq);
554		tdata->enq = true;
555	}
556
557	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
558}
559
560static void
561prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
562	cassert(config_prof);
563	assert(tdata == prof_tdata_get(tsd, false));
564
565	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
566
567	if (tdata != NULL) {
568		bool idump, gdump;
569
570		assert(tdata->enq);
571		tdata->enq = false;
572		idump = tdata->enq_idump;
573		tdata->enq_idump = false;
574		gdump = tdata->enq_gdump;
575		tdata->enq_gdump = false;
576
577		if (idump) {
578			prof_idump(tsd_tsdn(tsd));
579		}
580		if (gdump) {
581			prof_gdump(tsd_tsdn(tsd));
582		}
583	}
584}
585
586#ifdef JEMALLOC_PROF_LIBUNWIND
587void
588prof_backtrace(prof_bt_t *bt) {
589	int nframes;
590
591	cassert(config_prof);
592	assert(bt->len == 0);
593	assert(bt->vec != NULL);
594
595	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
596	if (nframes <= 0) {
597		return;
598	}
599	bt->len = nframes;
600}
601#elif (defined(JEMALLOC_PROF_LIBGCC))
602static _Unwind_Reason_Code
603prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
604	cassert(config_prof);
605
606	return _URC_NO_REASON;
607}
608
609static _Unwind_Reason_Code
610prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
611	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
612	void *ip;
613
614	cassert(config_prof);
615
616	ip = (void *)_Unwind_GetIP(context);
617	if (ip == NULL) {
618		return _URC_END_OF_STACK;
619	}
620	data->bt->vec[data->bt->len] = ip;
621	data->bt->len++;
622	if (data->bt->len == data->max) {
623		return _URC_END_OF_STACK;
624	}
625
626	return _URC_NO_REASON;
627}
628
629void
630prof_backtrace(prof_bt_t *bt) {
631	prof_unwind_data_t data = {bt, PROF_BT_MAX};
632
633	cassert(config_prof);
634
635	_Unwind_Backtrace(prof_unwind_callback, &data);
636}
637#elif (defined(JEMALLOC_PROF_GCC))
638void
639prof_backtrace(prof_bt_t *bt) {
640#define BT_FRAME(i)							\
641	if ((i) < PROF_BT_MAX) {					\
642		void *p;						\
643		if (__builtin_frame_address(i) == 0) {			\
644			return;						\
645		}							\
646		p = __builtin_return_address(i);			\
647		if (p == NULL) {					\
648			return;						\
649		}							\
650		bt->vec[(i)] = p;					\
651		bt->len = (i) + 1;					\
652	} else {							\
653		return;							\
654	}
655
656	cassert(config_prof);
657
658	BT_FRAME(0)
659	BT_FRAME(1)
660	BT_FRAME(2)
661	BT_FRAME(3)
662	BT_FRAME(4)
663	BT_FRAME(5)
664	BT_FRAME(6)
665	BT_FRAME(7)
666	BT_FRAME(8)
667	BT_FRAME(9)
668
669	BT_FRAME(10)
670	BT_FRAME(11)
671	BT_FRAME(12)
672	BT_FRAME(13)
673	BT_FRAME(14)
674	BT_FRAME(15)
675	BT_FRAME(16)
676	BT_FRAME(17)
677	BT_FRAME(18)
678	BT_FRAME(19)
679
680	BT_FRAME(20)
681	BT_FRAME(21)
682	BT_FRAME(22)
683	BT_FRAME(23)
684	BT_FRAME(24)
685	BT_FRAME(25)
686	BT_FRAME(26)
687	BT_FRAME(27)
688	BT_FRAME(28)
689	BT_FRAME(29)
690
691	BT_FRAME(30)
692	BT_FRAME(31)
693	BT_FRAME(32)
694	BT_FRAME(33)
695	BT_FRAME(34)
696	BT_FRAME(35)
697	BT_FRAME(36)
698	BT_FRAME(37)
699	BT_FRAME(38)
700	BT_FRAME(39)
701
702	BT_FRAME(40)
703	BT_FRAME(41)
704	BT_FRAME(42)
705	BT_FRAME(43)
706	BT_FRAME(44)
707	BT_FRAME(45)
708	BT_FRAME(46)
709	BT_FRAME(47)
710	BT_FRAME(48)
711	BT_FRAME(49)
712
713	BT_FRAME(50)
714	BT_FRAME(51)
715	BT_FRAME(52)
716	BT_FRAME(53)
717	BT_FRAME(54)
718	BT_FRAME(55)
719	BT_FRAME(56)
720	BT_FRAME(57)
721	BT_FRAME(58)
722	BT_FRAME(59)
723
724	BT_FRAME(60)
725	BT_FRAME(61)
726	BT_FRAME(62)
727	BT_FRAME(63)
728	BT_FRAME(64)
729	BT_FRAME(65)
730	BT_FRAME(66)
731	BT_FRAME(67)
732	BT_FRAME(68)
733	BT_FRAME(69)
734
735	BT_FRAME(70)
736	BT_FRAME(71)
737	BT_FRAME(72)
738	BT_FRAME(73)
739	BT_FRAME(74)
740	BT_FRAME(75)
741	BT_FRAME(76)
742	BT_FRAME(77)
743	BT_FRAME(78)
744	BT_FRAME(79)
745
746	BT_FRAME(80)
747	BT_FRAME(81)
748	BT_FRAME(82)
749	BT_FRAME(83)
750	BT_FRAME(84)
751	BT_FRAME(85)
752	BT_FRAME(86)
753	BT_FRAME(87)
754	BT_FRAME(88)
755	BT_FRAME(89)
756
757	BT_FRAME(90)
758	BT_FRAME(91)
759	BT_FRAME(92)
760	BT_FRAME(93)
761	BT_FRAME(94)
762	BT_FRAME(95)
763	BT_FRAME(96)
764	BT_FRAME(97)
765	BT_FRAME(98)
766	BT_FRAME(99)
767
768	BT_FRAME(100)
769	BT_FRAME(101)
770	BT_FRAME(102)
771	BT_FRAME(103)
772	BT_FRAME(104)
773	BT_FRAME(105)
774	BT_FRAME(106)
775	BT_FRAME(107)
776	BT_FRAME(108)
777	BT_FRAME(109)
778
779	BT_FRAME(110)
780	BT_FRAME(111)
781	BT_FRAME(112)
782	BT_FRAME(113)
783	BT_FRAME(114)
784	BT_FRAME(115)
785	BT_FRAME(116)
786	BT_FRAME(117)
787	BT_FRAME(118)
788	BT_FRAME(119)
789
790	BT_FRAME(120)
791	BT_FRAME(121)
792	BT_FRAME(122)
793	BT_FRAME(123)
794	BT_FRAME(124)
795	BT_FRAME(125)
796	BT_FRAME(126)
797	BT_FRAME(127)
798#undef BT_FRAME
799}
800#else
801void
802prof_backtrace(prof_bt_t *bt) {
803	cassert(config_prof);
804	not_reached();
805}
806#endif
807
808static malloc_mutex_t *
809prof_gctx_mutex_choose(void) {
810	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
811
812	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
813}
814
815static malloc_mutex_t *
816prof_tdata_mutex_choose(uint64_t thr_uid) {
817	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
818}
819
820static prof_gctx_t *
821prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
822	/*
823	 * Create a single allocation that has space for vec of length bt->len.
824	 */
825	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
826	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
827	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
828	    true);
829	if (gctx == NULL) {
830		return NULL;
831	}
832	gctx->lock = prof_gctx_mutex_choose();
833	/*
834	 * Set nlimbo to 1, in order to avoid a race condition with
835	 * prof_tctx_destroy()/prof_gctx_try_destroy().
836	 */
837	gctx->nlimbo = 1;
838	tctx_tree_new(&gctx->tctxs);
839	/* Duplicate bt. */
840	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
841	gctx->bt.vec = gctx->vec;
842	gctx->bt.len = bt->len;
843	return gctx;
844}
845
846static void
847prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
848    prof_tdata_t *tdata) {
849	cassert(config_prof);
850
851	/*
852	 * Check that gctx is still unused by any thread cache before destroying
853	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
854	 * condition with this function, as does prof_tctx_destroy() in order to
855	 * avoid a race between the main body of prof_tctx_destroy() and entry
856	 * into this function.
857	 */
858	prof_enter(tsd, tdata_self);
859	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
860	assert(gctx->nlimbo != 0);
861	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
862		/* Remove gctx from bt2gctx. */
863		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
864			not_reached();
865		}
866		prof_leave(tsd, tdata_self);
867		/* Destroy gctx. */
868		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
869		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
870	} else {
871		/*
872		 * Compensate for increment in prof_tctx_destroy() or
873		 * prof_lookup().
874		 */
875		gctx->nlimbo--;
876		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
877		prof_leave(tsd, tdata_self);
878	}
879}
880
881static bool
882prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
883	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
884
885	if (opt_prof_accum) {
886		return false;
887	}
888	if (tctx->cnts.curobjs != 0) {
889		return false;
890	}
891	if (tctx->prepared) {
892		return false;
893	}
894	return true;
895}
896
897static bool
898prof_gctx_should_destroy(prof_gctx_t *gctx) {
899	if (opt_prof_accum) {
900		return false;
901	}
902	if (!tctx_tree_empty(&gctx->tctxs)) {
903		return false;
904	}
905	if (gctx->nlimbo != 0) {
906		return false;
907	}
908	return true;
909}
910
911static void
912prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
913	prof_tdata_t *tdata = tctx->tdata;
914	prof_gctx_t *gctx = tctx->gctx;
915	bool destroy_tdata, destroy_tctx, destroy_gctx;
916
917	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
918
919	assert(tctx->cnts.curobjs == 0);
920	assert(tctx->cnts.curbytes == 0);
921	assert(!opt_prof_accum);
922	assert(tctx->cnts.accumobjs == 0);
923	assert(tctx->cnts.accumbytes == 0);
924
925	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
926	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
927	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
928
929	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
930	switch (tctx->state) {
931	case prof_tctx_state_nominal:
932		tctx_tree_remove(&gctx->tctxs, tctx);
933		destroy_tctx = true;
934		if (prof_gctx_should_destroy(gctx)) {
935			/*
936			 * Increment gctx->nlimbo in order to keep another
937			 * thread from winning the race to destroy gctx while
938			 * this one has gctx->lock dropped.  Without this, it
939			 * would be possible for another thread to:
940			 *
941			 * 1) Sample an allocation associated with gctx.
942			 * 2) Deallocate the sampled object.
943			 * 3) Successfully prof_gctx_try_destroy(gctx).
944			 *
945			 * The result would be that gctx no longer exists by the
946			 * time this thread accesses it in
947			 * prof_gctx_try_destroy().
948			 */
949			gctx->nlimbo++;
950			destroy_gctx = true;
951		} else {
952			destroy_gctx = false;
953		}
954		break;
955	case prof_tctx_state_dumping:
956		/*
957		 * A dumping thread needs tctx to remain valid until dumping
958		 * has finished.  Change state such that the dumping thread will
959		 * complete destruction during a late dump iteration phase.
960		 */
961		tctx->state = prof_tctx_state_purgatory;
962		destroy_tctx = false;
963		destroy_gctx = false;
964		break;
965	default:
966		not_reached();
967		destroy_tctx = false;
968		destroy_gctx = false;
969	}
970	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
971	if (destroy_gctx) {
972		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
973		    tdata);
974	}
975
976	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
977
978	if (destroy_tdata) {
979		prof_tdata_destroy(tsd, tdata, false);
980	}
981
982	if (destroy_tctx) {
983		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
984	}
985}
986
987static bool
988prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
989    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
990	union {
991		prof_gctx_t	*p;
992		void		*v;
993	} gctx, tgctx;
994	union {
995		prof_bt_t	*p;
996		void		*v;
997	} btkey;
998	bool new_gctx;
999
1000	prof_enter(tsd, tdata);
1001	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1002		/* bt has never been seen before.  Insert it. */
1003		prof_leave(tsd, tdata);
1004		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
1005		if (tgctx.v == NULL) {
1006			return true;
1007		}
1008		prof_enter(tsd, tdata);
1009		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1010			gctx.p = tgctx.p;
1011			btkey.p = &gctx.p->bt;
1012			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
1013				/* OOM. */
1014				prof_leave(tsd, tdata);
1015				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
1016				    true, true);
1017				return true;
1018			}
1019			new_gctx = true;
1020		} else {
1021			new_gctx = false;
1022		}
1023	} else {
1024		tgctx.v = NULL;
1025		new_gctx = false;
1026	}
1027
1028	if (!new_gctx) {
1029		/*
1030		 * Increment nlimbo, in order to avoid a race condition with
1031		 * prof_tctx_destroy()/prof_gctx_try_destroy().
1032		 */
1033		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
1034		gctx.p->nlimbo++;
1035		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
1036		new_gctx = false;
1037
1038		if (tgctx.v != NULL) {
1039			/* Lost race to insert. */
1040			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
1041			    true);
1042		}
1043	}
1044	prof_leave(tsd, tdata);
1045
1046	*p_btkey = btkey.v;
1047	*p_gctx = gctx.p;
1048	*p_new_gctx = new_gctx;
1049	return false;
1050}
1051
1052prof_tctx_t *
1053prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
1054	union {
1055		prof_tctx_t	*p;
1056		void		*v;
1057	} ret;
1058	prof_tdata_t *tdata;
1059	bool not_found;
1060
1061	cassert(config_prof);
1062
1063	tdata = prof_tdata_get(tsd, false);
1064	if (tdata == NULL) {
1065		return NULL;
1066	}
1067
1068	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1069	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
1070	if (!not_found) { /* Note double negative! */
1071		ret.p->prepared = true;
1072	}
1073	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1074	if (not_found) {
1075		void *btkey;
1076		prof_gctx_t *gctx;
1077		bool new_gctx, error;
1078
1079		/*
1080		 * This thread's cache lacks bt.  Look for it in the global
1081		 * cache.
1082		 */
1083		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
1084		    &new_gctx)) {
1085			return NULL;
1086		}
1087
1088		/* Link a prof_tctx_t into gctx for this thread. */
1089		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
1090		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
1091		    arena_ichoose(tsd, NULL), true);
1092		if (ret.p == NULL) {
1093			if (new_gctx) {
1094				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1095			}
1096			return NULL;
1097		}
1098		ret.p->tdata = tdata;
1099		ret.p->thr_uid = tdata->thr_uid;
1100		ret.p->thr_discrim = tdata->thr_discrim;
1101		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
1102		ret.p->gctx = gctx;
1103		ret.p->tctx_uid = tdata->tctx_uid_next++;
1104		ret.p->prepared = true;
1105		ret.p->state = prof_tctx_state_initializing;
1106		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1107		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
1108		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1109		if (error) {
1110			if (new_gctx) {
1111				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1112			}
1113			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
1114			return NULL;
1115		}
1116		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1117		ret.p->state = prof_tctx_state_nominal;
1118		tctx_tree_insert(&gctx->tctxs, ret.p);
1119		gctx->nlimbo--;
1120		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1121	}
1122
1123	return ret.p;
1124}
1125
1126/*
1127 * The bodies of this function and prof_leakcheck() are compiled out unless heap
1128 * profiling is enabled, so that it is possible to compile jemalloc with
1129 * floating point support completely disabled.  Avoiding floating point code is
1130 * important on memory-constrained systems, but it also enables a workaround for
1131 * versions of glibc that don't properly save/restore floating point registers
1132 * during dynamic lazy symbol loading (which internally calls into whatever
1133 * malloc implementation happens to be integrated into the application).  Note
1134 * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
1135 * memory moves, so jemalloc must be compiled with such optimizations disabled
1136 * (e.g.
1137 * -mno-sse) in order for the workaround to be complete.
1138 */
1139void
1140prof_sample_threshold_update(prof_tdata_t *tdata) {
1141#ifdef JEMALLOC_PROF
1142	if (!config_prof) {
1143		return;
1144	}
1145
1146	if (lg_prof_sample == 0) {
1147		tsd_bytes_until_sample_set(tsd_fetch(), 0);
1148		return;
1149	}
1150
1151	/*
1152	 * Compute sample interval as a geometrically distributed random
1153	 * variable with mean (2^lg_prof_sample).
1154	 *
1155	 *                             __        __
1156	 *                             |  log(u)  |                     1
1157	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
1158	 *                             | log(1-p) |             lg_prof_sample
1159	 *                                                     2
1160	 *
1161	 * For more information on the math, see:
1162	 *
1163	 *   Non-Uniform Random Variate Generation
1164	 *   Luc Devroye
1165	 *   Springer-Verlag, New York, 1986
1166	 *   pp 500
1167	 *   (http://luc.devroye.org/rnbookindex.html)
1168	 */
1169	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
1170	double u = (double)r * (1.0/9007199254740992.0L);
1171	uint64_t bytes_until_sample = (uint64_t)(log(u) /
1172	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
1173	    + (uint64_t)1U;
1174	if (bytes_until_sample > SSIZE_MAX) {
1175		bytes_until_sample = SSIZE_MAX;
1176	}
1177	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
1178
1179#endif
1180}
1181
1182#ifdef JEMALLOC_JET
1183static prof_tdata_t *
1184prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1185    void *arg) {
1186	size_t *tdata_count = (size_t *)arg;
1187
1188	(*tdata_count)++;
1189
1190	return NULL;
1191}
1192
1193size_t
1194prof_tdata_count(void) {
1195	size_t tdata_count = 0;
1196	tsdn_t *tsdn;
1197
1198	tsdn = tsdn_fetch();
1199	malloc_mutex_lock(tsdn, &tdatas_mtx);
1200	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
1201	    (void *)&tdata_count);
1202	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1203
1204	return tdata_count;
1205}
1206
1207size_t
1208prof_bt_count(void) {
1209	size_t bt_count;
1210	tsd_t *tsd;
1211	prof_tdata_t *tdata;
1212
1213	tsd = tsd_fetch();
1214	tdata = prof_tdata_get(tsd, false);
1215	if (tdata == NULL) {
1216		return 0;
1217	}
1218
1219	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
1220	bt_count = ckh_count(&bt2gctx);
1221	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
1222
1223	return bt_count;
1224}
1225#endif
1226
1227static int
1228prof_dump_open_impl(bool propagate_err, const char *filename) {
1229	int fd;
1230
1231	fd = creat(filename, 0644);
1232	if (fd == -1 && !propagate_err) {
1233		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
1234		    filename);
1235		if (opt_abort) {
1236			abort();
1237		}
1238	}
1239
1240	return fd;
1241}
1242prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
1243
1244static bool
1245prof_dump_flush(bool propagate_err) {
1246	bool ret = false;
1247	ssize_t err;
1248
1249	cassert(config_prof);
1250
1251	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
1252	if (err == -1) {
1253		if (!propagate_err) {
1254			malloc_write("<jemalloc>: write() failed during heap "
1255			    "profile flush\n");
1256			if (opt_abort) {
1257				abort();
1258			}
1259		}
1260		ret = true;
1261	}
1262	prof_dump_buf_end = 0;
1263
1264	return ret;
1265}
1266
1267static bool
1268prof_dump_close(bool propagate_err) {
1269	bool ret;
1270
1271	assert(prof_dump_fd != -1);
1272	ret = prof_dump_flush(propagate_err);
1273	close(prof_dump_fd);
1274	prof_dump_fd = -1;
1275
1276	return ret;
1277}
1278
1279static bool
1280prof_dump_write(bool propagate_err, const char *s) {
1281	size_t i, slen, n;
1282
1283	cassert(config_prof);
1284
1285	i = 0;
1286	slen = strlen(s);
1287	while (i < slen) {
1288		/* Flush the buffer if it is full. */
1289		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1290			if (prof_dump_flush(propagate_err) && propagate_err) {
1291				return true;
1292			}
1293		}
1294
1295		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
1296			/* Finish writing. */
1297			n = slen - i;
1298		} else {
1299			/* Write as much of s as will fit. */
1300			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1301		}
1302		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1303		prof_dump_buf_end += n;
1304		i += n;
1305	}
1306	assert(i == slen);
1307
1308	return false;
1309}
1310
1311JEMALLOC_FORMAT_PRINTF(2, 3)
1312static bool
1313prof_dump_printf(bool propagate_err, const char *format, ...) {
1314	bool ret;
1315	va_list ap;
1316	char buf[PROF_PRINTF_BUFSIZE];
1317
1318	va_start(ap, format);
1319	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1320	va_end(ap);
1321	ret = prof_dump_write(propagate_err, buf);
1322
1323	return ret;
1324}
1325
1326static void
1327prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1328	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1329
1330	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1331
1332	switch (tctx->state) {
1333	case prof_tctx_state_initializing:
1334		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1335		return;
1336	case prof_tctx_state_nominal:
1337		tctx->state = prof_tctx_state_dumping;
1338		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1339
1340		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1341
1342		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1343		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1344		if (opt_prof_accum) {
1345			tdata->cnt_summed.accumobjs +=
1346			    tctx->dump_cnts.accumobjs;
1347			tdata->cnt_summed.accumbytes +=
1348			    tctx->dump_cnts.accumbytes;
1349		}
1350		break;
1351	case prof_tctx_state_dumping:
1352	case prof_tctx_state_purgatory:
1353		not_reached();
1354	}
1355}
1356
1357static void
1358prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1359	malloc_mutex_assert_owner(tsdn, gctx->lock);
1360
1361	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1362	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1363	if (opt_prof_accum) {
1364		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1365		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1366	}
1367}
1368
1369static prof_tctx_t *
1370prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1371	tsdn_t *tsdn = (tsdn_t *)arg;
1372
1373	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1374
1375	switch (tctx->state) {
1376	case prof_tctx_state_nominal:
1377		/* New since dumping started; ignore. */
1378		break;
1379	case prof_tctx_state_dumping:
1380	case prof_tctx_state_purgatory:
1381		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1382		break;
1383	default:
1384		not_reached();
1385	}
1386
1387	return NULL;
1388}
1389
1390struct prof_tctx_dump_iter_arg_s {
1391	tsdn_t	*tsdn;
1392	bool	propagate_err;
1393};
1394
1395static prof_tctx_t *
1396prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1397	struct prof_tctx_dump_iter_arg_s *arg =
1398	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1399
1400	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1401
1402	switch (tctx->state) {
1403	case prof_tctx_state_initializing:
1404	case prof_tctx_state_nominal:
1405		/* Not captured by this dump. */
1406		break;
1407	case prof_tctx_state_dumping:
1408	case prof_tctx_state_purgatory:
1409		if (prof_dump_printf(arg->propagate_err,
1410		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1411		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1412		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1413		    tctx->dump_cnts.accumbytes)) {
1414			return tctx;
1415		}
1416		break;
1417	default:
1418		not_reached();
1419	}
1420	return NULL;
1421}
1422
1423static prof_tctx_t *
1424prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1425	tsdn_t *tsdn = (tsdn_t *)arg;
1426	prof_tctx_t *ret;
1427
1428	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1429
1430	switch (tctx->state) {
1431	case prof_tctx_state_nominal:
1432		/* New since dumping started; ignore. */
1433		break;
1434	case prof_tctx_state_dumping:
1435		tctx->state = prof_tctx_state_nominal;
1436		break;
1437	case prof_tctx_state_purgatory:
1438		ret = tctx;
1439		goto label_return;
1440	default:
1441		not_reached();
1442	}
1443
1444	ret = NULL;
1445label_return:
1446	return ret;
1447}
1448
1449static void
1450prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1451	cassert(config_prof);
1452
1453	malloc_mutex_lock(tsdn, gctx->lock);
1454
1455	/*
1456	 * Increment nlimbo so that gctx won't go away before dump.
1457	 * Additionally, link gctx into the dump list so that it is included in
1458	 * prof_dump()'s second pass.
1459	 */
1460	gctx->nlimbo++;
1461	gctx_tree_insert(gctxs, gctx);
1462
1463	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1464
1465	malloc_mutex_unlock(tsdn, gctx->lock);
1466}
1467
1468struct prof_gctx_merge_iter_arg_s {
1469	tsdn_t	*tsdn;
1470	size_t	leak_ngctx;
1471};
1472
1473static prof_gctx_t *
1474prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1475	struct prof_gctx_merge_iter_arg_s *arg =
1476	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1477
1478	malloc_mutex_lock(arg->tsdn, gctx->lock);
1479	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1480	    (void *)arg->tsdn);
1481	if (gctx->cnt_summed.curobjs != 0) {
1482		arg->leak_ngctx++;
1483	}
1484	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1485
1486	return NULL;
1487}
1488
1489static void
1490prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1491	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1492	prof_gctx_t *gctx;
1493
1494	/*
1495	 * Standard tree iteration won't work here, because as soon as we
1496	 * decrement gctx->nlimbo and unlock gctx, another thread can
1497	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1498	 * tear down the tree one node at a time during iteration.
1499	 */
1500	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1501		gctx_tree_remove(gctxs, gctx);
1502		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1503		{
1504			prof_tctx_t *next;
1505
1506			next = NULL;
1507			do {
1508				prof_tctx_t *to_destroy =
1509				    tctx_tree_iter(&gctx->tctxs, next,
1510				    prof_tctx_finish_iter,
1511				    (void *)tsd_tsdn(tsd));
1512				if (to_destroy != NULL) {
1513					next = tctx_tree_next(&gctx->tctxs,
1514					    to_destroy);
1515					tctx_tree_remove(&gctx->tctxs,
1516					    to_destroy);
1517					idalloctm(tsd_tsdn(tsd), to_destroy,
1518					    NULL, NULL, true, true);
1519				} else {
1520					next = NULL;
1521				}
1522			} while (next != NULL);
1523		}
1524		gctx->nlimbo--;
1525		if (prof_gctx_should_destroy(gctx)) {
1526			gctx->nlimbo++;
1527			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1528			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1529		} else {
1530			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1531		}
1532	}
1533}
1534
1535struct prof_tdata_merge_iter_arg_s {
1536	tsdn_t		*tsdn;
1537	prof_cnt_t	cnt_all;
1538};
1539
1540static prof_tdata_t *
1541prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1542    void *opaque) {
1543	struct prof_tdata_merge_iter_arg_s *arg =
1544	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1545
1546	malloc_mutex_lock(arg->tsdn, tdata->lock);
1547	if (!tdata->expired) {
1548		size_t tabind;
1549		union {
1550			prof_tctx_t	*p;
1551			void		*v;
1552		} tctx;
1553
1554		tdata->dumping = true;
1555		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1556		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1557		    &tctx.v);) {
1558			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1559		}
1560
1561		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1562		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1563		if (opt_prof_accum) {
1564			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1565			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1566		}
1567	} else {
1568		tdata->dumping = false;
1569	}
1570	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1571
1572	return NULL;
1573}
1574
1575static prof_tdata_t *
1576prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1577    void *arg) {
1578	bool propagate_err = *(bool *)arg;
1579
1580	if (!tdata->dumping) {
1581		return NULL;
1582	}
1583
1584	if (prof_dump_printf(propagate_err,
1585	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1586	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1587	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1588	    tdata->cnt_summed.accumbytes,
1589	    (tdata->thread_name != NULL) ? " " : "",
1590	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1591		return tdata;
1592	}
1593	return NULL;
1594}
1595
1596static bool
1597prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1598    const prof_cnt_t *cnt_all) {
1599	bool ret;
1600
1601	if (prof_dump_printf(propagate_err,
1602	    "heap_v2/%"FMTu64"\n"
1603	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1604	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1605	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1606		return true;
1607	}
1608
1609	malloc_mutex_lock(tsdn, &tdatas_mtx);
1610	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1611	    (void *)&propagate_err) != NULL);
1612	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1613	return ret;
1614}
1615prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1616
1617static bool
1618prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1619    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1620	bool ret;
1621	unsigned i;
1622	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1623
1624	cassert(config_prof);
1625	malloc_mutex_assert_owner(tsdn, gctx->lock);
1626
1627	/* Avoid dumping such gctx's that have no useful data. */
1628	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1629	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1630		assert(gctx->cnt_summed.curobjs == 0);
1631		assert(gctx->cnt_summed.curbytes == 0);
1632		assert(gctx->cnt_summed.accumobjs == 0);
1633		assert(gctx->cnt_summed.accumbytes == 0);
1634		ret = false;
1635		goto label_return;
1636	}
1637
1638	if (prof_dump_printf(propagate_err, "@")) {
1639		ret = true;
1640		goto label_return;
1641	}
1642	for (i = 0; i < bt->len; i++) {
1643		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1644		    (uintptr_t)bt->vec[i])) {
1645			ret = true;
1646			goto label_return;
1647		}
1648	}
1649
1650	if (prof_dump_printf(propagate_err,
1651	    "\n"
1652	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1653	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1654	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1655		ret = true;
1656		goto label_return;
1657	}
1658
1659	prof_tctx_dump_iter_arg.tsdn = tsdn;
1660	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1661	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1662	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1663		ret = true;
1664		goto label_return;
1665	}
1666
1667	ret = false;
1668label_return:
1669	return ret;
1670}
1671
1672#ifndef _WIN32
1673JEMALLOC_FORMAT_PRINTF(1, 2)
1674static int
1675prof_open_maps(const char *format, ...) {
1676	int mfd;
1677	va_list ap;
1678	char filename[PATH_MAX + 1];
1679
1680	va_start(ap, format);
1681	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1682	va_end(ap);
1683
1684#if defined(O_CLOEXEC)
1685	mfd = open(filename, O_RDONLY | O_CLOEXEC);
1686#else
1687	mfd = open(filename, O_RDONLY);
1688	if (mfd != -1) {
1689		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1690	}
1691#endif
1692
1693	return mfd;
1694}
1695#endif
1696
1697static int
1698prof_getpid(void) {
1699#ifdef _WIN32
1700	return GetCurrentProcessId();
1701#else
1702	return getpid();
1703#endif
1704}
1705
1706static bool
1707prof_dump_maps(bool propagate_err) {
1708	bool ret;
1709	int mfd;
1710
1711	cassert(config_prof);
1712#ifdef __FreeBSD__
1713	mfd = prof_open_maps("/proc/curproc/map");
1714#elif defined(_WIN32)
1715	mfd = -1; // Not implemented
1716#else
1717	{
1718		int pid = prof_getpid();
1719
1720		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1721		if (mfd == -1) {
1722			mfd = prof_open_maps("/proc/%d/maps", pid);
1723		}
1724	}
1725#endif
1726	if (mfd != -1) {
1727		ssize_t nread;
1728
1729		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1730		    propagate_err) {
1731			ret = true;
1732			goto label_return;
1733		}
1734		nread = 0;
1735		do {
1736			prof_dump_buf_end += nread;
1737			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1738				/* Make space in prof_dump_buf before read(). */
1739				if (prof_dump_flush(propagate_err) &&
1740				    propagate_err) {
1741					ret = true;
1742					goto label_return;
1743				}
1744			}
1745			nread = malloc_read_fd(mfd,
1746			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1747			    - prof_dump_buf_end);
1748		} while (nread > 0);
1749	} else {
1750		ret = true;
1751		goto label_return;
1752	}
1753
1754	ret = false;
1755label_return:
1756	if (mfd != -1) {
1757		close(mfd);
1758	}
1759	return ret;
1760}
1761
1762/*
1763 * See prof_sample_threshold_update() comment for why the body of this function
1764 * is conditionally compiled.
1765 */
1766static void
1767prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1768    const char *filename) {
1769#ifdef JEMALLOC_PROF
1770	/*
1771	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1772	 * differ slightly from what jeprof reports, because here we scale the
1773	 * summary values, whereas jeprof scales each context individually and
1774	 * reports the sums of the scaled values.
1775	 */
1776	if (cnt_all->curbytes != 0) {
1777		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1778		double ratio = (((double)cnt_all->curbytes) /
1779		    (double)cnt_all->curobjs) / sample_period;
1780		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1781		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1782		    * scale_factor);
1783		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1784		    scale_factor);
1785
1786		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1787		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1788		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1789		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1790		malloc_printf(
1791		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1792		    filename);
1793	}
1794#endif
1795}
1796
1797struct prof_gctx_dump_iter_arg_s {
1798	tsdn_t	*tsdn;
1799	bool	propagate_err;
1800};
1801
1802static prof_gctx_t *
1803prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1804	prof_gctx_t *ret;
1805	struct prof_gctx_dump_iter_arg_s *arg =
1806	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1807
1808	malloc_mutex_lock(arg->tsdn, gctx->lock);
1809
1810	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1811	    gctxs)) {
1812		ret = gctx;
1813		goto label_return;
1814	}
1815
1816	ret = NULL;
1817label_return:
1818	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1819	return ret;
1820}
1821
1822static void
1823prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1824    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1825    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1826    prof_gctx_tree_t *gctxs) {
1827	size_t tabind;
1828	union {
1829		prof_gctx_t	*p;
1830		void		*v;
1831	} gctx;
1832
1833	prof_enter(tsd, tdata);
1834
1835	/*
1836	 * Put gctx's in limbo and clear their counters in preparation for
1837	 * summing.
1838	 */
1839	gctx_tree_new(gctxs);
1840	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1841		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1842	}
1843
1844	/*
1845	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1846	 * stats and merge them into the associated gctx's.
1847	 */
1848	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1849	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1850	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1851	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1852	    (void *)prof_tdata_merge_iter_arg);
1853	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1854
1855	/* Merge tctx stats into gctx's. */
1856	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1857	prof_gctx_merge_iter_arg->leak_ngctx = 0;
1858	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1859	    (void *)prof_gctx_merge_iter_arg);
1860
1861	prof_leave(tsd, tdata);
1862}
1863
1864static bool
1865prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1866    bool leakcheck, prof_tdata_t *tdata,
1867    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1868    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1869    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1870    prof_gctx_tree_t *gctxs) {
1871	/* Create dump file. */
1872	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1873		return true;
1874	}
1875
1876	/* Dump profile header. */
1877	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1878	    &prof_tdata_merge_iter_arg->cnt_all)) {
1879		goto label_write_error;
1880	}
1881
1882	/* Dump per gctx profile stats. */
1883	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1884	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1885	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1886	    (void *)prof_gctx_dump_iter_arg) != NULL) {
1887		goto label_write_error;
1888	}
1889
1890	/* Dump /proc/<pid>/maps if possible. */
1891	if (prof_dump_maps(propagate_err)) {
1892		goto label_write_error;
1893	}
1894
1895	if (prof_dump_close(propagate_err)) {
1896		return true;
1897	}
1898
1899	return false;
1900label_write_error:
1901	prof_dump_close(propagate_err);
1902	return true;
1903}
1904
1905static bool
1906prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1907    bool leakcheck) {
1908	cassert(config_prof);
1909	assert(tsd_reentrancy_level_get(tsd) == 0);
1910
1911	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1912	if (tdata == NULL) {
1913		return true;
1914	}
1915
1916	pre_reentrancy(tsd, NULL);
1917	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1918
1919	prof_gctx_tree_t gctxs;
1920	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1921	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1922	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1923	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1924	    &prof_gctx_merge_iter_arg, &gctxs);
1925	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1926	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1927	    &prof_gctx_dump_iter_arg, &gctxs);
1928	prof_gctx_finish(tsd, &gctxs);
1929
1930	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1931	post_reentrancy(tsd);
1932
1933	if (err) {
1934		return true;
1935	}
1936
1937	if (leakcheck) {
1938		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1939		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1940	}
1941	return false;
1942}
1943
1944#ifdef JEMALLOC_JET
1945void
1946prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1947    uint64_t *accumbytes) {
1948	tsd_t *tsd;
1949	prof_tdata_t *tdata;
1950	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1951	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1952	prof_gctx_tree_t gctxs;
1953
1954	tsd = tsd_fetch();
1955	tdata = prof_tdata_get(tsd, false);
1956	if (tdata == NULL) {
1957		if (curobjs != NULL) {
1958			*curobjs = 0;
1959		}
1960		if (curbytes != NULL) {
1961			*curbytes = 0;
1962		}
1963		if (accumobjs != NULL) {
1964			*accumobjs = 0;
1965		}
1966		if (accumbytes != NULL) {
1967			*accumbytes = 0;
1968		}
1969		return;
1970	}
1971
1972	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1973	    &prof_gctx_merge_iter_arg, &gctxs);
1974	prof_gctx_finish(tsd, &gctxs);
1975
1976	if (curobjs != NULL) {
1977		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1978	}
1979	if (curbytes != NULL) {
1980		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1981	}
1982	if (accumobjs != NULL) {
1983		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1984	}
1985	if (accumbytes != NULL) {
1986		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1987	}
1988}
1989#endif
1990
1991#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1992#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1993static void
1994prof_dump_filename(char *filename, char v, uint64_t vseq) {
1995	cassert(config_prof);
1996
1997	if (vseq != VSEQ_INVALID) {
1998	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1999		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2000		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
2001		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
2002	} else {
2003	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
2004		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2005		    "%s.%d.%"FMTu64".%c.heap",
2006		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
2007	}
2008	prof_dump_seq++;
2009}
2010
2011static void
2012prof_fdump(void) {
2013	tsd_t *tsd;
2014	char filename[DUMP_FILENAME_BUFSIZE];
2015
2016	cassert(config_prof);
2017	assert(opt_prof_final);
2018	assert(opt_prof_prefix[0] != '\0');
2019
2020	if (!prof_booted) {
2021		return;
2022	}
2023	tsd = tsd_fetch();
2024	assert(tsd_reentrancy_level_get(tsd) == 0);
2025
2026	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2027	prof_dump_filename(filename, 'f', VSEQ_INVALID);
2028	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2029	prof_dump(tsd, false, filename, opt_prof_leak);
2030}
2031
2032bool
2033prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
2034	cassert(config_prof);
2035
2036#ifndef JEMALLOC_ATOMIC_U64
2037	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
2038	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
2039		return true;
2040	}
2041	prof_accum->accumbytes = 0;
2042#else
2043	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
2044#endif
2045	return false;
2046}
2047
2048void
2049prof_idump(tsdn_t *tsdn) {
2050	tsd_t *tsd;
2051	prof_tdata_t *tdata;
2052
2053	cassert(config_prof);
2054
2055	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2056		return;
2057	}
2058	tsd = tsdn_tsd(tsdn);
2059	if (tsd_reentrancy_level_get(tsd) > 0) {
2060		return;
2061	}
2062
2063	tdata = prof_tdata_get(tsd, false);
2064	if (tdata == NULL) {
2065		return;
2066	}
2067	if (tdata->enq) {
2068		tdata->enq_idump = true;
2069		return;
2070	}
2071
2072	if (opt_prof_prefix[0] != '\0') {
2073		char filename[PATH_MAX + 1];
2074		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2075		prof_dump_filename(filename, 'i', prof_dump_iseq);
2076		prof_dump_iseq++;
2077		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2078		prof_dump(tsd, false, filename, false);
2079	}
2080}
2081
2082bool
2083prof_mdump(tsd_t *tsd, const char *filename) {
2084	cassert(config_prof);
2085	assert(tsd_reentrancy_level_get(tsd) == 0);
2086
2087	if (!opt_prof || !prof_booted) {
2088		return true;
2089	}
2090	char filename_buf[DUMP_FILENAME_BUFSIZE];
2091	if (filename == NULL) {
2092		/* No filename specified, so automatically generate one. */
2093		if (opt_prof_prefix[0] == '\0') {
2094			return true;
2095		}
2096		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2097		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
2098		prof_dump_mseq++;
2099		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2100		filename = filename_buf;
2101	}
2102	return prof_dump(tsd, true, filename, false);
2103}
2104
2105void
2106prof_gdump(tsdn_t *tsdn) {
2107	tsd_t *tsd;
2108	prof_tdata_t *tdata;
2109
2110	cassert(config_prof);
2111
2112	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2113		return;
2114	}
2115	tsd = tsdn_tsd(tsdn);
2116	if (tsd_reentrancy_level_get(tsd) > 0) {
2117		return;
2118	}
2119
2120	tdata = prof_tdata_get(tsd, false);
2121	if (tdata == NULL) {
2122		return;
2123	}
2124	if (tdata->enq) {
2125		tdata->enq_gdump = true;
2126		return;
2127	}
2128
2129	if (opt_prof_prefix[0] != '\0') {
2130		char filename[DUMP_FILENAME_BUFSIZE];
2131		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
2132		prof_dump_filename(filename, 'u', prof_dump_useq);
2133		prof_dump_useq++;
2134		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
2135		prof_dump(tsd, false, filename, false);
2136	}
2137}
2138
2139static void
2140prof_bt_hash(const void *key, size_t r_hash[2]) {
2141	prof_bt_t *bt = (prof_bt_t *)key;
2142
2143	cassert(config_prof);
2144
2145	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
2146}
2147
2148static bool
2149prof_bt_keycomp(const void *k1, const void *k2) {
2150	const prof_bt_t *bt1 = (prof_bt_t *)k1;
2151	const prof_bt_t *bt2 = (prof_bt_t *)k2;
2152
2153	cassert(config_prof);
2154
2155	if (bt1->len != bt2->len) {
2156		return false;
2157	}
2158	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
2159}
2160
2161static void
2162prof_bt_node_hash(const void *key, size_t r_hash[2]) {
2163	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
2164	prof_bt_hash((void *)(&bt_node->bt), r_hash);
2165}
2166
2167static bool
2168prof_bt_node_keycomp(const void *k1, const void *k2) {
2169	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
2170	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
2171	return prof_bt_keycomp((void *)(&bt_node1->bt),
2172	    (void *)(&bt_node2->bt));
2173}
2174
2175static void
2176prof_thr_node_hash(const void *key, size_t r_hash[2]) {
2177	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
2178	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
2179}
2180
2181static bool
2182prof_thr_node_keycomp(const void *k1, const void *k2) {
2183	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
2184	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
2185	return thr_node1->thr_uid == thr_node2->thr_uid;
2186}
2187
2188static uint64_t
2189prof_thr_uid_alloc(tsdn_t *tsdn) {
2190	uint64_t thr_uid;
2191
2192	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
2193	thr_uid = next_thr_uid;
2194	next_thr_uid++;
2195	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
2196
2197	return thr_uid;
2198}
2199
2200static prof_tdata_t *
2201prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
2202    char *thread_name, bool active) {
2203	prof_tdata_t *tdata;
2204
2205	cassert(config_prof);
2206
2207	/* Initialize an empty cache for this thread. */
2208	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
2209	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
2210	    arena_get(TSDN_NULL, 0, true), true);
2211	if (tdata == NULL) {
2212		return NULL;
2213	}
2214
2215	tdata->lock = prof_tdata_mutex_choose(thr_uid);
2216	tdata->thr_uid = thr_uid;
2217	tdata->thr_discrim = thr_discrim;
2218	tdata->thread_name = thread_name;
2219	tdata->attached = true;
2220	tdata->expired = false;
2221	tdata->tctx_uid_next = 0;
2222
2223	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
2224	    prof_bt_keycomp)) {
2225		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2226		return NULL;
2227	}
2228
2229	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
2230	prof_sample_threshold_update(tdata);
2231
2232	tdata->enq = false;
2233	tdata->enq_idump = false;
2234	tdata->enq_gdump = false;
2235
2236	tdata->dumping = false;
2237	tdata->active = active;
2238
2239	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2240	tdata_tree_insert(&tdatas, tdata);
2241	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2242
2243	return tdata;
2244}
2245
2246prof_tdata_t *
2247prof_tdata_init(tsd_t *tsd) {
2248	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
2249	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
2250}
2251
2252static bool
2253prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
2254	if (tdata->attached && !even_if_attached) {
2255		return false;
2256	}
2257	if (ckh_count(&tdata->bt2tctx) != 0) {
2258		return false;
2259	}
2260	return true;
2261}
2262
2263static bool
2264prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
2265    bool even_if_attached) {
2266	malloc_mutex_assert_owner(tsdn, tdata->lock);
2267
2268	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
2269}
2270
2271static void
2272prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
2273    bool even_if_attached) {
2274	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
2275
2276	tdata_tree_remove(&tdatas, tdata);
2277
2278	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
2279
2280	if (tdata->thread_name != NULL) {
2281		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2282		    true);
2283	}
2284	ckh_delete(tsd, &tdata->bt2tctx);
2285	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2286}
2287
2288static void
2289prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
2290	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2291	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
2292	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2293}
2294
2295static void
2296prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
2297	bool destroy_tdata;
2298
2299	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2300	if (tdata->attached) {
2301		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2302		    true);
2303		/*
2304		 * Only detach if !destroy_tdata, because detaching would allow
2305		 * another thread to win the race to destroy tdata.
2306		 */
2307		if (!destroy_tdata) {
2308			tdata->attached = false;
2309		}
2310		tsd_prof_tdata_set(tsd, NULL);
2311	} else {
2312		destroy_tdata = false;
2313	}
2314	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2315	if (destroy_tdata) {
2316		prof_tdata_destroy(tsd, tdata, true);
2317	}
2318}
2319
2320prof_tdata_t *
2321prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2322	uint64_t thr_uid = tdata->thr_uid;
2323	uint64_t thr_discrim = tdata->thr_discrim + 1;
2324	char *thread_name = (tdata->thread_name != NULL) ?
2325	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2326	bool active = tdata->active;
2327
2328	prof_tdata_detach(tsd, tdata);
2329	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2330	    active);
2331}
2332
2333static bool
2334prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2335	bool destroy_tdata;
2336
2337	malloc_mutex_lock(tsdn, tdata->lock);
2338	if (!tdata->expired) {
2339		tdata->expired = true;
2340		destroy_tdata = tdata->attached ? false :
2341		    prof_tdata_should_destroy(tsdn, tdata, false);
2342	} else {
2343		destroy_tdata = false;
2344	}
2345	malloc_mutex_unlock(tsdn, tdata->lock);
2346
2347	return destroy_tdata;
2348}
2349
2350static prof_tdata_t *
2351prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
2352    void *arg) {
2353	tsdn_t *tsdn = (tsdn_t *)arg;
2354
2355	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2356}
2357
2358void
2359prof_reset(tsd_t *tsd, size_t lg_sample) {
2360	prof_tdata_t *next;
2361
2362	assert(lg_sample < (sizeof(uint64_t) << 3));
2363
2364	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2365	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2366
2367	lg_prof_sample = lg_sample;
2368
2369	next = NULL;
2370	do {
2371		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2372		    prof_tdata_reset_iter, (void *)tsd);
2373		if (to_destroy != NULL) {
2374			next = tdata_tree_next(&tdatas, to_destroy);
2375			prof_tdata_destroy_locked(tsd, to_destroy, false);
2376		} else {
2377			next = NULL;
2378		}
2379	} while (next != NULL);
2380
2381	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2382	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2383}
2384
2385void
2386prof_tdata_cleanup(tsd_t *tsd) {
2387	prof_tdata_t *tdata;
2388
2389	if (!config_prof) {
2390		return;
2391	}
2392
2393	tdata = tsd_prof_tdata_get(tsd);
2394	if (tdata != NULL) {
2395		prof_tdata_detach(tsd, tdata);
2396	}
2397}
2398
2399bool
2400prof_active_get(tsdn_t *tsdn) {
2401	bool prof_active_current;
2402
2403	malloc_mutex_lock(tsdn, &prof_active_mtx);
2404	prof_active_current = prof_active;
2405	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2406	return prof_active_current;
2407}
2408
2409bool
2410prof_active_set(tsdn_t *tsdn, bool active) {
2411	bool prof_active_old;
2412
2413	malloc_mutex_lock(tsdn, &prof_active_mtx);
2414	prof_active_old = prof_active;
2415	prof_active = active;
2416	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2417	return prof_active_old;
2418}
2419
2420#ifdef JEMALLOC_JET
2421size_t
2422prof_log_bt_count(void) {
2423	size_t cnt = 0;
2424	prof_bt_node_t *node = log_bt_first;
2425	while (node != NULL) {
2426		cnt++;
2427		node = node->next;
2428	}
2429	return cnt;
2430}
2431
2432size_t
2433prof_log_alloc_count(void) {
2434	size_t cnt = 0;
2435	prof_alloc_node_t *node = log_alloc_first;
2436	while (node != NULL) {
2437		cnt++;
2438		node = node->next;
2439	}
2440	return cnt;
2441}
2442
2443size_t
2444prof_log_thr_count(void) {
2445	size_t cnt = 0;
2446	prof_thr_node_t *node = log_thr_first;
2447	while (node != NULL) {
2448		cnt++;
2449		node = node->next;
2450	}
2451	return cnt;
2452}
2453
2454bool
2455prof_log_is_logging(void) {
2456	return prof_logging_state == prof_logging_state_started;
2457}
2458
2459bool
2460prof_log_rep_check(void) {
2461	if (prof_logging_state == prof_logging_state_stopped
2462	    && log_tables_initialized) {
2463		return true;
2464	}
2465
2466	if (log_bt_last != NULL && log_bt_last->next != NULL) {
2467		return true;
2468	}
2469	if (log_thr_last != NULL && log_thr_last->next != NULL) {
2470		return true;
2471	}
2472	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
2473		return true;
2474	}
2475
2476	size_t bt_count = prof_log_bt_count();
2477	size_t thr_count = prof_log_thr_count();
2478	size_t alloc_count = prof_log_alloc_count();
2479
2480
2481	if (prof_logging_state == prof_logging_state_stopped) {
2482		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
2483			return true;
2484		}
2485	}
2486
2487	prof_alloc_node_t *node = log_alloc_first;
2488	while (node != NULL) {
2489		if (node->alloc_bt_ind >= bt_count) {
2490			return true;
2491		}
2492		if (node->free_bt_ind >= bt_count) {
2493			return true;
2494		}
2495		if (node->alloc_thr_ind >= thr_count) {
2496			return true;
2497		}
2498		if (node->free_thr_ind >= thr_count) {
2499			return true;
2500		}
2501		if (node->alloc_time_ns > node->free_time_ns) {
2502			return true;
2503		}
2504		node = node->next;
2505	}
2506
2507	return false;
2508}
2509
2510void
2511prof_log_dummy_set(bool new_value) {
2512	prof_log_dummy = new_value;
2513}
2514#endif
2515
2516bool
2517prof_log_start(tsdn_t *tsdn, const char *filename) {
2518	if (!opt_prof || !prof_booted) {
2519		return true;
2520	}
2521
2522	bool ret = false;
2523	size_t buf_size = PATH_MAX + 1;
2524
2525	malloc_mutex_lock(tsdn, &log_mtx);
2526
2527	if (prof_logging_state != prof_logging_state_stopped) {
2528		ret = true;
2529	} else if (filename == NULL) {
2530		/* Make default name. */
2531		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
2532		    opt_prof_prefix, prof_getpid(), log_seq);
2533		log_seq++;
2534		prof_logging_state = prof_logging_state_started;
2535	} else if (strlen(filename) >= buf_size) {
2536		ret = true;
2537	} else {
2538		strcpy(log_filename, filename);
2539		prof_logging_state = prof_logging_state_started;
2540	}
2541
2542	if (!ret) {
2543		nstime_update(&log_start_timestamp);
2544	}
2545
2546	malloc_mutex_unlock(tsdn, &log_mtx);
2547
2548	return ret;
2549}
2550
2551/* Used as an atexit function to stop logging on exit. */
2552static void
2553prof_log_stop_final(void) {
2554	tsd_t *tsd = tsd_fetch();
2555	prof_log_stop(tsd_tsdn(tsd));
2556}
2557
2558struct prof_emitter_cb_arg_s {
2559	int fd;
2560	ssize_t ret;
2561};
2562
2563static void
2564prof_emitter_write_cb(void *opaque, const char *to_write) {
2565	struct prof_emitter_cb_arg_s *arg =
2566	    (struct prof_emitter_cb_arg_s *)opaque;
2567	size_t bytes = strlen(to_write);
2568#ifdef JEMALLOC_JET
2569	if (prof_log_dummy) {
2570		return;
2571	}
2572#endif
2573	arg->ret = write(arg->fd, (void *)to_write, bytes);
2574}
2575
2576/*
2577 * prof_log_emit_{...} goes through the appropriate linked list, emitting each
2578 * node to the json and deallocating it.
2579 */
2580static void
2581prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
2582	emitter_json_array_kv_begin(emitter, "threads");
2583	prof_thr_node_t *thr_node = log_thr_first;
2584	prof_thr_node_t *thr_old_node;
2585	while (thr_node != NULL) {
2586		emitter_json_object_begin(emitter);
2587
2588		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
2589		    &thr_node->thr_uid);
2590
2591		char *thr_name = thr_node->name;
2592
2593		emitter_json_kv(emitter, "thr_name", emitter_type_string,
2594		    &thr_name);
2595
2596		emitter_json_object_end(emitter);
2597		thr_old_node = thr_node;
2598		thr_node = thr_node->next;
2599		idalloc(tsd, thr_old_node);
2600	}
2601	emitter_json_array_end(emitter);
2602}
2603
2604static void
2605prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
2606	emitter_json_array_kv_begin(emitter, "stack_traces");
2607	prof_bt_node_t *bt_node = log_bt_first;
2608	prof_bt_node_t *bt_old_node;
2609	/*
2610	 * Calculate how many hex digits we need: twice number of bytes, two for
2611	 * "0x", and then one more for terminating '\0'.
2612	 */
2613	char buf[2 * sizeof(intptr_t) + 3];
2614	size_t buf_sz = sizeof(buf);
2615	while (bt_node != NULL) {
2616		emitter_json_array_begin(emitter);
2617		size_t i;
2618		for (i = 0; i < bt_node->bt.len; i++) {
2619			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
2620			char *trace_str = buf;
2621			emitter_json_value(emitter, emitter_type_string,
2622			    &trace_str);
2623		}
2624		emitter_json_array_end(emitter);
2625
2626		bt_old_node = bt_node;
2627		bt_node = bt_node->next;
2628		idalloc(tsd, bt_old_node);
2629	}
2630	emitter_json_array_end(emitter);
2631}
2632
2633static void
2634prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
2635	emitter_json_array_kv_begin(emitter, "allocations");
2636	prof_alloc_node_t *alloc_node = log_alloc_first;
2637	prof_alloc_node_t *alloc_old_node;
2638	while (alloc_node != NULL) {
2639		emitter_json_object_begin(emitter);
2640
2641		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
2642		    &alloc_node->alloc_thr_ind);
2643
2644		emitter_json_kv(emitter, "free_thread", emitter_type_size,
2645		    &alloc_node->free_thr_ind);
2646
2647		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
2648		    &alloc_node->alloc_bt_ind);
2649
2650		emitter_json_kv(emitter, "free_trace", emitter_type_size,
2651		    &alloc_node->free_bt_ind);
2652
2653		emitter_json_kv(emitter, "alloc_timestamp",
2654		    emitter_type_uint64, &alloc_node->alloc_time_ns);
2655
2656		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
2657		    &alloc_node->free_time_ns);
2658
2659		emitter_json_kv(emitter, "usize", emitter_type_uint64,
2660		    &alloc_node->usize);
2661
2662		emitter_json_object_end(emitter);
2663
2664		alloc_old_node = alloc_node;
2665		alloc_node = alloc_node->next;
2666		idalloc(tsd, alloc_old_node);
2667	}
2668	emitter_json_array_end(emitter);
2669}
2670
2671static void
2672prof_log_emit_metadata(emitter_t *emitter) {
2673	emitter_json_object_kv_begin(emitter, "info");
2674
2675	nstime_t now = NSTIME_ZERO_INITIALIZER;
2676
2677	nstime_update(&now);
2678	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
2679	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
2680
2681	char *vers = JEMALLOC_VERSION;
2682	emitter_json_kv(emitter, "version",
2683	    emitter_type_string, &vers);
2684
2685	emitter_json_kv(emitter, "lg_sample_rate",
2686	    emitter_type_int, &lg_prof_sample);
2687
2688	int pid = prof_getpid();
2689	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
2690
2691	emitter_json_object_end(emitter);
2692}
2693
2694
2695bool
2696prof_log_stop(tsdn_t *tsdn) {
2697	if (!opt_prof || !prof_booted) {
2698		return true;
2699	}
2700
2701	tsd_t *tsd = tsdn_tsd(tsdn);
2702	malloc_mutex_lock(tsdn, &log_mtx);
2703
2704	if (prof_logging_state != prof_logging_state_started) {
2705		malloc_mutex_unlock(tsdn, &log_mtx);
2706		return true;
2707	}
2708
2709	/*
2710	 * Set the state to dumping. We'll set it to stopped when we're done.
2711	 * Since other threads won't be able to start/stop/log when the state is
2712	 * dumping, we don't have to hold the lock during the whole method.
2713	 */
2714	prof_logging_state = prof_logging_state_dumping;
2715	malloc_mutex_unlock(tsdn, &log_mtx);
2716
2717
2718	emitter_t emitter;
2719
2720	/* Create a file. */
2721
2722	int fd;
2723#ifdef JEMALLOC_JET
2724	if (prof_log_dummy) {
2725		fd = 0;
2726	} else {
2727		fd = creat(log_filename, 0644);
2728	}
2729#else
2730	fd = creat(log_filename, 0644);
2731#endif
2732
2733	if (fd == -1) {
2734		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
2735			      " failed with %d\n", log_filename, errno);
2736		if (opt_abort) {
2737			abort();
2738		}
2739		return true;
2740	}
2741
2742	/* Emit to json. */
2743	struct prof_emitter_cb_arg_s arg;
2744	arg.fd = fd;
2745	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
2746	    (void *)(&arg));
2747
2748	emitter_begin(&emitter);
2749	prof_log_emit_metadata(&emitter);
2750	prof_log_emit_threads(tsd, &emitter);
2751	prof_log_emit_traces(tsd, &emitter);
2752	prof_log_emit_allocs(tsd, &emitter);
2753	emitter_end(&emitter);
2754
2755	/* Reset global state. */
2756	if (log_tables_initialized) {
2757		ckh_delete(tsd, &log_bt_node_set);
2758		ckh_delete(tsd, &log_thr_node_set);
2759	}
2760	log_tables_initialized = false;
2761	log_bt_index = 0;
2762	log_thr_index = 0;
2763	log_bt_first = NULL;
2764	log_bt_last = NULL;
2765	log_thr_first = NULL;
2766	log_thr_last = NULL;
2767	log_alloc_first = NULL;
2768	log_alloc_last = NULL;
2769
2770	malloc_mutex_lock(tsdn, &log_mtx);
2771	prof_logging_state = prof_logging_state_stopped;
2772	malloc_mutex_unlock(tsdn, &log_mtx);
2773
2774#ifdef JEMALLOC_JET
2775	if (prof_log_dummy) {
2776		return false;
2777	}
2778#endif
2779	return close(fd);
2780}
2781
2782const char *
2783prof_thread_name_get(tsd_t *tsd) {
2784	prof_tdata_t *tdata;
2785
2786	tdata = prof_tdata_get(tsd, true);
2787	if (tdata == NULL) {
2788		return "";
2789	}
2790	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2791}
2792
2793static char *
2794prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2795	char *ret;
2796	size_t size;
2797
2798	if (thread_name == NULL) {
2799		return NULL;
2800	}
2801
2802	size = strlen(thread_name) + 1;
2803	if (size == 1) {
2804		return "";
2805	}
2806
2807	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2808	    arena_get(TSDN_NULL, 0, true), true);
2809	if (ret == NULL) {
2810		return NULL;
2811	}
2812	memcpy(ret, thread_name, size);
2813	return ret;
2814}
2815
2816int
2817prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2818	prof_tdata_t *tdata;
2819	unsigned i;
2820	char *s;
2821
2822	tdata = prof_tdata_get(tsd, true);
2823	if (tdata == NULL) {
2824		return EAGAIN;
2825	}
2826
2827	/* Validate input. */
2828	if (thread_name == NULL) {
2829		return EFAULT;
2830	}
2831	for (i = 0; thread_name[i] != '\0'; i++) {
2832		char c = thread_name[i];
2833		if (!isgraph(c) && !isblank(c)) {
2834			return EFAULT;
2835		}
2836	}
2837
2838	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2839	if (s == NULL) {
2840		return EAGAIN;
2841	}
2842
2843	if (tdata->thread_name != NULL) {
2844		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2845		    true);
2846		tdata->thread_name = NULL;
2847	}
2848	if (strlen(s) > 0) {
2849		tdata->thread_name = s;
2850	}
2851	return 0;
2852}
2853
2854bool
2855prof_thread_active_get(tsd_t *tsd) {
2856	prof_tdata_t *tdata;
2857
2858	tdata = prof_tdata_get(tsd, true);
2859	if (tdata == NULL) {
2860		return false;
2861	}
2862	return tdata->active;
2863}
2864
2865bool
2866prof_thread_active_set(tsd_t *tsd, bool active) {
2867	prof_tdata_t *tdata;
2868
2869	tdata = prof_tdata_get(tsd, true);
2870	if (tdata == NULL) {
2871		return true;
2872	}
2873	tdata->active = active;
2874	return false;
2875}
2876
2877bool
2878prof_thread_active_init_get(tsdn_t *tsdn) {
2879	bool active_init;
2880
2881	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2882	active_init = prof_thread_active_init;
2883	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2884	return active_init;
2885}
2886
2887bool
2888prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2889	bool active_init_old;
2890
2891	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2892	active_init_old = prof_thread_active_init;
2893	prof_thread_active_init = active_init;
2894	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2895	return active_init_old;
2896}
2897
2898bool
2899prof_gdump_get(tsdn_t *tsdn) {
2900	bool prof_gdump_current;
2901
2902	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2903	prof_gdump_current = prof_gdump_val;
2904	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2905	return prof_gdump_current;
2906}
2907
2908bool
2909prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2910	bool prof_gdump_old;
2911
2912	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2913	prof_gdump_old = prof_gdump_val;
2914	prof_gdump_val = gdump;
2915	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2916	return prof_gdump_old;
2917}
2918
2919void
2920prof_boot0(void) {
2921	cassert(config_prof);
2922
2923	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2924	    sizeof(PROF_PREFIX_DEFAULT));
2925}
2926
2927void
2928prof_boot1(void) {
2929	cassert(config_prof);
2930
2931	/*
2932	 * opt_prof must be in its final state before any arenas are
2933	 * initialized, so this function must be executed early.
2934	 */
2935
2936	if (opt_prof_leak && !opt_prof) {
2937		/*
2938		 * Enable opt_prof, but in such a way that profiles are never
2939		 * automatically dumped.
2940		 */
2941		opt_prof = true;
2942		opt_prof_gdump = false;
2943	} else if (opt_prof) {
2944		if (opt_lg_prof_interval >= 0) {
2945			prof_interval = (((uint64_t)1U) <<
2946			    opt_lg_prof_interval);
2947		}
2948	}
2949}
2950
2951bool
2952prof_boot2(tsd_t *tsd) {
2953	cassert(config_prof);
2954
2955	if (opt_prof) {
2956		unsigned i;
2957
2958		lg_prof_sample = opt_lg_prof_sample;
2959
2960		prof_active = opt_prof_active;
2961		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2962		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2963			return true;
2964		}
2965
2966		prof_gdump_val = opt_prof_gdump;
2967		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2968		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2969			return true;
2970		}
2971
2972		prof_thread_active_init = opt_prof_thread_active_init;
2973		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2974		    "prof_thread_active_init",
2975		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2976		    malloc_mutex_rank_exclusive)) {
2977			return true;
2978		}
2979
2980		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2981		    prof_bt_keycomp)) {
2982			return true;
2983		}
2984		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2985		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
2986			return true;
2987		}
2988
2989		tdata_tree_new(&tdatas);
2990		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2991		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
2992			return true;
2993		}
2994
2995		next_thr_uid = 0;
2996		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2997		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
2998			return true;
2999		}
3000
3001		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
3002		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
3003			return true;
3004		}
3005		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
3006		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
3007			return true;
3008		}
3009
3010		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
3011		    atexit(prof_fdump) != 0) {
3012			malloc_write("<jemalloc>: Error in atexit()\n");
3013			if (opt_abort) {
3014				abort();
3015			}
3016		}
3017
3018		if (opt_prof_log) {
3019			prof_log_start(tsd_tsdn(tsd), NULL);
3020		}
3021
3022		if (atexit(prof_log_stop_final) != 0) {
3023			malloc_write("<jemalloc>: Error in atexit() "
3024				     "for logging\n");
3025			if (opt_abort) {
3026				abort();
3027			}
3028		}
3029
3030		if (malloc_mutex_init(&log_mtx, "prof_log",
3031		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
3032			return true;
3033		}
3034
3035		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
3036		    prof_bt_node_hash, prof_bt_node_keycomp)) {
3037			return true;
3038		}
3039
3040		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
3041		    prof_thr_node_hash, prof_thr_node_keycomp)) {
3042			return true;
3043		}
3044
3045		log_tables_initialized = true;
3046
3047		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3048		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
3049		    CACHELINE);
3050		if (gctx_locks == NULL) {
3051			return true;
3052		}
3053		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3054			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
3055			    WITNESS_RANK_PROF_GCTX,
3056			    malloc_mutex_rank_exclusive)) {
3057				return true;
3058			}
3059		}
3060
3061		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3062		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
3063		    CACHELINE);
3064		if (tdata_locks == NULL) {
3065			return true;
3066		}
3067		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3068			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
3069			    WITNESS_RANK_PROF_TDATA,
3070			    malloc_mutex_rank_exclusive)) {
3071				return true;
3072			}
3073		}
3074#ifdef JEMALLOC_PROF_LIBGCC
3075		/*
3076		 * Cause the backtracing machinery to allocate its internal
3077		 * state before enabling profiling.
3078		 */
3079		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
3080#endif
3081	}
3082	prof_booted = true;
3083
3084	return false;
3085}
3086
3087void
3088prof_prefork0(tsdn_t *tsdn) {
3089	if (config_prof && opt_prof) {
3090		unsigned i;
3091
3092		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
3093		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
3094		malloc_mutex_prefork(tsdn, &tdatas_mtx);
3095		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3096			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
3097		}
3098		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3099			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
3100		}
3101	}
3102}
3103
3104void
3105prof_prefork1(tsdn_t *tsdn) {
3106	if (config_prof && opt_prof) {
3107		malloc_mutex_prefork(tsdn, &prof_active_mtx);
3108		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
3109		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
3110		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
3111		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
3112	}
3113}
3114
3115void
3116prof_postfork_parent(tsdn_t *tsdn) {
3117	if (config_prof && opt_prof) {
3118		unsigned i;
3119
3120		malloc_mutex_postfork_parent(tsdn,
3121		    &prof_thread_active_init_mtx);
3122		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
3123		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
3124		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
3125		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
3126		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3127			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
3128		}
3129		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3130			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
3131		}
3132		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
3133		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
3134		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
3135	}
3136}
3137
3138void
3139prof_postfork_child(tsdn_t *tsdn) {
3140	if (config_prof && opt_prof) {
3141		unsigned i;
3142
3143		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
3144		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
3145		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
3146		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
3147		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
3148		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3149			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
3150		}
3151		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3152			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
3153		}
3154		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
3155		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
3156		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
3157	}
3158}
3159
3160/******************************************************************************/
3161