Deleted Added
full compact
prof.c (261071) prof.c (263974)
1#define JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3/******************************************************************************/
4
5#ifdef JEMALLOC_PROF_LIBUNWIND
6#define UNW_LOCAL_ONLY
7#include <libunwind.h>
8#endif
9
10#ifdef JEMALLOC_PROF_LIBGCC
11#include <unwind.h>
12#endif
13
14/******************************************************************************/
15/* Data. */
16
17malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
18
19bool opt_prof = false;
20bool opt_prof_active = true;
21size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23bool opt_prof_gdump = false;
24bool opt_prof_final = true;
25bool opt_prof_leak = false;
26bool opt_prof_accum = false;
27char opt_prof_prefix[
28 /* Minimize memory bloat for non-prof builds. */
29#ifdef JEMALLOC_PROF
30 PATH_MAX +
31#endif
32 1];
33
34uint64_t prof_interval = 0;
35bool prof_promote;
36
37/*
38 * Table of mutexes that are shared among ctx's. These are leaf locks, so
39 * there is no problem with using them for more than one ctx at the same time.
40 * The primary motivation for this sharing though is that ctx's are ephemeral,
41 * and destroying mutexes causes complications for systems that allocate when
42 * creating/destroying mutexes.
43 */
44static malloc_mutex_t *ctx_locks;
45static unsigned cum_ctxs; /* Atomic counter. */
46
47/*
48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
49 * structure that knows about all backtraces currently captured.
50 */
51static ckh_t bt2ctx;
52static malloc_mutex_t bt2ctx_mtx;
53
54static malloc_mutex_t prof_dump_seq_mtx;
55static uint64_t prof_dump_seq;
56static uint64_t prof_dump_iseq;
57static uint64_t prof_dump_mseq;
58static uint64_t prof_dump_useq;
59
60/*
61 * This buffer is rather large for stack allocation, so use a single buffer for
62 * all profile dumps.
63 */
64static malloc_mutex_t prof_dump_mtx;
65static char prof_dump_buf[
66 /* Minimize memory bloat for non-prof builds. */
67#ifdef JEMALLOC_PROF
68 PROF_DUMP_BUFSIZE
69#else
70 1
71#endif
72];
73static unsigned prof_dump_buf_end;
74static int prof_dump_fd;
75
76/* Do not dump any profiles until bootstrapping is complete. */
77static bool prof_booted = false;
78
79/******************************************************************************/
80
81void
82bt_init(prof_bt_t *bt, void **vec)
83{
84
85 cassert(config_prof);
86
87 bt->vec = vec;
88 bt->len = 0;
89}
90
91static void
92bt_destroy(prof_bt_t *bt)
93{
94
95 cassert(config_prof);
96
97 idalloc(bt);
98}
99
100static prof_bt_t *
101bt_dup(prof_bt_t *bt)
102{
103 prof_bt_t *ret;
104
105 cassert(config_prof);
106
107 /*
108 * Create a single allocation that has space for vec immediately
109 * following the prof_bt_t structure. The backtraces that get
110 * stored in the backtrace caches are copied from stack-allocated
111 * temporary variables, so size is known at creation time. Making this
112 * a contiguous object improves cache locality.
113 */
114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
115 (bt->len * sizeof(void *)));
116 if (ret == NULL)
117 return (NULL);
118 ret->vec = (void **)((uintptr_t)ret +
119 QUANTUM_CEILING(sizeof(prof_bt_t)));
120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
121 ret->len = bt->len;
122
123 return (ret);
124}
125
126static inline void
127prof_enter(prof_tdata_t *prof_tdata)
128{
129
130 cassert(config_prof);
131
132 assert(prof_tdata->enq == false);
133 prof_tdata->enq = true;
134
135 malloc_mutex_lock(&bt2ctx_mtx);
136}
137
138static inline void
139prof_leave(prof_tdata_t *prof_tdata)
140{
141 bool idump, gdump;
142
143 cassert(config_prof);
144
145 malloc_mutex_unlock(&bt2ctx_mtx);
146
147 assert(prof_tdata->enq);
148 prof_tdata->enq = false;
149 idump = prof_tdata->enq_idump;
150 prof_tdata->enq_idump = false;
151 gdump = prof_tdata->enq_gdump;
152 prof_tdata->enq_gdump = false;
153
154 if (idump)
155 prof_idump();
156 if (gdump)
157 prof_gdump();
158}
159
160#ifdef JEMALLOC_PROF_LIBUNWIND
161void
162prof_backtrace(prof_bt_t *bt, unsigned nignore)
163{
164 unw_context_t uc;
165 unw_cursor_t cursor;
166 unsigned i;
167 int err;
168
169 cassert(config_prof);
170 assert(bt->len == 0);
171 assert(bt->vec != NULL);
172
173 unw_getcontext(&uc);
174 unw_init_local(&cursor, &uc);
175
176 /* Throw away (nignore+1) stack frames, if that many exist. */
177 for (i = 0; i < nignore + 1; i++) {
178 err = unw_step(&cursor);
179 if (err <= 0)
180 return;
181 }
182
183 /*
184 * Iterate over stack frames until there are no more, or until no space
185 * remains in bt.
186 */
187 for (i = 0; i < PROF_BT_MAX; i++) {
188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
189 bt->len++;
190 err = unw_step(&cursor);
191 if (err <= 0)
192 break;
193 }
194}
195#elif (defined(JEMALLOC_PROF_LIBGCC))
196static _Unwind_Reason_Code
197prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
198{
199
200 cassert(config_prof);
201
202 return (_URC_NO_REASON);
203}
204
205static _Unwind_Reason_Code
206prof_unwind_callback(struct _Unwind_Context *context, void *arg)
207{
208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
209
210 cassert(config_prof);
211
212 if (data->nignore > 0)
213 data->nignore--;
214 else {
215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
216 data->bt->len++;
217 if (data->bt->len == data->max)
218 return (_URC_END_OF_STACK);
219 }
220
221 return (_URC_NO_REASON);
222}
223
224void
225prof_backtrace(prof_bt_t *bt, unsigned nignore)
226{
227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
228
229 cassert(config_prof);
230
231 _Unwind_Backtrace(prof_unwind_callback, &data);
232}
233#elif (defined(JEMALLOC_PROF_GCC))
234void
235prof_backtrace(prof_bt_t *bt, unsigned nignore)
236{
237#define BT_FRAME(i) \
238 if ((i) < nignore + PROF_BT_MAX) { \
239 void *p; \
240 if (__builtin_frame_address(i) == 0) \
241 return; \
242 p = __builtin_return_address(i); \
243 if (p == NULL) \
244 return; \
245 if (i >= nignore) { \
246 bt->vec[(i) - nignore] = p; \
247 bt->len = (i) - nignore + 1; \
248 } \
249 } else \
250 return;
251
252 cassert(config_prof);
253 assert(nignore <= 3);
254
255 BT_FRAME(0)
256 BT_FRAME(1)
257 BT_FRAME(2)
258 BT_FRAME(3)
259 BT_FRAME(4)
260 BT_FRAME(5)
261 BT_FRAME(6)
262 BT_FRAME(7)
263 BT_FRAME(8)
264 BT_FRAME(9)
265
266 BT_FRAME(10)
267 BT_FRAME(11)
268 BT_FRAME(12)
269 BT_FRAME(13)
270 BT_FRAME(14)
271 BT_FRAME(15)
272 BT_FRAME(16)
273 BT_FRAME(17)
274 BT_FRAME(18)
275 BT_FRAME(19)
276
277 BT_FRAME(20)
278 BT_FRAME(21)
279 BT_FRAME(22)
280 BT_FRAME(23)
281 BT_FRAME(24)
282 BT_FRAME(25)
283 BT_FRAME(26)
284 BT_FRAME(27)
285 BT_FRAME(28)
286 BT_FRAME(29)
287
288 BT_FRAME(30)
289 BT_FRAME(31)
290 BT_FRAME(32)
291 BT_FRAME(33)
292 BT_FRAME(34)
293 BT_FRAME(35)
294 BT_FRAME(36)
295 BT_FRAME(37)
296 BT_FRAME(38)
297 BT_FRAME(39)
298
299 BT_FRAME(40)
300 BT_FRAME(41)
301 BT_FRAME(42)
302 BT_FRAME(43)
303 BT_FRAME(44)
304 BT_FRAME(45)
305 BT_FRAME(46)
306 BT_FRAME(47)
307 BT_FRAME(48)
308 BT_FRAME(49)
309
310 BT_FRAME(50)
311 BT_FRAME(51)
312 BT_FRAME(52)
313 BT_FRAME(53)
314 BT_FRAME(54)
315 BT_FRAME(55)
316 BT_FRAME(56)
317 BT_FRAME(57)
318 BT_FRAME(58)
319 BT_FRAME(59)
320
321 BT_FRAME(60)
322 BT_FRAME(61)
323 BT_FRAME(62)
324 BT_FRAME(63)
325 BT_FRAME(64)
326 BT_FRAME(65)
327 BT_FRAME(66)
328 BT_FRAME(67)
329 BT_FRAME(68)
330 BT_FRAME(69)
331
332 BT_FRAME(70)
333 BT_FRAME(71)
334 BT_FRAME(72)
335 BT_FRAME(73)
336 BT_FRAME(74)
337 BT_FRAME(75)
338 BT_FRAME(76)
339 BT_FRAME(77)
340 BT_FRAME(78)
341 BT_FRAME(79)
342
343 BT_FRAME(80)
344 BT_FRAME(81)
345 BT_FRAME(82)
346 BT_FRAME(83)
347 BT_FRAME(84)
348 BT_FRAME(85)
349 BT_FRAME(86)
350 BT_FRAME(87)
351 BT_FRAME(88)
352 BT_FRAME(89)
353
354 BT_FRAME(90)
355 BT_FRAME(91)
356 BT_FRAME(92)
357 BT_FRAME(93)
358 BT_FRAME(94)
359 BT_FRAME(95)
360 BT_FRAME(96)
361 BT_FRAME(97)
362 BT_FRAME(98)
363 BT_FRAME(99)
364
365 BT_FRAME(100)
366 BT_FRAME(101)
367 BT_FRAME(102)
368 BT_FRAME(103)
369 BT_FRAME(104)
370 BT_FRAME(105)
371 BT_FRAME(106)
372 BT_FRAME(107)
373 BT_FRAME(108)
374 BT_FRAME(109)
375
376 BT_FRAME(110)
377 BT_FRAME(111)
378 BT_FRAME(112)
379 BT_FRAME(113)
380 BT_FRAME(114)
381 BT_FRAME(115)
382 BT_FRAME(116)
383 BT_FRAME(117)
384 BT_FRAME(118)
385 BT_FRAME(119)
386
387 BT_FRAME(120)
388 BT_FRAME(121)
389 BT_FRAME(122)
390 BT_FRAME(123)
391 BT_FRAME(124)
392 BT_FRAME(125)
393 BT_FRAME(126)
394 BT_FRAME(127)
395
396 /* Extras to compensate for nignore. */
397 BT_FRAME(128)
398 BT_FRAME(129)
399 BT_FRAME(130)
400#undef BT_FRAME
401}
402#else
403void
404prof_backtrace(prof_bt_t *bt, unsigned nignore)
405{
406
407 cassert(config_prof);
408 not_reached();
409}
410#endif
411
412static malloc_mutex_t *
413prof_ctx_mutex_choose(void)
414{
415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
416
417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
418}
419
420static void
421prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
422{
423
424 ctx->bt = bt;
425 ctx->lock = prof_ctx_mutex_choose();
426 /*
427 * Set nlimbo to 1, in order to avoid a race condition with
428 * prof_ctx_merge()/prof_ctx_destroy().
429 */
430 ctx->nlimbo = 1;
431 ql_elm_new(ctx, dump_link);
432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
433 ql_new(&ctx->cnts_ql);
434}
435
436static void
437prof_ctx_destroy(prof_ctx_t *ctx)
438{
439 prof_tdata_t *prof_tdata;
440
441 cassert(config_prof);
442
443 /*
444 * Check that ctx is still unused by any thread cache before destroying
445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
446 * condition with this function, as does prof_ctx_merge() in order to
447 * avoid a race between the main body of prof_ctx_merge() and entry
448 * into this function.
449 */
450 prof_tdata = prof_tdata_get(false);
451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
452 prof_enter(prof_tdata);
453 malloc_mutex_lock(ctx->lock);
454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
455 ctx->nlimbo == 1) {
456 assert(ctx->cnt_merged.curbytes == 0);
457 assert(ctx->cnt_merged.accumobjs == 0);
458 assert(ctx->cnt_merged.accumbytes == 0);
459 /* Remove ctx from bt2ctx. */
460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
461 not_reached();
462 prof_leave(prof_tdata);
463 /* Destroy ctx. */
464 malloc_mutex_unlock(ctx->lock);
465 bt_destroy(ctx->bt);
466 idalloc(ctx);
467 } else {
468 /*
469 * Compensate for increment in prof_ctx_merge() or
470 * prof_lookup().
471 */
472 ctx->nlimbo--;
473 malloc_mutex_unlock(ctx->lock);
474 prof_leave(prof_tdata);
475 }
476}
477
478static void
479prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
480{
481 bool destroy;
482
483 cassert(config_prof);
484
485 /* Merge cnt stats and detach from ctx. */
486 malloc_mutex_lock(ctx->lock);
487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
491 ql_remove(&ctx->cnts_ql, cnt, cnts_link);
492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
494 /*
495 * Increment ctx->nlimbo in order to keep another thread from
496 * winning the race to destroy ctx while this one has ctx->lock
497 * dropped. Without this, it would be possible for another
498 * thread to:
499 *
500 * 1) Sample an allocation associated with ctx.
501 * 2) Deallocate the sampled object.
502 * 3) Successfully prof_ctx_destroy(ctx).
503 *
504 * The result would be that ctx no longer exists by the time
505 * this thread accesses it in prof_ctx_destroy().
506 */
507 ctx->nlimbo++;
508 destroy = true;
509 } else
510 destroy = false;
511 malloc_mutex_unlock(ctx->lock);
512 if (destroy)
513 prof_ctx_destroy(ctx);
514}
515
516static bool
517prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
518 prof_ctx_t **p_ctx, bool *p_new_ctx)
519{
520 union {
521 prof_ctx_t *p;
522 void *v;
523 } ctx;
524 union {
525 prof_bt_t *p;
526 void *v;
527 } btkey;
528 bool new_ctx;
529
530 prof_enter(prof_tdata);
531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
532 /* bt has never been seen before. Insert it. */
533 ctx.v = imalloc(sizeof(prof_ctx_t));
534 if (ctx.v == NULL) {
535 prof_leave(prof_tdata);
536 return (true);
537 }
538 btkey.p = bt_dup(bt);
539 if (btkey.v == NULL) {
540 prof_leave(prof_tdata);
541 idalloc(ctx.v);
542 return (true);
543 }
544 prof_ctx_init(ctx.p, btkey.p);
545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
546 /* OOM. */
547 prof_leave(prof_tdata);
548 idalloc(btkey.v);
549 idalloc(ctx.v);
550 return (true);
551 }
552 new_ctx = true;
553 } else {
554 /*
555 * Increment nlimbo, in order to avoid a race condition with
556 * prof_ctx_merge()/prof_ctx_destroy().
557 */
558 malloc_mutex_lock(ctx.p->lock);
559 ctx.p->nlimbo++;
560 malloc_mutex_unlock(ctx.p->lock);
561 new_ctx = false;
562 }
563 prof_leave(prof_tdata);
564
565 *p_btkey = btkey.v;
566 *p_ctx = ctx.p;
567 *p_new_ctx = new_ctx;
568 return (false);
569}
570
571prof_thr_cnt_t *
572prof_lookup(prof_bt_t *bt)
573{
574 union {
575 prof_thr_cnt_t *p;
576 void *v;
577 } ret;
578 prof_tdata_t *prof_tdata;
579
580 cassert(config_prof);
581
582 prof_tdata = prof_tdata_get(false);
583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
584 return (NULL);
585
586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
587 void *btkey;
588 prof_ctx_t *ctx;
589 bool new_ctx;
590
591 /*
592 * This thread's cache lacks bt. Look for it in the global
593 * cache.
594 */
595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
596 return (NULL);
597
598 /* Link a prof_thd_cnt_t into ctx for this thread. */
599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
600 assert(ckh_count(&prof_tdata->bt2cnt) > 0);
601 /*
602 * Flush the least recently used cnt in order to keep
603 * bt2cnt from becoming too large.
604 */
605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
606 assert(ret.v != NULL);
607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
608 NULL, NULL))
609 not_reached();
610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
611 prof_ctx_merge(ret.p->ctx, ret.p);
612 /* ret can now be re-used. */
613 } else {
614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
615 /* Allocate and partially initialize a new cnt. */
616 ret.v = imalloc(sizeof(prof_thr_cnt_t));
617 if (ret.p == NULL) {
618 if (new_ctx)
619 prof_ctx_destroy(ctx);
620 return (NULL);
621 }
622 ql_elm_new(ret.p, cnts_link);
623 ql_elm_new(ret.p, lru_link);
624 }
625 /* Finish initializing ret. */
626 ret.p->ctx = ctx;
627 ret.p->epoch = 0;
628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
630 if (new_ctx)
631 prof_ctx_destroy(ctx);
632 idalloc(ret.v);
633 return (NULL);
634 }
635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
636 malloc_mutex_lock(ctx->lock);
637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
638 ctx->nlimbo--;
639 malloc_mutex_unlock(ctx->lock);
640 } else {
641 /* Move ret to the front of the LRU. */
642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
644 }
645
646 return (ret.p);
647}
648
649#ifdef JEMALLOC_JET
650size_t
651prof_bt_count(void)
652{
653 size_t bt_count;
654 prof_tdata_t *prof_tdata;
655
656 prof_tdata = prof_tdata_get(false);
657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
658 return (0);
659
660 prof_enter(prof_tdata);
661 bt_count = ckh_count(&bt2ctx);
662 prof_leave(prof_tdata);
663
664 return (bt_count);
665}
666#endif
667
668#ifdef JEMALLOC_JET
669#undef prof_dump_open
670#define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
671#endif
672static int
673prof_dump_open(bool propagate_err, const char *filename)
674{
675 int fd;
676
677 fd = creat(filename, 0644);
678 if (fd == -1 && propagate_err == false) {
679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
680 filename);
681 if (opt_abort)
682 abort();
683 }
684
685 return (fd);
686}
687#ifdef JEMALLOC_JET
688#undef prof_dump_open
689#define prof_dump_open JEMALLOC_N(prof_dump_open)
690prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
691#endif
692
693static bool
694prof_dump_flush(bool propagate_err)
695{
696 bool ret = false;
697 ssize_t err;
698
699 cassert(config_prof);
700
701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
702 if (err == -1) {
703 if (propagate_err == false) {
704 malloc_write("<jemalloc>: write() failed during heap "
705 "profile flush\n");
706 if (opt_abort)
707 abort();
708 }
709 ret = true;
710 }
711 prof_dump_buf_end = 0;
712
713 return (ret);
714}
715
716static bool
717prof_dump_close(bool propagate_err)
718{
719 bool ret;
720
721 assert(prof_dump_fd != -1);
722 ret = prof_dump_flush(propagate_err);
723 close(prof_dump_fd);
724 prof_dump_fd = -1;
725
726 return (ret);
727}
728
729static bool
730prof_dump_write(bool propagate_err, const char *s)
731{
732 unsigned i, slen, n;
733
734 cassert(config_prof);
735
736 i = 0;
737 slen = strlen(s);
738 while (i < slen) {
739 /* Flush the buffer if it is full. */
740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
741 if (prof_dump_flush(propagate_err) && propagate_err)
742 return (true);
743
744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
745 /* Finish writing. */
746 n = slen - i;
747 } else {
748 /* Write as much of s as will fit. */
749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
750 }
751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
752 prof_dump_buf_end += n;
753 i += n;
754 }
755
756 return (false);
757}
758
759JEMALLOC_ATTR(format(printf, 2, 3))
760static bool
761prof_dump_printf(bool propagate_err, const char *format, ...)
762{
763 bool ret;
764 va_list ap;
765 char buf[PROF_PRINTF_BUFSIZE];
766
767 va_start(ap, format);
768 malloc_vsnprintf(buf, sizeof(buf), format, ap);
769 va_end(ap);
770 ret = prof_dump_write(propagate_err, buf);
771
772 return (ret);
773}
774
775static void
776prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
777 prof_ctx_list_t *ctx_ql)
778{
779 prof_thr_cnt_t *thr_cnt;
780 prof_cnt_t tcnt;
781
782 cassert(config_prof);
783
784 malloc_mutex_lock(ctx->lock);
785
786 /*
787 * Increment nlimbo so that ctx won't go away before dump.
788 * Additionally, link ctx into the dump list so that it is included in
789 * prof_dump()'s second pass.
790 */
791 ctx->nlimbo++;
792 ql_tail_insert(ctx_ql, ctx, dump_link);
793
794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
796 volatile unsigned *epoch = &thr_cnt->epoch;
797
798 while (true) {
799 unsigned epoch0 = *epoch;
800
801 /* Make sure epoch is even. */
802 if (epoch0 & 1U)
803 continue;
804
805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
806
807 /* Terminate if epoch didn't change while reading. */
808 if (*epoch == epoch0)
809 break;
810 }
811
812 ctx->cnt_summed.curobjs += tcnt.curobjs;
813 ctx->cnt_summed.curbytes += tcnt.curbytes;
814 if (opt_prof_accum) {
815 ctx->cnt_summed.accumobjs += tcnt.accumobjs;
816 ctx->cnt_summed.accumbytes += tcnt.accumbytes;
817 }
818 }
819
820 if (ctx->cnt_summed.curobjs != 0)
821 (*leak_nctx)++;
822
823 /* Add to cnt_all. */
824 cnt_all->curobjs += ctx->cnt_summed.curobjs;
825 cnt_all->curbytes += ctx->cnt_summed.curbytes;
826 if (opt_prof_accum) {
827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
829 }
830
831 malloc_mutex_unlock(ctx->lock);
832}
833
834static bool
835prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
836{
837
838 if (opt_lg_prof_sample == 0) {
839 if (prof_dump_printf(propagate_err,
840 "heap profile: %"PRId64": %"PRId64
841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
842 cnt_all->curobjs, cnt_all->curbytes,
843 cnt_all->accumobjs, cnt_all->accumbytes))
844 return (true);
845 } else {
846 if (prof_dump_printf(propagate_err,
847 "heap profile: %"PRId64": %"PRId64
848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
849 cnt_all->curobjs, cnt_all->curbytes,
850 cnt_all->accumobjs, cnt_all->accumbytes,
851 ((uint64_t)1U << opt_lg_prof_sample)))
852 return (true);
853 }
854
855 return (false);
856}
857
858static void
859prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
860{
861
862 ctx->nlimbo--;
863 ql_remove(ctx_ql, ctx, dump_link);
864}
865
866static void
867prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
868{
869
870 malloc_mutex_lock(ctx->lock);
871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
872 malloc_mutex_unlock(ctx->lock);
873}
874
875static bool
876prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
877 prof_ctx_list_t *ctx_ql)
878{
879 bool ret;
880 unsigned i;
881
882 cassert(config_prof);
883
884 /*
885 * Current statistics can sum to 0 as a result of unmerged per thread
886 * statistics. Additionally, interval- and growth-triggered dumps can
887 * occur between the time a ctx is created and when its statistics are
888 * filled in. Avoid dumping any ctx that is an artifact of either
889 * implementation detail.
890 */
891 malloc_mutex_lock(ctx->lock);
892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
894 assert(ctx->cnt_summed.curobjs == 0);
895 assert(ctx->cnt_summed.curbytes == 0);
896 assert(ctx->cnt_summed.accumobjs == 0);
897 assert(ctx->cnt_summed.accumbytes == 0);
898 ret = false;
899 goto label_return;
900 }
901
902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
903 " [%"PRIu64": %"PRIu64"] @",
904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
906 ret = true;
907 goto label_return;
908 }
909
910 for (i = 0; i < bt->len; i++) {
911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
912 (uintptr_t)bt->vec[i])) {
913 ret = true;
914 goto label_return;
915 }
916 }
917
918 if (prof_dump_write(propagate_err, "\n")) {
919 ret = true;
920 goto label_return;
921 }
922
923 ret = false;
924label_return:
925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
926 malloc_mutex_unlock(ctx->lock);
927 return (ret);
928}
929
930static bool
931prof_dump_maps(bool propagate_err)
932{
933 bool ret;
934 int mfd;
935 char filename[PATH_MAX + 1];
936
937 cassert(config_prof);
1#define JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3/******************************************************************************/
4
5#ifdef JEMALLOC_PROF_LIBUNWIND
6#define UNW_LOCAL_ONLY
7#include <libunwind.h>
8#endif
9
10#ifdef JEMALLOC_PROF_LIBGCC
11#include <unwind.h>
12#endif
13
14/******************************************************************************/
15/* Data. */
16
17malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
18
19bool opt_prof = false;
20bool opt_prof_active = true;
21size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23bool opt_prof_gdump = false;
24bool opt_prof_final = true;
25bool opt_prof_leak = false;
26bool opt_prof_accum = false;
27char opt_prof_prefix[
28 /* Minimize memory bloat for non-prof builds. */
29#ifdef JEMALLOC_PROF
30 PATH_MAX +
31#endif
32 1];
33
34uint64_t prof_interval = 0;
35bool prof_promote;
36
37/*
38 * Table of mutexes that are shared among ctx's. These are leaf locks, so
39 * there is no problem with using them for more than one ctx at the same time.
40 * The primary motivation for this sharing though is that ctx's are ephemeral,
41 * and destroying mutexes causes complications for systems that allocate when
42 * creating/destroying mutexes.
43 */
44static malloc_mutex_t *ctx_locks;
45static unsigned cum_ctxs; /* Atomic counter. */
46
47/*
48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
49 * structure that knows about all backtraces currently captured.
50 */
51static ckh_t bt2ctx;
52static malloc_mutex_t bt2ctx_mtx;
53
54static malloc_mutex_t prof_dump_seq_mtx;
55static uint64_t prof_dump_seq;
56static uint64_t prof_dump_iseq;
57static uint64_t prof_dump_mseq;
58static uint64_t prof_dump_useq;
59
60/*
61 * This buffer is rather large for stack allocation, so use a single buffer for
62 * all profile dumps.
63 */
64static malloc_mutex_t prof_dump_mtx;
65static char prof_dump_buf[
66 /* Minimize memory bloat for non-prof builds. */
67#ifdef JEMALLOC_PROF
68 PROF_DUMP_BUFSIZE
69#else
70 1
71#endif
72];
73static unsigned prof_dump_buf_end;
74static int prof_dump_fd;
75
76/* Do not dump any profiles until bootstrapping is complete. */
77static bool prof_booted = false;
78
79/******************************************************************************/
80
81void
82bt_init(prof_bt_t *bt, void **vec)
83{
84
85 cassert(config_prof);
86
87 bt->vec = vec;
88 bt->len = 0;
89}
90
91static void
92bt_destroy(prof_bt_t *bt)
93{
94
95 cassert(config_prof);
96
97 idalloc(bt);
98}
99
100static prof_bt_t *
101bt_dup(prof_bt_t *bt)
102{
103 prof_bt_t *ret;
104
105 cassert(config_prof);
106
107 /*
108 * Create a single allocation that has space for vec immediately
109 * following the prof_bt_t structure. The backtraces that get
110 * stored in the backtrace caches are copied from stack-allocated
111 * temporary variables, so size is known at creation time. Making this
112 * a contiguous object improves cache locality.
113 */
114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
115 (bt->len * sizeof(void *)));
116 if (ret == NULL)
117 return (NULL);
118 ret->vec = (void **)((uintptr_t)ret +
119 QUANTUM_CEILING(sizeof(prof_bt_t)));
120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
121 ret->len = bt->len;
122
123 return (ret);
124}
125
126static inline void
127prof_enter(prof_tdata_t *prof_tdata)
128{
129
130 cassert(config_prof);
131
132 assert(prof_tdata->enq == false);
133 prof_tdata->enq = true;
134
135 malloc_mutex_lock(&bt2ctx_mtx);
136}
137
138static inline void
139prof_leave(prof_tdata_t *prof_tdata)
140{
141 bool idump, gdump;
142
143 cassert(config_prof);
144
145 malloc_mutex_unlock(&bt2ctx_mtx);
146
147 assert(prof_tdata->enq);
148 prof_tdata->enq = false;
149 idump = prof_tdata->enq_idump;
150 prof_tdata->enq_idump = false;
151 gdump = prof_tdata->enq_gdump;
152 prof_tdata->enq_gdump = false;
153
154 if (idump)
155 prof_idump();
156 if (gdump)
157 prof_gdump();
158}
159
160#ifdef JEMALLOC_PROF_LIBUNWIND
161void
162prof_backtrace(prof_bt_t *bt, unsigned nignore)
163{
164 unw_context_t uc;
165 unw_cursor_t cursor;
166 unsigned i;
167 int err;
168
169 cassert(config_prof);
170 assert(bt->len == 0);
171 assert(bt->vec != NULL);
172
173 unw_getcontext(&uc);
174 unw_init_local(&cursor, &uc);
175
176 /* Throw away (nignore+1) stack frames, if that many exist. */
177 for (i = 0; i < nignore + 1; i++) {
178 err = unw_step(&cursor);
179 if (err <= 0)
180 return;
181 }
182
183 /*
184 * Iterate over stack frames until there are no more, or until no space
185 * remains in bt.
186 */
187 for (i = 0; i < PROF_BT_MAX; i++) {
188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
189 bt->len++;
190 err = unw_step(&cursor);
191 if (err <= 0)
192 break;
193 }
194}
195#elif (defined(JEMALLOC_PROF_LIBGCC))
196static _Unwind_Reason_Code
197prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
198{
199
200 cassert(config_prof);
201
202 return (_URC_NO_REASON);
203}
204
205static _Unwind_Reason_Code
206prof_unwind_callback(struct _Unwind_Context *context, void *arg)
207{
208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
209
210 cassert(config_prof);
211
212 if (data->nignore > 0)
213 data->nignore--;
214 else {
215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
216 data->bt->len++;
217 if (data->bt->len == data->max)
218 return (_URC_END_OF_STACK);
219 }
220
221 return (_URC_NO_REASON);
222}
223
224void
225prof_backtrace(prof_bt_t *bt, unsigned nignore)
226{
227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
228
229 cassert(config_prof);
230
231 _Unwind_Backtrace(prof_unwind_callback, &data);
232}
233#elif (defined(JEMALLOC_PROF_GCC))
234void
235prof_backtrace(prof_bt_t *bt, unsigned nignore)
236{
237#define BT_FRAME(i) \
238 if ((i) < nignore + PROF_BT_MAX) { \
239 void *p; \
240 if (__builtin_frame_address(i) == 0) \
241 return; \
242 p = __builtin_return_address(i); \
243 if (p == NULL) \
244 return; \
245 if (i >= nignore) { \
246 bt->vec[(i) - nignore] = p; \
247 bt->len = (i) - nignore + 1; \
248 } \
249 } else \
250 return;
251
252 cassert(config_prof);
253 assert(nignore <= 3);
254
255 BT_FRAME(0)
256 BT_FRAME(1)
257 BT_FRAME(2)
258 BT_FRAME(3)
259 BT_FRAME(4)
260 BT_FRAME(5)
261 BT_FRAME(6)
262 BT_FRAME(7)
263 BT_FRAME(8)
264 BT_FRAME(9)
265
266 BT_FRAME(10)
267 BT_FRAME(11)
268 BT_FRAME(12)
269 BT_FRAME(13)
270 BT_FRAME(14)
271 BT_FRAME(15)
272 BT_FRAME(16)
273 BT_FRAME(17)
274 BT_FRAME(18)
275 BT_FRAME(19)
276
277 BT_FRAME(20)
278 BT_FRAME(21)
279 BT_FRAME(22)
280 BT_FRAME(23)
281 BT_FRAME(24)
282 BT_FRAME(25)
283 BT_FRAME(26)
284 BT_FRAME(27)
285 BT_FRAME(28)
286 BT_FRAME(29)
287
288 BT_FRAME(30)
289 BT_FRAME(31)
290 BT_FRAME(32)
291 BT_FRAME(33)
292 BT_FRAME(34)
293 BT_FRAME(35)
294 BT_FRAME(36)
295 BT_FRAME(37)
296 BT_FRAME(38)
297 BT_FRAME(39)
298
299 BT_FRAME(40)
300 BT_FRAME(41)
301 BT_FRAME(42)
302 BT_FRAME(43)
303 BT_FRAME(44)
304 BT_FRAME(45)
305 BT_FRAME(46)
306 BT_FRAME(47)
307 BT_FRAME(48)
308 BT_FRAME(49)
309
310 BT_FRAME(50)
311 BT_FRAME(51)
312 BT_FRAME(52)
313 BT_FRAME(53)
314 BT_FRAME(54)
315 BT_FRAME(55)
316 BT_FRAME(56)
317 BT_FRAME(57)
318 BT_FRAME(58)
319 BT_FRAME(59)
320
321 BT_FRAME(60)
322 BT_FRAME(61)
323 BT_FRAME(62)
324 BT_FRAME(63)
325 BT_FRAME(64)
326 BT_FRAME(65)
327 BT_FRAME(66)
328 BT_FRAME(67)
329 BT_FRAME(68)
330 BT_FRAME(69)
331
332 BT_FRAME(70)
333 BT_FRAME(71)
334 BT_FRAME(72)
335 BT_FRAME(73)
336 BT_FRAME(74)
337 BT_FRAME(75)
338 BT_FRAME(76)
339 BT_FRAME(77)
340 BT_FRAME(78)
341 BT_FRAME(79)
342
343 BT_FRAME(80)
344 BT_FRAME(81)
345 BT_FRAME(82)
346 BT_FRAME(83)
347 BT_FRAME(84)
348 BT_FRAME(85)
349 BT_FRAME(86)
350 BT_FRAME(87)
351 BT_FRAME(88)
352 BT_FRAME(89)
353
354 BT_FRAME(90)
355 BT_FRAME(91)
356 BT_FRAME(92)
357 BT_FRAME(93)
358 BT_FRAME(94)
359 BT_FRAME(95)
360 BT_FRAME(96)
361 BT_FRAME(97)
362 BT_FRAME(98)
363 BT_FRAME(99)
364
365 BT_FRAME(100)
366 BT_FRAME(101)
367 BT_FRAME(102)
368 BT_FRAME(103)
369 BT_FRAME(104)
370 BT_FRAME(105)
371 BT_FRAME(106)
372 BT_FRAME(107)
373 BT_FRAME(108)
374 BT_FRAME(109)
375
376 BT_FRAME(110)
377 BT_FRAME(111)
378 BT_FRAME(112)
379 BT_FRAME(113)
380 BT_FRAME(114)
381 BT_FRAME(115)
382 BT_FRAME(116)
383 BT_FRAME(117)
384 BT_FRAME(118)
385 BT_FRAME(119)
386
387 BT_FRAME(120)
388 BT_FRAME(121)
389 BT_FRAME(122)
390 BT_FRAME(123)
391 BT_FRAME(124)
392 BT_FRAME(125)
393 BT_FRAME(126)
394 BT_FRAME(127)
395
396 /* Extras to compensate for nignore. */
397 BT_FRAME(128)
398 BT_FRAME(129)
399 BT_FRAME(130)
400#undef BT_FRAME
401}
402#else
403void
404prof_backtrace(prof_bt_t *bt, unsigned nignore)
405{
406
407 cassert(config_prof);
408 not_reached();
409}
410#endif
411
412static malloc_mutex_t *
413prof_ctx_mutex_choose(void)
414{
415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
416
417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
418}
419
420static void
421prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
422{
423
424 ctx->bt = bt;
425 ctx->lock = prof_ctx_mutex_choose();
426 /*
427 * Set nlimbo to 1, in order to avoid a race condition with
428 * prof_ctx_merge()/prof_ctx_destroy().
429 */
430 ctx->nlimbo = 1;
431 ql_elm_new(ctx, dump_link);
432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
433 ql_new(&ctx->cnts_ql);
434}
435
436static void
437prof_ctx_destroy(prof_ctx_t *ctx)
438{
439 prof_tdata_t *prof_tdata;
440
441 cassert(config_prof);
442
443 /*
444 * Check that ctx is still unused by any thread cache before destroying
445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
446 * condition with this function, as does prof_ctx_merge() in order to
447 * avoid a race between the main body of prof_ctx_merge() and entry
448 * into this function.
449 */
450 prof_tdata = prof_tdata_get(false);
451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
452 prof_enter(prof_tdata);
453 malloc_mutex_lock(ctx->lock);
454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
455 ctx->nlimbo == 1) {
456 assert(ctx->cnt_merged.curbytes == 0);
457 assert(ctx->cnt_merged.accumobjs == 0);
458 assert(ctx->cnt_merged.accumbytes == 0);
459 /* Remove ctx from bt2ctx. */
460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
461 not_reached();
462 prof_leave(prof_tdata);
463 /* Destroy ctx. */
464 malloc_mutex_unlock(ctx->lock);
465 bt_destroy(ctx->bt);
466 idalloc(ctx);
467 } else {
468 /*
469 * Compensate for increment in prof_ctx_merge() or
470 * prof_lookup().
471 */
472 ctx->nlimbo--;
473 malloc_mutex_unlock(ctx->lock);
474 prof_leave(prof_tdata);
475 }
476}
477
478static void
479prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
480{
481 bool destroy;
482
483 cassert(config_prof);
484
485 /* Merge cnt stats and detach from ctx. */
486 malloc_mutex_lock(ctx->lock);
487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
491 ql_remove(&ctx->cnts_ql, cnt, cnts_link);
492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
494 /*
495 * Increment ctx->nlimbo in order to keep another thread from
496 * winning the race to destroy ctx while this one has ctx->lock
497 * dropped. Without this, it would be possible for another
498 * thread to:
499 *
500 * 1) Sample an allocation associated with ctx.
501 * 2) Deallocate the sampled object.
502 * 3) Successfully prof_ctx_destroy(ctx).
503 *
504 * The result would be that ctx no longer exists by the time
505 * this thread accesses it in prof_ctx_destroy().
506 */
507 ctx->nlimbo++;
508 destroy = true;
509 } else
510 destroy = false;
511 malloc_mutex_unlock(ctx->lock);
512 if (destroy)
513 prof_ctx_destroy(ctx);
514}
515
516static bool
517prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
518 prof_ctx_t **p_ctx, bool *p_new_ctx)
519{
520 union {
521 prof_ctx_t *p;
522 void *v;
523 } ctx;
524 union {
525 prof_bt_t *p;
526 void *v;
527 } btkey;
528 bool new_ctx;
529
530 prof_enter(prof_tdata);
531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
532 /* bt has never been seen before. Insert it. */
533 ctx.v = imalloc(sizeof(prof_ctx_t));
534 if (ctx.v == NULL) {
535 prof_leave(prof_tdata);
536 return (true);
537 }
538 btkey.p = bt_dup(bt);
539 if (btkey.v == NULL) {
540 prof_leave(prof_tdata);
541 idalloc(ctx.v);
542 return (true);
543 }
544 prof_ctx_init(ctx.p, btkey.p);
545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
546 /* OOM. */
547 prof_leave(prof_tdata);
548 idalloc(btkey.v);
549 idalloc(ctx.v);
550 return (true);
551 }
552 new_ctx = true;
553 } else {
554 /*
555 * Increment nlimbo, in order to avoid a race condition with
556 * prof_ctx_merge()/prof_ctx_destroy().
557 */
558 malloc_mutex_lock(ctx.p->lock);
559 ctx.p->nlimbo++;
560 malloc_mutex_unlock(ctx.p->lock);
561 new_ctx = false;
562 }
563 prof_leave(prof_tdata);
564
565 *p_btkey = btkey.v;
566 *p_ctx = ctx.p;
567 *p_new_ctx = new_ctx;
568 return (false);
569}
570
571prof_thr_cnt_t *
572prof_lookup(prof_bt_t *bt)
573{
574 union {
575 prof_thr_cnt_t *p;
576 void *v;
577 } ret;
578 prof_tdata_t *prof_tdata;
579
580 cassert(config_prof);
581
582 prof_tdata = prof_tdata_get(false);
583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
584 return (NULL);
585
586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
587 void *btkey;
588 prof_ctx_t *ctx;
589 bool new_ctx;
590
591 /*
592 * This thread's cache lacks bt. Look for it in the global
593 * cache.
594 */
595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
596 return (NULL);
597
598 /* Link a prof_thd_cnt_t into ctx for this thread. */
599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
600 assert(ckh_count(&prof_tdata->bt2cnt) > 0);
601 /*
602 * Flush the least recently used cnt in order to keep
603 * bt2cnt from becoming too large.
604 */
605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
606 assert(ret.v != NULL);
607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
608 NULL, NULL))
609 not_reached();
610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
611 prof_ctx_merge(ret.p->ctx, ret.p);
612 /* ret can now be re-used. */
613 } else {
614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
615 /* Allocate and partially initialize a new cnt. */
616 ret.v = imalloc(sizeof(prof_thr_cnt_t));
617 if (ret.p == NULL) {
618 if (new_ctx)
619 prof_ctx_destroy(ctx);
620 return (NULL);
621 }
622 ql_elm_new(ret.p, cnts_link);
623 ql_elm_new(ret.p, lru_link);
624 }
625 /* Finish initializing ret. */
626 ret.p->ctx = ctx;
627 ret.p->epoch = 0;
628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
630 if (new_ctx)
631 prof_ctx_destroy(ctx);
632 idalloc(ret.v);
633 return (NULL);
634 }
635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
636 malloc_mutex_lock(ctx->lock);
637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
638 ctx->nlimbo--;
639 malloc_mutex_unlock(ctx->lock);
640 } else {
641 /* Move ret to the front of the LRU. */
642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
644 }
645
646 return (ret.p);
647}
648
649#ifdef JEMALLOC_JET
650size_t
651prof_bt_count(void)
652{
653 size_t bt_count;
654 prof_tdata_t *prof_tdata;
655
656 prof_tdata = prof_tdata_get(false);
657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
658 return (0);
659
660 prof_enter(prof_tdata);
661 bt_count = ckh_count(&bt2ctx);
662 prof_leave(prof_tdata);
663
664 return (bt_count);
665}
666#endif
667
668#ifdef JEMALLOC_JET
669#undef prof_dump_open
670#define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
671#endif
672static int
673prof_dump_open(bool propagate_err, const char *filename)
674{
675 int fd;
676
677 fd = creat(filename, 0644);
678 if (fd == -1 && propagate_err == false) {
679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
680 filename);
681 if (opt_abort)
682 abort();
683 }
684
685 return (fd);
686}
687#ifdef JEMALLOC_JET
688#undef prof_dump_open
689#define prof_dump_open JEMALLOC_N(prof_dump_open)
690prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
691#endif
692
693static bool
694prof_dump_flush(bool propagate_err)
695{
696 bool ret = false;
697 ssize_t err;
698
699 cassert(config_prof);
700
701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
702 if (err == -1) {
703 if (propagate_err == false) {
704 malloc_write("<jemalloc>: write() failed during heap "
705 "profile flush\n");
706 if (opt_abort)
707 abort();
708 }
709 ret = true;
710 }
711 prof_dump_buf_end = 0;
712
713 return (ret);
714}
715
716static bool
717prof_dump_close(bool propagate_err)
718{
719 bool ret;
720
721 assert(prof_dump_fd != -1);
722 ret = prof_dump_flush(propagate_err);
723 close(prof_dump_fd);
724 prof_dump_fd = -1;
725
726 return (ret);
727}
728
729static bool
730prof_dump_write(bool propagate_err, const char *s)
731{
732 unsigned i, slen, n;
733
734 cassert(config_prof);
735
736 i = 0;
737 slen = strlen(s);
738 while (i < slen) {
739 /* Flush the buffer if it is full. */
740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
741 if (prof_dump_flush(propagate_err) && propagate_err)
742 return (true);
743
744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
745 /* Finish writing. */
746 n = slen - i;
747 } else {
748 /* Write as much of s as will fit. */
749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
750 }
751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
752 prof_dump_buf_end += n;
753 i += n;
754 }
755
756 return (false);
757}
758
759JEMALLOC_ATTR(format(printf, 2, 3))
760static bool
761prof_dump_printf(bool propagate_err, const char *format, ...)
762{
763 bool ret;
764 va_list ap;
765 char buf[PROF_PRINTF_BUFSIZE];
766
767 va_start(ap, format);
768 malloc_vsnprintf(buf, sizeof(buf), format, ap);
769 va_end(ap);
770 ret = prof_dump_write(propagate_err, buf);
771
772 return (ret);
773}
774
775static void
776prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
777 prof_ctx_list_t *ctx_ql)
778{
779 prof_thr_cnt_t *thr_cnt;
780 prof_cnt_t tcnt;
781
782 cassert(config_prof);
783
784 malloc_mutex_lock(ctx->lock);
785
786 /*
787 * Increment nlimbo so that ctx won't go away before dump.
788 * Additionally, link ctx into the dump list so that it is included in
789 * prof_dump()'s second pass.
790 */
791 ctx->nlimbo++;
792 ql_tail_insert(ctx_ql, ctx, dump_link);
793
794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
796 volatile unsigned *epoch = &thr_cnt->epoch;
797
798 while (true) {
799 unsigned epoch0 = *epoch;
800
801 /* Make sure epoch is even. */
802 if (epoch0 & 1U)
803 continue;
804
805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
806
807 /* Terminate if epoch didn't change while reading. */
808 if (*epoch == epoch0)
809 break;
810 }
811
812 ctx->cnt_summed.curobjs += tcnt.curobjs;
813 ctx->cnt_summed.curbytes += tcnt.curbytes;
814 if (opt_prof_accum) {
815 ctx->cnt_summed.accumobjs += tcnt.accumobjs;
816 ctx->cnt_summed.accumbytes += tcnt.accumbytes;
817 }
818 }
819
820 if (ctx->cnt_summed.curobjs != 0)
821 (*leak_nctx)++;
822
823 /* Add to cnt_all. */
824 cnt_all->curobjs += ctx->cnt_summed.curobjs;
825 cnt_all->curbytes += ctx->cnt_summed.curbytes;
826 if (opt_prof_accum) {
827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
829 }
830
831 malloc_mutex_unlock(ctx->lock);
832}
833
834static bool
835prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
836{
837
838 if (opt_lg_prof_sample == 0) {
839 if (prof_dump_printf(propagate_err,
840 "heap profile: %"PRId64": %"PRId64
841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
842 cnt_all->curobjs, cnt_all->curbytes,
843 cnt_all->accumobjs, cnt_all->accumbytes))
844 return (true);
845 } else {
846 if (prof_dump_printf(propagate_err,
847 "heap profile: %"PRId64": %"PRId64
848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
849 cnt_all->curobjs, cnt_all->curbytes,
850 cnt_all->accumobjs, cnt_all->accumbytes,
851 ((uint64_t)1U << opt_lg_prof_sample)))
852 return (true);
853 }
854
855 return (false);
856}
857
858static void
859prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
860{
861
862 ctx->nlimbo--;
863 ql_remove(ctx_ql, ctx, dump_link);
864}
865
866static void
867prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
868{
869
870 malloc_mutex_lock(ctx->lock);
871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
872 malloc_mutex_unlock(ctx->lock);
873}
874
875static bool
876prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
877 prof_ctx_list_t *ctx_ql)
878{
879 bool ret;
880 unsigned i;
881
882 cassert(config_prof);
883
884 /*
885 * Current statistics can sum to 0 as a result of unmerged per thread
886 * statistics. Additionally, interval- and growth-triggered dumps can
887 * occur between the time a ctx is created and when its statistics are
888 * filled in. Avoid dumping any ctx that is an artifact of either
889 * implementation detail.
890 */
891 malloc_mutex_lock(ctx->lock);
892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
894 assert(ctx->cnt_summed.curobjs == 0);
895 assert(ctx->cnt_summed.curbytes == 0);
896 assert(ctx->cnt_summed.accumobjs == 0);
897 assert(ctx->cnt_summed.accumbytes == 0);
898 ret = false;
899 goto label_return;
900 }
901
902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
903 " [%"PRIu64": %"PRIu64"] @",
904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
906 ret = true;
907 goto label_return;
908 }
909
910 for (i = 0; i < bt->len; i++) {
911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
912 (uintptr_t)bt->vec[i])) {
913 ret = true;
914 goto label_return;
915 }
916 }
917
918 if (prof_dump_write(propagate_err, "\n")) {
919 ret = true;
920 goto label_return;
921 }
922
923 ret = false;
924label_return:
925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
926 malloc_mutex_unlock(ctx->lock);
927 return (ret);
928}
929
930static bool
931prof_dump_maps(bool propagate_err)
932{
933 bool ret;
934 int mfd;
935 char filename[PATH_MAX + 1];
936
937 cassert(config_prof);
938
938#ifdef __FreeBSD__
939 malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
940#else
939 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
940 (int)getpid());
941 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
942 (int)getpid());
943#endif
941 mfd = open(filename, O_RDONLY);
942 if (mfd != -1) {
943 ssize_t nread;
944
945 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
946 propagate_err) {
947 ret = true;
948 goto label_return;
949 }
950 nread = 0;
951 do {
952 prof_dump_buf_end += nread;
953 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
954 /* Make space in prof_dump_buf before read(). */
955 if (prof_dump_flush(propagate_err) &&
956 propagate_err) {
957 ret = true;
958 goto label_return;
959 }
960 }
961 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
962 PROF_DUMP_BUFSIZE - prof_dump_buf_end);
963 } while (nread > 0);
964 } else {
965 ret = true;
966 goto label_return;
967 }
968
969 ret = false;
970label_return:
971 if (mfd != -1)
972 close(mfd);
973 return (ret);
974}
975
976static void
977prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
978 const char *filename)
979{
980
981 if (cnt_all->curbytes != 0) {
982 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
983 PRId64" object%s, %zu context%s\n",
984 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
985 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
986 leak_nctx, (leak_nctx != 1) ? "s" : "");
987 malloc_printf(
988 "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
989 filename);
990 }
991}
992
993static bool
994prof_dump(bool propagate_err, const char *filename, bool leakcheck)
995{
996 prof_tdata_t *prof_tdata;
997 prof_cnt_t cnt_all;
998 size_t tabind;
999 union {
1000 prof_ctx_t *p;
1001 void *v;
1002 } ctx;
1003 size_t leak_nctx;
1004 prof_ctx_list_t ctx_ql;
1005
1006 cassert(config_prof);
1007
1008 prof_tdata = prof_tdata_get(false);
1009 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1010 return (true);
1011
1012 malloc_mutex_lock(&prof_dump_mtx);
1013
1014 /* Merge per thread profile stats, and sum them in cnt_all. */
1015 memset(&cnt_all, 0, sizeof(prof_cnt_t));
1016 leak_nctx = 0;
1017 ql_new(&ctx_ql);
1018 prof_enter(prof_tdata);
1019 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
1020 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
1021 prof_leave(prof_tdata);
1022
1023 /* Create dump file. */
1024 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1025 goto label_open_close_error;
1026
1027 /* Dump profile header. */
1028 if (prof_dump_header(propagate_err, &cnt_all))
1029 goto label_write_error;
1030
1031 /* Dump per ctx profile stats. */
1032 while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
1033 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
1034 goto label_write_error;
1035 }
1036
1037 /* Dump /proc/<pid>/maps if possible. */
1038 if (prof_dump_maps(propagate_err))
1039 goto label_write_error;
1040
1041 if (prof_dump_close(propagate_err))
1042 goto label_open_close_error;
1043
1044 malloc_mutex_unlock(&prof_dump_mtx);
1045
1046 if (leakcheck)
1047 prof_leakcheck(&cnt_all, leak_nctx, filename);
1048
1049 return (false);
1050label_write_error:
1051 prof_dump_close(propagate_err);
1052label_open_close_error:
1053 while ((ctx.p = ql_first(&ctx_ql)) != NULL)
1054 prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
1055 malloc_mutex_unlock(&prof_dump_mtx);
1056 return (true);
1057}
1058
1059#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
1060#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
1061static void
1062prof_dump_filename(char *filename, char v, int64_t vseq)
1063{
1064
1065 cassert(config_prof);
1066
1067 if (vseq != VSEQ_INVALID) {
1068 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1069 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1070 "%s.%d.%"PRIu64".%c%"PRId64".heap",
1071 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
1072 } else {
1073 /* "<prefix>.<pid>.<seq>.<v>.heap" */
1074 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1075 "%s.%d.%"PRIu64".%c.heap",
1076 opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
1077 }
1078 prof_dump_seq++;
1079}
1080
1081static void
1082prof_fdump(void)
1083{
1084 char filename[DUMP_FILENAME_BUFSIZE];
1085
1086 cassert(config_prof);
1087
1088 if (prof_booted == false)
1089 return;
1090
1091 if (opt_prof_final && opt_prof_prefix[0] != '\0') {
1092 malloc_mutex_lock(&prof_dump_seq_mtx);
1093 prof_dump_filename(filename, 'f', VSEQ_INVALID);
1094 malloc_mutex_unlock(&prof_dump_seq_mtx);
1095 prof_dump(false, filename, opt_prof_leak);
1096 }
1097}
1098
1099void
1100prof_idump(void)
1101{
1102 prof_tdata_t *prof_tdata;
1103 char filename[PATH_MAX + 1];
1104
1105 cassert(config_prof);
1106
1107 if (prof_booted == false)
1108 return;
1109 prof_tdata = prof_tdata_get(false);
1110 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1111 return;
1112 if (prof_tdata->enq) {
1113 prof_tdata->enq_idump = true;
1114 return;
1115 }
1116
1117 if (opt_prof_prefix[0] != '\0') {
1118 malloc_mutex_lock(&prof_dump_seq_mtx);
1119 prof_dump_filename(filename, 'i', prof_dump_iseq);
1120 prof_dump_iseq++;
1121 malloc_mutex_unlock(&prof_dump_seq_mtx);
1122 prof_dump(false, filename, false);
1123 }
1124}
1125
1126bool
1127prof_mdump(const char *filename)
1128{
1129 char filename_buf[DUMP_FILENAME_BUFSIZE];
1130
1131 cassert(config_prof);
1132
1133 if (opt_prof == false || prof_booted == false)
1134 return (true);
1135
1136 if (filename == NULL) {
1137 /* No filename specified, so automatically generate one. */
1138 if (opt_prof_prefix[0] == '\0')
1139 return (true);
1140 malloc_mutex_lock(&prof_dump_seq_mtx);
1141 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1142 prof_dump_mseq++;
1143 malloc_mutex_unlock(&prof_dump_seq_mtx);
1144 filename = filename_buf;
1145 }
1146 return (prof_dump(true, filename, false));
1147}
1148
1149void
1150prof_gdump(void)
1151{
1152 prof_tdata_t *prof_tdata;
1153 char filename[DUMP_FILENAME_BUFSIZE];
1154
1155 cassert(config_prof);
1156
1157 if (prof_booted == false)
1158 return;
1159 prof_tdata = prof_tdata_get(false);
1160 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1161 return;
1162 if (prof_tdata->enq) {
1163 prof_tdata->enq_gdump = true;
1164 return;
1165 }
1166
1167 if (opt_prof_prefix[0] != '\0') {
1168 malloc_mutex_lock(&prof_dump_seq_mtx);
1169 prof_dump_filename(filename, 'u', prof_dump_useq);
1170 prof_dump_useq++;
1171 malloc_mutex_unlock(&prof_dump_seq_mtx);
1172 prof_dump(false, filename, false);
1173 }
1174}
1175
1176static void
1177prof_bt_hash(const void *key, size_t r_hash[2])
1178{
1179 prof_bt_t *bt = (prof_bt_t *)key;
1180
1181 cassert(config_prof);
1182
1183 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1184}
1185
1186static bool
1187prof_bt_keycomp(const void *k1, const void *k2)
1188{
1189 const prof_bt_t *bt1 = (prof_bt_t *)k1;
1190 const prof_bt_t *bt2 = (prof_bt_t *)k2;
1191
1192 cassert(config_prof);
1193
1194 if (bt1->len != bt2->len)
1195 return (false);
1196 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1197}
1198
1199prof_tdata_t *
1200prof_tdata_init(void)
1201{
1202 prof_tdata_t *prof_tdata;
1203
1204 cassert(config_prof);
1205
1206 /* Initialize an empty cache for this thread. */
1207 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1208 if (prof_tdata == NULL)
1209 return (NULL);
1210
1211 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1212 prof_bt_hash, prof_bt_keycomp)) {
1213 idalloc(prof_tdata);
1214 return (NULL);
1215 }
1216 ql_new(&prof_tdata->lru_ql);
1217
1218 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
1219 if (prof_tdata->vec == NULL) {
1220 ckh_delete(&prof_tdata->bt2cnt);
1221 idalloc(prof_tdata);
1222 return (NULL);
1223 }
1224
1225 prof_tdata->prng_state = 0;
1226 prof_tdata->threshold = 0;
1227 prof_tdata->accum = 0;
1228
1229 prof_tdata->enq = false;
1230 prof_tdata->enq_idump = false;
1231 prof_tdata->enq_gdump = false;
1232
1233 prof_tdata_tsd_set(&prof_tdata);
1234
1235 return (prof_tdata);
1236}
1237
1238void
1239prof_tdata_cleanup(void *arg)
1240{
1241 prof_thr_cnt_t *cnt;
1242 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
1243
1244 cassert(config_prof);
1245
1246 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
1247 /*
1248 * Another destructor deallocated memory after this destructor
1249 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1250 * in order to receive another callback.
1251 */
1252 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1253 prof_tdata_tsd_set(&prof_tdata);
1254 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
1255 /*
1256 * The previous time this destructor was called, we set the key
1257 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1258 * wouldn't cause re-creation of the prof_tdata. This time, do
1259 * nothing, so that the destructor will not be called again.
1260 */
1261 } else if (prof_tdata != NULL) {
1262 /*
1263 * Delete the hash table. All of its contents can still be
1264 * iterated over via the LRU.
1265 */
1266 ckh_delete(&prof_tdata->bt2cnt);
1267 /*
1268 * Iteratively merge cnt's into the global stats and delete
1269 * them.
1270 */
1271 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1272 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1273 prof_ctx_merge(cnt->ctx, cnt);
1274 idalloc(cnt);
1275 }
1276 idalloc(prof_tdata->vec);
1277 idalloc(prof_tdata);
1278 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1279 prof_tdata_tsd_set(&prof_tdata);
1280 }
1281}
1282
1283void
1284prof_boot0(void)
1285{
1286
1287 cassert(config_prof);
1288
1289 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1290 sizeof(PROF_PREFIX_DEFAULT));
1291}
1292
1293void
1294prof_boot1(void)
1295{
1296
1297 cassert(config_prof);
1298
1299 /*
1300 * opt_prof and prof_promote must be in their final state before any
1301 * arenas are initialized, so this function must be executed early.
1302 */
1303
1304 if (opt_prof_leak && opt_prof == false) {
1305 /*
1306 * Enable opt_prof, but in such a way that profiles are never
1307 * automatically dumped.
1308 */
1309 opt_prof = true;
1310 opt_prof_gdump = false;
1311 } else if (opt_prof) {
1312 if (opt_lg_prof_interval >= 0) {
1313 prof_interval = (((uint64_t)1U) <<
1314 opt_lg_prof_interval);
1315 }
1316 }
1317
1318 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
1319}
1320
1321bool
1322prof_boot2(void)
1323{
1324
1325 cassert(config_prof);
1326
1327 if (opt_prof) {
1328 unsigned i;
1329
1330 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1331 prof_bt_keycomp))
1332 return (true);
1333 if (malloc_mutex_init(&bt2ctx_mtx))
1334 return (true);
1335 if (prof_tdata_tsd_boot()) {
1336 malloc_write(
1337 "<jemalloc>: Error in pthread_key_create()\n");
1338 abort();
1339 }
1340
1341 if (malloc_mutex_init(&prof_dump_seq_mtx))
1342 return (true);
1343 if (malloc_mutex_init(&prof_dump_mtx))
1344 return (true);
1345
1346 if (atexit(prof_fdump) != 0) {
1347 malloc_write("<jemalloc>: Error in atexit()\n");
1348 if (opt_abort)
1349 abort();
1350 }
1351
1352 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
1353 sizeof(malloc_mutex_t));
1354 if (ctx_locks == NULL)
1355 return (true);
1356 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
1357 if (malloc_mutex_init(&ctx_locks[i]))
1358 return (true);
1359 }
1360 }
1361
1362#ifdef JEMALLOC_PROF_LIBGCC
1363 /*
1364 * Cause the backtracing machinery to allocate its internal state
1365 * before enabling profiling.
1366 */
1367 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1368#endif
1369
1370 prof_booted = true;
1371
1372 return (false);
1373}
1374
1375void
1376prof_prefork(void)
1377{
1378
1379 if (opt_prof) {
1380 unsigned i;
1381
1382 malloc_mutex_prefork(&bt2ctx_mtx);
1383 malloc_mutex_prefork(&prof_dump_seq_mtx);
1384 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1385 malloc_mutex_prefork(&ctx_locks[i]);
1386 }
1387}
1388
1389void
1390prof_postfork_parent(void)
1391{
1392
1393 if (opt_prof) {
1394 unsigned i;
1395
1396 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1397 malloc_mutex_postfork_parent(&ctx_locks[i]);
1398 malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
1399 malloc_mutex_postfork_parent(&bt2ctx_mtx);
1400 }
1401}
1402
1403void
1404prof_postfork_child(void)
1405{
1406
1407 if (opt_prof) {
1408 unsigned i;
1409
1410 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1411 malloc_mutex_postfork_child(&ctx_locks[i]);
1412 malloc_mutex_postfork_child(&prof_dump_seq_mtx);
1413 malloc_mutex_postfork_child(&bt2ctx_mtx);
1414 }
1415}
1416
1417/******************************************************************************/
944 mfd = open(filename, O_RDONLY);
945 if (mfd != -1) {
946 ssize_t nread;
947
948 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
949 propagate_err) {
950 ret = true;
951 goto label_return;
952 }
953 nread = 0;
954 do {
955 prof_dump_buf_end += nread;
956 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
957 /* Make space in prof_dump_buf before read(). */
958 if (prof_dump_flush(propagate_err) &&
959 propagate_err) {
960 ret = true;
961 goto label_return;
962 }
963 }
964 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
965 PROF_DUMP_BUFSIZE - prof_dump_buf_end);
966 } while (nread > 0);
967 } else {
968 ret = true;
969 goto label_return;
970 }
971
972 ret = false;
973label_return:
974 if (mfd != -1)
975 close(mfd);
976 return (ret);
977}
978
979static void
980prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
981 const char *filename)
982{
983
984 if (cnt_all->curbytes != 0) {
985 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
986 PRId64" object%s, %zu context%s\n",
987 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
988 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
989 leak_nctx, (leak_nctx != 1) ? "s" : "");
990 malloc_printf(
991 "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
992 filename);
993 }
994}
995
996static bool
997prof_dump(bool propagate_err, const char *filename, bool leakcheck)
998{
999 prof_tdata_t *prof_tdata;
1000 prof_cnt_t cnt_all;
1001 size_t tabind;
1002 union {
1003 prof_ctx_t *p;
1004 void *v;
1005 } ctx;
1006 size_t leak_nctx;
1007 prof_ctx_list_t ctx_ql;
1008
1009 cassert(config_prof);
1010
1011 prof_tdata = prof_tdata_get(false);
1012 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1013 return (true);
1014
1015 malloc_mutex_lock(&prof_dump_mtx);
1016
1017 /* Merge per thread profile stats, and sum them in cnt_all. */
1018 memset(&cnt_all, 0, sizeof(prof_cnt_t));
1019 leak_nctx = 0;
1020 ql_new(&ctx_ql);
1021 prof_enter(prof_tdata);
1022 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
1023 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
1024 prof_leave(prof_tdata);
1025
1026 /* Create dump file. */
1027 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1028 goto label_open_close_error;
1029
1030 /* Dump profile header. */
1031 if (prof_dump_header(propagate_err, &cnt_all))
1032 goto label_write_error;
1033
1034 /* Dump per ctx profile stats. */
1035 while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
1036 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
1037 goto label_write_error;
1038 }
1039
1040 /* Dump /proc/<pid>/maps if possible. */
1041 if (prof_dump_maps(propagate_err))
1042 goto label_write_error;
1043
1044 if (prof_dump_close(propagate_err))
1045 goto label_open_close_error;
1046
1047 malloc_mutex_unlock(&prof_dump_mtx);
1048
1049 if (leakcheck)
1050 prof_leakcheck(&cnt_all, leak_nctx, filename);
1051
1052 return (false);
1053label_write_error:
1054 prof_dump_close(propagate_err);
1055label_open_close_error:
1056 while ((ctx.p = ql_first(&ctx_ql)) != NULL)
1057 prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
1058 malloc_mutex_unlock(&prof_dump_mtx);
1059 return (true);
1060}
1061
1062#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
1063#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
1064static void
1065prof_dump_filename(char *filename, char v, int64_t vseq)
1066{
1067
1068 cassert(config_prof);
1069
1070 if (vseq != VSEQ_INVALID) {
1071 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1072 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1073 "%s.%d.%"PRIu64".%c%"PRId64".heap",
1074 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
1075 } else {
1076 /* "<prefix>.<pid>.<seq>.<v>.heap" */
1077 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1078 "%s.%d.%"PRIu64".%c.heap",
1079 opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
1080 }
1081 prof_dump_seq++;
1082}
1083
1084static void
1085prof_fdump(void)
1086{
1087 char filename[DUMP_FILENAME_BUFSIZE];
1088
1089 cassert(config_prof);
1090
1091 if (prof_booted == false)
1092 return;
1093
1094 if (opt_prof_final && opt_prof_prefix[0] != '\0') {
1095 malloc_mutex_lock(&prof_dump_seq_mtx);
1096 prof_dump_filename(filename, 'f', VSEQ_INVALID);
1097 malloc_mutex_unlock(&prof_dump_seq_mtx);
1098 prof_dump(false, filename, opt_prof_leak);
1099 }
1100}
1101
1102void
1103prof_idump(void)
1104{
1105 prof_tdata_t *prof_tdata;
1106 char filename[PATH_MAX + 1];
1107
1108 cassert(config_prof);
1109
1110 if (prof_booted == false)
1111 return;
1112 prof_tdata = prof_tdata_get(false);
1113 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1114 return;
1115 if (prof_tdata->enq) {
1116 prof_tdata->enq_idump = true;
1117 return;
1118 }
1119
1120 if (opt_prof_prefix[0] != '\0') {
1121 malloc_mutex_lock(&prof_dump_seq_mtx);
1122 prof_dump_filename(filename, 'i', prof_dump_iseq);
1123 prof_dump_iseq++;
1124 malloc_mutex_unlock(&prof_dump_seq_mtx);
1125 prof_dump(false, filename, false);
1126 }
1127}
1128
1129bool
1130prof_mdump(const char *filename)
1131{
1132 char filename_buf[DUMP_FILENAME_BUFSIZE];
1133
1134 cassert(config_prof);
1135
1136 if (opt_prof == false || prof_booted == false)
1137 return (true);
1138
1139 if (filename == NULL) {
1140 /* No filename specified, so automatically generate one. */
1141 if (opt_prof_prefix[0] == '\0')
1142 return (true);
1143 malloc_mutex_lock(&prof_dump_seq_mtx);
1144 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1145 prof_dump_mseq++;
1146 malloc_mutex_unlock(&prof_dump_seq_mtx);
1147 filename = filename_buf;
1148 }
1149 return (prof_dump(true, filename, false));
1150}
1151
1152void
1153prof_gdump(void)
1154{
1155 prof_tdata_t *prof_tdata;
1156 char filename[DUMP_FILENAME_BUFSIZE];
1157
1158 cassert(config_prof);
1159
1160 if (prof_booted == false)
1161 return;
1162 prof_tdata = prof_tdata_get(false);
1163 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1164 return;
1165 if (prof_tdata->enq) {
1166 prof_tdata->enq_gdump = true;
1167 return;
1168 }
1169
1170 if (opt_prof_prefix[0] != '\0') {
1171 malloc_mutex_lock(&prof_dump_seq_mtx);
1172 prof_dump_filename(filename, 'u', prof_dump_useq);
1173 prof_dump_useq++;
1174 malloc_mutex_unlock(&prof_dump_seq_mtx);
1175 prof_dump(false, filename, false);
1176 }
1177}
1178
1179static void
1180prof_bt_hash(const void *key, size_t r_hash[2])
1181{
1182 prof_bt_t *bt = (prof_bt_t *)key;
1183
1184 cassert(config_prof);
1185
1186 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1187}
1188
1189static bool
1190prof_bt_keycomp(const void *k1, const void *k2)
1191{
1192 const prof_bt_t *bt1 = (prof_bt_t *)k1;
1193 const prof_bt_t *bt2 = (prof_bt_t *)k2;
1194
1195 cassert(config_prof);
1196
1197 if (bt1->len != bt2->len)
1198 return (false);
1199 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1200}
1201
1202prof_tdata_t *
1203prof_tdata_init(void)
1204{
1205 prof_tdata_t *prof_tdata;
1206
1207 cassert(config_prof);
1208
1209 /* Initialize an empty cache for this thread. */
1210 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1211 if (prof_tdata == NULL)
1212 return (NULL);
1213
1214 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1215 prof_bt_hash, prof_bt_keycomp)) {
1216 idalloc(prof_tdata);
1217 return (NULL);
1218 }
1219 ql_new(&prof_tdata->lru_ql);
1220
1221 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
1222 if (prof_tdata->vec == NULL) {
1223 ckh_delete(&prof_tdata->bt2cnt);
1224 idalloc(prof_tdata);
1225 return (NULL);
1226 }
1227
1228 prof_tdata->prng_state = 0;
1229 prof_tdata->threshold = 0;
1230 prof_tdata->accum = 0;
1231
1232 prof_tdata->enq = false;
1233 prof_tdata->enq_idump = false;
1234 prof_tdata->enq_gdump = false;
1235
1236 prof_tdata_tsd_set(&prof_tdata);
1237
1238 return (prof_tdata);
1239}
1240
1241void
1242prof_tdata_cleanup(void *arg)
1243{
1244 prof_thr_cnt_t *cnt;
1245 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
1246
1247 cassert(config_prof);
1248
1249 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
1250 /*
1251 * Another destructor deallocated memory after this destructor
1252 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1253 * in order to receive another callback.
1254 */
1255 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1256 prof_tdata_tsd_set(&prof_tdata);
1257 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
1258 /*
1259 * The previous time this destructor was called, we set the key
1260 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1261 * wouldn't cause re-creation of the prof_tdata. This time, do
1262 * nothing, so that the destructor will not be called again.
1263 */
1264 } else if (prof_tdata != NULL) {
1265 /*
1266 * Delete the hash table. All of its contents can still be
1267 * iterated over via the LRU.
1268 */
1269 ckh_delete(&prof_tdata->bt2cnt);
1270 /*
1271 * Iteratively merge cnt's into the global stats and delete
1272 * them.
1273 */
1274 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1275 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1276 prof_ctx_merge(cnt->ctx, cnt);
1277 idalloc(cnt);
1278 }
1279 idalloc(prof_tdata->vec);
1280 idalloc(prof_tdata);
1281 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1282 prof_tdata_tsd_set(&prof_tdata);
1283 }
1284}
1285
1286void
1287prof_boot0(void)
1288{
1289
1290 cassert(config_prof);
1291
1292 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1293 sizeof(PROF_PREFIX_DEFAULT));
1294}
1295
1296void
1297prof_boot1(void)
1298{
1299
1300 cassert(config_prof);
1301
1302 /*
1303 * opt_prof and prof_promote must be in their final state before any
1304 * arenas are initialized, so this function must be executed early.
1305 */
1306
1307 if (opt_prof_leak && opt_prof == false) {
1308 /*
1309 * Enable opt_prof, but in such a way that profiles are never
1310 * automatically dumped.
1311 */
1312 opt_prof = true;
1313 opt_prof_gdump = false;
1314 } else if (opt_prof) {
1315 if (opt_lg_prof_interval >= 0) {
1316 prof_interval = (((uint64_t)1U) <<
1317 opt_lg_prof_interval);
1318 }
1319 }
1320
1321 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
1322}
1323
1324bool
1325prof_boot2(void)
1326{
1327
1328 cassert(config_prof);
1329
1330 if (opt_prof) {
1331 unsigned i;
1332
1333 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1334 prof_bt_keycomp))
1335 return (true);
1336 if (malloc_mutex_init(&bt2ctx_mtx))
1337 return (true);
1338 if (prof_tdata_tsd_boot()) {
1339 malloc_write(
1340 "<jemalloc>: Error in pthread_key_create()\n");
1341 abort();
1342 }
1343
1344 if (malloc_mutex_init(&prof_dump_seq_mtx))
1345 return (true);
1346 if (malloc_mutex_init(&prof_dump_mtx))
1347 return (true);
1348
1349 if (atexit(prof_fdump) != 0) {
1350 malloc_write("<jemalloc>: Error in atexit()\n");
1351 if (opt_abort)
1352 abort();
1353 }
1354
1355 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
1356 sizeof(malloc_mutex_t));
1357 if (ctx_locks == NULL)
1358 return (true);
1359 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
1360 if (malloc_mutex_init(&ctx_locks[i]))
1361 return (true);
1362 }
1363 }
1364
1365#ifdef JEMALLOC_PROF_LIBGCC
1366 /*
1367 * Cause the backtracing machinery to allocate its internal state
1368 * before enabling profiling.
1369 */
1370 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1371#endif
1372
1373 prof_booted = true;
1374
1375 return (false);
1376}
1377
1378void
1379prof_prefork(void)
1380{
1381
1382 if (opt_prof) {
1383 unsigned i;
1384
1385 malloc_mutex_prefork(&bt2ctx_mtx);
1386 malloc_mutex_prefork(&prof_dump_seq_mtx);
1387 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1388 malloc_mutex_prefork(&ctx_locks[i]);
1389 }
1390}
1391
1392void
1393prof_postfork_parent(void)
1394{
1395
1396 if (opt_prof) {
1397 unsigned i;
1398
1399 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1400 malloc_mutex_postfork_parent(&ctx_locks[i]);
1401 malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
1402 malloc_mutex_postfork_parent(&bt2ctx_mtx);
1403 }
1404}
1405
1406void
1407prof_postfork_child(void)
1408{
1409
1410 if (opt_prof) {
1411 unsigned i;
1412
1413 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1414 malloc_mutex_postfork_child(&ctx_locks[i]);
1415 malloc_mutex_postfork_child(&prof_dump_seq_mtx);
1416 malloc_mutex_postfork_child(&bt2ctx_mtx);
1417 }
1418}
1419
1420/******************************************************************************/