1#define JEMALLOC_PROF_C_ 2#include "jemalloc/internal/jemalloc_internal.h" 3/******************************************************************************/ 4 5#ifdef JEMALLOC_PROF_LIBUNWIND 6#define UNW_LOCAL_ONLY 7#include <libunwind.h> 8#endif 9 10#ifdef JEMALLOC_PROF_LIBGCC 11#include <unwind.h> 12#endif 13 14/******************************************************************************/ 15/* Data. */ 16 17malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19bool opt_prof = false; 20bool opt_prof_active = true; 21size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23bool opt_prof_gdump = false; 24bool opt_prof_final = true; 25bool opt_prof_leak = false; 26bool opt_prof_accum = false; 27char opt_prof_prefix[ 28 /* Minimize memory bloat for non-prof builds. */ 29#ifdef JEMALLOC_PROF 30 PATH_MAX + 31#endif 32 1]; 33 34uint64_t prof_interval = 0; 35bool prof_promote; 36 37/* 38 * Table of mutexes that are shared among ctx's. These are leaf locks, so 39 * there is no problem with using them for more than one ctx at the same time. 40 * The primary motivation for this sharing though is that ctx's are ephemeral, 41 * and destroying mutexes causes complications for systems that allocate when 42 * creating/destroying mutexes. 43 */ 44static malloc_mutex_t *ctx_locks; 45static unsigned cum_ctxs; /* Atomic counter. */ 46 47/* 48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 49 * structure that knows about all backtraces currently captured. 50 */ 51static ckh_t bt2ctx; 52static malloc_mutex_t bt2ctx_mtx; 53 54static malloc_mutex_t prof_dump_seq_mtx; 55static uint64_t prof_dump_seq; 56static uint64_t prof_dump_iseq; 57static uint64_t prof_dump_mseq; 58static uint64_t prof_dump_useq; 59 60/* 61 * This buffer is rather large for stack allocation, so use a single buffer for 62 * all profile dumps. 63 */ 64static malloc_mutex_t prof_dump_mtx; 65static char prof_dump_buf[ 66 /* Minimize memory bloat for non-prof builds. */ 67#ifdef JEMALLOC_PROF 68 PROF_DUMP_BUFSIZE 69#else 70 1 71#endif 72]; 73static unsigned prof_dump_buf_end; 74static int prof_dump_fd; 75 76/* Do not dump any profiles until bootstrapping is complete. */ 77static bool prof_booted = false; 78 79/******************************************************************************/ 80 81void 82bt_init(prof_bt_t *bt, void **vec) 83{ 84 85 cassert(config_prof); 86 87 bt->vec = vec; 88 bt->len = 0; 89} 90 91static void 92bt_destroy(prof_bt_t *bt) 93{ 94 95 cassert(config_prof); 96 97 idalloc(bt); 98} 99 100static prof_bt_t * 101bt_dup(prof_bt_t *bt) 102{ 103 prof_bt_t *ret; 104 105 cassert(config_prof); 106 107 /* 108 * Create a single allocation that has space for vec immediately 109 * following the prof_bt_t structure. The backtraces that get 110 * stored in the backtrace caches are copied from stack-allocated 111 * temporary variables, so size is known at creation time. Making this 112 * a contiguous object improves cache locality. 113 */ 114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 115 (bt->len * sizeof(void *))); 116 if (ret == NULL) 117 return (NULL); 118 ret->vec = (void **)((uintptr_t)ret + 119 QUANTUM_CEILING(sizeof(prof_bt_t))); 120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 121 ret->len = bt->len; 122 123 return (ret); 124} 125 126static inline void 127prof_enter(prof_tdata_t *prof_tdata) 128{ 129 130 cassert(config_prof); 131 132 assert(prof_tdata->enq == false); 133 prof_tdata->enq = true; 134 135 malloc_mutex_lock(&bt2ctx_mtx); 136} 137 138static inline void 139prof_leave(prof_tdata_t *prof_tdata) 140{ 141 bool idump, gdump; 142 143 cassert(config_prof); 144 145 malloc_mutex_unlock(&bt2ctx_mtx); 146 147 assert(prof_tdata->enq); 148 prof_tdata->enq = false; 149 idump = prof_tdata->enq_idump; 150 prof_tdata->enq_idump = false; 151 gdump = prof_tdata->enq_gdump; 152 prof_tdata->enq_gdump = false; 153 154 if (idump) 155 prof_idump(); 156 if (gdump) 157 prof_gdump(); 158} 159 160#ifdef JEMALLOC_PROF_LIBUNWIND 161void 162prof_backtrace(prof_bt_t *bt, unsigned nignore) 163{ 164 unw_context_t uc; 165 unw_cursor_t cursor; 166 unsigned i; 167 int err; 168 169 cassert(config_prof); 170 assert(bt->len == 0); 171 assert(bt->vec != NULL); 172 173 unw_getcontext(&uc); 174 unw_init_local(&cursor, &uc); 175 176 /* Throw away (nignore+1) stack frames, if that many exist. */ 177 for (i = 0; i < nignore + 1; i++) { 178 err = unw_step(&cursor); 179 if (err <= 0) 180 return; 181 } 182 183 /* 184 * Iterate over stack frames until there are no more, or until no space 185 * remains in bt. 186 */ 187 for (i = 0; i < PROF_BT_MAX; i++) { 188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 189 bt->len++; 190 err = unw_step(&cursor); 191 if (err <= 0) 192 break; 193 } 194} 195#elif (defined(JEMALLOC_PROF_LIBGCC)) 196static _Unwind_Reason_Code 197prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 198{ 199 200 cassert(config_prof); 201 202 return (_URC_NO_REASON); 203} 204 205static _Unwind_Reason_Code 206prof_unwind_callback(struct _Unwind_Context *context, void *arg) 207{ 208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 209 210 cassert(config_prof); 211 212 if (data->nignore > 0) 213 data->nignore--; 214 else { 215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 216 data->bt->len++; 217 if (data->bt->len == data->max) 218 return (_URC_END_OF_STACK); 219 } 220 221 return (_URC_NO_REASON); 222} 223 224void 225prof_backtrace(prof_bt_t *bt, unsigned nignore) 226{ 227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 228 229 cassert(config_prof); 230 231 _Unwind_Backtrace(prof_unwind_callback, &data); 232} 233#elif (defined(JEMALLOC_PROF_GCC)) 234void 235prof_backtrace(prof_bt_t *bt, unsigned nignore) 236{ 237#define BT_FRAME(i) \ 238 if ((i) < nignore + PROF_BT_MAX) { \ 239 void *p; \ 240 if (__builtin_frame_address(i) == 0) \ 241 return; \ 242 p = __builtin_return_address(i); \ 243 if (p == NULL) \ 244 return; \ 245 if (i >= nignore) { \ 246 bt->vec[(i) - nignore] = p; \ 247 bt->len = (i) - nignore + 1; \ 248 } \ 249 } else \ 250 return; 251 252 cassert(config_prof); 253 assert(nignore <= 3); 254 255 BT_FRAME(0) 256 BT_FRAME(1) 257 BT_FRAME(2) 258 BT_FRAME(3) 259 BT_FRAME(4) 260 BT_FRAME(5) 261 BT_FRAME(6) 262 BT_FRAME(7) 263 BT_FRAME(8) 264 BT_FRAME(9) 265 266 BT_FRAME(10) 267 BT_FRAME(11) 268 BT_FRAME(12) 269 BT_FRAME(13) 270 BT_FRAME(14) 271 BT_FRAME(15) 272 BT_FRAME(16) 273 BT_FRAME(17) 274 BT_FRAME(18) 275 BT_FRAME(19) 276 277 BT_FRAME(20) 278 BT_FRAME(21) 279 BT_FRAME(22) 280 BT_FRAME(23) 281 BT_FRAME(24) 282 BT_FRAME(25) 283 BT_FRAME(26) 284 BT_FRAME(27) 285 BT_FRAME(28) 286 BT_FRAME(29) 287 288 BT_FRAME(30) 289 BT_FRAME(31) 290 BT_FRAME(32) 291 BT_FRAME(33) 292 BT_FRAME(34) 293 BT_FRAME(35) 294 BT_FRAME(36) 295 BT_FRAME(37) 296 BT_FRAME(38) 297 BT_FRAME(39) 298 299 BT_FRAME(40) 300 BT_FRAME(41) 301 BT_FRAME(42) 302 BT_FRAME(43) 303 BT_FRAME(44) 304 BT_FRAME(45) 305 BT_FRAME(46) 306 BT_FRAME(47) 307 BT_FRAME(48) 308 BT_FRAME(49) 309 310 BT_FRAME(50) 311 BT_FRAME(51) 312 BT_FRAME(52) 313 BT_FRAME(53) 314 BT_FRAME(54) 315 BT_FRAME(55) 316 BT_FRAME(56) 317 BT_FRAME(57) 318 BT_FRAME(58) 319 BT_FRAME(59) 320 321 BT_FRAME(60) 322 BT_FRAME(61) 323 BT_FRAME(62) 324 BT_FRAME(63) 325 BT_FRAME(64) 326 BT_FRAME(65) 327 BT_FRAME(66) 328 BT_FRAME(67) 329 BT_FRAME(68) 330 BT_FRAME(69) 331 332 BT_FRAME(70) 333 BT_FRAME(71) 334 BT_FRAME(72) 335 BT_FRAME(73) 336 BT_FRAME(74) 337 BT_FRAME(75) 338 BT_FRAME(76) 339 BT_FRAME(77) 340 BT_FRAME(78) 341 BT_FRAME(79) 342 343 BT_FRAME(80) 344 BT_FRAME(81) 345 BT_FRAME(82) 346 BT_FRAME(83) 347 BT_FRAME(84) 348 BT_FRAME(85) 349 BT_FRAME(86) 350 BT_FRAME(87) 351 BT_FRAME(88) 352 BT_FRAME(89) 353 354 BT_FRAME(90) 355 BT_FRAME(91) 356 BT_FRAME(92) 357 BT_FRAME(93) 358 BT_FRAME(94) 359 BT_FRAME(95) 360 BT_FRAME(96) 361 BT_FRAME(97) 362 BT_FRAME(98) 363 BT_FRAME(99) 364 365 BT_FRAME(100) 366 BT_FRAME(101) 367 BT_FRAME(102) 368 BT_FRAME(103) 369 BT_FRAME(104) 370 BT_FRAME(105) 371 BT_FRAME(106) 372 BT_FRAME(107) 373 BT_FRAME(108) 374 BT_FRAME(109) 375 376 BT_FRAME(110) 377 BT_FRAME(111) 378 BT_FRAME(112) 379 BT_FRAME(113) 380 BT_FRAME(114) 381 BT_FRAME(115) 382 BT_FRAME(116) 383 BT_FRAME(117) 384 BT_FRAME(118) 385 BT_FRAME(119) 386 387 BT_FRAME(120) 388 BT_FRAME(121) 389 BT_FRAME(122) 390 BT_FRAME(123) 391 BT_FRAME(124) 392 BT_FRAME(125) 393 BT_FRAME(126) 394 BT_FRAME(127) 395 396 /* Extras to compensate for nignore. */ 397 BT_FRAME(128) 398 BT_FRAME(129) 399 BT_FRAME(130) 400#undef BT_FRAME 401} 402#else 403void 404prof_backtrace(prof_bt_t *bt, unsigned nignore) 405{ 406 407 cassert(config_prof); 408 not_reached(); 409} 410#endif 411 412static malloc_mutex_t * 413prof_ctx_mutex_choose(void) 414{ 415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 416 417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 418} 419 420static void 421prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) 422{ 423 424 ctx->bt = bt; 425 ctx->lock = prof_ctx_mutex_choose(); 426 /* 427 * Set nlimbo to 1, in order to avoid a race condition with 428 * prof_ctx_merge()/prof_ctx_destroy(). 429 */ 430 ctx->nlimbo = 1; 431 ql_elm_new(ctx, dump_link); 432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); 433 ql_new(&ctx->cnts_ql); 434} 435 436static void 437prof_ctx_destroy(prof_ctx_t *ctx) 438{ 439 prof_tdata_t *prof_tdata; 440 441 cassert(config_prof); 442 443 /* 444 * Check that ctx is still unused by any thread cache before destroying 445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 446 * condition with this function, as does prof_ctx_merge() in order to 447 * avoid a race between the main body of prof_ctx_merge() and entry 448 * into this function. 449 */ 450 prof_tdata = prof_tdata_get(false); 451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 452 prof_enter(prof_tdata); 453 malloc_mutex_lock(ctx->lock); 454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 455 ctx->nlimbo == 1) { 456 assert(ctx->cnt_merged.curbytes == 0); 457 assert(ctx->cnt_merged.accumobjs == 0); 458 assert(ctx->cnt_merged.accumbytes == 0); 459 /* Remove ctx from bt2ctx. */ 460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 461 not_reached(); 462 prof_leave(prof_tdata); 463 /* Destroy ctx. */ 464 malloc_mutex_unlock(ctx->lock); 465 bt_destroy(ctx->bt); 466 idalloc(ctx); 467 } else { 468 /* 469 * Compensate for increment in prof_ctx_merge() or 470 * prof_lookup(). 471 */ 472 ctx->nlimbo--; 473 malloc_mutex_unlock(ctx->lock); 474 prof_leave(prof_tdata); 475 } 476} 477 478static void 479prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 480{ 481 bool destroy; 482 483 cassert(config_prof); 484 485 /* Merge cnt stats and detach from ctx. */ 486 malloc_mutex_lock(ctx->lock); 487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 491 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 494 /* 495 * Increment ctx->nlimbo in order to keep another thread from 496 * winning the race to destroy ctx while this one has ctx->lock 497 * dropped. Without this, it would be possible for another 498 * thread to: 499 * 500 * 1) Sample an allocation associated with ctx. 501 * 2) Deallocate the sampled object. 502 * 3) Successfully prof_ctx_destroy(ctx). 503 * 504 * The result would be that ctx no longer exists by the time 505 * this thread accesses it in prof_ctx_destroy(). 506 */ 507 ctx->nlimbo++; 508 destroy = true; 509 } else 510 destroy = false; 511 malloc_mutex_unlock(ctx->lock); 512 if (destroy) 513 prof_ctx_destroy(ctx); 514} 515 516static bool 517prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, 518 prof_ctx_t **p_ctx, bool *p_new_ctx) 519{ 520 union { 521 prof_ctx_t *p; 522 void *v; 523 } ctx; 524 union { 525 prof_bt_t *p; 526 void *v; 527 } btkey; 528 bool new_ctx; 529 530 prof_enter(prof_tdata); 531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 532 /* bt has never been seen before. Insert it. */ 533 ctx.v = imalloc(sizeof(prof_ctx_t)); 534 if (ctx.v == NULL) { 535 prof_leave(prof_tdata); 536 return (true); 537 } 538 btkey.p = bt_dup(bt); 539 if (btkey.v == NULL) { 540 prof_leave(prof_tdata); 541 idalloc(ctx.v); 542 return (true); 543 } 544 prof_ctx_init(ctx.p, btkey.p); 545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 546 /* OOM. */ 547 prof_leave(prof_tdata); 548 idalloc(btkey.v); 549 idalloc(ctx.v); 550 return (true); 551 } 552 new_ctx = true; 553 } else { 554 /* 555 * Increment nlimbo, in order to avoid a race condition with 556 * prof_ctx_merge()/prof_ctx_destroy(). 557 */ 558 malloc_mutex_lock(ctx.p->lock); 559 ctx.p->nlimbo++; 560 malloc_mutex_unlock(ctx.p->lock); 561 new_ctx = false; 562 } 563 prof_leave(prof_tdata); 564 565 *p_btkey = btkey.v; 566 *p_ctx = ctx.p; 567 *p_new_ctx = new_ctx; 568 return (false); 569} 570 571prof_thr_cnt_t * 572prof_lookup(prof_bt_t *bt) 573{ 574 union { 575 prof_thr_cnt_t *p; 576 void *v; 577 } ret; 578 prof_tdata_t *prof_tdata; 579 580 cassert(config_prof); 581 582 prof_tdata = prof_tdata_get(false); 583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 584 return (NULL); 585 586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 587 void *btkey; 588 prof_ctx_t *ctx; 589 bool new_ctx; 590 591 /* 592 * This thread's cache lacks bt. Look for it in the global 593 * cache. 594 */ 595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) 596 return (NULL); 597 598 /* Link a prof_thd_cnt_t into ctx for this thread. */ 599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 600 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 601 /* 602 * Flush the least recently used cnt in order to keep 603 * bt2cnt from becoming too large. 604 */ 605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 606 assert(ret.v != NULL); 607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 608 NULL, NULL)) 609 not_reached(); 610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 611 prof_ctx_merge(ret.p->ctx, ret.p); 612 /* ret can now be re-used. */ 613 } else { 614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 615 /* Allocate and partially initialize a new cnt. */ 616 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 617 if (ret.p == NULL) { 618 if (new_ctx) 619 prof_ctx_destroy(ctx); 620 return (NULL); 621 } 622 ql_elm_new(ret.p, cnts_link); 623 ql_elm_new(ret.p, lru_link); 624 } 625 /* Finish initializing ret. */ 626 ret.p->ctx = ctx; 627 ret.p->epoch = 0; 628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { 630 if (new_ctx) 631 prof_ctx_destroy(ctx); 632 idalloc(ret.v); 633 return (NULL); 634 } 635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 636 malloc_mutex_lock(ctx->lock); 637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); 638 ctx->nlimbo--; 639 malloc_mutex_unlock(ctx->lock); 640 } else { 641 /* Move ret to the front of the LRU. */ 642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 644 } 645 646 return (ret.p); 647} 648 649#ifdef JEMALLOC_JET 650size_t 651prof_bt_count(void) 652{ 653 size_t bt_count; 654 prof_tdata_t *prof_tdata; 655 656 prof_tdata = prof_tdata_get(false); 657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 658 return (0); 659 660 prof_enter(prof_tdata); 661 bt_count = ckh_count(&bt2ctx); 662 prof_leave(prof_tdata); 663 664 return (bt_count); 665} 666#endif 667 668#ifdef JEMALLOC_JET 669#undef prof_dump_open 670#define prof_dump_open JEMALLOC_N(prof_dump_open_impl) 671#endif 672static int 673prof_dump_open(bool propagate_err, const char *filename) 674{ 675 int fd; 676 677 fd = creat(filename, 0644); 678 if (fd == -1 && propagate_err == false) { 679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 680 filename); 681 if (opt_abort) 682 abort(); 683 } 684 685 return (fd); 686} 687#ifdef JEMALLOC_JET 688#undef prof_dump_open 689#define prof_dump_open JEMALLOC_N(prof_dump_open) 690prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); 691#endif 692 693static bool 694prof_dump_flush(bool propagate_err) 695{ 696 bool ret = false; 697 ssize_t err; 698 699 cassert(config_prof); 700 701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 702 if (err == -1) { 703 if (propagate_err == false) { 704 malloc_write("<jemalloc>: write() failed during heap " 705 "profile flush\n"); 706 if (opt_abort) 707 abort(); 708 } 709 ret = true; 710 } 711 prof_dump_buf_end = 0; 712 713 return (ret); 714} 715 716static bool 717prof_dump_close(bool propagate_err) 718{ 719 bool ret; 720 721 assert(prof_dump_fd != -1); 722 ret = prof_dump_flush(propagate_err); 723 close(prof_dump_fd); 724 prof_dump_fd = -1; 725 726 return (ret); 727} 728 729static bool 730prof_dump_write(bool propagate_err, const char *s) 731{ 732 unsigned i, slen, n; 733 734 cassert(config_prof); 735 736 i = 0; 737 slen = strlen(s); 738 while (i < slen) { 739 /* Flush the buffer if it is full. */ 740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 741 if (prof_dump_flush(propagate_err) && propagate_err) 742 return (true); 743 744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 745 /* Finish writing. */ 746 n = slen - i; 747 } else { 748 /* Write as much of s as will fit. */ 749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 750 } 751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 752 prof_dump_buf_end += n; 753 i += n; 754 } 755 756 return (false); 757} 758 759JEMALLOC_ATTR(format(printf, 2, 3)) 760static bool 761prof_dump_printf(bool propagate_err, const char *format, ...) 762{ 763 bool ret; 764 va_list ap; 765 char buf[PROF_PRINTF_BUFSIZE]; 766 767 va_start(ap, format); 768 malloc_vsnprintf(buf, sizeof(buf), format, ap); 769 va_end(ap); 770 ret = prof_dump_write(propagate_err, buf); 771 772 return (ret); 773} 774 775static void 776prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, 777 prof_ctx_list_t *ctx_ql) 778{ 779 prof_thr_cnt_t *thr_cnt; 780 prof_cnt_t tcnt; 781 782 cassert(config_prof); 783 784 malloc_mutex_lock(ctx->lock); 785 786 /* 787 * Increment nlimbo so that ctx won't go away before dump. 788 * Additionally, link ctx into the dump list so that it is included in 789 * prof_dump()'s second pass. 790 */ 791 ctx->nlimbo++; 792 ql_tail_insert(ctx_ql, ctx, dump_link); 793 794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 796 volatile unsigned *epoch = &thr_cnt->epoch; 797 798 while (true) { 799 unsigned epoch0 = *epoch; 800 801 /* Make sure epoch is even. */ 802 if (epoch0 & 1U) 803 continue; 804 805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 806 807 /* Terminate if epoch didn't change while reading. */ 808 if (*epoch == epoch0) 809 break; 810 } 811 812 ctx->cnt_summed.curobjs += tcnt.curobjs; 813 ctx->cnt_summed.curbytes += tcnt.curbytes; 814 if (opt_prof_accum) { 815 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 816 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 817 } 818 } 819 820 if (ctx->cnt_summed.curobjs != 0) 821 (*leak_nctx)++; 822 823 /* Add to cnt_all. */ 824 cnt_all->curobjs += ctx->cnt_summed.curobjs; 825 cnt_all->curbytes += ctx->cnt_summed.curbytes; 826 if (opt_prof_accum) { 827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 829 } 830 831 malloc_mutex_unlock(ctx->lock); 832} 833 834static bool 835prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) 836{ 837 838 if (opt_lg_prof_sample == 0) { 839 if (prof_dump_printf(propagate_err, 840 "heap profile: %"PRId64": %"PRId64 841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 842 cnt_all->curobjs, cnt_all->curbytes, 843 cnt_all->accumobjs, cnt_all->accumbytes)) 844 return (true); 845 } else { 846 if (prof_dump_printf(propagate_err, 847 "heap profile: %"PRId64": %"PRId64 848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 849 cnt_all->curobjs, cnt_all->curbytes, 850 cnt_all->accumobjs, cnt_all->accumbytes, 851 ((uint64_t)1U << opt_lg_prof_sample))) 852 return (true); 853 } 854 855 return (false); 856} 857 858static void 859prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 860{ 861 862 ctx->nlimbo--; 863 ql_remove(ctx_ql, ctx, dump_link); 864} 865 866static void 867prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 868{ 869 870 malloc_mutex_lock(ctx->lock); 871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 872 malloc_mutex_unlock(ctx->lock); 873} 874 875static bool 876prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, 877 prof_ctx_list_t *ctx_ql) 878{ 879 bool ret; 880 unsigned i; 881 882 cassert(config_prof); 883 884 /* 885 * Current statistics can sum to 0 as a result of unmerged per thread 886 * statistics. Additionally, interval- and growth-triggered dumps can 887 * occur between the time a ctx is created and when its statistics are 888 * filled in. Avoid dumping any ctx that is an artifact of either 889 * implementation detail. 890 */ 891 malloc_mutex_lock(ctx->lock); 892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 894 assert(ctx->cnt_summed.curobjs == 0); 895 assert(ctx->cnt_summed.curbytes == 0); 896 assert(ctx->cnt_summed.accumobjs == 0); 897 assert(ctx->cnt_summed.accumbytes == 0); 898 ret = false; 899 goto label_return; 900 } 901 902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 903 " [%"PRIu64": %"PRIu64"] @", 904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { 906 ret = true; 907 goto label_return; 908 } 909 910 for (i = 0; i < bt->len; i++) { 911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR, 912 (uintptr_t)bt->vec[i])) { 913 ret = true; 914 goto label_return; 915 } 916 } 917 918 if (prof_dump_write(propagate_err, "\n")) { 919 ret = true; 920 goto label_return; 921 } 922 923 ret = false; 924label_return: 925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 926 malloc_mutex_unlock(ctx->lock); 927 return (ret); 928} 929 930static bool 931prof_dump_maps(bool propagate_err) 932{ 933 bool ret; 934 int mfd; 935 char filename[PATH_MAX + 1]; 936 937 cassert(config_prof);
| 1#define JEMALLOC_PROF_C_ 2#include "jemalloc/internal/jemalloc_internal.h" 3/******************************************************************************/ 4 5#ifdef JEMALLOC_PROF_LIBUNWIND 6#define UNW_LOCAL_ONLY 7#include <libunwind.h> 8#endif 9 10#ifdef JEMALLOC_PROF_LIBGCC 11#include <unwind.h> 12#endif 13 14/******************************************************************************/ 15/* Data. */ 16 17malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19bool opt_prof = false; 20bool opt_prof_active = true; 21size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23bool opt_prof_gdump = false; 24bool opt_prof_final = true; 25bool opt_prof_leak = false; 26bool opt_prof_accum = false; 27char opt_prof_prefix[ 28 /* Minimize memory bloat for non-prof builds. */ 29#ifdef JEMALLOC_PROF 30 PATH_MAX + 31#endif 32 1]; 33 34uint64_t prof_interval = 0; 35bool prof_promote; 36 37/* 38 * Table of mutexes that are shared among ctx's. These are leaf locks, so 39 * there is no problem with using them for more than one ctx at the same time. 40 * The primary motivation for this sharing though is that ctx's are ephemeral, 41 * and destroying mutexes causes complications for systems that allocate when 42 * creating/destroying mutexes. 43 */ 44static malloc_mutex_t *ctx_locks; 45static unsigned cum_ctxs; /* Atomic counter. */ 46 47/* 48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 49 * structure that knows about all backtraces currently captured. 50 */ 51static ckh_t bt2ctx; 52static malloc_mutex_t bt2ctx_mtx; 53 54static malloc_mutex_t prof_dump_seq_mtx; 55static uint64_t prof_dump_seq; 56static uint64_t prof_dump_iseq; 57static uint64_t prof_dump_mseq; 58static uint64_t prof_dump_useq; 59 60/* 61 * This buffer is rather large for stack allocation, so use a single buffer for 62 * all profile dumps. 63 */ 64static malloc_mutex_t prof_dump_mtx; 65static char prof_dump_buf[ 66 /* Minimize memory bloat for non-prof builds. */ 67#ifdef JEMALLOC_PROF 68 PROF_DUMP_BUFSIZE 69#else 70 1 71#endif 72]; 73static unsigned prof_dump_buf_end; 74static int prof_dump_fd; 75 76/* Do not dump any profiles until bootstrapping is complete. */ 77static bool prof_booted = false; 78 79/******************************************************************************/ 80 81void 82bt_init(prof_bt_t *bt, void **vec) 83{ 84 85 cassert(config_prof); 86 87 bt->vec = vec; 88 bt->len = 0; 89} 90 91static void 92bt_destroy(prof_bt_t *bt) 93{ 94 95 cassert(config_prof); 96 97 idalloc(bt); 98} 99 100static prof_bt_t * 101bt_dup(prof_bt_t *bt) 102{ 103 prof_bt_t *ret; 104 105 cassert(config_prof); 106 107 /* 108 * Create a single allocation that has space for vec immediately 109 * following the prof_bt_t structure. The backtraces that get 110 * stored in the backtrace caches are copied from stack-allocated 111 * temporary variables, so size is known at creation time. Making this 112 * a contiguous object improves cache locality. 113 */ 114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 115 (bt->len * sizeof(void *))); 116 if (ret == NULL) 117 return (NULL); 118 ret->vec = (void **)((uintptr_t)ret + 119 QUANTUM_CEILING(sizeof(prof_bt_t))); 120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 121 ret->len = bt->len; 122 123 return (ret); 124} 125 126static inline void 127prof_enter(prof_tdata_t *prof_tdata) 128{ 129 130 cassert(config_prof); 131 132 assert(prof_tdata->enq == false); 133 prof_tdata->enq = true; 134 135 malloc_mutex_lock(&bt2ctx_mtx); 136} 137 138static inline void 139prof_leave(prof_tdata_t *prof_tdata) 140{ 141 bool idump, gdump; 142 143 cassert(config_prof); 144 145 malloc_mutex_unlock(&bt2ctx_mtx); 146 147 assert(prof_tdata->enq); 148 prof_tdata->enq = false; 149 idump = prof_tdata->enq_idump; 150 prof_tdata->enq_idump = false; 151 gdump = prof_tdata->enq_gdump; 152 prof_tdata->enq_gdump = false; 153 154 if (idump) 155 prof_idump(); 156 if (gdump) 157 prof_gdump(); 158} 159 160#ifdef JEMALLOC_PROF_LIBUNWIND 161void 162prof_backtrace(prof_bt_t *bt, unsigned nignore) 163{ 164 unw_context_t uc; 165 unw_cursor_t cursor; 166 unsigned i; 167 int err; 168 169 cassert(config_prof); 170 assert(bt->len == 0); 171 assert(bt->vec != NULL); 172 173 unw_getcontext(&uc); 174 unw_init_local(&cursor, &uc); 175 176 /* Throw away (nignore+1) stack frames, if that many exist. */ 177 for (i = 0; i < nignore + 1; i++) { 178 err = unw_step(&cursor); 179 if (err <= 0) 180 return; 181 } 182 183 /* 184 * Iterate over stack frames until there are no more, or until no space 185 * remains in bt. 186 */ 187 for (i = 0; i < PROF_BT_MAX; i++) { 188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 189 bt->len++; 190 err = unw_step(&cursor); 191 if (err <= 0) 192 break; 193 } 194} 195#elif (defined(JEMALLOC_PROF_LIBGCC)) 196static _Unwind_Reason_Code 197prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 198{ 199 200 cassert(config_prof); 201 202 return (_URC_NO_REASON); 203} 204 205static _Unwind_Reason_Code 206prof_unwind_callback(struct _Unwind_Context *context, void *arg) 207{ 208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 209 210 cassert(config_prof); 211 212 if (data->nignore > 0) 213 data->nignore--; 214 else { 215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 216 data->bt->len++; 217 if (data->bt->len == data->max) 218 return (_URC_END_OF_STACK); 219 } 220 221 return (_URC_NO_REASON); 222} 223 224void 225prof_backtrace(prof_bt_t *bt, unsigned nignore) 226{ 227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 228 229 cassert(config_prof); 230 231 _Unwind_Backtrace(prof_unwind_callback, &data); 232} 233#elif (defined(JEMALLOC_PROF_GCC)) 234void 235prof_backtrace(prof_bt_t *bt, unsigned nignore) 236{ 237#define BT_FRAME(i) \ 238 if ((i) < nignore + PROF_BT_MAX) { \ 239 void *p; \ 240 if (__builtin_frame_address(i) == 0) \ 241 return; \ 242 p = __builtin_return_address(i); \ 243 if (p == NULL) \ 244 return; \ 245 if (i >= nignore) { \ 246 bt->vec[(i) - nignore] = p; \ 247 bt->len = (i) - nignore + 1; \ 248 } \ 249 } else \ 250 return; 251 252 cassert(config_prof); 253 assert(nignore <= 3); 254 255 BT_FRAME(0) 256 BT_FRAME(1) 257 BT_FRAME(2) 258 BT_FRAME(3) 259 BT_FRAME(4) 260 BT_FRAME(5) 261 BT_FRAME(6) 262 BT_FRAME(7) 263 BT_FRAME(8) 264 BT_FRAME(9) 265 266 BT_FRAME(10) 267 BT_FRAME(11) 268 BT_FRAME(12) 269 BT_FRAME(13) 270 BT_FRAME(14) 271 BT_FRAME(15) 272 BT_FRAME(16) 273 BT_FRAME(17) 274 BT_FRAME(18) 275 BT_FRAME(19) 276 277 BT_FRAME(20) 278 BT_FRAME(21) 279 BT_FRAME(22) 280 BT_FRAME(23) 281 BT_FRAME(24) 282 BT_FRAME(25) 283 BT_FRAME(26) 284 BT_FRAME(27) 285 BT_FRAME(28) 286 BT_FRAME(29) 287 288 BT_FRAME(30) 289 BT_FRAME(31) 290 BT_FRAME(32) 291 BT_FRAME(33) 292 BT_FRAME(34) 293 BT_FRAME(35) 294 BT_FRAME(36) 295 BT_FRAME(37) 296 BT_FRAME(38) 297 BT_FRAME(39) 298 299 BT_FRAME(40) 300 BT_FRAME(41) 301 BT_FRAME(42) 302 BT_FRAME(43) 303 BT_FRAME(44) 304 BT_FRAME(45) 305 BT_FRAME(46) 306 BT_FRAME(47) 307 BT_FRAME(48) 308 BT_FRAME(49) 309 310 BT_FRAME(50) 311 BT_FRAME(51) 312 BT_FRAME(52) 313 BT_FRAME(53) 314 BT_FRAME(54) 315 BT_FRAME(55) 316 BT_FRAME(56) 317 BT_FRAME(57) 318 BT_FRAME(58) 319 BT_FRAME(59) 320 321 BT_FRAME(60) 322 BT_FRAME(61) 323 BT_FRAME(62) 324 BT_FRAME(63) 325 BT_FRAME(64) 326 BT_FRAME(65) 327 BT_FRAME(66) 328 BT_FRAME(67) 329 BT_FRAME(68) 330 BT_FRAME(69) 331 332 BT_FRAME(70) 333 BT_FRAME(71) 334 BT_FRAME(72) 335 BT_FRAME(73) 336 BT_FRAME(74) 337 BT_FRAME(75) 338 BT_FRAME(76) 339 BT_FRAME(77) 340 BT_FRAME(78) 341 BT_FRAME(79) 342 343 BT_FRAME(80) 344 BT_FRAME(81) 345 BT_FRAME(82) 346 BT_FRAME(83) 347 BT_FRAME(84) 348 BT_FRAME(85) 349 BT_FRAME(86) 350 BT_FRAME(87) 351 BT_FRAME(88) 352 BT_FRAME(89) 353 354 BT_FRAME(90) 355 BT_FRAME(91) 356 BT_FRAME(92) 357 BT_FRAME(93) 358 BT_FRAME(94) 359 BT_FRAME(95) 360 BT_FRAME(96) 361 BT_FRAME(97) 362 BT_FRAME(98) 363 BT_FRAME(99) 364 365 BT_FRAME(100) 366 BT_FRAME(101) 367 BT_FRAME(102) 368 BT_FRAME(103) 369 BT_FRAME(104) 370 BT_FRAME(105) 371 BT_FRAME(106) 372 BT_FRAME(107) 373 BT_FRAME(108) 374 BT_FRAME(109) 375 376 BT_FRAME(110) 377 BT_FRAME(111) 378 BT_FRAME(112) 379 BT_FRAME(113) 380 BT_FRAME(114) 381 BT_FRAME(115) 382 BT_FRAME(116) 383 BT_FRAME(117) 384 BT_FRAME(118) 385 BT_FRAME(119) 386 387 BT_FRAME(120) 388 BT_FRAME(121) 389 BT_FRAME(122) 390 BT_FRAME(123) 391 BT_FRAME(124) 392 BT_FRAME(125) 393 BT_FRAME(126) 394 BT_FRAME(127) 395 396 /* Extras to compensate for nignore. */ 397 BT_FRAME(128) 398 BT_FRAME(129) 399 BT_FRAME(130) 400#undef BT_FRAME 401} 402#else 403void 404prof_backtrace(prof_bt_t *bt, unsigned nignore) 405{ 406 407 cassert(config_prof); 408 not_reached(); 409} 410#endif 411 412static malloc_mutex_t * 413prof_ctx_mutex_choose(void) 414{ 415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 416 417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 418} 419 420static void 421prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) 422{ 423 424 ctx->bt = bt; 425 ctx->lock = prof_ctx_mutex_choose(); 426 /* 427 * Set nlimbo to 1, in order to avoid a race condition with 428 * prof_ctx_merge()/prof_ctx_destroy(). 429 */ 430 ctx->nlimbo = 1; 431 ql_elm_new(ctx, dump_link); 432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); 433 ql_new(&ctx->cnts_ql); 434} 435 436static void 437prof_ctx_destroy(prof_ctx_t *ctx) 438{ 439 prof_tdata_t *prof_tdata; 440 441 cassert(config_prof); 442 443 /* 444 * Check that ctx is still unused by any thread cache before destroying 445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 446 * condition with this function, as does prof_ctx_merge() in order to 447 * avoid a race between the main body of prof_ctx_merge() and entry 448 * into this function. 449 */ 450 prof_tdata = prof_tdata_get(false); 451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 452 prof_enter(prof_tdata); 453 malloc_mutex_lock(ctx->lock); 454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 455 ctx->nlimbo == 1) { 456 assert(ctx->cnt_merged.curbytes == 0); 457 assert(ctx->cnt_merged.accumobjs == 0); 458 assert(ctx->cnt_merged.accumbytes == 0); 459 /* Remove ctx from bt2ctx. */ 460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 461 not_reached(); 462 prof_leave(prof_tdata); 463 /* Destroy ctx. */ 464 malloc_mutex_unlock(ctx->lock); 465 bt_destroy(ctx->bt); 466 idalloc(ctx); 467 } else { 468 /* 469 * Compensate for increment in prof_ctx_merge() or 470 * prof_lookup(). 471 */ 472 ctx->nlimbo--; 473 malloc_mutex_unlock(ctx->lock); 474 prof_leave(prof_tdata); 475 } 476} 477 478static void 479prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 480{ 481 bool destroy; 482 483 cassert(config_prof); 484 485 /* Merge cnt stats and detach from ctx. */ 486 malloc_mutex_lock(ctx->lock); 487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 491 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 494 /* 495 * Increment ctx->nlimbo in order to keep another thread from 496 * winning the race to destroy ctx while this one has ctx->lock 497 * dropped. Without this, it would be possible for another 498 * thread to: 499 * 500 * 1) Sample an allocation associated with ctx. 501 * 2) Deallocate the sampled object. 502 * 3) Successfully prof_ctx_destroy(ctx). 503 * 504 * The result would be that ctx no longer exists by the time 505 * this thread accesses it in prof_ctx_destroy(). 506 */ 507 ctx->nlimbo++; 508 destroy = true; 509 } else 510 destroy = false; 511 malloc_mutex_unlock(ctx->lock); 512 if (destroy) 513 prof_ctx_destroy(ctx); 514} 515 516static bool 517prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, 518 prof_ctx_t **p_ctx, bool *p_new_ctx) 519{ 520 union { 521 prof_ctx_t *p; 522 void *v; 523 } ctx; 524 union { 525 prof_bt_t *p; 526 void *v; 527 } btkey; 528 bool new_ctx; 529 530 prof_enter(prof_tdata); 531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 532 /* bt has never been seen before. Insert it. */ 533 ctx.v = imalloc(sizeof(prof_ctx_t)); 534 if (ctx.v == NULL) { 535 prof_leave(prof_tdata); 536 return (true); 537 } 538 btkey.p = bt_dup(bt); 539 if (btkey.v == NULL) { 540 prof_leave(prof_tdata); 541 idalloc(ctx.v); 542 return (true); 543 } 544 prof_ctx_init(ctx.p, btkey.p); 545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 546 /* OOM. */ 547 prof_leave(prof_tdata); 548 idalloc(btkey.v); 549 idalloc(ctx.v); 550 return (true); 551 } 552 new_ctx = true; 553 } else { 554 /* 555 * Increment nlimbo, in order to avoid a race condition with 556 * prof_ctx_merge()/prof_ctx_destroy(). 557 */ 558 malloc_mutex_lock(ctx.p->lock); 559 ctx.p->nlimbo++; 560 malloc_mutex_unlock(ctx.p->lock); 561 new_ctx = false; 562 } 563 prof_leave(prof_tdata); 564 565 *p_btkey = btkey.v; 566 *p_ctx = ctx.p; 567 *p_new_ctx = new_ctx; 568 return (false); 569} 570 571prof_thr_cnt_t * 572prof_lookup(prof_bt_t *bt) 573{ 574 union { 575 prof_thr_cnt_t *p; 576 void *v; 577 } ret; 578 prof_tdata_t *prof_tdata; 579 580 cassert(config_prof); 581 582 prof_tdata = prof_tdata_get(false); 583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 584 return (NULL); 585 586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 587 void *btkey; 588 prof_ctx_t *ctx; 589 bool new_ctx; 590 591 /* 592 * This thread's cache lacks bt. Look for it in the global 593 * cache. 594 */ 595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) 596 return (NULL); 597 598 /* Link a prof_thd_cnt_t into ctx for this thread. */ 599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 600 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 601 /* 602 * Flush the least recently used cnt in order to keep 603 * bt2cnt from becoming too large. 604 */ 605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 606 assert(ret.v != NULL); 607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 608 NULL, NULL)) 609 not_reached(); 610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 611 prof_ctx_merge(ret.p->ctx, ret.p); 612 /* ret can now be re-used. */ 613 } else { 614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 615 /* Allocate and partially initialize a new cnt. */ 616 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 617 if (ret.p == NULL) { 618 if (new_ctx) 619 prof_ctx_destroy(ctx); 620 return (NULL); 621 } 622 ql_elm_new(ret.p, cnts_link); 623 ql_elm_new(ret.p, lru_link); 624 } 625 /* Finish initializing ret. */ 626 ret.p->ctx = ctx; 627 ret.p->epoch = 0; 628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { 630 if (new_ctx) 631 prof_ctx_destroy(ctx); 632 idalloc(ret.v); 633 return (NULL); 634 } 635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 636 malloc_mutex_lock(ctx->lock); 637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); 638 ctx->nlimbo--; 639 malloc_mutex_unlock(ctx->lock); 640 } else { 641 /* Move ret to the front of the LRU. */ 642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 644 } 645 646 return (ret.p); 647} 648 649#ifdef JEMALLOC_JET 650size_t 651prof_bt_count(void) 652{ 653 size_t bt_count; 654 prof_tdata_t *prof_tdata; 655 656 prof_tdata = prof_tdata_get(false); 657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 658 return (0); 659 660 prof_enter(prof_tdata); 661 bt_count = ckh_count(&bt2ctx); 662 prof_leave(prof_tdata); 663 664 return (bt_count); 665} 666#endif 667 668#ifdef JEMALLOC_JET 669#undef prof_dump_open 670#define prof_dump_open JEMALLOC_N(prof_dump_open_impl) 671#endif 672static int 673prof_dump_open(bool propagate_err, const char *filename) 674{ 675 int fd; 676 677 fd = creat(filename, 0644); 678 if (fd == -1 && propagate_err == false) { 679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 680 filename); 681 if (opt_abort) 682 abort(); 683 } 684 685 return (fd); 686} 687#ifdef JEMALLOC_JET 688#undef prof_dump_open 689#define prof_dump_open JEMALLOC_N(prof_dump_open) 690prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); 691#endif 692 693static bool 694prof_dump_flush(bool propagate_err) 695{ 696 bool ret = false; 697 ssize_t err; 698 699 cassert(config_prof); 700 701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 702 if (err == -1) { 703 if (propagate_err == false) { 704 malloc_write("<jemalloc>: write() failed during heap " 705 "profile flush\n"); 706 if (opt_abort) 707 abort(); 708 } 709 ret = true; 710 } 711 prof_dump_buf_end = 0; 712 713 return (ret); 714} 715 716static bool 717prof_dump_close(bool propagate_err) 718{ 719 bool ret; 720 721 assert(prof_dump_fd != -1); 722 ret = prof_dump_flush(propagate_err); 723 close(prof_dump_fd); 724 prof_dump_fd = -1; 725 726 return (ret); 727} 728 729static bool 730prof_dump_write(bool propagate_err, const char *s) 731{ 732 unsigned i, slen, n; 733 734 cassert(config_prof); 735 736 i = 0; 737 slen = strlen(s); 738 while (i < slen) { 739 /* Flush the buffer if it is full. */ 740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 741 if (prof_dump_flush(propagate_err) && propagate_err) 742 return (true); 743 744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 745 /* Finish writing. */ 746 n = slen - i; 747 } else { 748 /* Write as much of s as will fit. */ 749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 750 } 751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 752 prof_dump_buf_end += n; 753 i += n; 754 } 755 756 return (false); 757} 758 759JEMALLOC_ATTR(format(printf, 2, 3)) 760static bool 761prof_dump_printf(bool propagate_err, const char *format, ...) 762{ 763 bool ret; 764 va_list ap; 765 char buf[PROF_PRINTF_BUFSIZE]; 766 767 va_start(ap, format); 768 malloc_vsnprintf(buf, sizeof(buf), format, ap); 769 va_end(ap); 770 ret = prof_dump_write(propagate_err, buf); 771 772 return (ret); 773} 774 775static void 776prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, 777 prof_ctx_list_t *ctx_ql) 778{ 779 prof_thr_cnt_t *thr_cnt; 780 prof_cnt_t tcnt; 781 782 cassert(config_prof); 783 784 malloc_mutex_lock(ctx->lock); 785 786 /* 787 * Increment nlimbo so that ctx won't go away before dump. 788 * Additionally, link ctx into the dump list so that it is included in 789 * prof_dump()'s second pass. 790 */ 791 ctx->nlimbo++; 792 ql_tail_insert(ctx_ql, ctx, dump_link); 793 794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 796 volatile unsigned *epoch = &thr_cnt->epoch; 797 798 while (true) { 799 unsigned epoch0 = *epoch; 800 801 /* Make sure epoch is even. */ 802 if (epoch0 & 1U) 803 continue; 804 805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 806 807 /* Terminate if epoch didn't change while reading. */ 808 if (*epoch == epoch0) 809 break; 810 } 811 812 ctx->cnt_summed.curobjs += tcnt.curobjs; 813 ctx->cnt_summed.curbytes += tcnt.curbytes; 814 if (opt_prof_accum) { 815 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 816 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 817 } 818 } 819 820 if (ctx->cnt_summed.curobjs != 0) 821 (*leak_nctx)++; 822 823 /* Add to cnt_all. */ 824 cnt_all->curobjs += ctx->cnt_summed.curobjs; 825 cnt_all->curbytes += ctx->cnt_summed.curbytes; 826 if (opt_prof_accum) { 827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 829 } 830 831 malloc_mutex_unlock(ctx->lock); 832} 833 834static bool 835prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) 836{ 837 838 if (opt_lg_prof_sample == 0) { 839 if (prof_dump_printf(propagate_err, 840 "heap profile: %"PRId64": %"PRId64 841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 842 cnt_all->curobjs, cnt_all->curbytes, 843 cnt_all->accumobjs, cnt_all->accumbytes)) 844 return (true); 845 } else { 846 if (prof_dump_printf(propagate_err, 847 "heap profile: %"PRId64": %"PRId64 848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 849 cnt_all->curobjs, cnt_all->curbytes, 850 cnt_all->accumobjs, cnt_all->accumbytes, 851 ((uint64_t)1U << opt_lg_prof_sample))) 852 return (true); 853 } 854 855 return (false); 856} 857 858static void 859prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 860{ 861 862 ctx->nlimbo--; 863 ql_remove(ctx_ql, ctx, dump_link); 864} 865 866static void 867prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 868{ 869 870 malloc_mutex_lock(ctx->lock); 871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 872 malloc_mutex_unlock(ctx->lock); 873} 874 875static bool 876prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, 877 prof_ctx_list_t *ctx_ql) 878{ 879 bool ret; 880 unsigned i; 881 882 cassert(config_prof); 883 884 /* 885 * Current statistics can sum to 0 as a result of unmerged per thread 886 * statistics. Additionally, interval- and growth-triggered dumps can 887 * occur between the time a ctx is created and when its statistics are 888 * filled in. Avoid dumping any ctx that is an artifact of either 889 * implementation detail. 890 */ 891 malloc_mutex_lock(ctx->lock); 892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 894 assert(ctx->cnt_summed.curobjs == 0); 895 assert(ctx->cnt_summed.curbytes == 0); 896 assert(ctx->cnt_summed.accumobjs == 0); 897 assert(ctx->cnt_summed.accumbytes == 0); 898 ret = false; 899 goto label_return; 900 } 901 902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 903 " [%"PRIu64": %"PRIu64"] @", 904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { 906 ret = true; 907 goto label_return; 908 } 909 910 for (i = 0; i < bt->len; i++) { 911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR, 912 (uintptr_t)bt->vec[i])) { 913 ret = true; 914 goto label_return; 915 } 916 } 917 918 if (prof_dump_write(propagate_err, "\n")) { 919 ret = true; 920 goto label_return; 921 } 922 923 ret = false; 924label_return: 925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 926 malloc_mutex_unlock(ctx->lock); 927 return (ret); 928} 929 930static bool 931prof_dump_maps(bool propagate_err) 932{ 933 bool ret; 934 int mfd; 935 char filename[PATH_MAX + 1]; 936 937 cassert(config_prof);
|
941 mfd = open(filename, O_RDONLY); 942 if (mfd != -1) { 943 ssize_t nread; 944 945 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 946 propagate_err) { 947 ret = true; 948 goto label_return; 949 } 950 nread = 0; 951 do { 952 prof_dump_buf_end += nread; 953 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 954 /* Make space in prof_dump_buf before read(). */ 955 if (prof_dump_flush(propagate_err) && 956 propagate_err) { 957 ret = true; 958 goto label_return; 959 } 960 } 961 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 962 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 963 } while (nread > 0); 964 } else { 965 ret = true; 966 goto label_return; 967 } 968 969 ret = false; 970label_return: 971 if (mfd != -1) 972 close(mfd); 973 return (ret); 974} 975 976static void 977prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, 978 const char *filename) 979{ 980 981 if (cnt_all->curbytes != 0) { 982 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 983 PRId64" object%s, %zu context%s\n", 984 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", 985 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", 986 leak_nctx, (leak_nctx != 1) ? "s" : ""); 987 malloc_printf( 988 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 989 filename); 990 } 991} 992 993static bool 994prof_dump(bool propagate_err, const char *filename, bool leakcheck) 995{ 996 prof_tdata_t *prof_tdata; 997 prof_cnt_t cnt_all; 998 size_t tabind; 999 union { 1000 prof_ctx_t *p; 1001 void *v; 1002 } ctx; 1003 size_t leak_nctx; 1004 prof_ctx_list_t ctx_ql; 1005 1006 cassert(config_prof); 1007 1008 prof_tdata = prof_tdata_get(false); 1009 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1010 return (true); 1011 1012 malloc_mutex_lock(&prof_dump_mtx); 1013 1014 /* Merge per thread profile stats, and sum them in cnt_all. */ 1015 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 1016 leak_nctx = 0; 1017 ql_new(&ctx_ql); 1018 prof_enter(prof_tdata); 1019 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 1020 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); 1021 prof_leave(prof_tdata); 1022 1023 /* Create dump file. */ 1024 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) 1025 goto label_open_close_error; 1026 1027 /* Dump profile header. */ 1028 if (prof_dump_header(propagate_err, &cnt_all)) 1029 goto label_write_error; 1030 1031 /* Dump per ctx profile stats. */ 1032 while ((ctx.p = ql_first(&ctx_ql)) != NULL) { 1033 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) 1034 goto label_write_error; 1035 } 1036 1037 /* Dump /proc/<pid>/maps if possible. */ 1038 if (prof_dump_maps(propagate_err)) 1039 goto label_write_error; 1040 1041 if (prof_dump_close(propagate_err)) 1042 goto label_open_close_error; 1043 1044 malloc_mutex_unlock(&prof_dump_mtx); 1045 1046 if (leakcheck) 1047 prof_leakcheck(&cnt_all, leak_nctx, filename); 1048 1049 return (false); 1050label_write_error: 1051 prof_dump_close(propagate_err); 1052label_open_close_error: 1053 while ((ctx.p = ql_first(&ctx_ql)) != NULL) 1054 prof_dump_ctx_cleanup(ctx.p, &ctx_ql); 1055 malloc_mutex_unlock(&prof_dump_mtx); 1056 return (true); 1057} 1058 1059#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1060#define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1061static void 1062prof_dump_filename(char *filename, char v, int64_t vseq) 1063{ 1064 1065 cassert(config_prof); 1066 1067 if (vseq != VSEQ_INVALID) { 1068 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1069 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1070 "%s.%d.%"PRIu64".%c%"PRId64".heap", 1071 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 1072 } else { 1073 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1074 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1075 "%s.%d.%"PRIu64".%c.heap", 1076 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 1077 } 1078 prof_dump_seq++; 1079} 1080 1081static void 1082prof_fdump(void) 1083{ 1084 char filename[DUMP_FILENAME_BUFSIZE]; 1085 1086 cassert(config_prof); 1087 1088 if (prof_booted == false) 1089 return; 1090 1091 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 1092 malloc_mutex_lock(&prof_dump_seq_mtx); 1093 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1094 malloc_mutex_unlock(&prof_dump_seq_mtx); 1095 prof_dump(false, filename, opt_prof_leak); 1096 } 1097} 1098 1099void 1100prof_idump(void) 1101{ 1102 prof_tdata_t *prof_tdata; 1103 char filename[PATH_MAX + 1]; 1104 1105 cassert(config_prof); 1106 1107 if (prof_booted == false) 1108 return; 1109 prof_tdata = prof_tdata_get(false); 1110 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1111 return; 1112 if (prof_tdata->enq) { 1113 prof_tdata->enq_idump = true; 1114 return; 1115 } 1116 1117 if (opt_prof_prefix[0] != '\0') { 1118 malloc_mutex_lock(&prof_dump_seq_mtx); 1119 prof_dump_filename(filename, 'i', prof_dump_iseq); 1120 prof_dump_iseq++; 1121 malloc_mutex_unlock(&prof_dump_seq_mtx); 1122 prof_dump(false, filename, false); 1123 } 1124} 1125 1126bool 1127prof_mdump(const char *filename) 1128{ 1129 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1130 1131 cassert(config_prof); 1132 1133 if (opt_prof == false || prof_booted == false) 1134 return (true); 1135 1136 if (filename == NULL) { 1137 /* No filename specified, so automatically generate one. */ 1138 if (opt_prof_prefix[0] == '\0') 1139 return (true); 1140 malloc_mutex_lock(&prof_dump_seq_mtx); 1141 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1142 prof_dump_mseq++; 1143 malloc_mutex_unlock(&prof_dump_seq_mtx); 1144 filename = filename_buf; 1145 } 1146 return (prof_dump(true, filename, false)); 1147} 1148 1149void 1150prof_gdump(void) 1151{ 1152 prof_tdata_t *prof_tdata; 1153 char filename[DUMP_FILENAME_BUFSIZE]; 1154 1155 cassert(config_prof); 1156 1157 if (prof_booted == false) 1158 return; 1159 prof_tdata = prof_tdata_get(false); 1160 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1161 return; 1162 if (prof_tdata->enq) { 1163 prof_tdata->enq_gdump = true; 1164 return; 1165 } 1166 1167 if (opt_prof_prefix[0] != '\0') { 1168 malloc_mutex_lock(&prof_dump_seq_mtx); 1169 prof_dump_filename(filename, 'u', prof_dump_useq); 1170 prof_dump_useq++; 1171 malloc_mutex_unlock(&prof_dump_seq_mtx); 1172 prof_dump(false, filename, false); 1173 } 1174} 1175 1176static void 1177prof_bt_hash(const void *key, size_t r_hash[2]) 1178{ 1179 prof_bt_t *bt = (prof_bt_t *)key; 1180 1181 cassert(config_prof); 1182 1183 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1184} 1185 1186static bool 1187prof_bt_keycomp(const void *k1, const void *k2) 1188{ 1189 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1190 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1191 1192 cassert(config_prof); 1193 1194 if (bt1->len != bt2->len) 1195 return (false); 1196 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1197} 1198 1199prof_tdata_t * 1200prof_tdata_init(void) 1201{ 1202 prof_tdata_t *prof_tdata; 1203 1204 cassert(config_prof); 1205 1206 /* Initialize an empty cache for this thread. */ 1207 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1208 if (prof_tdata == NULL) 1209 return (NULL); 1210 1211 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1212 prof_bt_hash, prof_bt_keycomp)) { 1213 idalloc(prof_tdata); 1214 return (NULL); 1215 } 1216 ql_new(&prof_tdata->lru_ql); 1217 1218 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1219 if (prof_tdata->vec == NULL) { 1220 ckh_delete(&prof_tdata->bt2cnt); 1221 idalloc(prof_tdata); 1222 return (NULL); 1223 } 1224 1225 prof_tdata->prng_state = 0; 1226 prof_tdata->threshold = 0; 1227 prof_tdata->accum = 0; 1228 1229 prof_tdata->enq = false; 1230 prof_tdata->enq_idump = false; 1231 prof_tdata->enq_gdump = false; 1232 1233 prof_tdata_tsd_set(&prof_tdata); 1234 1235 return (prof_tdata); 1236} 1237 1238void 1239prof_tdata_cleanup(void *arg) 1240{ 1241 prof_thr_cnt_t *cnt; 1242 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1243 1244 cassert(config_prof); 1245 1246 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1247 /* 1248 * Another destructor deallocated memory after this destructor 1249 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1250 * in order to receive another callback. 1251 */ 1252 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1253 prof_tdata_tsd_set(&prof_tdata); 1254 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1255 /* 1256 * The previous time this destructor was called, we set the key 1257 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1258 * wouldn't cause re-creation of the prof_tdata. This time, do 1259 * nothing, so that the destructor will not be called again. 1260 */ 1261 } else if (prof_tdata != NULL) { 1262 /* 1263 * Delete the hash table. All of its contents can still be 1264 * iterated over via the LRU. 1265 */ 1266 ckh_delete(&prof_tdata->bt2cnt); 1267 /* 1268 * Iteratively merge cnt's into the global stats and delete 1269 * them. 1270 */ 1271 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1272 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1273 prof_ctx_merge(cnt->ctx, cnt); 1274 idalloc(cnt); 1275 } 1276 idalloc(prof_tdata->vec); 1277 idalloc(prof_tdata); 1278 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1279 prof_tdata_tsd_set(&prof_tdata); 1280 } 1281} 1282 1283void 1284prof_boot0(void) 1285{ 1286 1287 cassert(config_prof); 1288 1289 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1290 sizeof(PROF_PREFIX_DEFAULT)); 1291} 1292 1293void 1294prof_boot1(void) 1295{ 1296 1297 cassert(config_prof); 1298 1299 /* 1300 * opt_prof and prof_promote must be in their final state before any 1301 * arenas are initialized, so this function must be executed early. 1302 */ 1303 1304 if (opt_prof_leak && opt_prof == false) { 1305 /* 1306 * Enable opt_prof, but in such a way that profiles are never 1307 * automatically dumped. 1308 */ 1309 opt_prof = true; 1310 opt_prof_gdump = false; 1311 } else if (opt_prof) { 1312 if (opt_lg_prof_interval >= 0) { 1313 prof_interval = (((uint64_t)1U) << 1314 opt_lg_prof_interval); 1315 } 1316 } 1317 1318 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1319} 1320 1321bool 1322prof_boot2(void) 1323{ 1324 1325 cassert(config_prof); 1326 1327 if (opt_prof) { 1328 unsigned i; 1329 1330 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1331 prof_bt_keycomp)) 1332 return (true); 1333 if (malloc_mutex_init(&bt2ctx_mtx)) 1334 return (true); 1335 if (prof_tdata_tsd_boot()) { 1336 malloc_write( 1337 "<jemalloc>: Error in pthread_key_create()\n"); 1338 abort(); 1339 } 1340 1341 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1342 return (true); 1343 if (malloc_mutex_init(&prof_dump_mtx)) 1344 return (true); 1345 1346 if (atexit(prof_fdump) != 0) { 1347 malloc_write("<jemalloc>: Error in atexit()\n"); 1348 if (opt_abort) 1349 abort(); 1350 } 1351 1352 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1353 sizeof(malloc_mutex_t)); 1354 if (ctx_locks == NULL) 1355 return (true); 1356 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1357 if (malloc_mutex_init(&ctx_locks[i])) 1358 return (true); 1359 } 1360 } 1361 1362#ifdef JEMALLOC_PROF_LIBGCC 1363 /* 1364 * Cause the backtracing machinery to allocate its internal state 1365 * before enabling profiling. 1366 */ 1367 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1368#endif 1369 1370 prof_booted = true; 1371 1372 return (false); 1373} 1374 1375void 1376prof_prefork(void) 1377{ 1378 1379 if (opt_prof) { 1380 unsigned i; 1381 1382 malloc_mutex_prefork(&bt2ctx_mtx); 1383 malloc_mutex_prefork(&prof_dump_seq_mtx); 1384 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1385 malloc_mutex_prefork(&ctx_locks[i]); 1386 } 1387} 1388 1389void 1390prof_postfork_parent(void) 1391{ 1392 1393 if (opt_prof) { 1394 unsigned i; 1395 1396 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1397 malloc_mutex_postfork_parent(&ctx_locks[i]); 1398 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1399 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1400 } 1401} 1402 1403void 1404prof_postfork_child(void) 1405{ 1406 1407 if (opt_prof) { 1408 unsigned i; 1409 1410 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1411 malloc_mutex_postfork_child(&ctx_locks[i]); 1412 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1413 malloc_mutex_postfork_child(&bt2ctx_mtx); 1414 } 1415} 1416 1417/******************************************************************************/
| 944 mfd = open(filename, O_RDONLY); 945 if (mfd != -1) { 946 ssize_t nread; 947 948 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 949 propagate_err) { 950 ret = true; 951 goto label_return; 952 } 953 nread = 0; 954 do { 955 prof_dump_buf_end += nread; 956 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 957 /* Make space in prof_dump_buf before read(). */ 958 if (prof_dump_flush(propagate_err) && 959 propagate_err) { 960 ret = true; 961 goto label_return; 962 } 963 } 964 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 965 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 966 } while (nread > 0); 967 } else { 968 ret = true; 969 goto label_return; 970 } 971 972 ret = false; 973label_return: 974 if (mfd != -1) 975 close(mfd); 976 return (ret); 977} 978 979static void 980prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, 981 const char *filename) 982{ 983 984 if (cnt_all->curbytes != 0) { 985 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 986 PRId64" object%s, %zu context%s\n", 987 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", 988 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", 989 leak_nctx, (leak_nctx != 1) ? "s" : ""); 990 malloc_printf( 991 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 992 filename); 993 } 994} 995 996static bool 997prof_dump(bool propagate_err, const char *filename, bool leakcheck) 998{ 999 prof_tdata_t *prof_tdata; 1000 prof_cnt_t cnt_all; 1001 size_t tabind; 1002 union { 1003 prof_ctx_t *p; 1004 void *v; 1005 } ctx; 1006 size_t leak_nctx; 1007 prof_ctx_list_t ctx_ql; 1008 1009 cassert(config_prof); 1010 1011 prof_tdata = prof_tdata_get(false); 1012 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1013 return (true); 1014 1015 malloc_mutex_lock(&prof_dump_mtx); 1016 1017 /* Merge per thread profile stats, and sum them in cnt_all. */ 1018 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 1019 leak_nctx = 0; 1020 ql_new(&ctx_ql); 1021 prof_enter(prof_tdata); 1022 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 1023 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); 1024 prof_leave(prof_tdata); 1025 1026 /* Create dump file. */ 1027 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) 1028 goto label_open_close_error; 1029 1030 /* Dump profile header. */ 1031 if (prof_dump_header(propagate_err, &cnt_all)) 1032 goto label_write_error; 1033 1034 /* Dump per ctx profile stats. */ 1035 while ((ctx.p = ql_first(&ctx_ql)) != NULL) { 1036 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) 1037 goto label_write_error; 1038 } 1039 1040 /* Dump /proc/<pid>/maps if possible. */ 1041 if (prof_dump_maps(propagate_err)) 1042 goto label_write_error; 1043 1044 if (prof_dump_close(propagate_err)) 1045 goto label_open_close_error; 1046 1047 malloc_mutex_unlock(&prof_dump_mtx); 1048 1049 if (leakcheck) 1050 prof_leakcheck(&cnt_all, leak_nctx, filename); 1051 1052 return (false); 1053label_write_error: 1054 prof_dump_close(propagate_err); 1055label_open_close_error: 1056 while ((ctx.p = ql_first(&ctx_ql)) != NULL) 1057 prof_dump_ctx_cleanup(ctx.p, &ctx_ql); 1058 malloc_mutex_unlock(&prof_dump_mtx); 1059 return (true); 1060} 1061 1062#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1063#define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1064static void 1065prof_dump_filename(char *filename, char v, int64_t vseq) 1066{ 1067 1068 cassert(config_prof); 1069 1070 if (vseq != VSEQ_INVALID) { 1071 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1072 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1073 "%s.%d.%"PRIu64".%c%"PRId64".heap", 1074 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 1075 } else { 1076 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1077 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1078 "%s.%d.%"PRIu64".%c.heap", 1079 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 1080 } 1081 prof_dump_seq++; 1082} 1083 1084static void 1085prof_fdump(void) 1086{ 1087 char filename[DUMP_FILENAME_BUFSIZE]; 1088 1089 cassert(config_prof); 1090 1091 if (prof_booted == false) 1092 return; 1093 1094 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 1095 malloc_mutex_lock(&prof_dump_seq_mtx); 1096 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1097 malloc_mutex_unlock(&prof_dump_seq_mtx); 1098 prof_dump(false, filename, opt_prof_leak); 1099 } 1100} 1101 1102void 1103prof_idump(void) 1104{ 1105 prof_tdata_t *prof_tdata; 1106 char filename[PATH_MAX + 1]; 1107 1108 cassert(config_prof); 1109 1110 if (prof_booted == false) 1111 return; 1112 prof_tdata = prof_tdata_get(false); 1113 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1114 return; 1115 if (prof_tdata->enq) { 1116 prof_tdata->enq_idump = true; 1117 return; 1118 } 1119 1120 if (opt_prof_prefix[0] != '\0') { 1121 malloc_mutex_lock(&prof_dump_seq_mtx); 1122 prof_dump_filename(filename, 'i', prof_dump_iseq); 1123 prof_dump_iseq++; 1124 malloc_mutex_unlock(&prof_dump_seq_mtx); 1125 prof_dump(false, filename, false); 1126 } 1127} 1128 1129bool 1130prof_mdump(const char *filename) 1131{ 1132 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1133 1134 cassert(config_prof); 1135 1136 if (opt_prof == false || prof_booted == false) 1137 return (true); 1138 1139 if (filename == NULL) { 1140 /* No filename specified, so automatically generate one. */ 1141 if (opt_prof_prefix[0] == '\0') 1142 return (true); 1143 malloc_mutex_lock(&prof_dump_seq_mtx); 1144 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1145 prof_dump_mseq++; 1146 malloc_mutex_unlock(&prof_dump_seq_mtx); 1147 filename = filename_buf; 1148 } 1149 return (prof_dump(true, filename, false)); 1150} 1151 1152void 1153prof_gdump(void) 1154{ 1155 prof_tdata_t *prof_tdata; 1156 char filename[DUMP_FILENAME_BUFSIZE]; 1157 1158 cassert(config_prof); 1159 1160 if (prof_booted == false) 1161 return; 1162 prof_tdata = prof_tdata_get(false); 1163 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1164 return; 1165 if (prof_tdata->enq) { 1166 prof_tdata->enq_gdump = true; 1167 return; 1168 } 1169 1170 if (opt_prof_prefix[0] != '\0') { 1171 malloc_mutex_lock(&prof_dump_seq_mtx); 1172 prof_dump_filename(filename, 'u', prof_dump_useq); 1173 prof_dump_useq++; 1174 malloc_mutex_unlock(&prof_dump_seq_mtx); 1175 prof_dump(false, filename, false); 1176 } 1177} 1178 1179static void 1180prof_bt_hash(const void *key, size_t r_hash[2]) 1181{ 1182 prof_bt_t *bt = (prof_bt_t *)key; 1183 1184 cassert(config_prof); 1185 1186 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1187} 1188 1189static bool 1190prof_bt_keycomp(const void *k1, const void *k2) 1191{ 1192 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1193 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1194 1195 cassert(config_prof); 1196 1197 if (bt1->len != bt2->len) 1198 return (false); 1199 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1200} 1201 1202prof_tdata_t * 1203prof_tdata_init(void) 1204{ 1205 prof_tdata_t *prof_tdata; 1206 1207 cassert(config_prof); 1208 1209 /* Initialize an empty cache for this thread. */ 1210 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1211 if (prof_tdata == NULL) 1212 return (NULL); 1213 1214 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1215 prof_bt_hash, prof_bt_keycomp)) { 1216 idalloc(prof_tdata); 1217 return (NULL); 1218 } 1219 ql_new(&prof_tdata->lru_ql); 1220 1221 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1222 if (prof_tdata->vec == NULL) { 1223 ckh_delete(&prof_tdata->bt2cnt); 1224 idalloc(prof_tdata); 1225 return (NULL); 1226 } 1227 1228 prof_tdata->prng_state = 0; 1229 prof_tdata->threshold = 0; 1230 prof_tdata->accum = 0; 1231 1232 prof_tdata->enq = false; 1233 prof_tdata->enq_idump = false; 1234 prof_tdata->enq_gdump = false; 1235 1236 prof_tdata_tsd_set(&prof_tdata); 1237 1238 return (prof_tdata); 1239} 1240 1241void 1242prof_tdata_cleanup(void *arg) 1243{ 1244 prof_thr_cnt_t *cnt; 1245 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1246 1247 cassert(config_prof); 1248 1249 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1250 /* 1251 * Another destructor deallocated memory after this destructor 1252 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1253 * in order to receive another callback. 1254 */ 1255 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1256 prof_tdata_tsd_set(&prof_tdata); 1257 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1258 /* 1259 * The previous time this destructor was called, we set the key 1260 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1261 * wouldn't cause re-creation of the prof_tdata. This time, do 1262 * nothing, so that the destructor will not be called again. 1263 */ 1264 } else if (prof_tdata != NULL) { 1265 /* 1266 * Delete the hash table. All of its contents can still be 1267 * iterated over via the LRU. 1268 */ 1269 ckh_delete(&prof_tdata->bt2cnt); 1270 /* 1271 * Iteratively merge cnt's into the global stats and delete 1272 * them. 1273 */ 1274 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1275 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1276 prof_ctx_merge(cnt->ctx, cnt); 1277 idalloc(cnt); 1278 } 1279 idalloc(prof_tdata->vec); 1280 idalloc(prof_tdata); 1281 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1282 prof_tdata_tsd_set(&prof_tdata); 1283 } 1284} 1285 1286void 1287prof_boot0(void) 1288{ 1289 1290 cassert(config_prof); 1291 1292 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1293 sizeof(PROF_PREFIX_DEFAULT)); 1294} 1295 1296void 1297prof_boot1(void) 1298{ 1299 1300 cassert(config_prof); 1301 1302 /* 1303 * opt_prof and prof_promote must be in their final state before any 1304 * arenas are initialized, so this function must be executed early. 1305 */ 1306 1307 if (opt_prof_leak && opt_prof == false) { 1308 /* 1309 * Enable opt_prof, but in such a way that profiles are never 1310 * automatically dumped. 1311 */ 1312 opt_prof = true; 1313 opt_prof_gdump = false; 1314 } else if (opt_prof) { 1315 if (opt_lg_prof_interval >= 0) { 1316 prof_interval = (((uint64_t)1U) << 1317 opt_lg_prof_interval); 1318 } 1319 } 1320 1321 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1322} 1323 1324bool 1325prof_boot2(void) 1326{ 1327 1328 cassert(config_prof); 1329 1330 if (opt_prof) { 1331 unsigned i; 1332 1333 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1334 prof_bt_keycomp)) 1335 return (true); 1336 if (malloc_mutex_init(&bt2ctx_mtx)) 1337 return (true); 1338 if (prof_tdata_tsd_boot()) { 1339 malloc_write( 1340 "<jemalloc>: Error in pthread_key_create()\n"); 1341 abort(); 1342 } 1343 1344 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1345 return (true); 1346 if (malloc_mutex_init(&prof_dump_mtx)) 1347 return (true); 1348 1349 if (atexit(prof_fdump) != 0) { 1350 malloc_write("<jemalloc>: Error in atexit()\n"); 1351 if (opt_abort) 1352 abort(); 1353 } 1354 1355 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1356 sizeof(malloc_mutex_t)); 1357 if (ctx_locks == NULL) 1358 return (true); 1359 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1360 if (malloc_mutex_init(&ctx_locks[i])) 1361 return (true); 1362 } 1363 } 1364 1365#ifdef JEMALLOC_PROF_LIBGCC 1366 /* 1367 * Cause the backtracing machinery to allocate its internal state 1368 * before enabling profiling. 1369 */ 1370 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1371#endif 1372 1373 prof_booted = true; 1374 1375 return (false); 1376} 1377 1378void 1379prof_prefork(void) 1380{ 1381 1382 if (opt_prof) { 1383 unsigned i; 1384 1385 malloc_mutex_prefork(&bt2ctx_mtx); 1386 malloc_mutex_prefork(&prof_dump_seq_mtx); 1387 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1388 malloc_mutex_prefork(&ctx_locks[i]); 1389 } 1390} 1391 1392void 1393prof_postfork_parent(void) 1394{ 1395 1396 if (opt_prof) { 1397 unsigned i; 1398 1399 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1400 malloc_mutex_postfork_parent(&ctx_locks[i]); 1401 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1402 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1403 } 1404} 1405 1406void 1407prof_postfork_child(void) 1408{ 1409 1410 if (opt_prof) { 1411 unsigned i; 1412 1413 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1414 malloc_mutex_postfork_child(&ctx_locks[i]); 1415 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1416 malloc_mutex_postfork_child(&bt2ctx_mtx); 1417 } 1418} 1419 1420/******************************************************************************/
|