prof.h revision 262521
1234370Sjasone/******************************************************************************/ 2234370Sjasone#ifdef JEMALLOC_H_TYPES 3234370Sjasone 4234370Sjasonetypedef struct prof_bt_s prof_bt_t; 5234370Sjasonetypedef struct prof_cnt_s prof_cnt_t; 6234370Sjasonetypedef struct prof_thr_cnt_s prof_thr_cnt_t; 7234370Sjasonetypedef struct prof_ctx_s prof_ctx_t; 8234370Sjasonetypedef struct prof_tdata_s prof_tdata_t; 9234370Sjasone 10234370Sjasone/* Option defaults. */ 11262521Sjasone#ifdef JEMALLOC_PROF 12262521Sjasone# define PROF_PREFIX_DEFAULT "jeprof" 13262521Sjasone#else 14262521Sjasone# define PROF_PREFIX_DEFAULT "" 15262521Sjasone#endif 16234543Sjasone#define LG_PROF_SAMPLE_DEFAULT 19 17234370Sjasone#define LG_PROF_INTERVAL_DEFAULT -1 18234370Sjasone 19234370Sjasone/* 20234370Sjasone * Hard limit on stack backtrace depth. The version of prof_backtrace() that 21234370Sjasone * is based on __builtin_return_address() necessarily has a hard-coded number 22234370Sjasone * of backtrace frame handlers, and should be kept in sync with this setting. 23234370Sjasone */ 24234370Sjasone#define PROF_BT_MAX 128 25234370Sjasone 26234370Sjasone/* Maximum number of backtraces to store in each per thread LRU cache. */ 27234370Sjasone#define PROF_TCMAX 1024 28234370Sjasone 29234370Sjasone/* Initial hash table size. */ 30234370Sjasone#define PROF_CKH_MINITEMS 64 31234370Sjasone 32234370Sjasone/* Size of memory buffer to use when writing dump files. */ 33234370Sjasone#define PROF_DUMP_BUFSIZE 65536 34234370Sjasone 35234370Sjasone/* Size of stack-allocated buffer used by prof_printf(). */ 36234370Sjasone#define PROF_PRINTF_BUFSIZE 128 37234370Sjasone 38234370Sjasone/* 39234370Sjasone * Number of mutexes shared among all ctx's. No space is allocated for these 40234370Sjasone * unless profiling is enabled, so it's okay to over-provision. 41234370Sjasone */ 42234370Sjasone#define PROF_NCTX_LOCKS 1024 43234370Sjasone 44235238Sjasone/* 45235238Sjasone * prof_tdata pointers close to NULL are used to encode state information that 46235238Sjasone * is used for cleaning up during thread shutdown. 47235238Sjasone */ 48235238Sjasone#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) 49235238Sjasone#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) 50235238Sjasone#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY 51235238Sjasone 52234370Sjasone#endif /* JEMALLOC_H_TYPES */ 53234370Sjasone/******************************************************************************/ 54234370Sjasone#ifdef JEMALLOC_H_STRUCTS 55234370Sjasone 56234370Sjasonestruct prof_bt_s { 57234370Sjasone /* Backtrace, stored as len program counters. */ 58234370Sjasone void **vec; 59234370Sjasone unsigned len; 60234370Sjasone}; 61234370Sjasone 62234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC 63234370Sjasone/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ 64234370Sjasonetypedef struct { 65234370Sjasone prof_bt_t *bt; 66234370Sjasone unsigned nignore; 67234370Sjasone unsigned max; 68234370Sjasone} prof_unwind_data_t; 69234370Sjasone#endif 70234370Sjasone 71234370Sjasonestruct prof_cnt_s { 72234370Sjasone /* 73234370Sjasone * Profiling counters. An allocation/deallocation pair can operate on 74234370Sjasone * different prof_thr_cnt_t objects that are linked into the same 75234370Sjasone * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go 76234370Sjasone * negative. In principle it is possible for the *bytes counters to 77234370Sjasone * overflow/underflow, but a general solution would require something 78234370Sjasone * like 128-bit counters; this implementation doesn't bother to solve 79234370Sjasone * that problem. 80234370Sjasone */ 81234370Sjasone int64_t curobjs; 82234370Sjasone int64_t curbytes; 83234370Sjasone uint64_t accumobjs; 84234370Sjasone uint64_t accumbytes; 85234370Sjasone}; 86234370Sjasone 87234370Sjasonestruct prof_thr_cnt_s { 88234370Sjasone /* Linkage into prof_ctx_t's cnts_ql. */ 89234370Sjasone ql_elm(prof_thr_cnt_t) cnts_link; 90234370Sjasone 91234370Sjasone /* Linkage into thread's LRU. */ 92234370Sjasone ql_elm(prof_thr_cnt_t) lru_link; 93234370Sjasone 94234370Sjasone /* 95234370Sjasone * Associated context. If a thread frees an object that it did not 96234370Sjasone * allocate, it is possible that the context is not cached in the 97234370Sjasone * thread's hash table, in which case it must be able to look up the 98234370Sjasone * context, insert a new prof_thr_cnt_t into the thread's hash table, 99234370Sjasone * and link it into the prof_ctx_t's cnts_ql. 100234370Sjasone */ 101234370Sjasone prof_ctx_t *ctx; 102234370Sjasone 103234370Sjasone /* 104234370Sjasone * Threads use memory barriers to update the counters. Since there is 105234370Sjasone * only ever one writer, the only challenge is for the reader to get a 106234370Sjasone * consistent read of the counters. 107234370Sjasone * 108234370Sjasone * The writer uses this series of operations: 109234370Sjasone * 110234370Sjasone * 1) Increment epoch to an odd number. 111234370Sjasone * 2) Update counters. 112234370Sjasone * 3) Increment epoch to an even number. 113234370Sjasone * 114234370Sjasone * The reader must assure 1) that the epoch is even while it reads the 115234370Sjasone * counters, and 2) that the epoch doesn't change between the time it 116234370Sjasone * starts and finishes reading the counters. 117234370Sjasone */ 118234370Sjasone unsigned epoch; 119234370Sjasone 120234370Sjasone /* Profiling counters. */ 121234370Sjasone prof_cnt_t cnts; 122234370Sjasone}; 123234370Sjasone 124234370Sjasonestruct prof_ctx_s { 125234370Sjasone /* Associated backtrace. */ 126234370Sjasone prof_bt_t *bt; 127234370Sjasone 128235238Sjasone /* Protects nlimbo, cnt_merged, and cnts_ql. */ 129234370Sjasone malloc_mutex_t *lock; 130234370Sjasone 131235238Sjasone /* 132235238Sjasone * Number of threads that currently cause this ctx to be in a state of 133235238Sjasone * limbo due to one of: 134235238Sjasone * - Initializing per thread counters associated with this ctx. 135235238Sjasone * - Preparing to destroy this ctx. 136261071Sjasone * - Dumping a heap profile that includes this ctx. 137235238Sjasone * nlimbo must be 1 (single destroyer) in order to safely destroy the 138235238Sjasone * ctx. 139235238Sjasone */ 140235238Sjasone unsigned nlimbo; 141235238Sjasone 142234370Sjasone /* Temporary storage for summation during dump. */ 143234370Sjasone prof_cnt_t cnt_summed; 144234370Sjasone 145234370Sjasone /* When threads exit, they merge their stats into cnt_merged. */ 146234370Sjasone prof_cnt_t cnt_merged; 147234370Sjasone 148234370Sjasone /* 149234370Sjasone * List of profile counters, one for each thread that has allocated in 150234370Sjasone * this context. 151234370Sjasone */ 152234370Sjasone ql_head(prof_thr_cnt_t) cnts_ql; 153261071Sjasone 154261071Sjasone /* Linkage for list of contexts to be dumped. */ 155261071Sjasone ql_elm(prof_ctx_t) dump_link; 156234370Sjasone}; 157261071Sjasonetypedef ql_head(prof_ctx_t) prof_ctx_list_t; 158234370Sjasone 159234370Sjasonestruct prof_tdata_s { 160234370Sjasone /* 161234370Sjasone * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a 162234370Sjasone * cache of backtraces, with associated thread-specific prof_thr_cnt_t 163234370Sjasone * objects. Other threads may read the prof_thr_cnt_t contents, but no 164234370Sjasone * others will ever write them. 165234370Sjasone * 166234370Sjasone * Upon thread exit, the thread must merge all the prof_thr_cnt_t 167234370Sjasone * counter data into the associated prof_ctx_t objects, and unlink/free 168234370Sjasone * the prof_thr_cnt_t objects. 169234370Sjasone */ 170234370Sjasone ckh_t bt2cnt; 171234370Sjasone 172234370Sjasone /* LRU for contents of bt2cnt. */ 173234370Sjasone ql_head(prof_thr_cnt_t) lru_ql; 174234370Sjasone 175234370Sjasone /* Backtrace vector, used for calls to prof_backtrace(). */ 176234370Sjasone void **vec; 177234370Sjasone 178234370Sjasone /* Sampling state. */ 179234370Sjasone uint64_t prng_state; 180234370Sjasone uint64_t threshold; 181234370Sjasone uint64_t accum; 182235238Sjasone 183235238Sjasone /* State used to avoid dumping while operating on prof internals. */ 184235238Sjasone bool enq; 185235238Sjasone bool enq_idump; 186235238Sjasone bool enq_gdump; 187234370Sjasone}; 188234370Sjasone 189234370Sjasone#endif /* JEMALLOC_H_STRUCTS */ 190234370Sjasone/******************************************************************************/ 191234370Sjasone#ifdef JEMALLOC_H_EXTERNS 192234370Sjasone 193234370Sjasoneextern bool opt_prof; 194234370Sjasone/* 195234370Sjasone * Even if opt_prof is true, sampling can be temporarily disabled by setting 196234370Sjasone * opt_prof_active to false. No locking is used when updating opt_prof_active, 197234370Sjasone * so there are no guarantees regarding how long it will take for all threads 198234370Sjasone * to notice state changes. 199234370Sjasone */ 200234370Sjasoneextern bool opt_prof_active; 201234370Sjasoneextern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ 202234370Sjasoneextern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ 203234370Sjasoneextern bool opt_prof_gdump; /* High-water memory dumping. */ 204234543Sjasoneextern bool opt_prof_final; /* Final profile dumping. */ 205234370Sjasoneextern bool opt_prof_leak; /* Dump leak summary at exit. */ 206234370Sjasoneextern bool opt_prof_accum; /* Report cumulative bytes. */ 207261071Sjasoneextern char opt_prof_prefix[ 208261071Sjasone /* Minimize memory bloat for non-prof builds. */ 209261071Sjasone#ifdef JEMALLOC_PROF 210261071Sjasone PATH_MAX + 211261071Sjasone#endif 212261071Sjasone 1]; 213234370Sjasone 214234370Sjasone/* 215234370Sjasone * Profile dump interval, measured in bytes allocated. Each arena triggers a 216234370Sjasone * profile dump when it reaches this threshold. The effect is that the 217234370Sjasone * interval between profile dumps averages prof_interval, though the actual 218234370Sjasone * interval between dumps will tend to be sporadic, and the interval will be a 219234370Sjasone * maximum of approximately (prof_interval * narenas). 220234370Sjasone */ 221234370Sjasoneextern uint64_t prof_interval; 222234370Sjasone 223234370Sjasone/* 224234370Sjasone * If true, promote small sampled objects to large objects, since small run 225234370Sjasone * headers do not have embedded profile context pointers. 226234370Sjasone */ 227234370Sjasoneextern bool prof_promote; 228234370Sjasone 229234370Sjasonevoid bt_init(prof_bt_t *bt, void **vec); 230234370Sjasonevoid prof_backtrace(prof_bt_t *bt, unsigned nignore); 231234370Sjasoneprof_thr_cnt_t *prof_lookup(prof_bt_t *bt); 232261071Sjasone#ifdef JEMALLOC_JET 233261071Sjasonesize_t prof_bt_count(void); 234261071Sjasonetypedef int (prof_dump_open_t)(bool, const char *); 235261071Sjasoneextern prof_dump_open_t *prof_dump_open; 236261071Sjasone#endif 237234370Sjasonevoid prof_idump(void); 238234370Sjasonebool prof_mdump(const char *filename); 239234370Sjasonevoid prof_gdump(void); 240234370Sjasoneprof_tdata_t *prof_tdata_init(void); 241234370Sjasonevoid prof_tdata_cleanup(void *arg); 242234370Sjasonevoid prof_boot0(void); 243234370Sjasonevoid prof_boot1(void); 244234370Sjasonebool prof_boot2(void); 245242844Sjasonevoid prof_prefork(void); 246242844Sjasonevoid prof_postfork_parent(void); 247242844Sjasonevoid prof_postfork_child(void); 248234370Sjasone 249234370Sjasone#endif /* JEMALLOC_H_EXTERNS */ 250234370Sjasone/******************************************************************************/ 251234370Sjasone#ifdef JEMALLOC_H_INLINES 252234370Sjasone 253234370Sjasone#define PROF_ALLOC_PREP(nignore, size, ret) do { \ 254234370Sjasone prof_tdata_t *prof_tdata; \ 255234370Sjasone prof_bt_t bt; \ 256234370Sjasone \ 257234370Sjasone assert(size == s2u(size)); \ 258234370Sjasone \ 259251300Sjasone prof_tdata = prof_tdata_get(true); \ 260235238Sjasone if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ 261235238Sjasone if (prof_tdata != NULL) \ 262235238Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 263235238Sjasone else \ 264234370Sjasone ret = NULL; \ 265235238Sjasone break; \ 266234370Sjasone } \ 267234370Sjasone \ 268234370Sjasone if (opt_prof_active == false) { \ 269234370Sjasone /* Sampling is currently inactive, so avoid sampling. */\ 270234370Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 271234370Sjasone } else if (opt_lg_prof_sample == 0) { \ 272234370Sjasone /* Don't bother with sampling logic, since sampling */\ 273234370Sjasone /* interval is 1. */\ 274234370Sjasone bt_init(&bt, prof_tdata->vec); \ 275234370Sjasone prof_backtrace(&bt, nignore); \ 276234370Sjasone ret = prof_lookup(&bt); \ 277234370Sjasone } else { \ 278234370Sjasone if (prof_tdata->threshold == 0) { \ 279234370Sjasone /* Initialize. Seed the prng differently for */\ 280234370Sjasone /* each thread. */\ 281234370Sjasone prof_tdata->prng_state = \ 282234370Sjasone (uint64_t)(uintptr_t)&size; \ 283234370Sjasone prof_sample_threshold_update(prof_tdata); \ 284234370Sjasone } \ 285234370Sjasone \ 286234370Sjasone /* Determine whether to capture a backtrace based on */\ 287234370Sjasone /* whether size is enough for prof_accum to reach */\ 288234370Sjasone /* prof_tdata->threshold. However, delay updating */\ 289234370Sjasone /* these variables until prof_{m,re}alloc(), because */\ 290234370Sjasone /* we don't know for sure that the allocation will */\ 291234370Sjasone /* succeed. */\ 292234370Sjasone /* */\ 293234370Sjasone /* Use subtraction rather than addition to avoid */\ 294234370Sjasone /* potential integer overflow. */\ 295234370Sjasone if (size >= prof_tdata->threshold - \ 296234370Sjasone prof_tdata->accum) { \ 297234370Sjasone bt_init(&bt, prof_tdata->vec); \ 298234370Sjasone prof_backtrace(&bt, nignore); \ 299234370Sjasone ret = prof_lookup(&bt); \ 300234370Sjasone } else \ 301234370Sjasone ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ 302234370Sjasone } \ 303234370Sjasone} while (0) 304234370Sjasone 305234370Sjasone#ifndef JEMALLOC_ENABLE_INLINE 306234370Sjasonemalloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) 307234370Sjasone 308251300Sjasoneprof_tdata_t *prof_tdata_get(bool create); 309234370Sjasonevoid prof_sample_threshold_update(prof_tdata_t *prof_tdata); 310234370Sjasoneprof_ctx_t *prof_ctx_get(const void *ptr); 311261071Sjasonevoid prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); 312234370Sjasonebool prof_sample_accum_update(size_t size); 313261071Sjasonevoid prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); 314261071Sjasonevoid prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, 315261071Sjasone size_t old_usize, prof_ctx_t *old_ctx); 316234370Sjasonevoid prof_free(const void *ptr, size_t size); 317234370Sjasone#endif 318234370Sjasone 319234370Sjasone#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) 320234370Sjasone/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ 321234370Sjasonemalloc_tsd_externs(prof_tdata, prof_tdata_t *) 322234370Sjasonemalloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, 323234370Sjasone prof_tdata_cleanup) 324234370Sjasone 325235238SjasoneJEMALLOC_INLINE prof_tdata_t * 326251300Sjasoneprof_tdata_get(bool create) 327235238Sjasone{ 328235238Sjasone prof_tdata_t *prof_tdata; 329235238Sjasone 330235238Sjasone cassert(config_prof); 331235238Sjasone 332235238Sjasone prof_tdata = *prof_tdata_tsd_get(); 333251300Sjasone if (create && prof_tdata == NULL) 334251300Sjasone prof_tdata = prof_tdata_init(); 335235238Sjasone 336235238Sjasone return (prof_tdata); 337235238Sjasone} 338235238Sjasone 339234370SjasoneJEMALLOC_INLINE void 340234370Sjasoneprof_sample_threshold_update(prof_tdata_t *prof_tdata) 341234370Sjasone{ 342261071Sjasone /* 343261071Sjasone * The body of this function is compiled out unless heap profiling is 344261071Sjasone * enabled, so that it is possible to compile jemalloc with floating 345261071Sjasone * point support completely disabled. Avoiding floating point code is 346261071Sjasone * important on memory-constrained systems, but it also enables a 347261071Sjasone * workaround for versions of glibc that don't properly save/restore 348261071Sjasone * floating point registers during dynamic lazy symbol loading (which 349261071Sjasone * internally calls into whatever malloc implementation happens to be 350261071Sjasone * integrated into the application). Note that some compilers (e.g. 351261071Sjasone * gcc 4.8) may use floating point registers for fast memory moves, so 352261071Sjasone * jemalloc must be compiled with such optimizations disabled (e.g. 353261071Sjasone * -mno-sse) in order for the workaround to be complete. 354261071Sjasone */ 355261071Sjasone#ifdef JEMALLOC_PROF 356234370Sjasone uint64_t r; 357234370Sjasone double u; 358234370Sjasone 359234370Sjasone cassert(config_prof); 360234370Sjasone 361234370Sjasone /* 362234370Sjasone * Compute sample threshold as a geometrically distributed random 363234370Sjasone * variable with mean (2^opt_lg_prof_sample). 364234370Sjasone * 365234370Sjasone * __ __ 366234370Sjasone * | log(u) | 1 367234370Sjasone * prof_tdata->threshold = | -------- |, where p = ------------------- 368234370Sjasone * | log(1-p) | opt_lg_prof_sample 369234370Sjasone * 2 370234370Sjasone * 371234370Sjasone * For more information on the math, see: 372234370Sjasone * 373234370Sjasone * Non-Uniform Random Variate Generation 374234370Sjasone * Luc Devroye 375234370Sjasone * Springer-Verlag, New York, 1986 376234370Sjasone * pp 500 377261071Sjasone * (http://luc.devroye.org/rnbookindex.html) 378234370Sjasone */ 379234370Sjasone prng64(r, 53, prof_tdata->prng_state, 380234370Sjasone UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); 381234370Sjasone u = (double)r * (1.0/9007199254740992.0L); 382234370Sjasone prof_tdata->threshold = (uint64_t)(log(u) / 383234370Sjasone log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) 384234370Sjasone + (uint64_t)1U; 385261071Sjasone#endif 386234370Sjasone} 387234370Sjasone 388234370SjasoneJEMALLOC_INLINE prof_ctx_t * 389234370Sjasoneprof_ctx_get(const void *ptr) 390234370Sjasone{ 391234370Sjasone prof_ctx_t *ret; 392234370Sjasone arena_chunk_t *chunk; 393234370Sjasone 394234370Sjasone cassert(config_prof); 395234370Sjasone assert(ptr != NULL); 396234370Sjasone 397234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 398234370Sjasone if (chunk != ptr) { 399234370Sjasone /* Region. */ 400234370Sjasone ret = arena_prof_ctx_get(ptr); 401234370Sjasone } else 402234370Sjasone ret = huge_prof_ctx_get(ptr); 403234370Sjasone 404234370Sjasone return (ret); 405234370Sjasone} 406234370Sjasone 407234370SjasoneJEMALLOC_INLINE void 408261071Sjasoneprof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) 409234370Sjasone{ 410234370Sjasone arena_chunk_t *chunk; 411234370Sjasone 412234370Sjasone cassert(config_prof); 413234370Sjasone assert(ptr != NULL); 414234370Sjasone 415234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 416234370Sjasone if (chunk != ptr) { 417234370Sjasone /* Region. */ 418261071Sjasone arena_prof_ctx_set(ptr, usize, ctx); 419234370Sjasone } else 420234370Sjasone huge_prof_ctx_set(ptr, ctx); 421234370Sjasone} 422234370Sjasone 423234370SjasoneJEMALLOC_INLINE bool 424234370Sjasoneprof_sample_accum_update(size_t size) 425234370Sjasone{ 426234370Sjasone prof_tdata_t *prof_tdata; 427234370Sjasone 428234370Sjasone cassert(config_prof); 429234370Sjasone /* Sampling logic is unnecessary if the interval is 1. */ 430234370Sjasone assert(opt_lg_prof_sample != 0); 431234370Sjasone 432251300Sjasone prof_tdata = prof_tdata_get(false); 433235238Sjasone if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 434235238Sjasone return (true); 435234370Sjasone 436234370Sjasone /* Take care to avoid integer overflow. */ 437234370Sjasone if (size >= prof_tdata->threshold - prof_tdata->accum) { 438234370Sjasone prof_tdata->accum -= (prof_tdata->threshold - size); 439234370Sjasone /* Compute new sample threshold. */ 440234370Sjasone prof_sample_threshold_update(prof_tdata); 441234370Sjasone while (prof_tdata->accum >= prof_tdata->threshold) { 442234370Sjasone prof_tdata->accum -= prof_tdata->threshold; 443234370Sjasone prof_sample_threshold_update(prof_tdata); 444234370Sjasone } 445234370Sjasone return (false); 446234370Sjasone } else { 447234370Sjasone prof_tdata->accum += size; 448234370Sjasone return (true); 449234370Sjasone } 450234370Sjasone} 451234370Sjasone 452234370SjasoneJEMALLOC_INLINE void 453261071Sjasoneprof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) 454234370Sjasone{ 455234370Sjasone 456234370Sjasone cassert(config_prof); 457234370Sjasone assert(ptr != NULL); 458261071Sjasone assert(usize == isalloc(ptr, true)); 459234370Sjasone 460234370Sjasone if (opt_lg_prof_sample != 0) { 461261071Sjasone if (prof_sample_accum_update(usize)) { 462234370Sjasone /* 463234370Sjasone * Don't sample. For malloc()-like allocation, it is 464234370Sjasone * always possible to tell in advance how large an 465234370Sjasone * object's usable size will be, so there should never 466261071Sjasone * be a difference between the usize passed to 467234370Sjasone * PROF_ALLOC_PREP() and prof_malloc(). 468234370Sjasone */ 469234370Sjasone assert((uintptr_t)cnt == (uintptr_t)1U); 470234370Sjasone } 471234370Sjasone } 472234370Sjasone 473234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 474261071Sjasone prof_ctx_set(ptr, usize, cnt->ctx); 475234370Sjasone 476234370Sjasone cnt->epoch++; 477234370Sjasone /*********/ 478234370Sjasone mb_write(); 479234370Sjasone /*********/ 480234370Sjasone cnt->cnts.curobjs++; 481261071Sjasone cnt->cnts.curbytes += usize; 482234370Sjasone if (opt_prof_accum) { 483234370Sjasone cnt->cnts.accumobjs++; 484261071Sjasone cnt->cnts.accumbytes += usize; 485234370Sjasone } 486234370Sjasone /*********/ 487234370Sjasone mb_write(); 488234370Sjasone /*********/ 489234370Sjasone cnt->epoch++; 490234370Sjasone /*********/ 491234370Sjasone mb_write(); 492234370Sjasone /*********/ 493234370Sjasone } else 494261071Sjasone prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); 495234370Sjasone} 496234370Sjasone 497234370SjasoneJEMALLOC_INLINE void 498261071Sjasoneprof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, 499261071Sjasone size_t old_usize, prof_ctx_t *old_ctx) 500234370Sjasone{ 501234370Sjasone prof_thr_cnt_t *told_cnt; 502234370Sjasone 503234370Sjasone cassert(config_prof); 504234370Sjasone assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); 505234370Sjasone 506234370Sjasone if (ptr != NULL) { 507261071Sjasone assert(usize == isalloc(ptr, true)); 508234370Sjasone if (opt_lg_prof_sample != 0) { 509261071Sjasone if (prof_sample_accum_update(usize)) { 510234370Sjasone /* 511261071Sjasone * Don't sample. The usize passed to 512234370Sjasone * PROF_ALLOC_PREP() was larger than what 513234370Sjasone * actually got allocated, so a backtrace was 514234370Sjasone * captured for this allocation, even though 515261071Sjasone * its actual usize was insufficient to cross 516234370Sjasone * the sample threshold. 517234370Sjasone */ 518234370Sjasone cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 519234370Sjasone } 520234370Sjasone } 521234370Sjasone } 522234370Sjasone 523234370Sjasone if ((uintptr_t)old_ctx > (uintptr_t)1U) { 524234370Sjasone told_cnt = prof_lookup(old_ctx->bt); 525234370Sjasone if (told_cnt == NULL) { 526234370Sjasone /* 527234370Sjasone * It's too late to propagate OOM for this realloc(), 528234370Sjasone * so operate directly on old_cnt->ctx->cnt_merged. 529234370Sjasone */ 530234370Sjasone malloc_mutex_lock(old_ctx->lock); 531234370Sjasone old_ctx->cnt_merged.curobjs--; 532261071Sjasone old_ctx->cnt_merged.curbytes -= old_usize; 533234370Sjasone malloc_mutex_unlock(old_ctx->lock); 534234370Sjasone told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 535234370Sjasone } 536234370Sjasone } else 537234370Sjasone told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; 538234370Sjasone 539234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) 540234370Sjasone told_cnt->epoch++; 541234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 542261071Sjasone prof_ctx_set(ptr, usize, cnt->ctx); 543234370Sjasone cnt->epoch++; 544242844Sjasone } else if (ptr != NULL) 545261071Sjasone prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); 546234370Sjasone /*********/ 547234370Sjasone mb_write(); 548234370Sjasone /*********/ 549234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) { 550234370Sjasone told_cnt->cnts.curobjs--; 551261071Sjasone told_cnt->cnts.curbytes -= old_usize; 552234370Sjasone } 553234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) { 554234370Sjasone cnt->cnts.curobjs++; 555261071Sjasone cnt->cnts.curbytes += usize; 556234370Sjasone if (opt_prof_accum) { 557234370Sjasone cnt->cnts.accumobjs++; 558261071Sjasone cnt->cnts.accumbytes += usize; 559234370Sjasone } 560234370Sjasone } 561234370Sjasone /*********/ 562234370Sjasone mb_write(); 563234370Sjasone /*********/ 564234370Sjasone if ((uintptr_t)told_cnt > (uintptr_t)1U) 565234370Sjasone told_cnt->epoch++; 566234370Sjasone if ((uintptr_t)cnt > (uintptr_t)1U) 567234370Sjasone cnt->epoch++; 568234370Sjasone /*********/ 569234370Sjasone mb_write(); /* Not strictly necessary. */ 570234370Sjasone} 571234370Sjasone 572234370SjasoneJEMALLOC_INLINE void 573234370Sjasoneprof_free(const void *ptr, size_t size) 574234370Sjasone{ 575234370Sjasone prof_ctx_t *ctx = prof_ctx_get(ptr); 576234370Sjasone 577234370Sjasone cassert(config_prof); 578234370Sjasone 579234370Sjasone if ((uintptr_t)ctx > (uintptr_t)1) { 580235238Sjasone prof_thr_cnt_t *tcnt; 581234370Sjasone assert(size == isalloc(ptr, true)); 582235238Sjasone tcnt = prof_lookup(ctx->bt); 583234370Sjasone 584234370Sjasone if (tcnt != NULL) { 585234370Sjasone tcnt->epoch++; 586234370Sjasone /*********/ 587234370Sjasone mb_write(); 588234370Sjasone /*********/ 589234370Sjasone tcnt->cnts.curobjs--; 590234370Sjasone tcnt->cnts.curbytes -= size; 591234370Sjasone /*********/ 592234370Sjasone mb_write(); 593234370Sjasone /*********/ 594234370Sjasone tcnt->epoch++; 595234370Sjasone /*********/ 596234370Sjasone mb_write(); 597234370Sjasone /*********/ 598234370Sjasone } else { 599234370Sjasone /* 600234370Sjasone * OOM during free() cannot be propagated, so operate 601234370Sjasone * directly on cnt->ctx->cnt_merged. 602234370Sjasone */ 603234370Sjasone malloc_mutex_lock(ctx->lock); 604234370Sjasone ctx->cnt_merged.curobjs--; 605234370Sjasone ctx->cnt_merged.curbytes -= size; 606234370Sjasone malloc_mutex_unlock(ctx->lock); 607234370Sjasone } 608234370Sjasone } 609234370Sjasone} 610234370Sjasone#endif 611234370Sjasone 612234370Sjasone#endif /* JEMALLOC_H_INLINES */ 613234370Sjasone/******************************************************************************/ 614