prof.h revision 262521
1234370Sjasone/******************************************************************************/
2234370Sjasone#ifdef JEMALLOC_H_TYPES
3234370Sjasone
4234370Sjasonetypedef struct prof_bt_s prof_bt_t;
5234370Sjasonetypedef struct prof_cnt_s prof_cnt_t;
6234370Sjasonetypedef struct prof_thr_cnt_s prof_thr_cnt_t;
7234370Sjasonetypedef struct prof_ctx_s prof_ctx_t;
8234370Sjasonetypedef struct prof_tdata_s prof_tdata_t;
9234370Sjasone
10234370Sjasone/* Option defaults. */
11262521Sjasone#ifdef JEMALLOC_PROF
12262521Sjasone#  define PROF_PREFIX_DEFAULT		"jeprof"
13262521Sjasone#else
14262521Sjasone#  define PROF_PREFIX_DEFAULT		""
15262521Sjasone#endif
16234543Sjasone#define	LG_PROF_SAMPLE_DEFAULT		19
17234370Sjasone#define	LG_PROF_INTERVAL_DEFAULT	-1
18234370Sjasone
19234370Sjasone/*
20234370Sjasone * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
21234370Sjasone * is based on __builtin_return_address() necessarily has a hard-coded number
22234370Sjasone * of backtrace frame handlers, and should be kept in sync with this setting.
23234370Sjasone */
24234370Sjasone#define	PROF_BT_MAX			128
25234370Sjasone
26234370Sjasone/* Maximum number of backtraces to store in each per thread LRU cache. */
27234370Sjasone#define	PROF_TCMAX			1024
28234370Sjasone
29234370Sjasone/* Initial hash table size. */
30234370Sjasone#define	PROF_CKH_MINITEMS		64
31234370Sjasone
32234370Sjasone/* Size of memory buffer to use when writing dump files. */
33234370Sjasone#define	PROF_DUMP_BUFSIZE		65536
34234370Sjasone
35234370Sjasone/* Size of stack-allocated buffer used by prof_printf(). */
36234370Sjasone#define	PROF_PRINTF_BUFSIZE		128
37234370Sjasone
38234370Sjasone/*
39234370Sjasone * Number of mutexes shared among all ctx's.  No space is allocated for these
40234370Sjasone * unless profiling is enabled, so it's okay to over-provision.
41234370Sjasone */
42234370Sjasone#define	PROF_NCTX_LOCKS			1024
43234370Sjasone
44235238Sjasone/*
45235238Sjasone * prof_tdata pointers close to NULL are used to encode state information that
46235238Sjasone * is used for cleaning up during thread shutdown.
47235238Sjasone */
48235238Sjasone#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
49235238Sjasone#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
50235238Sjasone#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
51235238Sjasone
52234370Sjasone#endif /* JEMALLOC_H_TYPES */
53234370Sjasone/******************************************************************************/
54234370Sjasone#ifdef JEMALLOC_H_STRUCTS
55234370Sjasone
56234370Sjasonestruct prof_bt_s {
57234370Sjasone	/* Backtrace, stored as len program counters. */
58234370Sjasone	void		**vec;
59234370Sjasone	unsigned	len;
60234370Sjasone};
61234370Sjasone
62234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC
63234370Sjasone/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
64234370Sjasonetypedef struct {
65234370Sjasone	prof_bt_t	*bt;
66234370Sjasone	unsigned	nignore;
67234370Sjasone	unsigned	max;
68234370Sjasone} prof_unwind_data_t;
69234370Sjasone#endif
70234370Sjasone
71234370Sjasonestruct prof_cnt_s {
72234370Sjasone	/*
73234370Sjasone	 * Profiling counters.  An allocation/deallocation pair can operate on
74234370Sjasone	 * different prof_thr_cnt_t objects that are linked into the same
75234370Sjasone	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
76234370Sjasone	 * negative.  In principle it is possible for the *bytes counters to
77234370Sjasone	 * overflow/underflow, but a general solution would require something
78234370Sjasone	 * like 128-bit counters; this implementation doesn't bother to solve
79234370Sjasone	 * that problem.
80234370Sjasone	 */
81234370Sjasone	int64_t		curobjs;
82234370Sjasone	int64_t		curbytes;
83234370Sjasone	uint64_t	accumobjs;
84234370Sjasone	uint64_t	accumbytes;
85234370Sjasone};
86234370Sjasone
87234370Sjasonestruct prof_thr_cnt_s {
88234370Sjasone	/* Linkage into prof_ctx_t's cnts_ql. */
89234370Sjasone	ql_elm(prof_thr_cnt_t)	cnts_link;
90234370Sjasone
91234370Sjasone	/* Linkage into thread's LRU. */
92234370Sjasone	ql_elm(prof_thr_cnt_t)	lru_link;
93234370Sjasone
94234370Sjasone	/*
95234370Sjasone	 * Associated context.  If a thread frees an object that it did not
96234370Sjasone	 * allocate, it is possible that the context is not cached in the
97234370Sjasone	 * thread's hash table, in which case it must be able to look up the
98234370Sjasone	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
99234370Sjasone	 * and link it into the prof_ctx_t's cnts_ql.
100234370Sjasone	 */
101234370Sjasone	prof_ctx_t		*ctx;
102234370Sjasone
103234370Sjasone	/*
104234370Sjasone	 * Threads use memory barriers to update the counters.  Since there is
105234370Sjasone	 * only ever one writer, the only challenge is for the reader to get a
106234370Sjasone	 * consistent read of the counters.
107234370Sjasone	 *
108234370Sjasone	 * The writer uses this series of operations:
109234370Sjasone	 *
110234370Sjasone	 * 1) Increment epoch to an odd number.
111234370Sjasone	 * 2) Update counters.
112234370Sjasone	 * 3) Increment epoch to an even number.
113234370Sjasone	 *
114234370Sjasone	 * The reader must assure 1) that the epoch is even while it reads the
115234370Sjasone	 * counters, and 2) that the epoch doesn't change between the time it
116234370Sjasone	 * starts and finishes reading the counters.
117234370Sjasone	 */
118234370Sjasone	unsigned		epoch;
119234370Sjasone
120234370Sjasone	/* Profiling counters. */
121234370Sjasone	prof_cnt_t		cnts;
122234370Sjasone};
123234370Sjasone
124234370Sjasonestruct prof_ctx_s {
125234370Sjasone	/* Associated backtrace. */
126234370Sjasone	prof_bt_t		*bt;
127234370Sjasone
128235238Sjasone	/* Protects nlimbo, cnt_merged, and cnts_ql. */
129234370Sjasone	malloc_mutex_t		*lock;
130234370Sjasone
131235238Sjasone	/*
132235238Sjasone	 * Number of threads that currently cause this ctx to be in a state of
133235238Sjasone	 * limbo due to one of:
134235238Sjasone	 *   - Initializing per thread counters associated with this ctx.
135235238Sjasone	 *   - Preparing to destroy this ctx.
136261071Sjasone	 *   - Dumping a heap profile that includes this ctx.
137235238Sjasone	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
138235238Sjasone	 * ctx.
139235238Sjasone	 */
140235238Sjasone	unsigned		nlimbo;
141235238Sjasone
142234370Sjasone	/* Temporary storage for summation during dump. */
143234370Sjasone	prof_cnt_t		cnt_summed;
144234370Sjasone
145234370Sjasone	/* When threads exit, they merge their stats into cnt_merged. */
146234370Sjasone	prof_cnt_t		cnt_merged;
147234370Sjasone
148234370Sjasone	/*
149234370Sjasone	 * List of profile counters, one for each thread that has allocated in
150234370Sjasone	 * this context.
151234370Sjasone	 */
152234370Sjasone	ql_head(prof_thr_cnt_t)	cnts_ql;
153261071Sjasone
154261071Sjasone	/* Linkage for list of contexts to be dumped. */
155261071Sjasone	ql_elm(prof_ctx_t)	dump_link;
156234370Sjasone};
157261071Sjasonetypedef ql_head(prof_ctx_t) prof_ctx_list_t;
158234370Sjasone
159234370Sjasonestruct prof_tdata_s {
160234370Sjasone	/*
161234370Sjasone	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
162234370Sjasone	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
163234370Sjasone	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
164234370Sjasone	 * others will ever write them.
165234370Sjasone	 *
166234370Sjasone	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
167234370Sjasone	 * counter data into the associated prof_ctx_t objects, and unlink/free
168234370Sjasone	 * the prof_thr_cnt_t objects.
169234370Sjasone	 */
170234370Sjasone	ckh_t			bt2cnt;
171234370Sjasone
172234370Sjasone	/* LRU for contents of bt2cnt. */
173234370Sjasone	ql_head(prof_thr_cnt_t)	lru_ql;
174234370Sjasone
175234370Sjasone	/* Backtrace vector, used for calls to prof_backtrace(). */
176234370Sjasone	void			**vec;
177234370Sjasone
178234370Sjasone	/* Sampling state. */
179234370Sjasone	uint64_t		prng_state;
180234370Sjasone	uint64_t		threshold;
181234370Sjasone	uint64_t		accum;
182235238Sjasone
183235238Sjasone	/* State used to avoid dumping while operating on prof internals. */
184235238Sjasone	bool			enq;
185235238Sjasone	bool			enq_idump;
186235238Sjasone	bool			enq_gdump;
187234370Sjasone};
188234370Sjasone
189234370Sjasone#endif /* JEMALLOC_H_STRUCTS */
190234370Sjasone/******************************************************************************/
191234370Sjasone#ifdef JEMALLOC_H_EXTERNS
192234370Sjasone
193234370Sjasoneextern bool	opt_prof;
194234370Sjasone/*
195234370Sjasone * Even if opt_prof is true, sampling can be temporarily disabled by setting
196234370Sjasone * opt_prof_active to false.  No locking is used when updating opt_prof_active,
197234370Sjasone * so there are no guarantees regarding how long it will take for all threads
198234370Sjasone * to notice state changes.
199234370Sjasone */
200234370Sjasoneextern bool	opt_prof_active;
201234370Sjasoneextern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
202234370Sjasoneextern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
203234370Sjasoneextern bool	opt_prof_gdump;       /* High-water memory dumping. */
204234543Sjasoneextern bool	opt_prof_final;       /* Final profile dumping. */
205234370Sjasoneextern bool	opt_prof_leak;        /* Dump leak summary at exit. */
206234370Sjasoneextern bool	opt_prof_accum;       /* Report cumulative bytes. */
207261071Sjasoneextern char	opt_prof_prefix[
208261071Sjasone    /* Minimize memory bloat for non-prof builds. */
209261071Sjasone#ifdef JEMALLOC_PROF
210261071Sjasone    PATH_MAX +
211261071Sjasone#endif
212261071Sjasone    1];
213234370Sjasone
214234370Sjasone/*
215234370Sjasone * Profile dump interval, measured in bytes allocated.  Each arena triggers a
216234370Sjasone * profile dump when it reaches this threshold.  The effect is that the
217234370Sjasone * interval between profile dumps averages prof_interval, though the actual
218234370Sjasone * interval between dumps will tend to be sporadic, and the interval will be a
219234370Sjasone * maximum of approximately (prof_interval * narenas).
220234370Sjasone */
221234370Sjasoneextern uint64_t	prof_interval;
222234370Sjasone
223234370Sjasone/*
224234370Sjasone * If true, promote small sampled objects to large objects, since small run
225234370Sjasone * headers do not have embedded profile context pointers.
226234370Sjasone */
227234370Sjasoneextern bool	prof_promote;
228234370Sjasone
229234370Sjasonevoid	bt_init(prof_bt_t *bt, void **vec);
230234370Sjasonevoid	prof_backtrace(prof_bt_t *bt, unsigned nignore);
231234370Sjasoneprof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
232261071Sjasone#ifdef JEMALLOC_JET
233261071Sjasonesize_t	prof_bt_count(void);
234261071Sjasonetypedef int (prof_dump_open_t)(bool, const char *);
235261071Sjasoneextern prof_dump_open_t *prof_dump_open;
236261071Sjasone#endif
237234370Sjasonevoid	prof_idump(void);
238234370Sjasonebool	prof_mdump(const char *filename);
239234370Sjasonevoid	prof_gdump(void);
240234370Sjasoneprof_tdata_t	*prof_tdata_init(void);
241234370Sjasonevoid	prof_tdata_cleanup(void *arg);
242234370Sjasonevoid	prof_boot0(void);
243234370Sjasonevoid	prof_boot1(void);
244234370Sjasonebool	prof_boot2(void);
245242844Sjasonevoid	prof_prefork(void);
246242844Sjasonevoid	prof_postfork_parent(void);
247242844Sjasonevoid	prof_postfork_child(void);
248234370Sjasone
249234370Sjasone#endif /* JEMALLOC_H_EXTERNS */
250234370Sjasone/******************************************************************************/
251234370Sjasone#ifdef JEMALLOC_H_INLINES
252234370Sjasone
253234370Sjasone#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
254234370Sjasone	prof_tdata_t *prof_tdata;					\
255234370Sjasone	prof_bt_t bt;							\
256234370Sjasone									\
257234370Sjasone	assert(size == s2u(size));					\
258234370Sjasone									\
259251300Sjasone	prof_tdata = prof_tdata_get(true);				\
260235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
261235238Sjasone		if (prof_tdata != NULL)					\
262235238Sjasone			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
263235238Sjasone		else							\
264234370Sjasone			ret = NULL;					\
265235238Sjasone		break;							\
266234370Sjasone	}								\
267234370Sjasone									\
268234370Sjasone	if (opt_prof_active == false) {					\
269234370Sjasone		/* Sampling is currently inactive, so avoid sampling. */\
270234370Sjasone		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
271234370Sjasone	} else if (opt_lg_prof_sample == 0) {				\
272234370Sjasone		/* Don't bother with sampling logic, since sampling   */\
273234370Sjasone		/* interval is 1.                                     */\
274234370Sjasone		bt_init(&bt, prof_tdata->vec);				\
275234370Sjasone		prof_backtrace(&bt, nignore);				\
276234370Sjasone		ret = prof_lookup(&bt);					\
277234370Sjasone	} else {							\
278234370Sjasone		if (prof_tdata->threshold == 0) {			\
279234370Sjasone			/* Initialize.  Seed the prng differently for */\
280234370Sjasone			/* each thread.                               */\
281234370Sjasone			prof_tdata->prng_state =			\
282234370Sjasone			    (uint64_t)(uintptr_t)&size;			\
283234370Sjasone			prof_sample_threshold_update(prof_tdata);	\
284234370Sjasone		}							\
285234370Sjasone									\
286234370Sjasone		/* Determine whether to capture a backtrace based on  */\
287234370Sjasone		/* whether size is enough for prof_accum to reach     */\
288234370Sjasone		/* prof_tdata->threshold.  However, delay updating    */\
289234370Sjasone		/* these variables until prof_{m,re}alloc(), because  */\
290234370Sjasone		/* we don't know for sure that the allocation will    */\
291234370Sjasone		/* succeed.                                           */\
292234370Sjasone		/*                                                    */\
293234370Sjasone		/* Use subtraction rather than addition to avoid      */\
294234370Sjasone		/* potential integer overflow.                        */\
295234370Sjasone		if (size >= prof_tdata->threshold -			\
296234370Sjasone		    prof_tdata->accum) {				\
297234370Sjasone			bt_init(&bt, prof_tdata->vec);			\
298234370Sjasone			prof_backtrace(&bt, nignore);			\
299234370Sjasone			ret = prof_lookup(&bt);				\
300234370Sjasone		} else							\
301234370Sjasone			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
302234370Sjasone	}								\
303234370Sjasone} while (0)
304234370Sjasone
305234370Sjasone#ifndef JEMALLOC_ENABLE_INLINE
306234370Sjasonemalloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
307234370Sjasone
308251300Sjasoneprof_tdata_t	*prof_tdata_get(bool create);
309234370Sjasonevoid	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
310234370Sjasoneprof_ctx_t	*prof_ctx_get(const void *ptr);
311261071Sjasonevoid	prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
312234370Sjasonebool	prof_sample_accum_update(size_t size);
313261071Sjasonevoid	prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
314261071Sjasonevoid	prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
315261071Sjasone    size_t old_usize, prof_ctx_t *old_ctx);
316234370Sjasonevoid	prof_free(const void *ptr, size_t size);
317234370Sjasone#endif
318234370Sjasone
319234370Sjasone#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
320234370Sjasone/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
321234370Sjasonemalloc_tsd_externs(prof_tdata, prof_tdata_t *)
322234370Sjasonemalloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
323234370Sjasone    prof_tdata_cleanup)
324234370Sjasone
325235238SjasoneJEMALLOC_INLINE prof_tdata_t *
326251300Sjasoneprof_tdata_get(bool create)
327235238Sjasone{
328235238Sjasone	prof_tdata_t *prof_tdata;
329235238Sjasone
330235238Sjasone	cassert(config_prof);
331235238Sjasone
332235238Sjasone	prof_tdata = *prof_tdata_tsd_get();
333251300Sjasone	if (create && prof_tdata == NULL)
334251300Sjasone		prof_tdata = prof_tdata_init();
335235238Sjasone
336235238Sjasone	return (prof_tdata);
337235238Sjasone}
338235238Sjasone
339234370SjasoneJEMALLOC_INLINE void
340234370Sjasoneprof_sample_threshold_update(prof_tdata_t *prof_tdata)
341234370Sjasone{
342261071Sjasone	/*
343261071Sjasone	 * The body of this function is compiled out unless heap profiling is
344261071Sjasone	 * enabled, so that it is possible to compile jemalloc with floating
345261071Sjasone	 * point support completely disabled.  Avoiding floating point code is
346261071Sjasone	 * important on memory-constrained systems, but it also enables a
347261071Sjasone	 * workaround for versions of glibc that don't properly save/restore
348261071Sjasone	 * floating point registers during dynamic lazy symbol loading (which
349261071Sjasone	 * internally calls into whatever malloc implementation happens to be
350261071Sjasone	 * integrated into the application).  Note that some compilers (e.g.
351261071Sjasone	 * gcc 4.8) may use floating point registers for fast memory moves, so
352261071Sjasone	 * jemalloc must be compiled with such optimizations disabled (e.g.
353261071Sjasone	 * -mno-sse) in order for the workaround to be complete.
354261071Sjasone	 */
355261071Sjasone#ifdef JEMALLOC_PROF
356234370Sjasone	uint64_t r;
357234370Sjasone	double u;
358234370Sjasone
359234370Sjasone	cassert(config_prof);
360234370Sjasone
361234370Sjasone	/*
362234370Sjasone	 * Compute sample threshold as a geometrically distributed random
363234370Sjasone	 * variable with mean (2^opt_lg_prof_sample).
364234370Sjasone	 *
365234370Sjasone	 *                         __        __
366234370Sjasone	 *                         |  log(u)  |                     1
367234370Sjasone	 * prof_tdata->threshold = | -------- |, where p = -------------------
368234370Sjasone	 *                         | log(1-p) |             opt_lg_prof_sample
369234370Sjasone	 *                                                 2
370234370Sjasone	 *
371234370Sjasone	 * For more information on the math, see:
372234370Sjasone	 *
373234370Sjasone	 *   Non-Uniform Random Variate Generation
374234370Sjasone	 *   Luc Devroye
375234370Sjasone	 *   Springer-Verlag, New York, 1986
376234370Sjasone	 *   pp 500
377261071Sjasone	 *   (http://luc.devroye.org/rnbookindex.html)
378234370Sjasone	 */
379234370Sjasone	prng64(r, 53, prof_tdata->prng_state,
380234370Sjasone	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
381234370Sjasone	u = (double)r * (1.0/9007199254740992.0L);
382234370Sjasone	prof_tdata->threshold = (uint64_t)(log(u) /
383234370Sjasone	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
384234370Sjasone	    + (uint64_t)1U;
385261071Sjasone#endif
386234370Sjasone}
387234370Sjasone
388234370SjasoneJEMALLOC_INLINE prof_ctx_t *
389234370Sjasoneprof_ctx_get(const void *ptr)
390234370Sjasone{
391234370Sjasone	prof_ctx_t *ret;
392234370Sjasone	arena_chunk_t *chunk;
393234370Sjasone
394234370Sjasone	cassert(config_prof);
395234370Sjasone	assert(ptr != NULL);
396234370Sjasone
397234370Sjasone	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
398234370Sjasone	if (chunk != ptr) {
399234370Sjasone		/* Region. */
400234370Sjasone		ret = arena_prof_ctx_get(ptr);
401234370Sjasone	} else
402234370Sjasone		ret = huge_prof_ctx_get(ptr);
403234370Sjasone
404234370Sjasone	return (ret);
405234370Sjasone}
406234370Sjasone
407234370SjasoneJEMALLOC_INLINE void
408261071Sjasoneprof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
409234370Sjasone{
410234370Sjasone	arena_chunk_t *chunk;
411234370Sjasone
412234370Sjasone	cassert(config_prof);
413234370Sjasone	assert(ptr != NULL);
414234370Sjasone
415234370Sjasone	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
416234370Sjasone	if (chunk != ptr) {
417234370Sjasone		/* Region. */
418261071Sjasone		arena_prof_ctx_set(ptr, usize, ctx);
419234370Sjasone	} else
420234370Sjasone		huge_prof_ctx_set(ptr, ctx);
421234370Sjasone}
422234370Sjasone
423234370SjasoneJEMALLOC_INLINE bool
424234370Sjasoneprof_sample_accum_update(size_t size)
425234370Sjasone{
426234370Sjasone	prof_tdata_t *prof_tdata;
427234370Sjasone
428234370Sjasone	cassert(config_prof);
429234370Sjasone	/* Sampling logic is unnecessary if the interval is 1. */
430234370Sjasone	assert(opt_lg_prof_sample != 0);
431234370Sjasone
432251300Sjasone	prof_tdata = prof_tdata_get(false);
433235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
434235238Sjasone		return (true);
435234370Sjasone
436234370Sjasone	/* Take care to avoid integer overflow. */
437234370Sjasone	if (size >= prof_tdata->threshold - prof_tdata->accum) {
438234370Sjasone		prof_tdata->accum -= (prof_tdata->threshold - size);
439234370Sjasone		/* Compute new sample threshold. */
440234370Sjasone		prof_sample_threshold_update(prof_tdata);
441234370Sjasone		while (prof_tdata->accum >= prof_tdata->threshold) {
442234370Sjasone			prof_tdata->accum -= prof_tdata->threshold;
443234370Sjasone			prof_sample_threshold_update(prof_tdata);
444234370Sjasone		}
445234370Sjasone		return (false);
446234370Sjasone	} else {
447234370Sjasone		prof_tdata->accum += size;
448234370Sjasone		return (true);
449234370Sjasone	}
450234370Sjasone}
451234370Sjasone
452234370SjasoneJEMALLOC_INLINE void
453261071Sjasoneprof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
454234370Sjasone{
455234370Sjasone
456234370Sjasone	cassert(config_prof);
457234370Sjasone	assert(ptr != NULL);
458261071Sjasone	assert(usize == isalloc(ptr, true));
459234370Sjasone
460234370Sjasone	if (opt_lg_prof_sample != 0) {
461261071Sjasone		if (prof_sample_accum_update(usize)) {
462234370Sjasone			/*
463234370Sjasone			 * Don't sample.  For malloc()-like allocation, it is
464234370Sjasone			 * always possible to tell in advance how large an
465234370Sjasone			 * object's usable size will be, so there should never
466261071Sjasone			 * be a difference between the usize passed to
467234370Sjasone			 * PROF_ALLOC_PREP() and prof_malloc().
468234370Sjasone			 */
469234370Sjasone			assert((uintptr_t)cnt == (uintptr_t)1U);
470234370Sjasone		}
471234370Sjasone	}
472234370Sjasone
473234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
474261071Sjasone		prof_ctx_set(ptr, usize, cnt->ctx);
475234370Sjasone
476234370Sjasone		cnt->epoch++;
477234370Sjasone		/*********/
478234370Sjasone		mb_write();
479234370Sjasone		/*********/
480234370Sjasone		cnt->cnts.curobjs++;
481261071Sjasone		cnt->cnts.curbytes += usize;
482234370Sjasone		if (opt_prof_accum) {
483234370Sjasone			cnt->cnts.accumobjs++;
484261071Sjasone			cnt->cnts.accumbytes += usize;
485234370Sjasone		}
486234370Sjasone		/*********/
487234370Sjasone		mb_write();
488234370Sjasone		/*********/
489234370Sjasone		cnt->epoch++;
490234370Sjasone		/*********/
491234370Sjasone		mb_write();
492234370Sjasone		/*********/
493234370Sjasone	} else
494261071Sjasone		prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
495234370Sjasone}
496234370Sjasone
497234370SjasoneJEMALLOC_INLINE void
498261071Sjasoneprof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
499261071Sjasone    size_t old_usize, prof_ctx_t *old_ctx)
500234370Sjasone{
501234370Sjasone	prof_thr_cnt_t *told_cnt;
502234370Sjasone
503234370Sjasone	cassert(config_prof);
504234370Sjasone	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
505234370Sjasone
506234370Sjasone	if (ptr != NULL) {
507261071Sjasone		assert(usize == isalloc(ptr, true));
508234370Sjasone		if (opt_lg_prof_sample != 0) {
509261071Sjasone			if (prof_sample_accum_update(usize)) {
510234370Sjasone				/*
511261071Sjasone				 * Don't sample.  The usize passed to
512234370Sjasone				 * PROF_ALLOC_PREP() was larger than what
513234370Sjasone				 * actually got allocated, so a backtrace was
514234370Sjasone				 * captured for this allocation, even though
515261071Sjasone				 * its actual usize was insufficient to cross
516234370Sjasone				 * the sample threshold.
517234370Sjasone				 */
518234370Sjasone				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
519234370Sjasone			}
520234370Sjasone		}
521234370Sjasone	}
522234370Sjasone
523234370Sjasone	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
524234370Sjasone		told_cnt = prof_lookup(old_ctx->bt);
525234370Sjasone		if (told_cnt == NULL) {
526234370Sjasone			/*
527234370Sjasone			 * It's too late to propagate OOM for this realloc(),
528234370Sjasone			 * so operate directly on old_cnt->ctx->cnt_merged.
529234370Sjasone			 */
530234370Sjasone			malloc_mutex_lock(old_ctx->lock);
531234370Sjasone			old_ctx->cnt_merged.curobjs--;
532261071Sjasone			old_ctx->cnt_merged.curbytes -= old_usize;
533234370Sjasone			malloc_mutex_unlock(old_ctx->lock);
534234370Sjasone			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
535234370Sjasone		}
536234370Sjasone	} else
537234370Sjasone		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
538234370Sjasone
539234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U)
540234370Sjasone		told_cnt->epoch++;
541234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
542261071Sjasone		prof_ctx_set(ptr, usize, cnt->ctx);
543234370Sjasone		cnt->epoch++;
544242844Sjasone	} else if (ptr != NULL)
545261071Sjasone		prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
546234370Sjasone	/*********/
547234370Sjasone	mb_write();
548234370Sjasone	/*********/
549234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
550234370Sjasone		told_cnt->cnts.curobjs--;
551261071Sjasone		told_cnt->cnts.curbytes -= old_usize;
552234370Sjasone	}
553234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
554234370Sjasone		cnt->cnts.curobjs++;
555261071Sjasone		cnt->cnts.curbytes += usize;
556234370Sjasone		if (opt_prof_accum) {
557234370Sjasone			cnt->cnts.accumobjs++;
558261071Sjasone			cnt->cnts.accumbytes += usize;
559234370Sjasone		}
560234370Sjasone	}
561234370Sjasone	/*********/
562234370Sjasone	mb_write();
563234370Sjasone	/*********/
564234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U)
565234370Sjasone		told_cnt->epoch++;
566234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U)
567234370Sjasone		cnt->epoch++;
568234370Sjasone	/*********/
569234370Sjasone	mb_write(); /* Not strictly necessary. */
570234370Sjasone}
571234370Sjasone
572234370SjasoneJEMALLOC_INLINE void
573234370Sjasoneprof_free(const void *ptr, size_t size)
574234370Sjasone{
575234370Sjasone	prof_ctx_t *ctx = prof_ctx_get(ptr);
576234370Sjasone
577234370Sjasone	cassert(config_prof);
578234370Sjasone
579234370Sjasone	if ((uintptr_t)ctx > (uintptr_t)1) {
580235238Sjasone		prof_thr_cnt_t *tcnt;
581234370Sjasone		assert(size == isalloc(ptr, true));
582235238Sjasone		tcnt = prof_lookup(ctx->bt);
583234370Sjasone
584234370Sjasone		if (tcnt != NULL) {
585234370Sjasone			tcnt->epoch++;
586234370Sjasone			/*********/
587234370Sjasone			mb_write();
588234370Sjasone			/*********/
589234370Sjasone			tcnt->cnts.curobjs--;
590234370Sjasone			tcnt->cnts.curbytes -= size;
591234370Sjasone			/*********/
592234370Sjasone			mb_write();
593234370Sjasone			/*********/
594234370Sjasone			tcnt->epoch++;
595234370Sjasone			/*********/
596234370Sjasone			mb_write();
597234370Sjasone			/*********/
598234370Sjasone		} else {
599234370Sjasone			/*
600234370Sjasone			 * OOM during free() cannot be propagated, so operate
601234370Sjasone			 * directly on cnt->ctx->cnt_merged.
602234370Sjasone			 */
603234370Sjasone			malloc_mutex_lock(ctx->lock);
604234370Sjasone			ctx->cnt_merged.curobjs--;
605234370Sjasone			ctx->cnt_merged.curbytes -= size;
606234370Sjasone			malloc_mutex_unlock(ctx->lock);
607234370Sjasone		}
608234370Sjasone	}
609234370Sjasone}
610234370Sjasone#endif
611234370Sjasone
612234370Sjasone#endif /* JEMALLOC_H_INLINES */
613234370Sjasone/******************************************************************************/
614