prof.h revision 261071
1234370Sjasone/******************************************************************************/
2234370Sjasone#ifdef JEMALLOC_H_TYPES
3234370Sjasone
4234370Sjasonetypedef struct prof_bt_s prof_bt_t;
5234370Sjasonetypedef struct prof_cnt_s prof_cnt_t;
6234370Sjasonetypedef struct prof_thr_cnt_s prof_thr_cnt_t;
7234370Sjasonetypedef struct prof_ctx_s prof_ctx_t;
8234370Sjasonetypedef struct prof_tdata_s prof_tdata_t;
9234370Sjasone
10234370Sjasone/* Option defaults. */
11234370Sjasone#define	PROF_PREFIX_DEFAULT		"jeprof"
12234543Sjasone#define	LG_PROF_SAMPLE_DEFAULT		19
13234370Sjasone#define	LG_PROF_INTERVAL_DEFAULT	-1
14234370Sjasone
15234370Sjasone/*
16234370Sjasone * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
17234370Sjasone * is based on __builtin_return_address() necessarily has a hard-coded number
18234370Sjasone * of backtrace frame handlers, and should be kept in sync with this setting.
19234370Sjasone */
20234370Sjasone#define	PROF_BT_MAX			128
21234370Sjasone
22234370Sjasone/* Maximum number of backtraces to store in each per thread LRU cache. */
23234370Sjasone#define	PROF_TCMAX			1024
24234370Sjasone
25234370Sjasone/* Initial hash table size. */
26234370Sjasone#define	PROF_CKH_MINITEMS		64
27234370Sjasone
28234370Sjasone/* Size of memory buffer to use when writing dump files. */
29234370Sjasone#define	PROF_DUMP_BUFSIZE		65536
30234370Sjasone
31234370Sjasone/* Size of stack-allocated buffer used by prof_printf(). */
32234370Sjasone#define	PROF_PRINTF_BUFSIZE		128
33234370Sjasone
34234370Sjasone/*
35234370Sjasone * Number of mutexes shared among all ctx's.  No space is allocated for these
36234370Sjasone * unless profiling is enabled, so it's okay to over-provision.
37234370Sjasone */
38234370Sjasone#define	PROF_NCTX_LOCKS			1024
39234370Sjasone
40235238Sjasone/*
41235238Sjasone * prof_tdata pointers close to NULL are used to encode state information that
42235238Sjasone * is used for cleaning up during thread shutdown.
43235238Sjasone */
44235238Sjasone#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
45235238Sjasone#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
46235238Sjasone#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
47235238Sjasone
48234370Sjasone#endif /* JEMALLOC_H_TYPES */
49234370Sjasone/******************************************************************************/
50234370Sjasone#ifdef JEMALLOC_H_STRUCTS
51234370Sjasone
52234370Sjasonestruct prof_bt_s {
53234370Sjasone	/* Backtrace, stored as len program counters. */
54234370Sjasone	void		**vec;
55234370Sjasone	unsigned	len;
56234370Sjasone};
57234370Sjasone
58234370Sjasone#ifdef JEMALLOC_PROF_LIBGCC
59234370Sjasone/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
60234370Sjasonetypedef struct {
61234370Sjasone	prof_bt_t	*bt;
62234370Sjasone	unsigned	nignore;
63234370Sjasone	unsigned	max;
64234370Sjasone} prof_unwind_data_t;
65234370Sjasone#endif
66234370Sjasone
67234370Sjasonestruct prof_cnt_s {
68234370Sjasone	/*
69234370Sjasone	 * Profiling counters.  An allocation/deallocation pair can operate on
70234370Sjasone	 * different prof_thr_cnt_t objects that are linked into the same
71234370Sjasone	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
72234370Sjasone	 * negative.  In principle it is possible for the *bytes counters to
73234370Sjasone	 * overflow/underflow, but a general solution would require something
74234370Sjasone	 * like 128-bit counters; this implementation doesn't bother to solve
75234370Sjasone	 * that problem.
76234370Sjasone	 */
77234370Sjasone	int64_t		curobjs;
78234370Sjasone	int64_t		curbytes;
79234370Sjasone	uint64_t	accumobjs;
80234370Sjasone	uint64_t	accumbytes;
81234370Sjasone};
82234370Sjasone
83234370Sjasonestruct prof_thr_cnt_s {
84234370Sjasone	/* Linkage into prof_ctx_t's cnts_ql. */
85234370Sjasone	ql_elm(prof_thr_cnt_t)	cnts_link;
86234370Sjasone
87234370Sjasone	/* Linkage into thread's LRU. */
88234370Sjasone	ql_elm(prof_thr_cnt_t)	lru_link;
89234370Sjasone
90234370Sjasone	/*
91234370Sjasone	 * Associated context.  If a thread frees an object that it did not
92234370Sjasone	 * allocate, it is possible that the context is not cached in the
93234370Sjasone	 * thread's hash table, in which case it must be able to look up the
94234370Sjasone	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
95234370Sjasone	 * and link it into the prof_ctx_t's cnts_ql.
96234370Sjasone	 */
97234370Sjasone	prof_ctx_t		*ctx;
98234370Sjasone
99234370Sjasone	/*
100234370Sjasone	 * Threads use memory barriers to update the counters.  Since there is
101234370Sjasone	 * only ever one writer, the only challenge is for the reader to get a
102234370Sjasone	 * consistent read of the counters.
103234370Sjasone	 *
104234370Sjasone	 * The writer uses this series of operations:
105234370Sjasone	 *
106234370Sjasone	 * 1) Increment epoch to an odd number.
107234370Sjasone	 * 2) Update counters.
108234370Sjasone	 * 3) Increment epoch to an even number.
109234370Sjasone	 *
110234370Sjasone	 * The reader must assure 1) that the epoch is even while it reads the
111234370Sjasone	 * counters, and 2) that the epoch doesn't change between the time it
112234370Sjasone	 * starts and finishes reading the counters.
113234370Sjasone	 */
114234370Sjasone	unsigned		epoch;
115234370Sjasone
116234370Sjasone	/* Profiling counters. */
117234370Sjasone	prof_cnt_t		cnts;
118234370Sjasone};
119234370Sjasone
120234370Sjasonestruct prof_ctx_s {
121234370Sjasone	/* Associated backtrace. */
122234370Sjasone	prof_bt_t		*bt;
123234370Sjasone
124235238Sjasone	/* Protects nlimbo, cnt_merged, and cnts_ql. */
125234370Sjasone	malloc_mutex_t		*lock;
126234370Sjasone
127235238Sjasone	/*
128235238Sjasone	 * Number of threads that currently cause this ctx to be in a state of
129235238Sjasone	 * limbo due to one of:
130235238Sjasone	 *   - Initializing per thread counters associated with this ctx.
131235238Sjasone	 *   - Preparing to destroy this ctx.
132261071Sjasone	 *   - Dumping a heap profile that includes this ctx.
133235238Sjasone	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
134235238Sjasone	 * ctx.
135235238Sjasone	 */
136235238Sjasone	unsigned		nlimbo;
137235238Sjasone
138234370Sjasone	/* Temporary storage for summation during dump. */
139234370Sjasone	prof_cnt_t		cnt_summed;
140234370Sjasone
141234370Sjasone	/* When threads exit, they merge their stats into cnt_merged. */
142234370Sjasone	prof_cnt_t		cnt_merged;
143234370Sjasone
144234370Sjasone	/*
145234370Sjasone	 * List of profile counters, one for each thread that has allocated in
146234370Sjasone	 * this context.
147234370Sjasone	 */
148234370Sjasone	ql_head(prof_thr_cnt_t)	cnts_ql;
149261071Sjasone
150261071Sjasone	/* Linkage for list of contexts to be dumped. */
151261071Sjasone	ql_elm(prof_ctx_t)	dump_link;
152234370Sjasone};
153261071Sjasonetypedef ql_head(prof_ctx_t) prof_ctx_list_t;
154234370Sjasone
155234370Sjasonestruct prof_tdata_s {
156234370Sjasone	/*
157234370Sjasone	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
158234370Sjasone	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
159234370Sjasone	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
160234370Sjasone	 * others will ever write them.
161234370Sjasone	 *
162234370Sjasone	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
163234370Sjasone	 * counter data into the associated prof_ctx_t objects, and unlink/free
164234370Sjasone	 * the prof_thr_cnt_t objects.
165234370Sjasone	 */
166234370Sjasone	ckh_t			bt2cnt;
167234370Sjasone
168234370Sjasone	/* LRU for contents of bt2cnt. */
169234370Sjasone	ql_head(prof_thr_cnt_t)	lru_ql;
170234370Sjasone
171234370Sjasone	/* Backtrace vector, used for calls to prof_backtrace(). */
172234370Sjasone	void			**vec;
173234370Sjasone
174234370Sjasone	/* Sampling state. */
175234370Sjasone	uint64_t		prng_state;
176234370Sjasone	uint64_t		threshold;
177234370Sjasone	uint64_t		accum;
178235238Sjasone
179235238Sjasone	/* State used to avoid dumping while operating on prof internals. */
180235238Sjasone	bool			enq;
181235238Sjasone	bool			enq_idump;
182235238Sjasone	bool			enq_gdump;
183234370Sjasone};
184234370Sjasone
185234370Sjasone#endif /* JEMALLOC_H_STRUCTS */
186234370Sjasone/******************************************************************************/
187234370Sjasone#ifdef JEMALLOC_H_EXTERNS
188234370Sjasone
189234370Sjasoneextern bool	opt_prof;
190234370Sjasone/*
191234370Sjasone * Even if opt_prof is true, sampling can be temporarily disabled by setting
192234370Sjasone * opt_prof_active to false.  No locking is used when updating opt_prof_active,
193234370Sjasone * so there are no guarantees regarding how long it will take for all threads
194234370Sjasone * to notice state changes.
195234370Sjasone */
196234370Sjasoneextern bool	opt_prof_active;
197234370Sjasoneextern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
198234370Sjasoneextern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
199234370Sjasoneextern bool	opt_prof_gdump;       /* High-water memory dumping. */
200234543Sjasoneextern bool	opt_prof_final;       /* Final profile dumping. */
201234370Sjasoneextern bool	opt_prof_leak;        /* Dump leak summary at exit. */
202234370Sjasoneextern bool	opt_prof_accum;       /* Report cumulative bytes. */
203261071Sjasoneextern char	opt_prof_prefix[
204261071Sjasone    /* Minimize memory bloat for non-prof builds. */
205261071Sjasone#ifdef JEMALLOC_PROF
206261071Sjasone    PATH_MAX +
207261071Sjasone#endif
208261071Sjasone    1];
209234370Sjasone
210234370Sjasone/*
211234370Sjasone * Profile dump interval, measured in bytes allocated.  Each arena triggers a
212234370Sjasone * profile dump when it reaches this threshold.  The effect is that the
213234370Sjasone * interval between profile dumps averages prof_interval, though the actual
214234370Sjasone * interval between dumps will tend to be sporadic, and the interval will be a
215234370Sjasone * maximum of approximately (prof_interval * narenas).
216234370Sjasone */
217234370Sjasoneextern uint64_t	prof_interval;
218234370Sjasone
219234370Sjasone/*
220234370Sjasone * If true, promote small sampled objects to large objects, since small run
221234370Sjasone * headers do not have embedded profile context pointers.
222234370Sjasone */
223234370Sjasoneextern bool	prof_promote;
224234370Sjasone
225234370Sjasonevoid	bt_init(prof_bt_t *bt, void **vec);
226234370Sjasonevoid	prof_backtrace(prof_bt_t *bt, unsigned nignore);
227234370Sjasoneprof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
228261071Sjasone#ifdef JEMALLOC_JET
229261071Sjasonesize_t	prof_bt_count(void);
230261071Sjasonetypedef int (prof_dump_open_t)(bool, const char *);
231261071Sjasoneextern prof_dump_open_t *prof_dump_open;
232261071Sjasone#endif
233234370Sjasonevoid	prof_idump(void);
234234370Sjasonebool	prof_mdump(const char *filename);
235234370Sjasonevoid	prof_gdump(void);
236234370Sjasoneprof_tdata_t	*prof_tdata_init(void);
237234370Sjasonevoid	prof_tdata_cleanup(void *arg);
238234370Sjasonevoid	prof_boot0(void);
239234370Sjasonevoid	prof_boot1(void);
240234370Sjasonebool	prof_boot2(void);
241242844Sjasonevoid	prof_prefork(void);
242242844Sjasonevoid	prof_postfork_parent(void);
243242844Sjasonevoid	prof_postfork_child(void);
244234370Sjasone
245234370Sjasone#endif /* JEMALLOC_H_EXTERNS */
246234370Sjasone/******************************************************************************/
247234370Sjasone#ifdef JEMALLOC_H_INLINES
248234370Sjasone
249234370Sjasone#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
250234370Sjasone	prof_tdata_t *prof_tdata;					\
251234370Sjasone	prof_bt_t bt;							\
252234370Sjasone									\
253234370Sjasone	assert(size == s2u(size));					\
254234370Sjasone									\
255251300Sjasone	prof_tdata = prof_tdata_get(true);				\
256235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
257235238Sjasone		if (prof_tdata != NULL)					\
258235238Sjasone			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
259235238Sjasone		else							\
260234370Sjasone			ret = NULL;					\
261235238Sjasone		break;							\
262234370Sjasone	}								\
263234370Sjasone									\
264234370Sjasone	if (opt_prof_active == false) {					\
265234370Sjasone		/* Sampling is currently inactive, so avoid sampling. */\
266234370Sjasone		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
267234370Sjasone	} else if (opt_lg_prof_sample == 0) {				\
268234370Sjasone		/* Don't bother with sampling logic, since sampling   */\
269234370Sjasone		/* interval is 1.                                     */\
270234370Sjasone		bt_init(&bt, prof_tdata->vec);				\
271234370Sjasone		prof_backtrace(&bt, nignore);				\
272234370Sjasone		ret = prof_lookup(&bt);					\
273234370Sjasone	} else {							\
274234370Sjasone		if (prof_tdata->threshold == 0) {			\
275234370Sjasone			/* Initialize.  Seed the prng differently for */\
276234370Sjasone			/* each thread.                               */\
277234370Sjasone			prof_tdata->prng_state =			\
278234370Sjasone			    (uint64_t)(uintptr_t)&size;			\
279234370Sjasone			prof_sample_threshold_update(prof_tdata);	\
280234370Sjasone		}							\
281234370Sjasone									\
282234370Sjasone		/* Determine whether to capture a backtrace based on  */\
283234370Sjasone		/* whether size is enough for prof_accum to reach     */\
284234370Sjasone		/* prof_tdata->threshold.  However, delay updating    */\
285234370Sjasone		/* these variables until prof_{m,re}alloc(), because  */\
286234370Sjasone		/* we don't know for sure that the allocation will    */\
287234370Sjasone		/* succeed.                                           */\
288234370Sjasone		/*                                                    */\
289234370Sjasone		/* Use subtraction rather than addition to avoid      */\
290234370Sjasone		/* potential integer overflow.                        */\
291234370Sjasone		if (size >= prof_tdata->threshold -			\
292234370Sjasone		    prof_tdata->accum) {				\
293234370Sjasone			bt_init(&bt, prof_tdata->vec);			\
294234370Sjasone			prof_backtrace(&bt, nignore);			\
295234370Sjasone			ret = prof_lookup(&bt);				\
296234370Sjasone		} else							\
297234370Sjasone			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
298234370Sjasone	}								\
299234370Sjasone} while (0)
300234370Sjasone
301234370Sjasone#ifndef JEMALLOC_ENABLE_INLINE
302234370Sjasonemalloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
303234370Sjasone
304251300Sjasoneprof_tdata_t	*prof_tdata_get(bool create);
305234370Sjasonevoid	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
306234370Sjasoneprof_ctx_t	*prof_ctx_get(const void *ptr);
307261071Sjasonevoid	prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
308234370Sjasonebool	prof_sample_accum_update(size_t size);
309261071Sjasonevoid	prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
310261071Sjasonevoid	prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
311261071Sjasone    size_t old_usize, prof_ctx_t *old_ctx);
312234370Sjasonevoid	prof_free(const void *ptr, size_t size);
313234370Sjasone#endif
314234370Sjasone
315234370Sjasone#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
316234370Sjasone/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
317234370Sjasonemalloc_tsd_externs(prof_tdata, prof_tdata_t *)
318234370Sjasonemalloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
319234370Sjasone    prof_tdata_cleanup)
320234370Sjasone
321235238SjasoneJEMALLOC_INLINE prof_tdata_t *
322251300Sjasoneprof_tdata_get(bool create)
323235238Sjasone{
324235238Sjasone	prof_tdata_t *prof_tdata;
325235238Sjasone
326235238Sjasone	cassert(config_prof);
327235238Sjasone
328235238Sjasone	prof_tdata = *prof_tdata_tsd_get();
329251300Sjasone	if (create && prof_tdata == NULL)
330251300Sjasone		prof_tdata = prof_tdata_init();
331235238Sjasone
332235238Sjasone	return (prof_tdata);
333235238Sjasone}
334235238Sjasone
335234370SjasoneJEMALLOC_INLINE void
336234370Sjasoneprof_sample_threshold_update(prof_tdata_t *prof_tdata)
337234370Sjasone{
338261071Sjasone	/*
339261071Sjasone	 * The body of this function is compiled out unless heap profiling is
340261071Sjasone	 * enabled, so that it is possible to compile jemalloc with floating
341261071Sjasone	 * point support completely disabled.  Avoiding floating point code is
342261071Sjasone	 * important on memory-constrained systems, but it also enables a
343261071Sjasone	 * workaround for versions of glibc that don't properly save/restore
344261071Sjasone	 * floating point registers during dynamic lazy symbol loading (which
345261071Sjasone	 * internally calls into whatever malloc implementation happens to be
346261071Sjasone	 * integrated into the application).  Note that some compilers (e.g.
347261071Sjasone	 * gcc 4.8) may use floating point registers for fast memory moves, so
348261071Sjasone	 * jemalloc must be compiled with such optimizations disabled (e.g.
349261071Sjasone	 * -mno-sse) in order for the workaround to be complete.
350261071Sjasone	 */
351261071Sjasone#ifdef JEMALLOC_PROF
352234370Sjasone	uint64_t r;
353234370Sjasone	double u;
354234370Sjasone
355234370Sjasone	cassert(config_prof);
356234370Sjasone
357234370Sjasone	/*
358234370Sjasone	 * Compute sample threshold as a geometrically distributed random
359234370Sjasone	 * variable with mean (2^opt_lg_prof_sample).
360234370Sjasone	 *
361234370Sjasone	 *                         __        __
362234370Sjasone	 *                         |  log(u)  |                     1
363234370Sjasone	 * prof_tdata->threshold = | -------- |, where p = -------------------
364234370Sjasone	 *                         | log(1-p) |             opt_lg_prof_sample
365234370Sjasone	 *                                                 2
366234370Sjasone	 *
367234370Sjasone	 * For more information on the math, see:
368234370Sjasone	 *
369234370Sjasone	 *   Non-Uniform Random Variate Generation
370234370Sjasone	 *   Luc Devroye
371234370Sjasone	 *   Springer-Verlag, New York, 1986
372234370Sjasone	 *   pp 500
373261071Sjasone	 *   (http://luc.devroye.org/rnbookindex.html)
374234370Sjasone	 */
375234370Sjasone	prng64(r, 53, prof_tdata->prng_state,
376234370Sjasone	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
377234370Sjasone	u = (double)r * (1.0/9007199254740992.0L);
378234370Sjasone	prof_tdata->threshold = (uint64_t)(log(u) /
379234370Sjasone	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
380234370Sjasone	    + (uint64_t)1U;
381261071Sjasone#endif
382234370Sjasone}
383234370Sjasone
384234370SjasoneJEMALLOC_INLINE prof_ctx_t *
385234370Sjasoneprof_ctx_get(const void *ptr)
386234370Sjasone{
387234370Sjasone	prof_ctx_t *ret;
388234370Sjasone	arena_chunk_t *chunk;
389234370Sjasone
390234370Sjasone	cassert(config_prof);
391234370Sjasone	assert(ptr != NULL);
392234370Sjasone
393234370Sjasone	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
394234370Sjasone	if (chunk != ptr) {
395234370Sjasone		/* Region. */
396234370Sjasone		ret = arena_prof_ctx_get(ptr);
397234370Sjasone	} else
398234370Sjasone		ret = huge_prof_ctx_get(ptr);
399234370Sjasone
400234370Sjasone	return (ret);
401234370Sjasone}
402234370Sjasone
403234370SjasoneJEMALLOC_INLINE void
404261071Sjasoneprof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
405234370Sjasone{
406234370Sjasone	arena_chunk_t *chunk;
407234370Sjasone
408234370Sjasone	cassert(config_prof);
409234370Sjasone	assert(ptr != NULL);
410234370Sjasone
411234370Sjasone	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
412234370Sjasone	if (chunk != ptr) {
413234370Sjasone		/* Region. */
414261071Sjasone		arena_prof_ctx_set(ptr, usize, ctx);
415234370Sjasone	} else
416234370Sjasone		huge_prof_ctx_set(ptr, ctx);
417234370Sjasone}
418234370Sjasone
419234370SjasoneJEMALLOC_INLINE bool
420234370Sjasoneprof_sample_accum_update(size_t size)
421234370Sjasone{
422234370Sjasone	prof_tdata_t *prof_tdata;
423234370Sjasone
424234370Sjasone	cassert(config_prof);
425234370Sjasone	/* Sampling logic is unnecessary if the interval is 1. */
426234370Sjasone	assert(opt_lg_prof_sample != 0);
427234370Sjasone
428251300Sjasone	prof_tdata = prof_tdata_get(false);
429235238Sjasone	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
430235238Sjasone		return (true);
431234370Sjasone
432234370Sjasone	/* Take care to avoid integer overflow. */
433234370Sjasone	if (size >= prof_tdata->threshold - prof_tdata->accum) {
434234370Sjasone		prof_tdata->accum -= (prof_tdata->threshold - size);
435234370Sjasone		/* Compute new sample threshold. */
436234370Sjasone		prof_sample_threshold_update(prof_tdata);
437234370Sjasone		while (prof_tdata->accum >= prof_tdata->threshold) {
438234370Sjasone			prof_tdata->accum -= prof_tdata->threshold;
439234370Sjasone			prof_sample_threshold_update(prof_tdata);
440234370Sjasone		}
441234370Sjasone		return (false);
442234370Sjasone	} else {
443234370Sjasone		prof_tdata->accum += size;
444234370Sjasone		return (true);
445234370Sjasone	}
446234370Sjasone}
447234370Sjasone
448234370SjasoneJEMALLOC_INLINE void
449261071Sjasoneprof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
450234370Sjasone{
451234370Sjasone
452234370Sjasone	cassert(config_prof);
453234370Sjasone	assert(ptr != NULL);
454261071Sjasone	assert(usize == isalloc(ptr, true));
455234370Sjasone
456234370Sjasone	if (opt_lg_prof_sample != 0) {
457261071Sjasone		if (prof_sample_accum_update(usize)) {
458234370Sjasone			/*
459234370Sjasone			 * Don't sample.  For malloc()-like allocation, it is
460234370Sjasone			 * always possible to tell in advance how large an
461234370Sjasone			 * object's usable size will be, so there should never
462261071Sjasone			 * be a difference between the usize passed to
463234370Sjasone			 * PROF_ALLOC_PREP() and prof_malloc().
464234370Sjasone			 */
465234370Sjasone			assert((uintptr_t)cnt == (uintptr_t)1U);
466234370Sjasone		}
467234370Sjasone	}
468234370Sjasone
469234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
470261071Sjasone		prof_ctx_set(ptr, usize, cnt->ctx);
471234370Sjasone
472234370Sjasone		cnt->epoch++;
473234370Sjasone		/*********/
474234370Sjasone		mb_write();
475234370Sjasone		/*********/
476234370Sjasone		cnt->cnts.curobjs++;
477261071Sjasone		cnt->cnts.curbytes += usize;
478234370Sjasone		if (opt_prof_accum) {
479234370Sjasone			cnt->cnts.accumobjs++;
480261071Sjasone			cnt->cnts.accumbytes += usize;
481234370Sjasone		}
482234370Sjasone		/*********/
483234370Sjasone		mb_write();
484234370Sjasone		/*********/
485234370Sjasone		cnt->epoch++;
486234370Sjasone		/*********/
487234370Sjasone		mb_write();
488234370Sjasone		/*********/
489234370Sjasone	} else
490261071Sjasone		prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
491234370Sjasone}
492234370Sjasone
493234370SjasoneJEMALLOC_INLINE void
494261071Sjasoneprof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
495261071Sjasone    size_t old_usize, prof_ctx_t *old_ctx)
496234370Sjasone{
497234370Sjasone	prof_thr_cnt_t *told_cnt;
498234370Sjasone
499234370Sjasone	cassert(config_prof);
500234370Sjasone	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
501234370Sjasone
502234370Sjasone	if (ptr != NULL) {
503261071Sjasone		assert(usize == isalloc(ptr, true));
504234370Sjasone		if (opt_lg_prof_sample != 0) {
505261071Sjasone			if (prof_sample_accum_update(usize)) {
506234370Sjasone				/*
507261071Sjasone				 * Don't sample.  The usize passed to
508234370Sjasone				 * PROF_ALLOC_PREP() was larger than what
509234370Sjasone				 * actually got allocated, so a backtrace was
510234370Sjasone				 * captured for this allocation, even though
511261071Sjasone				 * its actual usize was insufficient to cross
512234370Sjasone				 * the sample threshold.
513234370Sjasone				 */
514234370Sjasone				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
515234370Sjasone			}
516234370Sjasone		}
517234370Sjasone	}
518234370Sjasone
519234370Sjasone	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
520234370Sjasone		told_cnt = prof_lookup(old_ctx->bt);
521234370Sjasone		if (told_cnt == NULL) {
522234370Sjasone			/*
523234370Sjasone			 * It's too late to propagate OOM for this realloc(),
524234370Sjasone			 * so operate directly on old_cnt->ctx->cnt_merged.
525234370Sjasone			 */
526234370Sjasone			malloc_mutex_lock(old_ctx->lock);
527234370Sjasone			old_ctx->cnt_merged.curobjs--;
528261071Sjasone			old_ctx->cnt_merged.curbytes -= old_usize;
529234370Sjasone			malloc_mutex_unlock(old_ctx->lock);
530234370Sjasone			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
531234370Sjasone		}
532234370Sjasone	} else
533234370Sjasone		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
534234370Sjasone
535234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U)
536234370Sjasone		told_cnt->epoch++;
537234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
538261071Sjasone		prof_ctx_set(ptr, usize, cnt->ctx);
539234370Sjasone		cnt->epoch++;
540242844Sjasone	} else if (ptr != NULL)
541261071Sjasone		prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
542234370Sjasone	/*********/
543234370Sjasone	mb_write();
544234370Sjasone	/*********/
545234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
546234370Sjasone		told_cnt->cnts.curobjs--;
547261071Sjasone		told_cnt->cnts.curbytes -= old_usize;
548234370Sjasone	}
549234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U) {
550234370Sjasone		cnt->cnts.curobjs++;
551261071Sjasone		cnt->cnts.curbytes += usize;
552234370Sjasone		if (opt_prof_accum) {
553234370Sjasone			cnt->cnts.accumobjs++;
554261071Sjasone			cnt->cnts.accumbytes += usize;
555234370Sjasone		}
556234370Sjasone	}
557234370Sjasone	/*********/
558234370Sjasone	mb_write();
559234370Sjasone	/*********/
560234370Sjasone	if ((uintptr_t)told_cnt > (uintptr_t)1U)
561234370Sjasone		told_cnt->epoch++;
562234370Sjasone	if ((uintptr_t)cnt > (uintptr_t)1U)
563234370Sjasone		cnt->epoch++;
564234370Sjasone	/*********/
565234370Sjasone	mb_write(); /* Not strictly necessary. */
566234370Sjasone}
567234370Sjasone
568234370SjasoneJEMALLOC_INLINE void
569234370Sjasoneprof_free(const void *ptr, size_t size)
570234370Sjasone{
571234370Sjasone	prof_ctx_t *ctx = prof_ctx_get(ptr);
572234370Sjasone
573234370Sjasone	cassert(config_prof);
574234370Sjasone
575234370Sjasone	if ((uintptr_t)ctx > (uintptr_t)1) {
576235238Sjasone		prof_thr_cnt_t *tcnt;
577234370Sjasone		assert(size == isalloc(ptr, true));
578235238Sjasone		tcnt = prof_lookup(ctx->bt);
579234370Sjasone
580234370Sjasone		if (tcnt != NULL) {
581234370Sjasone			tcnt->epoch++;
582234370Sjasone			/*********/
583234370Sjasone			mb_write();
584234370Sjasone			/*********/
585234370Sjasone			tcnt->cnts.curobjs--;
586234370Sjasone			tcnt->cnts.curbytes -= size;
587234370Sjasone			/*********/
588234370Sjasone			mb_write();
589234370Sjasone			/*********/
590234370Sjasone			tcnt->epoch++;
591234370Sjasone			/*********/
592234370Sjasone			mb_write();
593234370Sjasone			/*********/
594234370Sjasone		} else {
595234370Sjasone			/*
596234370Sjasone			 * OOM during free() cannot be propagated, so operate
597234370Sjasone			 * directly on cnt->ctx->cnt_merged.
598234370Sjasone			 */
599234370Sjasone			malloc_mutex_lock(ctx->lock);
600234370Sjasone			ctx->cnt_merged.curobjs--;
601234370Sjasone			ctx->cnt_merged.curbytes -= size;
602234370Sjasone			malloc_mutex_unlock(ctx->lock);
603234370Sjasone		}
604234370Sjasone	}
605234370Sjasone}
606234370Sjasone#endif
607234370Sjasone
608234370Sjasone#endif /* JEMALLOC_H_INLINES */
609234370Sjasone/******************************************************************************/
610