prof.h revision 242844
1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4typedef struct prof_bt_s prof_bt_t;
5typedef struct prof_cnt_s prof_cnt_t;
6typedef struct prof_thr_cnt_s prof_thr_cnt_t;
7typedef struct prof_ctx_s prof_ctx_t;
8typedef struct prof_tdata_s prof_tdata_t;
9
10/* Option defaults. */
11#define	PROF_PREFIX_DEFAULT		"jeprof"
12#define	LG_PROF_SAMPLE_DEFAULT		19
13#define	LG_PROF_INTERVAL_DEFAULT	-1
14
15/*
16 * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
17 * is based on __builtin_return_address() necessarily has a hard-coded number
18 * of backtrace frame handlers, and should be kept in sync with this setting.
19 */
20#define	PROF_BT_MAX			128
21
22/* Maximum number of backtraces to store in each per thread LRU cache. */
23#define	PROF_TCMAX			1024
24
25/* Initial hash table size. */
26#define	PROF_CKH_MINITEMS		64
27
28/* Size of memory buffer to use when writing dump files. */
29#define	PROF_DUMP_BUFSIZE		65536
30
31/* Size of stack-allocated buffer used by prof_printf(). */
32#define	PROF_PRINTF_BUFSIZE		128
33
34/*
35 * Number of mutexes shared among all ctx's.  No space is allocated for these
36 * unless profiling is enabled, so it's okay to over-provision.
37 */
38#define	PROF_NCTX_LOCKS			1024
39
40/*
41 * prof_tdata pointers close to NULL are used to encode state information that
42 * is used for cleaning up during thread shutdown.
43 */
44#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
45#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
46#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
47
48#endif /* JEMALLOC_H_TYPES */
49/******************************************************************************/
50#ifdef JEMALLOC_H_STRUCTS
51
52struct prof_bt_s {
53	/* Backtrace, stored as len program counters. */
54	void		**vec;
55	unsigned	len;
56};
57
58#ifdef JEMALLOC_PROF_LIBGCC
59/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
60typedef struct {
61	prof_bt_t	*bt;
62	unsigned	nignore;
63	unsigned	max;
64} prof_unwind_data_t;
65#endif
66
67struct prof_cnt_s {
68	/*
69	 * Profiling counters.  An allocation/deallocation pair can operate on
70	 * different prof_thr_cnt_t objects that are linked into the same
71	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
72	 * negative.  In principle it is possible for the *bytes counters to
73	 * overflow/underflow, but a general solution would require something
74	 * like 128-bit counters; this implementation doesn't bother to solve
75	 * that problem.
76	 */
77	int64_t		curobjs;
78	int64_t		curbytes;
79	uint64_t	accumobjs;
80	uint64_t	accumbytes;
81};
82
83struct prof_thr_cnt_s {
84	/* Linkage into prof_ctx_t's cnts_ql. */
85	ql_elm(prof_thr_cnt_t)	cnts_link;
86
87	/* Linkage into thread's LRU. */
88	ql_elm(prof_thr_cnt_t)	lru_link;
89
90	/*
91	 * Associated context.  If a thread frees an object that it did not
92	 * allocate, it is possible that the context is not cached in the
93	 * thread's hash table, in which case it must be able to look up the
94	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
95	 * and link it into the prof_ctx_t's cnts_ql.
96	 */
97	prof_ctx_t		*ctx;
98
99	/*
100	 * Threads use memory barriers to update the counters.  Since there is
101	 * only ever one writer, the only challenge is for the reader to get a
102	 * consistent read of the counters.
103	 *
104	 * The writer uses this series of operations:
105	 *
106	 * 1) Increment epoch to an odd number.
107	 * 2) Update counters.
108	 * 3) Increment epoch to an even number.
109	 *
110	 * The reader must assure 1) that the epoch is even while it reads the
111	 * counters, and 2) that the epoch doesn't change between the time it
112	 * starts and finishes reading the counters.
113	 */
114	unsigned		epoch;
115
116	/* Profiling counters. */
117	prof_cnt_t		cnts;
118};
119
120struct prof_ctx_s {
121	/* Associated backtrace. */
122	prof_bt_t		*bt;
123
124	/* Protects nlimbo, cnt_merged, and cnts_ql. */
125	malloc_mutex_t		*lock;
126
127	/*
128	 * Number of threads that currently cause this ctx to be in a state of
129	 * limbo due to one of:
130	 *   - Initializing per thread counters associated with this ctx.
131	 *   - Preparing to destroy this ctx.
132	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
133	 * ctx.
134	 */
135	unsigned		nlimbo;
136
137	/* Temporary storage for summation during dump. */
138	prof_cnt_t		cnt_summed;
139
140	/* When threads exit, they merge their stats into cnt_merged. */
141	prof_cnt_t		cnt_merged;
142
143	/*
144	 * List of profile counters, one for each thread that has allocated in
145	 * this context.
146	 */
147	ql_head(prof_thr_cnt_t)	cnts_ql;
148};
149
150struct prof_tdata_s {
151	/*
152	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
153	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
154	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
155	 * others will ever write them.
156	 *
157	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
158	 * counter data into the associated prof_ctx_t objects, and unlink/free
159	 * the prof_thr_cnt_t objects.
160	 */
161	ckh_t			bt2cnt;
162
163	/* LRU for contents of bt2cnt. */
164	ql_head(prof_thr_cnt_t)	lru_ql;
165
166	/* Backtrace vector, used for calls to prof_backtrace(). */
167	void			**vec;
168
169	/* Sampling state. */
170	uint64_t		prng_state;
171	uint64_t		threshold;
172	uint64_t		accum;
173
174	/* State used to avoid dumping while operating on prof internals. */
175	bool			enq;
176	bool			enq_idump;
177	bool			enq_gdump;
178};
179
180#endif /* JEMALLOC_H_STRUCTS */
181/******************************************************************************/
182#ifdef JEMALLOC_H_EXTERNS
183
184extern bool	opt_prof;
185/*
186 * Even if opt_prof is true, sampling can be temporarily disabled by setting
187 * opt_prof_active to false.  No locking is used when updating opt_prof_active,
188 * so there are no guarantees regarding how long it will take for all threads
189 * to notice state changes.
190 */
191extern bool	opt_prof_active;
192extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
193extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
194extern bool	opt_prof_gdump;       /* High-water memory dumping. */
195extern bool	opt_prof_final;       /* Final profile dumping. */
196extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
197extern bool	opt_prof_accum;       /* Report cumulative bytes. */
198extern char	opt_prof_prefix[PATH_MAX + 1];
199
200/*
201 * Profile dump interval, measured in bytes allocated.  Each arena triggers a
202 * profile dump when it reaches this threshold.  The effect is that the
203 * interval between profile dumps averages prof_interval, though the actual
204 * interval between dumps will tend to be sporadic, and the interval will be a
205 * maximum of approximately (prof_interval * narenas).
206 */
207extern uint64_t	prof_interval;
208
209/*
210 * If true, promote small sampled objects to large objects, since small run
211 * headers do not have embedded profile context pointers.
212 */
213extern bool	prof_promote;
214
215void	bt_init(prof_bt_t *bt, void **vec);
216void	prof_backtrace(prof_bt_t *bt, unsigned nignore);
217prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
218void	prof_idump(void);
219bool	prof_mdump(const char *filename);
220void	prof_gdump(void);
221prof_tdata_t	*prof_tdata_init(void);
222void	prof_tdata_cleanup(void *arg);
223void	prof_boot0(void);
224void	prof_boot1(void);
225bool	prof_boot2(void);
226void	prof_prefork(void);
227void	prof_postfork_parent(void);
228void	prof_postfork_child(void);
229
230#endif /* JEMALLOC_H_EXTERNS */
231/******************************************************************************/
232#ifdef JEMALLOC_H_INLINES
233
234#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
235	prof_tdata_t *prof_tdata;					\
236	prof_bt_t bt;							\
237									\
238	assert(size == s2u(size));					\
239									\
240	prof_tdata = prof_tdata_get();					\
241	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
242		if (prof_tdata != NULL)					\
243			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
244		else							\
245			ret = NULL;					\
246		break;							\
247	}								\
248									\
249	if (opt_prof_active == false) {					\
250		/* Sampling is currently inactive, so avoid sampling. */\
251		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
252	} else if (opt_lg_prof_sample == 0) {				\
253		/* Don't bother with sampling logic, since sampling   */\
254		/* interval is 1.                                     */\
255		bt_init(&bt, prof_tdata->vec);				\
256		prof_backtrace(&bt, nignore);				\
257		ret = prof_lookup(&bt);					\
258	} else {							\
259		if (prof_tdata->threshold == 0) {			\
260			/* Initialize.  Seed the prng differently for */\
261			/* each thread.                               */\
262			prof_tdata->prng_state =			\
263			    (uint64_t)(uintptr_t)&size;			\
264			prof_sample_threshold_update(prof_tdata);	\
265		}							\
266									\
267		/* Determine whether to capture a backtrace based on  */\
268		/* whether size is enough for prof_accum to reach     */\
269		/* prof_tdata->threshold.  However, delay updating    */\
270		/* these variables until prof_{m,re}alloc(), because  */\
271		/* we don't know for sure that the allocation will    */\
272		/* succeed.                                           */\
273		/*                                                    */\
274		/* Use subtraction rather than addition to avoid      */\
275		/* potential integer overflow.                        */\
276		if (size >= prof_tdata->threshold -			\
277		    prof_tdata->accum) {				\
278			bt_init(&bt, prof_tdata->vec);			\
279			prof_backtrace(&bt, nignore);			\
280			ret = prof_lookup(&bt);				\
281		} else							\
282			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
283	}								\
284} while (0)
285
286#ifndef JEMALLOC_ENABLE_INLINE
287malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
288
289prof_tdata_t	*prof_tdata_get(void);
290void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
291prof_ctx_t	*prof_ctx_get(const void *ptr);
292void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
293bool	prof_sample_accum_update(size_t size);
294void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
295void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
296    size_t old_size, prof_ctx_t *old_ctx);
297void	prof_free(const void *ptr, size_t size);
298#endif
299
300#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
301/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
302malloc_tsd_externs(prof_tdata, prof_tdata_t *)
303malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
304    prof_tdata_cleanup)
305
306JEMALLOC_INLINE prof_tdata_t *
307prof_tdata_get(void)
308{
309	prof_tdata_t *prof_tdata;
310
311	cassert(config_prof);
312
313	prof_tdata = *prof_tdata_tsd_get();
314	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
315		if (prof_tdata == NULL)
316			prof_tdata = prof_tdata_init();
317	}
318
319	return (prof_tdata);
320}
321
322JEMALLOC_INLINE void
323prof_sample_threshold_update(prof_tdata_t *prof_tdata)
324{
325	uint64_t r;
326	double u;
327
328	cassert(config_prof);
329
330	/*
331	 * Compute sample threshold as a geometrically distributed random
332	 * variable with mean (2^opt_lg_prof_sample).
333	 *
334	 *                         __        __
335	 *                         |  log(u)  |                     1
336	 * prof_tdata->threshold = | -------- |, where p = -------------------
337	 *                         | log(1-p) |             opt_lg_prof_sample
338	 *                                                 2
339	 *
340	 * For more information on the math, see:
341	 *
342	 *   Non-Uniform Random Variate Generation
343	 *   Luc Devroye
344	 *   Springer-Verlag, New York, 1986
345	 *   pp 500
346	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
347	 */
348	prng64(r, 53, prof_tdata->prng_state,
349	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
350	u = (double)r * (1.0/9007199254740992.0L);
351	prof_tdata->threshold = (uint64_t)(log(u) /
352	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
353	    + (uint64_t)1U;
354}
355
356JEMALLOC_INLINE prof_ctx_t *
357prof_ctx_get(const void *ptr)
358{
359	prof_ctx_t *ret;
360	arena_chunk_t *chunk;
361
362	cassert(config_prof);
363	assert(ptr != NULL);
364
365	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
366	if (chunk != ptr) {
367		/* Region. */
368		ret = arena_prof_ctx_get(ptr);
369	} else
370		ret = huge_prof_ctx_get(ptr);
371
372	return (ret);
373}
374
375JEMALLOC_INLINE void
376prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
377{
378	arena_chunk_t *chunk;
379
380	cassert(config_prof);
381	assert(ptr != NULL);
382
383	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
384	if (chunk != ptr) {
385		/* Region. */
386		arena_prof_ctx_set(ptr, ctx);
387	} else
388		huge_prof_ctx_set(ptr, ctx);
389}
390
391JEMALLOC_INLINE bool
392prof_sample_accum_update(size_t size)
393{
394	prof_tdata_t *prof_tdata;
395
396	cassert(config_prof);
397	/* Sampling logic is unnecessary if the interval is 1. */
398	assert(opt_lg_prof_sample != 0);
399
400	prof_tdata = *prof_tdata_tsd_get();
401	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
402		return (true);
403
404	/* Take care to avoid integer overflow. */
405	if (size >= prof_tdata->threshold - prof_tdata->accum) {
406		prof_tdata->accum -= (prof_tdata->threshold - size);
407		/* Compute new sample threshold. */
408		prof_sample_threshold_update(prof_tdata);
409		while (prof_tdata->accum >= prof_tdata->threshold) {
410			prof_tdata->accum -= prof_tdata->threshold;
411			prof_sample_threshold_update(prof_tdata);
412		}
413		return (false);
414	} else {
415		prof_tdata->accum += size;
416		return (true);
417	}
418}
419
420JEMALLOC_INLINE void
421prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
422{
423
424	cassert(config_prof);
425	assert(ptr != NULL);
426	assert(size == isalloc(ptr, true));
427
428	if (opt_lg_prof_sample != 0) {
429		if (prof_sample_accum_update(size)) {
430			/*
431			 * Don't sample.  For malloc()-like allocation, it is
432			 * always possible to tell in advance how large an
433			 * object's usable size will be, so there should never
434			 * be a difference between the size passed to
435			 * PROF_ALLOC_PREP() and prof_malloc().
436			 */
437			assert((uintptr_t)cnt == (uintptr_t)1U);
438		}
439	}
440
441	if ((uintptr_t)cnt > (uintptr_t)1U) {
442		prof_ctx_set(ptr, cnt->ctx);
443
444		cnt->epoch++;
445		/*********/
446		mb_write();
447		/*********/
448		cnt->cnts.curobjs++;
449		cnt->cnts.curbytes += size;
450		if (opt_prof_accum) {
451			cnt->cnts.accumobjs++;
452			cnt->cnts.accumbytes += size;
453		}
454		/*********/
455		mb_write();
456		/*********/
457		cnt->epoch++;
458		/*********/
459		mb_write();
460		/*********/
461	} else
462		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
463}
464
465JEMALLOC_INLINE void
466prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
467    size_t old_size, prof_ctx_t *old_ctx)
468{
469	prof_thr_cnt_t *told_cnt;
470
471	cassert(config_prof);
472	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
473
474	if (ptr != NULL) {
475		assert(size == isalloc(ptr, true));
476		if (opt_lg_prof_sample != 0) {
477			if (prof_sample_accum_update(size)) {
478				/*
479				 * Don't sample.  The size passed to
480				 * PROF_ALLOC_PREP() was larger than what
481				 * actually got allocated, so a backtrace was
482				 * captured for this allocation, even though
483				 * its actual size was insufficient to cross
484				 * the sample threshold.
485				 */
486				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
487			}
488		}
489	}
490
491	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
492		told_cnt = prof_lookup(old_ctx->bt);
493		if (told_cnt == NULL) {
494			/*
495			 * It's too late to propagate OOM for this realloc(),
496			 * so operate directly on old_cnt->ctx->cnt_merged.
497			 */
498			malloc_mutex_lock(old_ctx->lock);
499			old_ctx->cnt_merged.curobjs--;
500			old_ctx->cnt_merged.curbytes -= old_size;
501			malloc_mutex_unlock(old_ctx->lock);
502			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
503		}
504	} else
505		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
506
507	if ((uintptr_t)told_cnt > (uintptr_t)1U)
508		told_cnt->epoch++;
509	if ((uintptr_t)cnt > (uintptr_t)1U) {
510		prof_ctx_set(ptr, cnt->ctx);
511		cnt->epoch++;
512	} else if (ptr != NULL)
513		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
514	/*********/
515	mb_write();
516	/*********/
517	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
518		told_cnt->cnts.curobjs--;
519		told_cnt->cnts.curbytes -= old_size;
520	}
521	if ((uintptr_t)cnt > (uintptr_t)1U) {
522		cnt->cnts.curobjs++;
523		cnt->cnts.curbytes += size;
524		if (opt_prof_accum) {
525			cnt->cnts.accumobjs++;
526			cnt->cnts.accumbytes += size;
527		}
528	}
529	/*********/
530	mb_write();
531	/*********/
532	if ((uintptr_t)told_cnt > (uintptr_t)1U)
533		told_cnt->epoch++;
534	if ((uintptr_t)cnt > (uintptr_t)1U)
535		cnt->epoch++;
536	/*********/
537	mb_write(); /* Not strictly necessary. */
538}
539
540JEMALLOC_INLINE void
541prof_free(const void *ptr, size_t size)
542{
543	prof_ctx_t *ctx = prof_ctx_get(ptr);
544
545	cassert(config_prof);
546
547	if ((uintptr_t)ctx > (uintptr_t)1) {
548		prof_thr_cnt_t *tcnt;
549		assert(size == isalloc(ptr, true));
550		tcnt = prof_lookup(ctx->bt);
551
552		if (tcnt != NULL) {
553			tcnt->epoch++;
554			/*********/
555			mb_write();
556			/*********/
557			tcnt->cnts.curobjs--;
558			tcnt->cnts.curbytes -= size;
559			/*********/
560			mb_write();
561			/*********/
562			tcnt->epoch++;
563			/*********/
564			mb_write();
565			/*********/
566		} else {
567			/*
568			 * OOM during free() cannot be propagated, so operate
569			 * directly on cnt->ctx->cnt_merged.
570			 */
571			malloc_mutex_lock(ctx->lock);
572			ctx->cnt_merged.curobjs--;
573			ctx->cnt_merged.curbytes -= size;
574			malloc_mutex_unlock(ctx->lock);
575		}
576	}
577}
578#endif
579
580#endif /* JEMALLOC_H_INLINES */
581/******************************************************************************/
582