1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4typedef struct prof_bt_s prof_bt_t;
5typedef struct prof_cnt_s prof_cnt_t;
6typedef struct prof_tctx_s prof_tctx_t;
7typedef struct prof_gctx_s prof_gctx_t;
8typedef struct prof_tdata_s prof_tdata_t;
9
10/* Option defaults. */
11#ifdef JEMALLOC_PROF
12#  define PROF_PREFIX_DEFAULT		"jeprof"
13#else
14#  define PROF_PREFIX_DEFAULT		""
15#endif
16#define	LG_PROF_SAMPLE_DEFAULT		19
17#define	LG_PROF_INTERVAL_DEFAULT	-1
18
19/*
20 * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
21 * is based on __builtin_return_address() necessarily has a hard-coded number
22 * of backtrace frame handlers, and should be kept in sync with this setting.
23 */
24#define	PROF_BT_MAX			128
25
26/* Initial hash table size. */
27#define	PROF_CKH_MINITEMS		64
28
29/* Size of memory buffer to use when writing dump files. */
30#define	PROF_DUMP_BUFSIZE		65536
31
32/* Size of stack-allocated buffer used by prof_printf(). */
33#define	PROF_PRINTF_BUFSIZE		128
34
35/*
36 * Number of mutexes shared among all gctx's.  No space is allocated for these
37 * unless profiling is enabled, so it's okay to over-provision.
38 */
39#define	PROF_NCTX_LOCKS			1024
40
41/*
42 * Number of mutexes shared among all tdata's.  No space is allocated for these
43 * unless profiling is enabled, so it's okay to over-provision.
44 */
45#define	PROF_NTDATA_LOCKS		256
46
47/*
48 * prof_tdata pointers close to NULL are used to encode state information that
49 * is used for cleaning up during thread shutdown.
50 */
51#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
52#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
53#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
54
55#endif /* JEMALLOC_H_TYPES */
56/******************************************************************************/
57#ifdef JEMALLOC_H_STRUCTS
58
59struct prof_bt_s {
60	/* Backtrace, stored as len program counters. */
61	void		**vec;
62	unsigned	len;
63};
64
65#ifdef JEMALLOC_PROF_LIBGCC
66/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
67typedef struct {
68	prof_bt_t	*bt;
69	unsigned	max;
70} prof_unwind_data_t;
71#endif
72
73struct prof_cnt_s {
74	/* Profiling counters. */
75	uint64_t	curobjs;
76	uint64_t	curbytes;
77	uint64_t	accumobjs;
78	uint64_t	accumbytes;
79};
80
81typedef enum {
82	prof_tctx_state_initializing,
83	prof_tctx_state_nominal,
84	prof_tctx_state_dumping,
85	prof_tctx_state_purgatory /* Dumper must finish destroying. */
86} prof_tctx_state_t;
87
88struct prof_tctx_s {
89	/* Thread data for thread that performed the allocation. */
90	prof_tdata_t		*tdata;
91
92	/*
93	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
94	 * defunct during teardown.
95	 */
96	uint64_t		thr_uid;
97	uint64_t		thr_discrim;
98
99	/* Profiling counters, protected by tdata->lock. */
100	prof_cnt_t		cnts;
101
102	/* Associated global context. */
103	prof_gctx_t		*gctx;
104
105	/*
106	 * UID that distinguishes multiple tctx's created by the same thread,
107	 * but coexisting in gctx->tctxs.  There are two ways that such
108	 * coexistence can occur:
109	 * - A dumper thread can cause a tctx to be retained in the purgatory
110	 *   state.
111	 * - Although a single "producer" thread must create all tctx's which
112	 *   share the same thr_uid, multiple "consumers" can each concurrently
113	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
114	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
115	 *   threshold can be hit again before the first consumer finishes
116	 *   executing prof_tctx_destroy().
117	 */
118	uint64_t		tctx_uid;
119
120	/* Linkage into gctx's tctxs. */
121	rb_node(prof_tctx_t)	tctx_link;
122
123	/*
124	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
125	 * sample vs destroy race.
126	 */
127	bool			prepared;
128
129	/* Current dump-related state, protected by gctx->lock. */
130	prof_tctx_state_t	state;
131
132	/*
133	 * Copy of cnts snapshotted during early dump phase, protected by
134	 * dump_mtx.
135	 */
136	prof_cnt_t		dump_cnts;
137};
138typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
139
140struct prof_gctx_s {
141	/* Protects nlimbo, cnt_summed, and tctxs. */
142	malloc_mutex_t		*lock;
143
144	/*
145	 * Number of threads that currently cause this gctx to be in a state of
146	 * limbo due to one of:
147	 *   - Initializing this gctx.
148	 *   - Initializing per thread counters associated with this gctx.
149	 *   - Preparing to destroy this gctx.
150	 *   - Dumping a heap profile that includes this gctx.
151	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
152	 * gctx.
153	 */
154	unsigned		nlimbo;
155
156	/*
157	 * Tree of profile counters, one for each thread that has allocated in
158	 * this context.
159	 */
160	prof_tctx_tree_t	tctxs;
161
162	/* Linkage for tree of contexts to be dumped. */
163	rb_node(prof_gctx_t)	dump_link;
164
165	/* Temporary storage for summation during dump. */
166	prof_cnt_t		cnt_summed;
167
168	/* Associated backtrace. */
169	prof_bt_t		bt;
170
171	/* Backtrace vector, variable size, referred to by bt. */
172	void			*vec[1];
173};
174typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
175
176struct prof_tdata_s {
177	malloc_mutex_t		*lock;
178
179	/* Monotonically increasing unique thread identifier. */
180	uint64_t		thr_uid;
181
182	/*
183	 * Monotonically increasing discriminator among tdata structures
184	 * associated with the same thr_uid.
185	 */
186	uint64_t		thr_discrim;
187
188	/* Included in heap profile dumps if non-NULL. */
189	char			*thread_name;
190
191	bool			attached;
192	bool			expired;
193
194	rb_node(prof_tdata_t)	tdata_link;
195
196	/*
197	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
198	 * necessary when incrementing this field, because only one thread ever
199	 * does so.
200	 */
201	uint64_t		tctx_uid_next;
202
203	/*
204	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
205	 * backtraces for which it has non-zero allocation/deallocation counters
206	 * associated with thread-specific prof_tctx_t objects.  Other threads
207	 * may write to prof_tctx_t contents when freeing associated objects.
208	 */
209	ckh_t			bt2tctx;
210
211	/* Sampling state. */
212	uint64_t		prng_state;
213	uint64_t		bytes_until_sample;
214
215	/* State used to avoid dumping while operating on prof internals. */
216	bool			enq;
217	bool			enq_idump;
218	bool			enq_gdump;
219
220	/*
221	 * Set to true during an early dump phase for tdata's which are
222	 * currently being dumped.  New threads' tdata's have this initialized
223	 * to false so that they aren't accidentally included in later dump
224	 * phases.
225	 */
226	bool			dumping;
227
228	/*
229	 * True if profiling is active for this tdata's thread
230	 * (thread.prof.active mallctl).
231	 */
232	bool			active;
233
234	/* Temporary storage for summation during dump. */
235	prof_cnt_t		cnt_summed;
236
237	/* Backtrace vector, used for calls to prof_backtrace(). */
238	void			*vec[PROF_BT_MAX];
239};
240typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
241
242#endif /* JEMALLOC_H_STRUCTS */
243/******************************************************************************/
244#ifdef JEMALLOC_H_EXTERNS
245
246extern bool	opt_prof;
247extern bool	opt_prof_active;
248extern bool	opt_prof_thread_active_init;
249extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
250extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
251extern bool	opt_prof_gdump;       /* High-water memory dumping. */
252extern bool	opt_prof_final;       /* Final profile dumping. */
253extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
254extern bool	opt_prof_accum;       /* Report cumulative bytes. */
255extern char	opt_prof_prefix[
256    /* Minimize memory bloat for non-prof builds. */
257#ifdef JEMALLOC_PROF
258    PATH_MAX +
259#endif
260    1];
261
262/* Accessed via prof_active_[gs]et{_unlocked,}(). */
263extern bool	prof_active;
264
265/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
266extern bool	prof_gdump_val;
267
268/*
269 * Profile dump interval, measured in bytes allocated.  Each arena triggers a
270 * profile dump when it reaches this threshold.  The effect is that the
271 * interval between profile dumps averages prof_interval, though the actual
272 * interval between dumps will tend to be sporadic, and the interval will be a
273 * maximum of approximately (prof_interval * narenas).
274 */
275extern uint64_t	prof_interval;
276
277/*
278 * Initialized as opt_lg_prof_sample, and potentially modified during profiling
279 * resets.
280 */
281extern size_t	lg_prof_sample;
282
283void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
284void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
285    prof_tctx_t *tctx);
286void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
287void	bt_init(prof_bt_t *bt, void **vec);
288void	prof_backtrace(prof_bt_t *bt);
289prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
290#ifdef JEMALLOC_JET
291size_t	prof_tdata_count(void);
292size_t	prof_bt_count(void);
293const prof_cnt_t *prof_cnt_all(void);
294typedef int (prof_dump_open_t)(bool, const char *);
295extern prof_dump_open_t *prof_dump_open;
296typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
297extern prof_dump_header_t *prof_dump_header;
298#endif
299void	prof_idump(tsdn_t *tsdn);
300bool	prof_mdump(tsd_t *tsd, const char *filename);
301void	prof_gdump(tsdn_t *tsdn);
302prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
303prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
304void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
305void	prof_tdata_cleanup(tsd_t *tsd);
306bool	prof_active_get(tsdn_t *tsdn);
307bool	prof_active_set(tsdn_t *tsdn, bool active);
308const char	*prof_thread_name_get(tsd_t *tsd);
309int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
310bool	prof_thread_active_get(tsd_t *tsd);
311bool	prof_thread_active_set(tsd_t *tsd, bool active);
312bool	prof_thread_active_init_get(tsdn_t *tsdn);
313bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
314bool	prof_gdump_get(tsdn_t *tsdn);
315bool	prof_gdump_set(tsdn_t *tsdn, bool active);
316void	prof_boot0(void);
317void	prof_boot1(void);
318bool	prof_boot2(tsdn_t *tsdn);
319void	prof_prefork0(tsdn_t *tsdn);
320void	prof_prefork1(tsdn_t *tsdn);
321void	prof_postfork_parent(tsdn_t *tsdn);
322void	prof_postfork_child(tsdn_t *tsdn);
323void	prof_sample_threshold_update(prof_tdata_t *tdata);
324
325#endif /* JEMALLOC_H_EXTERNS */
326/******************************************************************************/
327#ifdef JEMALLOC_H_INLINES
328
329#ifndef JEMALLOC_ENABLE_INLINE
330bool	prof_active_get_unlocked(void);
331bool	prof_gdump_get_unlocked(void);
332prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
333prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
334void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
335    prof_tctx_t *tctx);
336void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
337    const void *old_ptr, prof_tctx_t *tctx);
338bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
339    prof_tdata_t **tdata_out);
340prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
341    bool update);
342void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
343    prof_tctx_t *tctx);
344void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
345    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
346    size_t old_usize, prof_tctx_t *old_tctx);
347void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
348#endif
349
350#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
351JEMALLOC_ALWAYS_INLINE bool
352prof_active_get_unlocked(void)
353{
354
355	/*
356	 * Even if opt_prof is true, sampling can be temporarily disabled by
357	 * setting prof_active to false.  No locking is used when reading
358	 * prof_active in the fast path, so there are no guarantees regarding
359	 * how long it will take for all threads to notice state changes.
360	 */
361	return (prof_active);
362}
363
364JEMALLOC_ALWAYS_INLINE bool
365prof_gdump_get_unlocked(void)
366{
367
368	/*
369	 * No locking is used when reading prof_gdump_val in the fast path, so
370	 * there are no guarantees regarding how long it will take for all
371	 * threads to notice state changes.
372	 */
373	return (prof_gdump_val);
374}
375
376JEMALLOC_ALWAYS_INLINE prof_tdata_t *
377prof_tdata_get(tsd_t *tsd, bool create)
378{
379	prof_tdata_t *tdata;
380
381	cassert(config_prof);
382
383	tdata = tsd_prof_tdata_get(tsd);
384	if (create) {
385		if (unlikely(tdata == NULL)) {
386			if (tsd_nominal(tsd)) {
387				tdata = prof_tdata_init(tsd_tsdn(tsd));
388				tsd_prof_tdata_set(tsd, tdata);
389			}
390		} else if (unlikely(tdata->expired)) {
391			tdata = prof_tdata_reinit(tsd, tdata);
392			tsd_prof_tdata_set(tsd, tdata);
393		}
394		assert(tdata == NULL || tdata->attached);
395	}
396
397	return (tdata);
398}
399
400JEMALLOC_ALWAYS_INLINE prof_tctx_t *
401prof_tctx_get(tsdn_t *tsdn, const void *ptr)
402{
403
404	cassert(config_prof);
405	assert(ptr != NULL);
406
407	return (arena_prof_tctx_get(tsdn, ptr));
408}
409
410JEMALLOC_ALWAYS_INLINE void
411prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
412{
413
414	cassert(config_prof);
415	assert(ptr != NULL);
416
417	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
418}
419
420JEMALLOC_ALWAYS_INLINE void
421prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
422    prof_tctx_t *old_tctx)
423{
424
425	cassert(config_prof);
426	assert(ptr != NULL);
427
428	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
429}
430
431JEMALLOC_ALWAYS_INLINE bool
432prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
433    prof_tdata_t **tdata_out)
434{
435	prof_tdata_t *tdata;
436
437	cassert(config_prof);
438
439	tdata = prof_tdata_get(tsd, true);
440	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
441		tdata = NULL;
442
443	if (tdata_out != NULL)
444		*tdata_out = tdata;
445
446	if (unlikely(tdata == NULL))
447		return (true);
448
449	if (likely(tdata->bytes_until_sample >= usize)) {
450		if (update)
451			tdata->bytes_until_sample -= usize;
452		return (true);
453	} else {
454		/* Compute new sample threshold. */
455		if (update)
456			prof_sample_threshold_update(tdata);
457		return (!tdata->active);
458	}
459}
460
461JEMALLOC_ALWAYS_INLINE prof_tctx_t *
462prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
463{
464	prof_tctx_t *ret;
465	prof_tdata_t *tdata;
466	prof_bt_t bt;
467
468	assert(usize == s2u(usize));
469
470	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
471	    &tdata)))
472		ret = (prof_tctx_t *)(uintptr_t)1U;
473	else {
474		bt_init(&bt, tdata->vec);
475		prof_backtrace(&bt);
476		ret = prof_lookup(tsd, &bt);
477	}
478
479	return (ret);
480}
481
482JEMALLOC_ALWAYS_INLINE void
483prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
484{
485
486	cassert(config_prof);
487	assert(ptr != NULL);
488	assert(usize == isalloc(tsdn, ptr, true));
489
490	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
491		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
492	else
493		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
494}
495
496JEMALLOC_ALWAYS_INLINE void
497prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
498    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
499    prof_tctx_t *old_tctx)
500{
501	bool sampled, old_sampled;
502
503	cassert(config_prof);
504	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
505
506	if (prof_active && !updated && ptr != NULL) {
507		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
508		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
509			/*
510			 * Don't sample.  The usize passed to prof_alloc_prep()
511			 * was larger than what actually got allocated, so a
512			 * backtrace was captured for this allocation, even
513			 * though its actual usize was insufficient to cross the
514			 * sample threshold.
515			 */
516			prof_alloc_rollback(tsd, tctx, true);
517			tctx = (prof_tctx_t *)(uintptr_t)1U;
518		}
519	}
520
521	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
522	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
523
524	if (unlikely(sampled))
525		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
526	else
527		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
528
529	if (unlikely(old_sampled))
530		prof_free_sampled_object(tsd, old_usize, old_tctx);
531}
532
533JEMALLOC_ALWAYS_INLINE void
534prof_free(tsd_t *tsd, const void *ptr, size_t usize)
535{
536	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
537
538	cassert(config_prof);
539	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
540
541	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
542		prof_free_sampled_object(tsd, usize, tctx);
543}
544#endif
545
546#endif /* JEMALLOC_H_INLINES */
547/******************************************************************************/
548