1#define JEMALLOC_BACKGROUND_THREAD_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3#include "jemalloc/internal/jemalloc_internal_includes.h"
4
5#include "jemalloc/internal/assert.h"
6
7/******************************************************************************/
8/* Data. */
9
10/* This option should be opt-in only. */
11#define BACKGROUND_THREAD_DEFAULT false
12/* Read-only after initialization. */
13bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
15
16/* Used for thread creation, termination and stats. */
17malloc_mutex_t background_thread_lock;
18/* Indicates global state.  Atomic because decay reads this w/o locking. */
19atomic_b_t background_thread_enabled_state;
20size_t n_background_threads;
21size_t max_background_threads;
22/* Thread info per-index. */
23background_thread_info_t *background_thread_info;
24
25/* False if no necessary runtime support. */
26bool can_enable_background_thread;
27
28/******************************************************************************/
29
30#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
31#include <dlfcn.h>
32
33static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
34    void *(*)(void *), void *__restrict);
35
36static void
37pthread_create_wrapper_init(void) {
38#ifdef JEMALLOC_LAZY_LOCK
39	if (!isthreaded) {
40		isthreaded = true;
41	}
42#endif
43}
44
45int
46pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
47    void *(*start_routine)(void *), void *__restrict arg) {
48	pthread_create_wrapper_init();
49
50	return pthread_create_fptr(thread, attr, start_routine, arg);
51}
52#endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
53
54#ifndef JEMALLOC_BACKGROUND_THREAD
55#define NOT_REACHED { not_reached(); }
56bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
57bool background_threads_enable(tsd_t *tsd) NOT_REACHED
58bool background_threads_disable(tsd_t *tsd) NOT_REACHED
59void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
60    arena_decay_t *decay, size_t npages_new) NOT_REACHED
61void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
62void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
63void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
64void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
65bool background_thread_stats_read(tsdn_t *tsdn,
66    background_thread_stats_t *stats) NOT_REACHED
67void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
68#undef NOT_REACHED
69#else
70
71static bool background_thread_enabled_at_fork;
72
73static void
74background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
75	background_thread_wakeup_time_set(tsdn, info, 0);
76	info->npages_to_purge_new = 0;
77	if (config_stats) {
78		info->tot_n_runs = 0;
79		nstime_init(&info->tot_sleep_time, 0);
80	}
81}
82
83static inline bool
84set_current_thread_affinity(UNUSED int cpu) {
85#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
86	cpu_set_t cpuset;
87	CPU_ZERO(&cpuset);
88	CPU_SET(cpu, &cpuset);
89	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
90
91	return (ret != 0);
92#else
93	return false;
94#endif
95}
96
97/* Threshold for determining when to wake up the background thread. */
98#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
99#define BILLION UINT64_C(1000000000)
100/* Minimal sleep interval 100 ms. */
101#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
102
103static inline size_t
104decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
105	size_t i;
106	uint64_t sum = 0;
107	for (i = 0; i < interval; i++) {
108		sum += decay->backlog[i] * h_steps[i];
109	}
110	for (; i < SMOOTHSTEP_NSTEPS; i++) {
111		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
112	}
113
114	return (size_t)(sum >> SMOOTHSTEP_BFP);
115}
116
117static uint64_t
118arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
119    extents_t *extents) {
120	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
121		/* Use minimal interval if decay is contended. */
122		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
123	}
124
125	uint64_t interval;
126	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
127	if (decay_time <= 0) {
128		/* Purging is eagerly done or disabled currently. */
129		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
130		goto label_done;
131	}
132
133	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
134	assert(decay_interval_ns > 0);
135	size_t npages = extents_npages_get(extents);
136	if (npages == 0) {
137		unsigned i;
138		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
139			if (decay->backlog[i] > 0) {
140				break;
141			}
142		}
143		if (i == SMOOTHSTEP_NSTEPS) {
144			/* No dirty pages recorded.  Sleep indefinitely. */
145			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
146			goto label_done;
147		}
148	}
149	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
150		/* Use max interval. */
151		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
152		goto label_done;
153	}
154
155	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
156	size_t ub = SMOOTHSTEP_NSTEPS;
157	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
158	lb = (lb < 2) ? 2 : lb;
159	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
160	    (lb + 2 > ub)) {
161		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
162		goto label_done;
163	}
164
165	assert(lb + 2 <= ub);
166	size_t npurge_lb, npurge_ub;
167	npurge_lb = decay_npurge_after_interval(decay, lb);
168	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
169		interval = decay_interval_ns * lb;
170		goto label_done;
171	}
172	npurge_ub = decay_npurge_after_interval(decay, ub);
173	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
174		interval = decay_interval_ns * ub;
175		goto label_done;
176	}
177
178	unsigned n_search = 0;
179	size_t target, npurge;
180	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
181	    && (lb + 2 < ub)) {
182		target = (lb + ub) / 2;
183		npurge = decay_npurge_after_interval(decay, target);
184		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
185			ub = target;
186			npurge_ub = npurge;
187		} else {
188			lb = target;
189			npurge_lb = npurge;
190		}
191		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
192	}
193	interval = decay_interval_ns * (ub + lb) / 2;
194label_done:
195	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
196	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
197	malloc_mutex_unlock(tsdn, &decay->mtx);
198
199	return interval;
200}
201
202/* Compute purge interval for background threads. */
203static uint64_t
204arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
205	uint64_t i1, i2;
206	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
207	    &arena->extents_dirty);
208	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
209		return i1;
210	}
211	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
212	    &arena->extents_muzzy);
213
214	return i1 < i2 ? i1 : i2;
215}
216
217static void
218background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
219    uint64_t interval) {
220	if (config_stats) {
221		info->tot_n_runs++;
222	}
223	info->npages_to_purge_new = 0;
224
225	struct timeval tv;
226	/* Specific clock required by timedwait. */
227	gettimeofday(&tv, NULL);
228	nstime_t before_sleep;
229	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
230
231	int ret;
232	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
233		assert(background_thread_indefinite_sleep(info));
234		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
235		assert(ret == 0);
236	} else {
237		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
238		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
239		/* We need malloc clock (can be different from tv). */
240		nstime_t next_wakeup;
241		nstime_init(&next_wakeup, 0);
242		nstime_update(&next_wakeup);
243		nstime_iadd(&next_wakeup, interval);
244		assert(nstime_ns(&next_wakeup) <
245		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
246		background_thread_wakeup_time_set(tsdn, info,
247		    nstime_ns(&next_wakeup));
248
249		nstime_t ts_wakeup;
250		nstime_copy(&ts_wakeup, &before_sleep);
251		nstime_iadd(&ts_wakeup, interval);
252		struct timespec ts;
253		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
254		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
255
256		assert(!background_thread_indefinite_sleep(info));
257		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
258		assert(ret == ETIMEDOUT || ret == 0);
259		background_thread_wakeup_time_set(tsdn, info,
260		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
261	}
262	if (config_stats) {
263		gettimeofday(&tv, NULL);
264		nstime_t after_sleep;
265		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
266		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
267			nstime_subtract(&after_sleep, &before_sleep);
268			nstime_add(&info->tot_sleep_time, &after_sleep);
269		}
270	}
271}
272
273static bool
274background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
275	if (unlikely(info->state == background_thread_paused)) {
276		malloc_mutex_unlock(tsdn, &info->mtx);
277		/* Wait on global lock to update status. */
278		malloc_mutex_lock(tsdn, &background_thread_lock);
279		malloc_mutex_unlock(tsdn, &background_thread_lock);
280		malloc_mutex_lock(tsdn, &info->mtx);
281		return true;
282	}
283
284	return false;
285}
286
287static inline void
288background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
289	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
290	unsigned narenas = narenas_total_get();
291
292	for (unsigned i = ind; i < narenas; i += max_background_threads) {
293		arena_t *arena = arena_get(tsdn, i, false);
294		if (!arena) {
295			continue;
296		}
297		arena_decay(tsdn, arena, true, false);
298		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
299			/* Min interval will be used. */
300			continue;
301		}
302		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
303		    arena);
304		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
305		if (min_interval > interval) {
306			min_interval = interval;
307		}
308	}
309	background_thread_sleep(tsdn, info, min_interval);
310}
311
312static bool
313background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
314	if (info == &background_thread_info[0]) {
315		malloc_mutex_assert_owner(tsd_tsdn(tsd),
316		    &background_thread_lock);
317	} else {
318		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
319		    &background_thread_lock);
320	}
321
322	pre_reentrancy(tsd, NULL);
323	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
324	bool has_thread;
325	assert(info->state != background_thread_paused);
326	if (info->state == background_thread_started) {
327		has_thread = true;
328		info->state = background_thread_stopped;
329		pthread_cond_signal(&info->cond);
330	} else {
331		has_thread = false;
332	}
333	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
334
335	if (!has_thread) {
336		post_reentrancy(tsd);
337		return false;
338	}
339	void *ret;
340	if (pthread_join(info->thread, &ret)) {
341		post_reentrancy(tsd);
342		return true;
343	}
344	assert(ret == NULL);
345	n_background_threads--;
346	post_reentrancy(tsd);
347
348	return false;
349}
350
351static void *background_thread_entry(void *ind_arg);
352
353static int
354background_thread_create_signals_masked(pthread_t *thread,
355    const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
356	/*
357	 * Mask signals during thread creation so that the thread inherits
358	 * an empty signal set.
359	 */
360	sigset_t set;
361	sigfillset(&set);
362	sigset_t oldset;
363	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
364	if (mask_err != 0) {
365		return mask_err;
366	}
367	int create_err = pthread_create_wrapper(thread, attr, start_routine,
368	    arg);
369	/*
370	 * Restore the signal mask.  Failure to restore the signal mask here
371	 * changes program behavior.
372	 */
373	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
374	if (restore_err != 0) {
375		malloc_printf("<jemalloc>: background thread creation "
376		    "failed (%d), and signal mask restoration failed "
377		    "(%d)\n", create_err, restore_err);
378		if (opt_abort) {
379			abort();
380		}
381	}
382	return create_err;
383}
384
385static bool
386check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
387    bool *created_threads) {
388	bool ret = false;
389	if (likely(*n_created == n_background_threads)) {
390		return ret;
391	}
392
393	tsdn_t *tsdn = tsd_tsdn(tsd);
394	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
395	for (unsigned i = 1; i < max_background_threads; i++) {
396		if (created_threads[i]) {
397			continue;
398		}
399		background_thread_info_t *info = &background_thread_info[i];
400		malloc_mutex_lock(tsdn, &info->mtx);
401		/*
402		 * In case of the background_thread_paused state because of
403		 * arena reset, delay the creation.
404		 */
405		bool create = (info->state == background_thread_started);
406		malloc_mutex_unlock(tsdn, &info->mtx);
407		if (!create) {
408			continue;
409		}
410
411		pre_reentrancy(tsd, NULL);
412		int err = background_thread_create_signals_masked(&info->thread,
413		    NULL, background_thread_entry, (void *)(uintptr_t)i);
414		post_reentrancy(tsd);
415
416		if (err == 0) {
417			(*n_created)++;
418			created_threads[i] = true;
419		} else {
420			malloc_printf("<jemalloc>: background thread "
421			    "creation failed (%d)\n", err);
422			if (opt_abort) {
423				abort();
424			}
425		}
426		/* Return to restart the loop since we unlocked. */
427		ret = true;
428		break;
429	}
430	malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
431
432	return ret;
433}
434
435static void
436background_thread0_work(tsd_t *tsd) {
437	/* Thread0 is also responsible for launching / terminating threads. */
438	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
439	unsigned i;
440	for (i = 1; i < max_background_threads; i++) {
441		created_threads[i] = false;
442	}
443	/* Start working, and create more threads when asked. */
444	unsigned n_created = 1;
445	while (background_thread_info[0].state != background_thread_stopped) {
446		if (background_thread_pause_check(tsd_tsdn(tsd),
447		    &background_thread_info[0])) {
448			continue;
449		}
450		if (check_background_thread_creation(tsd, &n_created,
451		    (bool *)&created_threads)) {
452			continue;
453		}
454		background_work_sleep_once(tsd_tsdn(tsd),
455		    &background_thread_info[0], 0);
456	}
457
458	/*
459	 * Shut down other threads at exit.  Note that the ctl thread is holding
460	 * the global background_thread mutex (and is waiting) for us.
461	 */
462	assert(!background_thread_enabled());
463	for (i = 1; i < max_background_threads; i++) {
464		background_thread_info_t *info = &background_thread_info[i];
465		assert(info->state != background_thread_paused);
466		if (created_threads[i]) {
467			background_threads_disable_single(tsd, info);
468		} else {
469			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
470			if (info->state != background_thread_stopped) {
471				/* The thread was not created. */
472				assert(info->state ==
473				    background_thread_started);
474				n_background_threads--;
475				info->state = background_thread_stopped;
476			}
477			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
478		}
479	}
480	background_thread_info[0].state = background_thread_stopped;
481	assert(n_background_threads == 1);
482}
483
484static void
485background_work(tsd_t *tsd, unsigned ind) {
486	background_thread_info_t *info = &background_thread_info[ind];
487
488	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
489	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
490	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
491	if (ind == 0) {
492		background_thread0_work(tsd);
493	} else {
494		while (info->state != background_thread_stopped) {
495			if (background_thread_pause_check(tsd_tsdn(tsd),
496			    info)) {
497				continue;
498			}
499			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
500		}
501	}
502	assert(info->state == background_thread_stopped);
503	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
504	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
505}
506
507static void *
508background_thread_entry(void *ind_arg) {
509	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
510	assert(thread_ind < max_background_threads);
511#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
512	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
513#endif
514	if (opt_percpu_arena != percpu_arena_disabled) {
515		set_current_thread_affinity((int)thread_ind);
516	}
517	/*
518	 * Start periodic background work.  We use internal tsd which avoids
519	 * side effects, for example triggering new arena creation (which in
520	 * turn triggers another background thread creation).
521	 */
522	background_work(tsd_internal_fetch(), thread_ind);
523	assert(pthread_equal(pthread_self(),
524	    background_thread_info[thread_ind].thread));
525
526	return NULL;
527}
528
529static void
530background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532	info->state = background_thread_started;
533	background_thread_info_init(tsd_tsdn(tsd), info);
534	n_background_threads++;
535}
536
537/* Create a new background thread if needed. */
538bool
539background_thread_create(tsd_t *tsd, unsigned arena_ind) {
540	assert(have_background_thread);
541	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
542
543	/* We create at most NCPUs threads. */
544	size_t thread_ind = arena_ind % max_background_threads;
545	background_thread_info_t *info = &background_thread_info[thread_ind];
546
547	bool need_new_thread;
548	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
549	need_new_thread = background_thread_enabled() &&
550	    (info->state == background_thread_stopped);
551	if (need_new_thread) {
552		background_thread_init(tsd, info);
553	}
554	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
555	if (!need_new_thread) {
556		return false;
557	}
558	if (arena_ind != 0) {
559		/* Threads are created asynchronously by Thread 0. */
560		background_thread_info_t *t0 = &background_thread_info[0];
561		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
562		assert(t0->state == background_thread_started);
563		pthread_cond_signal(&t0->cond);
564		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
565
566		return false;
567	}
568
569	pre_reentrancy(tsd, NULL);
570	/*
571	 * To avoid complications (besides reentrancy), create internal
572	 * background threads with the underlying pthread_create.
573	 */
574	int err = background_thread_create_signals_masked(&info->thread, NULL,
575	    background_thread_entry, (void *)thread_ind);
576	post_reentrancy(tsd);
577
578	if (err != 0) {
579		malloc_printf("<jemalloc>: arena 0 background thread creation "
580		    "failed (%d)\n", err);
581		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
582		info->state = background_thread_stopped;
583		n_background_threads--;
584		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
585
586		return true;
587	}
588
589	return false;
590}
591
592bool
593background_threads_enable(tsd_t *tsd) {
594	assert(n_background_threads == 0);
595	assert(background_thread_enabled());
596	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
597
598	VARIABLE_ARRAY(bool, marked, max_background_threads);
599	unsigned i, nmarked;
600	for (i = 0; i < max_background_threads; i++) {
601		marked[i] = false;
602	}
603	nmarked = 0;
604	/* Thread 0 is required and created at the end. */
605	marked[0] = true;
606	/* Mark the threads we need to create for thread 0. */
607	unsigned n = narenas_total_get();
608	for (i = 1; i < n; i++) {
609		if (marked[i % max_background_threads] ||
610		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
611			continue;
612		}
613		background_thread_info_t *info = &background_thread_info[
614		    i % max_background_threads];
615		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
616		assert(info->state == background_thread_stopped);
617		background_thread_init(tsd, info);
618		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
619		marked[i % max_background_threads] = true;
620		if (++nmarked == max_background_threads) {
621			break;
622		}
623	}
624
625	return background_thread_create(tsd, 0);
626}
627
628bool
629background_threads_disable(tsd_t *tsd) {
630	assert(!background_thread_enabled());
631	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
632
633	/* Thread 0 will be responsible for terminating other threads. */
634	if (background_threads_disable_single(tsd,
635	    &background_thread_info[0])) {
636		return true;
637	}
638	assert(n_background_threads == 0);
639
640	return false;
641}
642
643/* Check if we need to signal the background thread early. */
644void
645background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
646    arena_decay_t *decay, size_t npages_new) {
647	background_thread_info_t *info = arena_background_thread_info_get(
648	    arena);
649	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
650		/*
651		 * Background thread may hold the mutex for a long period of
652		 * time.  We'd like to avoid the variance on application
653		 * threads.  So keep this non-blocking, and leave the work to a
654		 * future epoch.
655		 */
656		return;
657	}
658
659	if (info->state != background_thread_started) {
660		goto label_done;
661	}
662	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
663		goto label_done;
664	}
665
666	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
667	if (decay_time <= 0) {
668		/* Purging is eagerly done or disabled currently. */
669		goto label_done_unlock2;
670	}
671	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
672	assert(decay_interval_ns > 0);
673
674	nstime_t diff;
675	nstime_init(&diff, background_thread_wakeup_time_get(info));
676	if (nstime_compare(&diff, &decay->epoch) <= 0) {
677		goto label_done_unlock2;
678	}
679	nstime_subtract(&diff, &decay->epoch);
680	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
681		goto label_done_unlock2;
682	}
683
684	if (npages_new > 0) {
685		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
686		/*
687		 * Compute how many new pages we would need to purge by the next
688		 * wakeup, which is used to determine if we should signal the
689		 * background thread.
690		 */
691		uint64_t npurge_new;
692		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
693			npurge_new = npages_new;
694		} else {
695			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
696			assert(h_steps_max >=
697			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
698			npurge_new = npages_new * (h_steps_max -
699			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
700			npurge_new >>= SMOOTHSTEP_BFP;
701		}
702		info->npages_to_purge_new += npurge_new;
703	}
704
705	bool should_signal;
706	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
707		should_signal = true;
708	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
709	    (extents_npages_get(&arena->extents_dirty) > 0 ||
710	    extents_npages_get(&arena->extents_muzzy) > 0 ||
711	    info->npages_to_purge_new > 0)) {
712		should_signal = true;
713	} else {
714		should_signal = false;
715	}
716
717	if (should_signal) {
718		info->npages_to_purge_new = 0;
719		pthread_cond_signal(&info->cond);
720	}
721label_done_unlock2:
722	malloc_mutex_unlock(tsdn, &decay->mtx);
723label_done:
724	malloc_mutex_unlock(tsdn, &info->mtx);
725}
726
727void
728background_thread_prefork0(tsdn_t *tsdn) {
729	malloc_mutex_prefork(tsdn, &background_thread_lock);
730	background_thread_enabled_at_fork = background_thread_enabled();
731}
732
733void
734background_thread_prefork1(tsdn_t *tsdn) {
735	for (unsigned i = 0; i < max_background_threads; i++) {
736		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
737	}
738}
739
740void
741background_thread_postfork_parent(tsdn_t *tsdn) {
742	for (unsigned i = 0; i < max_background_threads; i++) {
743		malloc_mutex_postfork_parent(tsdn,
744		    &background_thread_info[i].mtx);
745	}
746	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
747}
748
749void
750background_thread_postfork_child(tsdn_t *tsdn) {
751	for (unsigned i = 0; i < max_background_threads; i++) {
752		malloc_mutex_postfork_child(tsdn,
753		    &background_thread_info[i].mtx);
754	}
755	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
756	if (!background_thread_enabled_at_fork) {
757		return;
758	}
759
760	/* Clear background_thread state (reset to disabled for child). */
761	malloc_mutex_lock(tsdn, &background_thread_lock);
762	n_background_threads = 0;
763	background_thread_enabled_set(tsdn, false);
764	for (unsigned i = 0; i < max_background_threads; i++) {
765		background_thread_info_t *info = &background_thread_info[i];
766		malloc_mutex_lock(tsdn, &info->mtx);
767		info->state = background_thread_stopped;
768		int ret = pthread_cond_init(&info->cond, NULL);
769		assert(ret == 0);
770		background_thread_info_init(tsdn, info);
771		malloc_mutex_unlock(tsdn, &info->mtx);
772	}
773	malloc_mutex_unlock(tsdn, &background_thread_lock);
774}
775
776bool
777background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
778	assert(config_stats);
779	malloc_mutex_lock(tsdn, &background_thread_lock);
780	if (!background_thread_enabled()) {
781		malloc_mutex_unlock(tsdn, &background_thread_lock);
782		return true;
783	}
784
785	stats->num_threads = n_background_threads;
786	uint64_t num_runs = 0;
787	nstime_init(&stats->run_interval, 0);
788	for (unsigned i = 0; i < max_background_threads; i++) {
789		background_thread_info_t *info = &background_thread_info[i];
790		malloc_mutex_lock(tsdn, &info->mtx);
791		if (info->state != background_thread_stopped) {
792			num_runs += info->tot_n_runs;
793			nstime_add(&stats->run_interval, &info->tot_sleep_time);
794		}
795		malloc_mutex_unlock(tsdn, &info->mtx);
796	}
797	stats->num_runs = num_runs;
798	if (num_runs > 0) {
799		nstime_idivide(&stats->run_interval, num_runs);
800	}
801	malloc_mutex_unlock(tsdn, &background_thread_lock);
802
803	return false;
804}
805
806#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
807#undef BILLION
808#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
809
810static bool
811pthread_create_fptr_init(void) {
812	if (pthread_create_fptr != NULL) {
813		return false;
814	}
815	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
816	if (pthread_create_fptr == NULL) {
817		can_enable_background_thread = false;
818		if (config_lazy_lock || opt_background_thread) {
819			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
820			    "\"pthread_create\")\n");
821			abort();
822		}
823	} else {
824		can_enable_background_thread = true;
825	}
826
827	return false;
828}
829
830/*
831 * When lazy lock is enabled, we need to make sure setting isthreaded before
832 * taking any background_thread locks.  This is called early in ctl (instead of
833 * wait for the pthread_create calls to trigger) because the mutex is required
834 * before creating background threads.
835 */
836void
837background_thread_ctl_init(tsdn_t *tsdn) {
838	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
839#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
840	pthread_create_fptr_init();
841	pthread_create_wrapper_init();
842#endif
843}
844
845#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
846
847bool
848background_thread_boot0(void) {
849	if (!have_background_thread && opt_background_thread) {
850		malloc_printf("<jemalloc>: option background_thread currently "
851		    "supports pthread only\n");
852		return true;
853	}
854#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
855	if ((config_lazy_lock || opt_background_thread) &&
856	    pthread_create_fptr_init()) {
857		return true;
858	}
859#endif
860	return false;
861}
862
863bool
864background_thread_boot1(tsdn_t *tsdn) {
865#ifdef JEMALLOC_BACKGROUND_THREAD
866	assert(have_background_thread);
867	assert(narenas_total_get() > 0);
868
869	if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
870	    ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
871		opt_max_background_threads = ncpus;
872	}
873	max_background_threads = opt_max_background_threads;
874
875	background_thread_enabled_set(tsdn, opt_background_thread);
876	if (malloc_mutex_init(&background_thread_lock,
877	    "background_thread_global",
878	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
879	    malloc_mutex_rank_exclusive)) {
880		return true;
881	}
882
883	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
884	    b0get(), opt_max_background_threads *
885	    sizeof(background_thread_info_t), CACHELINE);
886	if (background_thread_info == NULL) {
887		return true;
888	}
889
890	for (unsigned i = 0; i < max_background_threads; i++) {
891		background_thread_info_t *info = &background_thread_info[i];
892		/* Thread mutex is rank_inclusive because of thread0. */
893		if (malloc_mutex_init(&info->mtx, "background_thread",
894		    WITNESS_RANK_BACKGROUND_THREAD,
895		    malloc_mutex_address_ordered)) {
896			return true;
897		}
898		if (pthread_cond_init(&info->cond, NULL)) {
899			return true;
900		}
901		malloc_mutex_lock(tsdn, &info->mtx);
902		info->state = background_thread_stopped;
903		background_thread_info_init(tsdn, info);
904		malloc_mutex_unlock(tsdn, &info->mtx);
905	}
906#endif
907
908	return false;
909}
910