1#define	JEMALLOC_TCACHE_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3
4/******************************************************************************/
5/* Data. */
6
7bool	opt_tcache = true;
8ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
9
10tcache_bin_info_t	*tcache_bin_info;
11static unsigned		stack_nelms; /* Total stack elms per tcache. */
12
13unsigned		nhbins;
14size_t			tcache_maxclass;
15
16tcaches_t		*tcaches;
17
18/* Index of first element within tcaches that has never been used. */
19static unsigned		tcaches_past;
20
21/* Head of singly linked list tracking available tcaches elements. */
22static tcaches_t	*tcaches_avail;
23
24/******************************************************************************/
25
26size_t
27tcache_salloc(tsdn_t *tsdn, const void *ptr)
28{
29	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr));
30}
31
32void
33tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
34{
35	szind_t binind = tcache->next_gc_bin;
36	tcache_bin_t *tbin = &tcache->tbins[binind];
37	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
38
39	if (tbin->low_water > 0) {
40		/*
41		 * Flush (ceiling) 3/4 of the objects below the low water mark.
42		 */
43		if (binind < NBINS) {
44			tcache_bin_flush_small(tsd, tcache, tbin, binind,
45			    tbin->ncached - tbin->low_water + (tbin->low_water
46			    >> 2));
47		} else {
48			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
49			    - tbin->low_water + (tbin->low_water >> 2), tcache);
50		}
51		/*
52		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
53		 * fill count is always at least 1.
54		 */
55		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
56			tbin->lg_fill_div++;
57	} else if (tbin->low_water < 0) {
58		/*
59		 * Increase fill count by 2X.  Make sure lg_fill_div stays
60		 * greater than 0.
61		 */
62		if (tbin->lg_fill_div > 1)
63			tbin->lg_fill_div--;
64	}
65	tbin->low_water = tbin->ncached;
66
67	tcache->next_gc_bin++;
68	if (tcache->next_gc_bin == nhbins)
69		tcache->next_gc_bin = 0;
70}
71
72void *
73tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
74    tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
75{
76	void *ret;
77
78	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
79	    tcache->prof_accumbytes : 0);
80	if (config_prof)
81		tcache->prof_accumbytes = 0;
82	ret = tcache_alloc_easy(tbin, tcache_success);
83
84	return (ret);
85}
86
87void
88tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
89    szind_t binind, unsigned rem)
90{
91	arena_t *arena;
92	void *ptr;
93	unsigned i, nflush, ndeferred;
94	bool merged_stats = false;
95
96	assert(binind < NBINS);
97	assert(rem <= tbin->ncached);
98
99	arena = arena_choose(tsd, NULL);
100	assert(arena != NULL);
101	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
102		/* Lock the arena bin associated with the first object. */
103		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
104		arena_t *bin_arena = extent_arena_get(extent);
105		arena_bin_t *bin = &bin_arena->bins[binind];
106
107		if (config_prof && bin_arena == arena) {
108			if (arena_prof_accum(tsd_tsdn(tsd), arena,
109			    tcache->prof_accumbytes))
110				prof_idump(tsd_tsdn(tsd));
111			tcache->prof_accumbytes = 0;
112		}
113
114		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
115		if (config_stats && bin_arena == arena) {
116			assert(!merged_stats);
117			merged_stats = true;
118			bin->stats.nflushes++;
119			bin->stats.nrequests += tbin->tstats.nrequests;
120			tbin->tstats.nrequests = 0;
121		}
122		ndeferred = 0;
123		for (i = 0; i < nflush; i++) {
124			ptr = *(tbin->avail - 1 - i);
125			assert(ptr != NULL);
126
127			extent = iealloc(tsd_tsdn(tsd), ptr);
128			if (extent_arena_get(extent) == bin_arena) {
129				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
130				    bin_arena, extent, ptr);
131			} else {
132				/*
133				 * This object was allocated via a different
134				 * arena bin than the one that is currently
135				 * locked.  Stash the object, so that it can be
136				 * handled in a future pass.
137				 */
138				*(tbin->avail - 1 - ndeferred) = ptr;
139				ndeferred++;
140			}
141		}
142		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
143		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
144	}
145	if (config_stats && !merged_stats) {
146		/*
147		 * The flush loop didn't happen to flush to this thread's
148		 * arena, so the stats didn't get merged.  Manually do so now.
149		 */
150		arena_bin_t *bin = &arena->bins[binind];
151		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
152		bin->stats.nflushes++;
153		bin->stats.nrequests += tbin->tstats.nrequests;
154		tbin->tstats.nrequests = 0;
155		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
156	}
157
158	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
159	    sizeof(void *));
160	tbin->ncached = rem;
161	if ((int)tbin->ncached < tbin->low_water)
162		tbin->low_water = tbin->ncached;
163}
164
165void
166tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
167    unsigned rem, tcache_t *tcache)
168{
169	arena_t *arena;
170	void *ptr;
171	unsigned i, nflush, ndeferred;
172	bool merged_stats = false;
173
174	assert(binind < nhbins);
175	assert(rem <= tbin->ncached);
176
177	arena = arena_choose(tsd, NULL);
178	assert(arena != NULL);
179	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
180		/* Lock the arena associated with the first object. */
181		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
182		arena_t *locked_arena = extent_arena_get(extent);
183		UNUSED bool idump;
184
185		if (config_prof)
186			idump = false;
187		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
188		if ((config_prof || config_stats) && locked_arena == arena) {
189			if (config_prof) {
190				idump = arena_prof_accum_locked(arena,
191				    tcache->prof_accumbytes);
192				tcache->prof_accumbytes = 0;
193			}
194			if (config_stats) {
195				merged_stats = true;
196				arena->stats.nrequests_large +=
197				    tbin->tstats.nrequests;
198				arena->stats.lstats[binind - NBINS].nrequests +=
199				    tbin->tstats.nrequests;
200				tbin->tstats.nrequests = 0;
201			}
202		}
203		ndeferred = 0;
204		for (i = 0; i < nflush; i++) {
205			ptr = *(tbin->avail - 1 - i);
206			assert(ptr != NULL);
207			extent = iealloc(tsd_tsdn(tsd), ptr);
208			if (extent_arena_get(extent) == locked_arena) {
209				large_dalloc_junked_locked(tsd_tsdn(tsd),
210				    extent);
211			} else {
212				/*
213				 * This object was allocated via a different
214				 * arena than the one that is currently locked.
215				 * Stash the object, so that it can be handled
216				 * in a future pass.
217				 */
218				*(tbin->avail - 1 - ndeferred) = ptr;
219				ndeferred++;
220			}
221		}
222		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
223		if (config_prof && idump)
224			prof_idump(tsd_tsdn(tsd));
225		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
226		    ndeferred);
227	}
228	if (config_stats && !merged_stats) {
229		/*
230		 * The flush loop didn't happen to flush to this thread's
231		 * arena, so the stats didn't get merged.  Manually do so now.
232		 */
233		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
234		arena->stats.nrequests_large += tbin->tstats.nrequests;
235		arena->stats.lstats[binind - NBINS].nrequests +=
236		    tbin->tstats.nrequests;
237		tbin->tstats.nrequests = 0;
238		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
239	}
240
241	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
242	    sizeof(void *));
243	tbin->ncached = rem;
244	if ((int)tbin->ncached < tbin->low_water)
245		tbin->low_water = tbin->ncached;
246}
247
248static void
249tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
250{
251	if (config_stats) {
252		/* Link into list of extant tcaches. */
253		malloc_mutex_lock(tsdn, &arena->lock);
254		ql_elm_new(tcache, link);
255		ql_tail_insert(&arena->tcache_ql, tcache, link);
256		malloc_mutex_unlock(tsdn, &arena->lock);
257	}
258}
259
260static void
261tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
262{
263	if (config_stats) {
264		/* Unlink from list of extant tcaches. */
265		malloc_mutex_lock(tsdn, &arena->lock);
266		if (config_debug) {
267			bool in_ql = false;
268			tcache_t *iter;
269			ql_foreach(iter, &arena->tcache_ql, link) {
270				if (iter == tcache) {
271					in_ql = true;
272					break;
273				}
274			}
275			assert(in_ql);
276		}
277		ql_remove(&arena->tcache_ql, tcache, link);
278		tcache_stats_merge(tsdn, tcache, arena);
279		malloc_mutex_unlock(tsdn, &arena->lock);
280	}
281}
282
283void
284tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
285    arena_t *newarena)
286{
287	tcache_arena_dissociate(tsdn, tcache, oldarena);
288	tcache_arena_associate(tsdn, tcache, newarena);
289}
290
291tcache_t *
292tcache_get_hard(tsd_t *tsd)
293{
294	arena_t *arena;
295
296	if (!tcache_enabled_get()) {
297		if (tsd_nominal(tsd))
298			tcache_enabled_set(false); /* Memoize. */
299		return (NULL);
300	}
301	arena = arena_choose(tsd, NULL);
302	if (unlikely(arena == NULL))
303		return (NULL);
304	return (tcache_create(tsd_tsdn(tsd), arena));
305}
306
307tcache_t *
308tcache_create(tsdn_t *tsdn, arena_t *arena)
309{
310	tcache_t *tcache;
311	size_t size, stack_offset;
312	unsigned i;
313
314	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
315	/* Naturally align the pointer stacks. */
316	size = PTR_CEILING(size);
317	stack_offset = size;
318	size += stack_nelms * sizeof(void *);
319	/* Avoid false cacheline sharing. */
320	size = sa2u(size, CACHELINE);
321
322	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
323	    arena_get(TSDN_NULL, 0, true));
324	if (tcache == NULL)
325		return (NULL);
326
327	tcache_arena_associate(tsdn, tcache, arena);
328
329	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
330
331	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
332	for (i = 0; i < nhbins; i++) {
333		tcache->tbins[i].lg_fill_div = 1;
334		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
335		/*
336		 * avail points past the available space.  Allocations will
337		 * access the slots toward higher addresses (for the benefit of
338		 * prefetch).
339		 */
340		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
341		    (uintptr_t)stack_offset);
342	}
343
344	return (tcache);
345}
346
347static void
348tcache_destroy(tsd_t *tsd, tcache_t *tcache)
349{
350	arena_t *arena;
351	unsigned i;
352
353	arena = arena_choose(tsd, NULL);
354	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
355
356	for (i = 0; i < NBINS; i++) {
357		tcache_bin_t *tbin = &tcache->tbins[i];
358		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
359
360		if (config_stats) {
361			assert(tbin->tstats.nrequests == 0);
362		}
363	}
364
365	for (; i < nhbins; i++) {
366		tcache_bin_t *tbin = &tcache->tbins[i];
367		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
368
369		if (config_stats) {
370			assert(tbin->tstats.nrequests == 0);
371		}
372	}
373
374	if (config_prof && tcache->prof_accumbytes > 0 &&
375	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
376		prof_idump(tsd_tsdn(tsd));
377
378	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tcache), tcache, NULL,
379	    true, true);
380}
381
382void
383tcache_cleanup(tsd_t *tsd)
384{
385	tcache_t *tcache;
386
387	if (!config_tcache)
388		return;
389
390	if ((tcache = tsd_tcache_get(tsd)) != NULL) {
391		tcache_destroy(tsd, tcache);
392		tsd_tcache_set(tsd, NULL);
393	}
394}
395
396void
397tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
398{
399	unsigned i;
400
401	cassert(config_stats);
402
403	malloc_mutex_assert_owner(tsdn, &arena->lock);
404
405	/* Merge and reset tcache stats. */
406	for (i = 0; i < NBINS; i++) {
407		arena_bin_t *bin = &arena->bins[i];
408		tcache_bin_t *tbin = &tcache->tbins[i];
409		malloc_mutex_lock(tsdn, &bin->lock);
410		bin->stats.nrequests += tbin->tstats.nrequests;
411		malloc_mutex_unlock(tsdn, &bin->lock);
412		tbin->tstats.nrequests = 0;
413	}
414
415	for (; i < nhbins; i++) {
416		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
417		tcache_bin_t *tbin = &tcache->tbins[i];
418		arena->stats.nrequests_large += tbin->tstats.nrequests;
419		lstats->nrequests += tbin->tstats.nrequests;
420		tbin->tstats.nrequests = 0;
421	}
422}
423
424bool
425tcaches_create(tsd_t *tsd, unsigned *r_ind)
426{
427	arena_t *arena;
428	tcache_t *tcache;
429	tcaches_t *elm;
430
431	if (tcaches == NULL) {
432		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
433		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
434		if (tcaches == NULL)
435			return (true);
436	}
437
438	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
439		return (true);
440	arena = arena_ichoose(tsd, NULL);
441	if (unlikely(arena == NULL))
442		return (true);
443	tcache = tcache_create(tsd_tsdn(tsd), arena);
444	if (tcache == NULL)
445		return (true);
446
447	if (tcaches_avail != NULL) {
448		elm = tcaches_avail;
449		tcaches_avail = tcaches_avail->next;
450		elm->tcache = tcache;
451		*r_ind = (unsigned)(elm - tcaches);
452	} else {
453		elm = &tcaches[tcaches_past];
454		elm->tcache = tcache;
455		*r_ind = tcaches_past;
456		tcaches_past++;
457	}
458
459	return (false);
460}
461
462static void
463tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
464{
465	if (elm->tcache == NULL)
466		return;
467	tcache_destroy(tsd, elm->tcache);
468	elm->tcache = NULL;
469}
470
471void
472tcaches_flush(tsd_t *tsd, unsigned ind)
473{
474	tcaches_elm_flush(tsd, &tcaches[ind]);
475}
476
477void
478tcaches_destroy(tsd_t *tsd, unsigned ind)
479{
480	tcaches_t *elm = &tcaches[ind];
481	tcaches_elm_flush(tsd, elm);
482	elm->next = tcaches_avail;
483	tcaches_avail = elm;
484}
485
486bool
487tcache_boot(tsdn_t *tsdn)
488{
489	unsigned i;
490
491	/* If necessary, clamp opt_lg_tcache_max. */
492	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
493		tcache_maxclass = SMALL_MAXCLASS;
494	else
495		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
496
497	nhbins = size2index(tcache_maxclass) + 1;
498
499	/* Initialize tcache_bin_info. */
500	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
501	    * sizeof(tcache_bin_info_t), CACHELINE);
502	if (tcache_bin_info == NULL)
503		return (true);
504	stack_nelms = 0;
505	for (i = 0; i < NBINS; i++) {
506		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
507			tcache_bin_info[i].ncached_max =
508			    TCACHE_NSLOTS_SMALL_MIN;
509		} else if ((arena_bin_info[i].nregs << 1) <=
510		    TCACHE_NSLOTS_SMALL_MAX) {
511			tcache_bin_info[i].ncached_max =
512			    (arena_bin_info[i].nregs << 1);
513		} else {
514			tcache_bin_info[i].ncached_max =
515			    TCACHE_NSLOTS_SMALL_MAX;
516		}
517		stack_nelms += tcache_bin_info[i].ncached_max;
518	}
519	for (; i < nhbins; i++) {
520		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
521		stack_nelms += tcache_bin_info[i].ncached_max;
522	}
523
524	return (false);
525}
526