chunk.c revision 242844
1234370Sjasone#define	JEMALLOC_CHUNK_C_
2234370Sjasone#include "jemalloc/internal/jemalloc_internal.h"
3234370Sjasone
4234370Sjasone/******************************************************************************/
5234370Sjasone/* Data. */
6234370Sjasone
7242844Sjasoneconst char	*opt_dss = DSS_DEFAULT;
8242844Sjasonesize_t		opt_lg_chunk = LG_CHUNK_DEFAULT;
9234370Sjasone
10234370Sjasonemalloc_mutex_t	chunks_mtx;
11234370Sjasonechunk_stats_t	stats_chunks;
12234370Sjasone
13234370Sjasone/*
14234370Sjasone * Trees of chunks that were previously allocated (trees differ only in node
15234370Sjasone * ordering).  These are used when allocating chunks, in an attempt to re-use
16234370Sjasone * address space.  Depending on function, different tree orderings are needed,
17234370Sjasone * which is why there are two trees with the same contents.
18234370Sjasone */
19242844Sjasonestatic extent_tree_t	chunks_szad_mmap;
20242844Sjasonestatic extent_tree_t	chunks_ad_mmap;
21242844Sjasonestatic extent_tree_t	chunks_szad_dss;
22242844Sjasonestatic extent_tree_t	chunks_ad_dss;
23234370Sjasone
24234370Sjasonertree_t		*chunks_rtree;
25234370Sjasone
26234370Sjasone/* Various chunk-related settings. */
27234370Sjasonesize_t		chunksize;
28234370Sjasonesize_t		chunksize_mask; /* (chunksize - 1). */
29234370Sjasonesize_t		chunk_npages;
30234370Sjasonesize_t		map_bias;
31234370Sjasonesize_t		arena_maxclass; /* Max size class for arenas. */
32234370Sjasone
33234370Sjasone/******************************************************************************/
34234370Sjasone/* Function prototypes for non-inline static functions. */
35234370Sjasone
36242844Sjasonestatic void	*chunk_recycle(extent_tree_t *chunks_szad,
37242844Sjasone    extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base,
38235238Sjasone    bool *zero);
39242844Sjasonestatic void	chunk_record(extent_tree_t *chunks_szad,
40242844Sjasone    extent_tree_t *chunks_ad, void *chunk, size_t size);
41234370Sjasone
42234370Sjasone/******************************************************************************/
43234370Sjasone
44234370Sjasonestatic void *
45242844Sjasonechunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
46242844Sjasone    size_t alignment, bool base, bool *zero)
47234370Sjasone{
48234370Sjasone	void *ret;
49234370Sjasone	extent_node_t *node;
50234370Sjasone	extent_node_t key;
51234370Sjasone	size_t alloc_size, leadsize, trailsize;
52242844Sjasone	bool zeroed;
53234370Sjasone
54235238Sjasone	if (base) {
55235238Sjasone		/*
56235238Sjasone		 * This function may need to call base_node_{,de}alloc(), but
57235238Sjasone		 * the current chunk allocation request is on behalf of the
58235238Sjasone		 * base allocator.  Avoid deadlock (and if that weren't an
59235238Sjasone		 * issue, potential for infinite recursion) by returning NULL.
60235238Sjasone		 */
61235238Sjasone		return (NULL);
62235238Sjasone	}
63235238Sjasone
64234370Sjasone	alloc_size = size + alignment - chunksize;
65234370Sjasone	/* Beware size_t wrap-around. */
66234370Sjasone	if (alloc_size < size)
67234370Sjasone		return (NULL);
68234370Sjasone	key.addr = NULL;
69234370Sjasone	key.size = alloc_size;
70234370Sjasone	malloc_mutex_lock(&chunks_mtx);
71242844Sjasone	node = extent_tree_szad_nsearch(chunks_szad, &key);
72234370Sjasone	if (node == NULL) {
73234370Sjasone		malloc_mutex_unlock(&chunks_mtx);
74234370Sjasone		return (NULL);
75234370Sjasone	}
76234370Sjasone	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
77234370Sjasone	    (uintptr_t)node->addr;
78235238Sjasone	assert(node->size >= leadsize + size);
79235238Sjasone	trailsize = node->size - leadsize - size;
80234370Sjasone	ret = (void *)((uintptr_t)node->addr + leadsize);
81234370Sjasone	/* Remove node from the tree. */
82242844Sjasone	extent_tree_szad_remove(chunks_szad, node);
83242844Sjasone	extent_tree_ad_remove(chunks_ad, node);
84234370Sjasone	if (leadsize != 0) {
85234370Sjasone		/* Insert the leading space as a smaller chunk. */
86234370Sjasone		node->size = leadsize;
87242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
88242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
89234370Sjasone		node = NULL;
90234370Sjasone	}
91234370Sjasone	if (trailsize != 0) {
92234370Sjasone		/* Insert the trailing space as a smaller chunk. */
93234370Sjasone		if (node == NULL) {
94234370Sjasone			/*
95234370Sjasone			 * An additional node is required, but
96234370Sjasone			 * base_node_alloc() can cause a new base chunk to be
97234370Sjasone			 * allocated.  Drop chunks_mtx in order to avoid
98234370Sjasone			 * deadlock, and if node allocation fails, deallocate
99234370Sjasone			 * the result before returning an error.
100234370Sjasone			 */
101234370Sjasone			malloc_mutex_unlock(&chunks_mtx);
102234370Sjasone			node = base_node_alloc();
103234370Sjasone			if (node == NULL) {
104234370Sjasone				chunk_dealloc(ret, size, true);
105234370Sjasone				return (NULL);
106234370Sjasone			}
107234370Sjasone			malloc_mutex_lock(&chunks_mtx);
108234370Sjasone		}
109234370Sjasone		node->addr = (void *)((uintptr_t)(ret) + size);
110234370Sjasone		node->size = trailsize;
111242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
112242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
113234370Sjasone		node = NULL;
114234370Sjasone	}
115234370Sjasone	malloc_mutex_unlock(&chunks_mtx);
116234370Sjasone
117242844Sjasone	zeroed = false;
118242844Sjasone	if (node != NULL) {
119242844Sjasone		if (node->zeroed) {
120242844Sjasone			zeroed = true;
121242844Sjasone			*zero = true;
122242844Sjasone		}
123234370Sjasone		base_node_dealloc(node);
124242844Sjasone	}
125242844Sjasone	if (zeroed == false && *zero) {
126234370Sjasone		VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
127234370Sjasone		memset(ret, 0, size);
128234370Sjasone	}
129234370Sjasone	return (ret);
130234370Sjasone}
131234370Sjasone
132234370Sjasone/*
133234370Sjasone * If the caller specifies (*zero == false), it is still possible to receive
134234370Sjasone * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
135234370Sjasone * takes advantage of this to avoid demanding zeroed chunks, but taking
136234370Sjasone * advantage of them if they are returned.
137234370Sjasone */
138234370Sjasonevoid *
139242844Sjasonechunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
140242844Sjasone    dss_prec_t dss_prec)
141234370Sjasone{
142234370Sjasone	void *ret;
143234370Sjasone
144234370Sjasone	assert(size != 0);
145234370Sjasone	assert((size & chunksize_mask) == 0);
146235238Sjasone	assert(alignment != 0);
147234370Sjasone	assert((alignment & chunksize_mask) == 0);
148234370Sjasone
149242844Sjasone	/* "primary" dss. */
150242844Sjasone	if (config_dss && dss_prec == dss_prec_primary) {
151242844Sjasone		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
152242844Sjasone		    alignment, base, zero)) != NULL)
153242844Sjasone			goto label_return;
154242844Sjasone		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
155242844Sjasone			goto label_return;
156242844Sjasone	}
157242844Sjasone	/* mmap. */
158242844Sjasone	if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
159242844Sjasone	    alignment, base, zero)) != NULL)
160234370Sjasone		goto label_return;
161242844Sjasone	if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
162234569Sjasone		goto label_return;
163242844Sjasone	/* "secondary" dss. */
164242844Sjasone	if (config_dss && dss_prec == dss_prec_secondary) {
165242844Sjasone		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
166242844Sjasone		    alignment, base, zero)) != NULL)
167234370Sjasone			goto label_return;
168242844Sjasone		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
169242844Sjasone			goto label_return;
170234370Sjasone	}
171234370Sjasone
172234370Sjasone	/* All strategies for allocation failed. */
173234370Sjasone	ret = NULL;
174234370Sjasonelabel_return:
175234370Sjasone	if (config_ivsalloc && base == false && ret != NULL) {
176234370Sjasone		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
177234370Sjasone			chunk_dealloc(ret, size, true);
178234370Sjasone			return (NULL);
179234370Sjasone		}
180234370Sjasone	}
181234370Sjasone	if ((config_stats || config_prof) && ret != NULL) {
182234370Sjasone		bool gdump;
183234370Sjasone		malloc_mutex_lock(&chunks_mtx);
184234370Sjasone		if (config_stats)
185234370Sjasone			stats_chunks.nchunks += (size / chunksize);
186234370Sjasone		stats_chunks.curchunks += (size / chunksize);
187234370Sjasone		if (stats_chunks.curchunks > stats_chunks.highchunks) {
188234370Sjasone			stats_chunks.highchunks = stats_chunks.curchunks;
189234370Sjasone			if (config_prof)
190234370Sjasone				gdump = true;
191234370Sjasone		} else if (config_prof)
192234370Sjasone			gdump = false;
193234370Sjasone		malloc_mutex_unlock(&chunks_mtx);
194234370Sjasone		if (config_prof && opt_prof && opt_prof_gdump && gdump)
195234370Sjasone			prof_gdump();
196234370Sjasone	}
197234569Sjasone	if (config_debug && *zero && ret != NULL) {
198234569Sjasone		size_t i;
199234569Sjasone		size_t *p = (size_t *)(uintptr_t)ret;
200234370Sjasone
201235238Sjasone		VALGRIND_MAKE_MEM_DEFINED(ret, size);
202234569Sjasone		for (i = 0; i < size / sizeof(size_t); i++)
203234569Sjasone			assert(p[i] == 0);
204234569Sjasone	}
205234370Sjasone	assert(CHUNK_ADDR2BASE(ret) == ret);
206234370Sjasone	return (ret);
207234370Sjasone}
208234370Sjasone
209234370Sjasonestatic void
210242844Sjasonechunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
211242844Sjasone    size_t size)
212234370Sjasone{
213242844Sjasone	bool unzeroed;
214234370Sjasone	extent_node_t *xnode, *node, *prev, key;
215234370Sjasone
216242844Sjasone	unzeroed = pages_purge(chunk, size);
217234370Sjasone
218235238Sjasone	/*
219235238Sjasone	 * Allocate a node before acquiring chunks_mtx even though it might not
220235238Sjasone	 * be needed, because base_node_alloc() may cause a new base chunk to
221235238Sjasone	 * be allocated, which could cause deadlock if chunks_mtx were already
222235238Sjasone	 * held.
223235238Sjasone	 */
224235238Sjasone	xnode = base_node_alloc();
225235238Sjasone
226234370Sjasone	malloc_mutex_lock(&chunks_mtx);
227235238Sjasone	key.addr = (void *)((uintptr_t)chunk + size);
228242844Sjasone	node = extent_tree_ad_nsearch(chunks_ad, &key);
229235238Sjasone	/* Try to coalesce forward. */
230235238Sjasone	if (node != NULL && node->addr == key.addr) {
231235238Sjasone		/*
232235238Sjasone		 * Coalesce chunk with the following address range.  This does
233235238Sjasone		 * not change the position within chunks_ad, so only
234235238Sjasone		 * remove/insert from/into chunks_szad.
235235238Sjasone		 */
236242844Sjasone		extent_tree_szad_remove(chunks_szad, node);
237235238Sjasone		node->addr = chunk;
238235238Sjasone		node->size += size;
239242844Sjasone		node->zeroed = (node->zeroed && (unzeroed == false));
240242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
241235238Sjasone		if (xnode != NULL)
242235238Sjasone			base_node_dealloc(xnode);
243235238Sjasone	} else {
244235238Sjasone		/* Coalescing forward failed, so insert a new node. */
245235238Sjasone		if (xnode == NULL) {
246234370Sjasone			/*
247235238Sjasone			 * base_node_alloc() failed, which is an exceedingly
248235238Sjasone			 * unlikely failure.  Leak chunk; its pages have
249235238Sjasone			 * already been purged, so this is only a virtual
250235238Sjasone			 * memory leak.
251234370Sjasone			 */
252234370Sjasone			malloc_mutex_unlock(&chunks_mtx);
253235238Sjasone			return;
254234370Sjasone		}
255235238Sjasone		node = xnode;
256235238Sjasone		node->addr = chunk;
257235238Sjasone		node->size = size;
258242844Sjasone		node->zeroed = (unzeroed == false);
259242844Sjasone		extent_tree_ad_insert(chunks_ad, node);
260242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
261234370Sjasone	}
262234370Sjasone
263234370Sjasone	/* Try to coalesce backward. */
264242844Sjasone	prev = extent_tree_ad_prev(chunks_ad, node);
265234370Sjasone	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
266234370Sjasone	    chunk) {
267234370Sjasone		/*
268234370Sjasone		 * Coalesce chunk with the previous address range.  This does
269234370Sjasone		 * not change the position within chunks_ad, so only
270234370Sjasone		 * remove/insert node from/into chunks_szad.
271234370Sjasone		 */
272242844Sjasone		extent_tree_szad_remove(chunks_szad, prev);
273242844Sjasone		extent_tree_ad_remove(chunks_ad, prev);
274234370Sjasone
275242844Sjasone		extent_tree_szad_remove(chunks_szad, node);
276234370Sjasone		node->addr = prev->addr;
277234370Sjasone		node->size += prev->size;
278242844Sjasone		node->zeroed = (node->zeroed && prev->zeroed);
279242844Sjasone		extent_tree_szad_insert(chunks_szad, node);
280234370Sjasone
281234370Sjasone		base_node_dealloc(prev);
282234370Sjasone	}
283234370Sjasone	malloc_mutex_unlock(&chunks_mtx);
284234370Sjasone}
285234370Sjasone
286234370Sjasonevoid
287242844Sjasonechunk_unmap(void *chunk, size_t size)
288242844Sjasone{
289242844Sjasone	assert(chunk != NULL);
290242844Sjasone	assert(CHUNK_ADDR2BASE(chunk) == chunk);
291242844Sjasone	assert(size != 0);
292242844Sjasone	assert((size & chunksize_mask) == 0);
293242844Sjasone
294242844Sjasone	if (config_dss && chunk_in_dss(chunk))
295242844Sjasone		chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
296242844Sjasone	else if (chunk_dealloc_mmap(chunk, size))
297242844Sjasone		chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
298242844Sjasone}
299242844Sjasone
300242844Sjasonevoid
301234370Sjasonechunk_dealloc(void *chunk, size_t size, bool unmap)
302234370Sjasone{
303234370Sjasone
304234370Sjasone	assert(chunk != NULL);
305234370Sjasone	assert(CHUNK_ADDR2BASE(chunk) == chunk);
306234370Sjasone	assert(size != 0);
307234370Sjasone	assert((size & chunksize_mask) == 0);
308234370Sjasone
309234370Sjasone	if (config_ivsalloc)
310234370Sjasone		rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
311234370Sjasone	if (config_stats || config_prof) {
312234370Sjasone		malloc_mutex_lock(&chunks_mtx);
313242844Sjasone		assert(stats_chunks.curchunks >= (size / chunksize));
314234370Sjasone		stats_chunks.curchunks -= (size / chunksize);
315234370Sjasone		malloc_mutex_unlock(&chunks_mtx);
316234370Sjasone	}
317234370Sjasone
318242844Sjasone	if (unmap)
319242844Sjasone		chunk_unmap(chunk, size);
320234370Sjasone}
321234370Sjasone
322234370Sjasonebool
323234569Sjasonechunk_boot(void)
324234370Sjasone{
325234370Sjasone
326234370Sjasone	/* Set variables according to the value of opt_lg_chunk. */
327234370Sjasone	chunksize = (ZU(1) << opt_lg_chunk);
328234370Sjasone	assert(chunksize >= PAGE);
329234370Sjasone	chunksize_mask = chunksize - 1;
330234370Sjasone	chunk_npages = (chunksize >> LG_PAGE);
331234370Sjasone
332234370Sjasone	if (config_stats || config_prof) {
333234370Sjasone		if (malloc_mutex_init(&chunks_mtx))
334234370Sjasone			return (true);
335234370Sjasone		memset(&stats_chunks, 0, sizeof(chunk_stats_t));
336234370Sjasone	}
337234370Sjasone	if (config_dss && chunk_dss_boot())
338234370Sjasone		return (true);
339242844Sjasone	extent_tree_szad_new(&chunks_szad_mmap);
340242844Sjasone	extent_tree_ad_new(&chunks_ad_mmap);
341242844Sjasone	extent_tree_szad_new(&chunks_szad_dss);
342242844Sjasone	extent_tree_ad_new(&chunks_ad_dss);
343234370Sjasone	if (config_ivsalloc) {
344234370Sjasone		chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
345234370Sjasone		    opt_lg_chunk);
346234370Sjasone		if (chunks_rtree == NULL)
347234370Sjasone			return (true);
348234370Sjasone	}
349234370Sjasone
350234370Sjasone	return (false);
351234370Sjasone}
352242844Sjasone
353242844Sjasonevoid
354242844Sjasonechunk_prefork(void)
355242844Sjasone{
356242844Sjasone
357242844Sjasone	malloc_mutex_lock(&chunks_mtx);
358242844Sjasone	if (config_ivsalloc)
359242844Sjasone		rtree_prefork(chunks_rtree);
360242844Sjasone	chunk_dss_prefork();
361242844Sjasone}
362242844Sjasone
363242844Sjasonevoid
364242844Sjasonechunk_postfork_parent(void)
365242844Sjasone{
366242844Sjasone
367242844Sjasone	chunk_dss_postfork_parent();
368242844Sjasone	if (config_ivsalloc)
369242844Sjasone		rtree_postfork_parent(chunks_rtree);
370242844Sjasone	malloc_mutex_postfork_parent(&chunks_mtx);
371242844Sjasone}
372242844Sjasone
373242844Sjasonevoid
374242844Sjasonechunk_postfork_child(void)
375242844Sjasone{
376242844Sjasone
377242844Sjasone	chunk_dss_postfork_child();
378242844Sjasone	if (config_ivsalloc)
379242844Sjasone		rtree_postfork_child(chunks_rtree);
380242844Sjasone	malloc_mutex_postfork_child(&chunks_mtx);
381242844Sjasone}
382