1#define	JEMALLOC_PAGES_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3
4#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
5#include <sys/sysctl.h>
6#endif
7
8/******************************************************************************/
9/* Data. */
10
11#if !defined(_WIN32) && !defined(__Fuchsia__)
12#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
13#  define PAGES_PROT_DECOMMIT (PROT_NONE)
14static int	mmap_flags;
15#endif
16static bool	os_overcommits;
17
18/******************************************************************************/
19
20#ifdef __Fuchsia__
21
22#include <threads.h>
23
24#include <zircon/process.h>
25#include <zircon/status.h>
26#include <zircon/syscalls.h>
27
28// Reserve a terabyte of address space for heap allocations.
29#define VMAR_SIZE (1ull << 40)
30
31#define MMAP_VMO_NAME "jemalloc-heap"
32
33// malloc wants to manage both address space and memory mapped within
34// chunks of address space. To maintain claims to address space we
35// must use our own vmar.
36static uintptr_t pages_base;
37static zx_handle_t pages_vmar;
38static zx_handle_t pages_vmo;
39
40// Protect reservations to the pages_vmar.
41static mtx_t vmar_lock;
42
43static void* fuchsia_pages_map(void* start, size_t len) {
44	if (len >= PTRDIFF_MAX) {
45		return NULL;
46	}
47
48	// round up to page size
49	len = (len + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
50
51	mtx_lock(&vmar_lock);
52
53	// If we are given a base address, then jemalloc's internal
54	// bookkeeping expects to be able to extend an allocation at
55	// that bit of the address space, and so we just directly
56	// compute an offset. If we are not, ask for a new random
57	// region from the pages_vmar.
58
59	// TODO(kulakowski) Extending a region might fail. Investigate
60	// whether it is worthwhile teaching jemalloc about vmars and
61	// vmos at the extent.c or arena.c layer.
62	size_t offset;
63	if (start != NULL) {
64		uintptr_t addr = (uintptr_t)start;
65		if (addr < pages_base)
66			abort();
67		offset = addr - pages_base;
68	} else {
69		// TODO(kulakowski) Use ZX-942 instead of having to
70		// allocate and destroy under a lock.
71		zx_handle_t subvmar;
72		uintptr_t subvmar_base;
73		zx_status_t status = _zx_vmar_allocate(pages_vmar,
74		    ZX_VM_CAN_MAP_READ | ZX_VM_CAN_MAP_WRITE,
75			  0u, len, &subvmar, &subvmar_base);
76		if (status != ZX_OK)
77			abort();
78		_zx_vmar_destroy(subvmar);
79		_zx_handle_close(subvmar);
80		offset = subvmar_base - pages_base;
81	}
82
83	uintptr_t ptr = 0;
84	zx_vm_option_t zx_options = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE |
85	    ZX_VM_SPECIFIC;
86	zx_status_t status = _zx_vmar_map(pages_vmar, zx_options, offset, pages_vmo,
87	    offset, len, &ptr);
88	if (status != ZX_OK) {
89		ptr = 0u;
90	}
91
92	mtx_unlock(&vmar_lock);
93	return (void*)ptr;
94}
95
96static zx_status_t fuchsia_pages_free(void* addr, size_t size) {
97	uintptr_t ptr = (uintptr_t)addr;
98	return _zx_vmar_unmap(pages_vmar, ptr, size);
99}
100
101static void* fuchsia_pages_trim(void* ret, void* addr, size_t size,
102    size_t alloc_size, size_t leadsize) {
103	size_t trailsize = alloc_size - leadsize - size;
104
105	if (leadsize != 0)
106		pages_unmap(addr, leadsize);
107	if (trailsize != 0)
108		pages_unmap((void *)((uintptr_t)ret + size), trailsize);
109	return (ret);
110}
111
112#endif
113
114void *
115pages_map(void *addr, size_t size, bool *commit)
116{
117	void *ret;
118
119	assert(size != 0);
120
121	if (os_overcommits)
122		*commit = true;
123
124#ifdef _WIN32
125	/*
126	 * If VirtualAlloc can't allocate at the given address when one is
127	 * given, it fails and returns NULL.
128	 */
129	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
130	    PAGE_READWRITE);
131#elif __Fuchsia__
132	ret = fuchsia_pages_map(addr, size);
133#else
134	/*
135	 * We don't use MAP_FIXED here, because it can cause the *replacement*
136	 * of existing mappings, and we only want to create new mappings.
137	 */
138	{
139		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
140
141		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
142	}
143	assert(ret != NULL);
144
145	if (ret == MAP_FAILED)
146		ret = NULL;
147	else if (addr != NULL && ret != addr) {
148		/*
149		 * We succeeded in mapping memory, but not in the right place.
150		 */
151		pages_unmap(ret, size);
152		ret = NULL;
153	}
154#endif
155	assert(ret == NULL || (addr == NULL && ret != addr)
156	    || (addr != NULL && ret == addr));
157	return (ret);
158}
159
160void
161pages_unmap(void *addr, size_t size)
162{
163#ifdef _WIN32
164	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
165#elif __Fuchsia__
166	zx_status_t status = fuchsia_pages_free(addr, size);
167	if (status != ZX_OK)
168#else
169	if (munmap(addr, size) == -1)
170#endif
171	{
172#if __Fuchsia__
173		const char* buf = _zx_status_get_string(status);
174#else
175		char buf[BUFERROR_BUF];
176		buferror(get_errno(), buf, sizeof(buf));
177#endif
178
179		malloc_printf("<jemalloc>: Error in "
180#ifdef _WIN32
181		              "VirtualFree"
182#elif __Fuchsia__
183		              "unmapping jemalloc heap pages"
184#else
185		              "munmap"
186#endif
187		              "(): %s\n", buf);
188		if (opt_abort)
189			abort();
190	}
191}
192
193void *
194pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
195    bool *commit)
196{
197	void *ret = (void *)((uintptr_t)addr + leadsize);
198
199	assert(alloc_size >= leadsize + size);
200#ifdef _WIN32
201	{
202		void *new_addr;
203
204		pages_unmap(addr, alloc_size);
205		new_addr = pages_map(ret, size, commit);
206		if (new_addr == ret)
207			return (ret);
208		if (new_addr)
209			pages_unmap(new_addr, size);
210		return (NULL);
211	}
212#elif __Fuchsia__
213	return fuchsia_pages_trim(ret, addr, size, alloc_size, leadsize);
214#else
215	{
216		size_t trailsize = alloc_size - leadsize - size;
217
218		if (leadsize != 0)
219			pages_unmap(addr, leadsize);
220		if (trailsize != 0)
221			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
222		return (ret);
223	}
224#endif
225}
226
227static bool
228pages_commit_impl(void *addr, size_t size, bool commit)
229{
230	if (os_overcommits)
231		return (true);
232
233#ifdef _WIN32
234	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
235	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
236#elif __Fuchsia__
237	not_reached();
238#else
239	{
240		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
241		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
242		    -1, 0);
243		if (result == MAP_FAILED)
244			return (true);
245		if (result != addr) {
246			/*
247			 * We succeeded in mapping memory, but not in the right
248			 * place.
249			 */
250			pages_unmap(result, size);
251			return (true);
252		}
253		return (false);
254	}
255#endif
256}
257
258bool
259pages_commit(void *addr, size_t size)
260{
261	return (pages_commit_impl(addr, size, true));
262}
263
264bool
265pages_decommit(void *addr, size_t size)
266{
267	return (pages_commit_impl(addr, size, false));
268}
269
270bool
271pages_purge_lazy(void *addr, size_t size)
272{
273	if (!pages_can_purge_lazy)
274		return (true);
275
276#ifdef _WIN32
277	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
278#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
279	madvise(addr, size, MADV_FREE);
280#else
281	not_reached();
282#endif
283	return (false);
284}
285
286bool
287pages_purge_forced(void *addr, size_t size)
288{
289	if (!pages_can_purge_forced)
290		return (true);
291
292#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
293	return (madvise(addr, size, MADV_DONTNEED) != 0);
294#else
295	not_reached();
296#endif
297}
298
299bool
300pages_huge(void *addr, size_t size)
301{
302	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
303	assert(HUGEPAGE_CEILING(size) == size);
304
305#ifdef JEMALLOC_THP
306	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
307#else
308	return (true);
309#endif
310}
311
312bool
313pages_nohuge(void *addr, size_t size)
314{
315	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
316	assert(HUGEPAGE_CEILING(size) == size);
317
318#ifdef JEMALLOC_THP
319	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
320#else
321	return (false);
322#endif
323}
324
325#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
326static bool
327os_overcommits_sysctl(void)
328{
329	int vm_overcommit;
330	size_t sz;
331
332	sz = sizeof(vm_overcommit);
333	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
334		return (false); /* Error. */
335
336	return ((vm_overcommit & 0x3) == 0);
337}
338#endif
339
340#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
341/*
342 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
343 * reentry during bootstrapping if another library has interposed system call
344 * wrappers.
345 */
346static bool
347os_overcommits_proc(void)
348{
349	int fd;
350	char buf[1];
351	ssize_t nread;
352
353#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
354	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
355#else
356	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
357#endif
358	if (fd == -1)
359		return (false); /* Error. */
360
361#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
362	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
363#else
364	nread = read(fd, &buf, sizeof(buf));
365#endif
366
367#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
368	syscall(SYS_close, fd);
369#else
370	close(fd);
371#endif
372
373	if (nread < 1)
374		return (false); /* Error. */
375	/*
376	 * /proc/sys/vm/overcommit_memory meanings:
377	 * 0: Heuristic overcommit.
378	 * 1: Always overcommit.
379	 * 2: Never overcommit.
380	 */
381	return (buf[0] == '0' || buf[0] == '1');
382}
383#endif
384
385void
386pages_boot(void)
387{
388#if !defined(_WIN32) && !defined(__Fuchsia__)
389	mmap_flags = MAP_PRIVATE | MAP_ANON;
390#endif
391
392#if defined(__Fuchsia__)
393	zx_vm_option_t vmar_flags = ZX_VM_CAN_MAP_SPECIFIC | ZX_VM_CAN_MAP_READ |
394	    ZX_VM_CAN_MAP_WRITE;
395	zx_status_t status = _zx_vmar_allocate(_zx_vmar_root_self(), vmar_flags, 0,
396	                        VMAR_SIZE, &pages_vmar, &pages_base);
397	if (status != ZX_OK)
398		abort();
399	status = _zx_vmo_create(VMAR_SIZE, 0, &pages_vmo);
400	if (status != ZX_OK)
401		abort();
402	status = _zx_object_set_property(pages_vmo, ZX_PROP_NAME, MMAP_VMO_NAME,
403	    strlen(MMAP_VMO_NAME));
404	if (status != ZX_OK)
405		abort();
406#endif
407
408#if defined(__Fuchsia__)
409	os_overcommits = true;
410#elif defined(JEMALLOC_SYSCTL_VM_OVERCOMMIT)
411	os_overcommits = os_overcommits_sysctl();
412#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
413	os_overcommits = os_overcommits_proc();
414#  ifdef MAP_NORESERVE
415	if (os_overcommits)
416		mmap_flags |= MAP_NORESERVE;
417#  endif
418#else
419	os_overcommits = false;
420#endif
421}
422