1#define JEMALLOC_PAGES_C_
2#include "jemalloc/internal/jemalloc_preamble.h"
3
4#include "jemalloc/internal/pages.h"
5
6#include "jemalloc/internal/jemalloc_internal_includes.h"
7
8#include "jemalloc/internal/assert.h"
9#include "jemalloc/internal/malloc_io.h"
10
11#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12#include <sys/sysctl.h>
13#ifdef __FreeBSD__
14#include <vm/vm_param.h>
15#endif
16#endif
17#ifdef MAP_ALIGNED
18#include <sys/bitops.h>	/* NetBSD */
19#endif
20
21/******************************************************************************/
22/* Data. */
23
24/* Actual operating system page size, detected during bootstrap, <= PAGE. */
25static size_t	os_page;
26
27#ifndef _WIN32
28#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
29#  define PAGES_PROT_DECOMMIT (PROT_NONE)
30static int	mmap_flags;
31#endif
32static bool	os_overcommits;
33
34const char *thp_mode_names[] = {
35	"default",
36	"always",
37	"never",
38	"not supported"
39};
40thp_mode_t opt_thp = THP_MODE_DEFAULT;
41thp_mode_t init_system_thp_mode;
42
43/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
44static bool pages_can_purge_lazy_runtime = true;
45
46/******************************************************************************/
47/*
48 * Function prototypes for static functions that are referenced prior to
49 * definition.
50 */
51
52static void os_pages_unmap(void *addr, size_t size);
53
54/******************************************************************************/
55
56static void *
57os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
58	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
59	assert(ALIGNMENT_CEILING(size, os_page) == size);
60	assert(size != 0);
61
62	if (os_overcommits) {
63		*commit = true;
64	}
65
66	void *ret;
67#ifdef _WIN32
68	/*
69	 * If VirtualAlloc can't allocate at the given address when one is
70	 * given, it fails and returns NULL.
71	 */
72	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
73	    PAGE_READWRITE);
74#else
75	/*
76	 * We don't use MAP_FIXED here, because it can cause the *replacement*
77	 * of existing mappings, and we only want to create new mappings.
78	 */
79	{
80		int flags = mmap_flags;
81#ifdef MAP_ALIGNED
82		if (alignment > os_page || PAGE > os_page) {
83			int a = ilog2(MAX(alignment, PAGE));
84			flags |= MAP_ALIGNED(a);
85		}
86#endif
87		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
88
89		ret = mmap(addr, size, prot, flags, -1, 0);
90	}
91	assert(ret != NULL);
92
93	if (ret == MAP_FAILED) {
94		ret = NULL;
95	} else if (addr != NULL && ret != addr) {
96		/*
97		 * We succeeded in mapping memory, but not in the right place.
98		 */
99		os_pages_unmap(ret, size);
100		ret = NULL;
101	}
102#endif
103	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
104	    ret == addr));
105	return ret;
106}
107
108static void *
109os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
110    bool *commit) {
111	void *ret = (void *)((uintptr_t)addr + leadsize);
112
113	assert(alloc_size >= leadsize + size);
114#ifdef _WIN32
115	os_pages_unmap(addr, alloc_size);
116	void *new_addr = os_pages_map(ret, size, PAGE, commit);
117	if (new_addr == ret) {
118		return ret;
119	}
120	if (new_addr != NULL) {
121		os_pages_unmap(new_addr, size);
122	}
123	return NULL;
124#else
125	size_t trailsize = alloc_size - leadsize - size;
126
127	if (leadsize != 0) {
128		os_pages_unmap(addr, leadsize);
129	}
130	if (trailsize != 0) {
131		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
132	}
133	return ret;
134#endif
135}
136
137static void
138os_pages_unmap(void *addr, size_t size) {
139	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
140	assert(ALIGNMENT_CEILING(size, os_page) == size);
141
142#ifdef _WIN32
143	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
144#else
145	if (munmap(addr, size) == -1)
146#endif
147	{
148		char buf[BUFERROR_BUF];
149
150		buferror(get_errno(), buf, sizeof(buf));
151		malloc_printf("<jemalloc>: Error in "
152#ifdef _WIN32
153		    "VirtualFree"
154#else
155		    "munmap"
156#endif
157		    "(): %s\n", buf);
158		if (opt_abort) {
159			abort();
160		}
161	}
162}
163
164static void *
165pages_map_slow(size_t size, size_t alignment, bool *commit) {
166	size_t alloc_size = size + alignment - os_page;
167	/* Beware size_t wrap-around. */
168	if (alloc_size < size) {
169		return NULL;
170	}
171
172	void *ret;
173	do {
174		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
175		if (pages == NULL) {
176			return NULL;
177		}
178		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
179		    - (uintptr_t)pages;
180		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
181	} while (ret == NULL);
182
183	assert(ret != NULL);
184	assert(PAGE_ADDR2BASE(ret) == ret);
185	return ret;
186}
187
188void *
189pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
190	assert(alignment >= PAGE);
191	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
192
193	/*
194	 * Ideally, there would be a way to specify alignment to mmap() (like
195	 * NetBSD has), but in the absence of such a feature, we have to work
196	 * hard to efficiently create aligned mappings.  The reliable, but
197	 * slow method is to create a mapping that is over-sized, then trim the
198	 * excess.  However, that always results in one or two calls to
199	 * os_pages_unmap(), and it can leave holes in the process's virtual
200	 * memory map if memory grows downward.
201	 *
202	 * Optimistically try mapping precisely the right amount before falling
203	 * back to the slow method, with the expectation that the optimistic
204	 * approach works most of the time.
205	 */
206
207	void *ret = os_pages_map(addr, size, os_page, commit);
208	if (ret == NULL || ret == addr) {
209		return ret;
210	}
211	assert(addr == NULL);
212	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
213		os_pages_unmap(ret, size);
214		return pages_map_slow(size, alignment, commit);
215	}
216
217	assert(PAGE_ADDR2BASE(ret) == ret);
218	return ret;
219}
220
221void
222pages_unmap(void *addr, size_t size) {
223	assert(PAGE_ADDR2BASE(addr) == addr);
224	assert(PAGE_CEILING(size) == size);
225
226	os_pages_unmap(addr, size);
227}
228
229static bool
230pages_commit_impl(void *addr, size_t size, bool commit) {
231	assert(PAGE_ADDR2BASE(addr) == addr);
232	assert(PAGE_CEILING(size) == size);
233
234	if (os_overcommits) {
235		return true;
236	}
237
238#ifdef _WIN32
239	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
240	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
241#else
242	{
243		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
244		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
245		    -1, 0);
246		if (result == MAP_FAILED) {
247			return true;
248		}
249		if (result != addr) {
250			/*
251			 * We succeeded in mapping memory, but not in the right
252			 * place.
253			 */
254			os_pages_unmap(result, size);
255			return true;
256		}
257		return false;
258	}
259#endif
260}
261
262bool
263pages_commit(void *addr, size_t size) {
264	return pages_commit_impl(addr, size, true);
265}
266
267bool
268pages_decommit(void *addr, size_t size) {
269	return pages_commit_impl(addr, size, false);
270}
271
272bool
273pages_purge_lazy(void *addr, size_t size) {
274	assert(PAGE_ADDR2BASE(addr) == addr);
275	assert(PAGE_CEILING(size) == size);
276
277	if (!pages_can_purge_lazy) {
278		return true;
279	}
280	if (!pages_can_purge_lazy_runtime) {
281		/*
282		 * Built with lazy purge enabled, but detected it was not
283		 * supported on the current system.
284		 */
285		return true;
286	}
287
288#ifdef _WIN32
289	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
290	return false;
291#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
292	return (madvise(addr, size,
293#  ifdef MADV_FREE
294	    MADV_FREE
295#  else
296	    JEMALLOC_MADV_FREE
297#  endif
298	    ) != 0);
299#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
300    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
301	return (madvise(addr, size, MADV_DONTNEED) != 0);
302#else
303	not_reached();
304#endif
305}
306
307bool
308pages_purge_forced(void *addr, size_t size) {
309	assert(PAGE_ADDR2BASE(addr) == addr);
310	assert(PAGE_CEILING(size) == size);
311
312	if (!pages_can_purge_forced) {
313		return true;
314	}
315
316#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
317    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
318	return (madvise(addr, size, MADV_DONTNEED) != 0);
319#elif defined(JEMALLOC_MAPS_COALESCE)
320	/* Try to overlay a new demand-zeroed mapping. */
321	return pages_commit(addr, size);
322#else
323	not_reached();
324#endif
325}
326
327static bool
328pages_huge_impl(void *addr, size_t size, bool aligned) {
329	if (aligned) {
330		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
331		assert(HUGEPAGE_CEILING(size) == size);
332	}
333#ifdef JEMALLOC_HAVE_MADVISE_HUGE
334	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
335#else
336	return true;
337#endif
338}
339
340bool
341pages_huge(void *addr, size_t size) {
342	return pages_huge_impl(addr, size, true);
343}
344
345static bool
346pages_huge_unaligned(void *addr, size_t size) {
347	return pages_huge_impl(addr, size, false);
348}
349
350static bool
351pages_nohuge_impl(void *addr, size_t size, bool aligned) {
352	if (aligned) {
353		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
354		assert(HUGEPAGE_CEILING(size) == size);
355	}
356
357#ifdef JEMALLOC_HAVE_MADVISE_HUGE
358	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
359#else
360	return false;
361#endif
362}
363
364bool
365pages_nohuge(void *addr, size_t size) {
366	return pages_nohuge_impl(addr, size, true);
367}
368
369static bool
370pages_nohuge_unaligned(void *addr, size_t size) {
371	return pages_nohuge_impl(addr, size, false);
372}
373
374bool
375pages_dontdump(void *addr, size_t size) {
376	assert(PAGE_ADDR2BASE(addr) == addr);
377	assert(PAGE_CEILING(size) == size);
378#ifdef JEMALLOC_MADVISE_DONTDUMP
379	return madvise(addr, size, MADV_DONTDUMP) != 0;
380#else
381	return false;
382#endif
383}
384
385bool
386pages_dodump(void *addr, size_t size) {
387	assert(PAGE_ADDR2BASE(addr) == addr);
388	assert(PAGE_CEILING(size) == size);
389#ifdef JEMALLOC_MADVISE_DONTDUMP
390	return madvise(addr, size, MADV_DODUMP) != 0;
391#else
392	return false;
393#endif
394}
395
396
397static size_t
398os_page_detect(void) {
399#ifdef _WIN32
400	SYSTEM_INFO si;
401	GetSystemInfo(&si);
402	return si.dwPageSize;
403#elif defined(__FreeBSD__)
404	return getpagesize();
405#else
406	long result = sysconf(_SC_PAGESIZE);
407	if (result == -1) {
408		return LG_PAGE;
409	}
410	return (size_t)result;
411#endif
412}
413
414#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
415static bool
416os_overcommits_sysctl(void) {
417	int vm_overcommit;
418	size_t sz;
419
420	sz = sizeof(vm_overcommit);
421#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
422	int mib[2];
423
424	mib[0] = CTL_VM;
425	mib[1] = VM_OVERCOMMIT;
426	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
427		return false; /* Error. */
428	}
429#else
430	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
431		return false; /* Error. */
432	}
433#endif
434
435	return ((vm_overcommit & 0x3) == 0);
436}
437#endif
438
439#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
440/*
441 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
442 * reentry during bootstrapping if another library has interposed system call
443 * wrappers.
444 */
445static bool
446os_overcommits_proc(void) {
447	int fd;
448	char buf[1];
449
450#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
451	#if defined(O_CLOEXEC)
452		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
453			O_CLOEXEC);
454	#else
455		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
456		if (fd != -1) {
457			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
458		}
459	#endif
460#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
461	#if defined(O_CLOEXEC)
462		fd = (int)syscall(SYS_openat,
463			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
464	#else
465		fd = (int)syscall(SYS_openat,
466			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
467		if (fd != -1) {
468			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
469		}
470	#endif
471#else
472	#if defined(O_CLOEXEC)
473		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
474	#else
475		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
476		if (fd != -1) {
477			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
478		}
479	#endif
480#endif
481
482	if (fd == -1) {
483		return false; /* Error. */
484	}
485
486	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
487#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
488	syscall(SYS_close, fd);
489#else
490	close(fd);
491#endif
492
493	if (nread < 1) {
494		return false; /* Error. */
495	}
496	/*
497	 * /proc/sys/vm/overcommit_memory meanings:
498	 * 0: Heuristic overcommit.
499	 * 1: Always overcommit.
500	 * 2: Never overcommit.
501	 */
502	return (buf[0] == '0' || buf[0] == '1');
503}
504#endif
505
506void
507pages_set_thp_state (void *ptr, size_t size) {
508	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
509		return;
510	}
511	assert(opt_thp != thp_mode_not_supported &&
512	    init_system_thp_mode != thp_mode_not_supported);
513
514	if (opt_thp == thp_mode_always
515	    && init_system_thp_mode != thp_mode_never) {
516		assert(init_system_thp_mode == thp_mode_default);
517		pages_huge_unaligned(ptr, size);
518	} else if (opt_thp == thp_mode_never) {
519		assert(init_system_thp_mode == thp_mode_default ||
520		    init_system_thp_mode == thp_mode_always);
521		pages_nohuge_unaligned(ptr, size);
522	}
523}
524
525static void
526init_thp_state(void) {
527	if (!have_madvise_huge) {
528		if (metadata_thp_enabled() && opt_abort) {
529			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
530			abort();
531		}
532		goto label_error;
533	}
534
535	static const char sys_state_madvise[] = "always [madvise] never\n";
536	static const char sys_state_always[] = "[always] madvise never\n";
537	static const char sys_state_never[] = "always madvise [never]\n";
538	char buf[sizeof(sys_state_madvise)];
539
540#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
541	int fd = (int)syscall(SYS_open,
542	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
543#else
544	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
545#endif
546	if (fd == -1) {
547		goto label_error;
548	}
549
550	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
551#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
552	syscall(SYS_close, fd);
553#else
554	close(fd);
555#endif
556
557	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
558		init_system_thp_mode = thp_mode_default;
559	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
560		init_system_thp_mode = thp_mode_always;
561	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
562		init_system_thp_mode = thp_mode_never;
563	} else {
564		goto label_error;
565	}
566	return;
567label_error:
568	opt_thp = init_system_thp_mode = thp_mode_not_supported;
569}
570
571bool
572pages_boot(void) {
573	os_page = os_page_detect();
574	if (os_page > PAGE) {
575		malloc_write("<jemalloc>: Unsupported system page size\n");
576		if (opt_abort) {
577			abort();
578		}
579		return true;
580	}
581
582#ifndef _WIN32
583	mmap_flags = MAP_PRIVATE | MAP_ANON;
584#endif
585
586#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
587	os_overcommits = os_overcommits_sysctl();
588#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
589	os_overcommits = os_overcommits_proc();
590#  ifdef MAP_NORESERVE
591	if (os_overcommits) {
592		mmap_flags |= MAP_NORESERVE;
593	}
594#  endif
595#elif defined(__NetBSD__)
596	os_overcommits = true;
597#else
598	os_overcommits = false;
599#endif
600
601	init_thp_state();
602
603	/* Detect lazy purge runtime support. */
604	if (pages_can_purge_lazy) {
605		bool committed = false;
606		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
607		if (madv_free_page == NULL) {
608			return true;
609		}
610		assert(pages_can_purge_lazy_runtime);
611		if (pages_purge_lazy(madv_free_page, PAGE)) {
612			pages_can_purge_lazy_runtime = false;
613		}
614		os_pages_unmap(madv_free_page, PAGE);
615	}
616
617	return false;
618}
619