1#define JEMALLOC_PAGES_C_ 2#include "jemalloc/internal/jemalloc_preamble.h" 3 4#include "jemalloc/internal/pages.h" 5 6#include "jemalloc/internal/jemalloc_internal_includes.h" 7 8#include "jemalloc/internal/assert.h" 9#include "jemalloc/internal/malloc_io.h" 10 11#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12#include <sys/sysctl.h> 13#ifdef __FreeBSD__ 14#include <vm/vm_param.h> 15#endif 16#endif 17#ifdef MAP_ALIGNED 18#include <sys/bitops.h> /* NetBSD */ 19#endif 20 21/******************************************************************************/ 22/* Data. */ 23 24/* Actual operating system page size, detected during bootstrap, <= PAGE. */ 25static size_t os_page; 26 27#ifndef _WIN32 28# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 29# define PAGES_PROT_DECOMMIT (PROT_NONE) 30static int mmap_flags; 31#endif 32static bool os_overcommits; 33 34const char *thp_mode_names[] = { 35 "default", 36 "always", 37 "never", 38 "not supported" 39}; 40thp_mode_t opt_thp = THP_MODE_DEFAULT; 41thp_mode_t init_system_thp_mode; 42 43/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 44static bool pages_can_purge_lazy_runtime = true; 45 46/******************************************************************************/ 47/* 48 * Function prototypes for static functions that are referenced prior to 49 * definition. 50 */ 51 52static void os_pages_unmap(void *addr, size_t size); 53 54/******************************************************************************/ 55 56static void * 57os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 58 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 59 assert(ALIGNMENT_CEILING(size, os_page) == size); 60 assert(size != 0); 61 62 if (os_overcommits) { 63 *commit = true; 64 } 65 66 void *ret; 67#ifdef _WIN32 68 /* 69 * If VirtualAlloc can't allocate at the given address when one is 70 * given, it fails and returns NULL. 71 */ 72 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 73 PAGE_READWRITE); 74#else 75 /* 76 * We don't use MAP_FIXED here, because it can cause the *replacement* 77 * of existing mappings, and we only want to create new mappings. 78 */ 79 { 80 int flags = mmap_flags; 81#ifdef MAP_ALIGNED 82 if (alignment > os_page || PAGE > os_page) { 83 int a = ilog2(MAX(alignment, PAGE)); 84 flags |= MAP_ALIGNED(a); 85 } 86#endif 87 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 88 89 ret = mmap(addr, size, prot, flags, -1, 0); 90 } 91 assert(ret != NULL); 92 93 if (ret == MAP_FAILED) { 94 ret = NULL; 95 } else if (addr != NULL && ret != addr) { 96 /* 97 * We succeeded in mapping memory, but not in the right place. 98 */ 99 os_pages_unmap(ret, size); 100 ret = NULL; 101 } 102#endif 103 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 104 ret == addr)); 105 return ret; 106} 107 108static void * 109os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 110 bool *commit) { 111 void *ret = (void *)((uintptr_t)addr + leadsize); 112 113 assert(alloc_size >= leadsize + size); 114#ifdef _WIN32 115 os_pages_unmap(addr, alloc_size); 116 void *new_addr = os_pages_map(ret, size, PAGE, commit); 117 if (new_addr == ret) { 118 return ret; 119 } 120 if (new_addr != NULL) { 121 os_pages_unmap(new_addr, size); 122 } 123 return NULL; 124#else 125 size_t trailsize = alloc_size - leadsize - size; 126 127 if (leadsize != 0) { 128 os_pages_unmap(addr, leadsize); 129 } 130 if (trailsize != 0) { 131 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 132 } 133 return ret; 134#endif 135} 136 137static void 138os_pages_unmap(void *addr, size_t size) { 139 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 140 assert(ALIGNMENT_CEILING(size, os_page) == size); 141 142#ifdef _WIN32 143 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 144#else 145 if (munmap(addr, size) == -1) 146#endif 147 { 148 char buf[BUFERROR_BUF]; 149 150 buferror(get_errno(), buf, sizeof(buf)); 151 malloc_printf("<jemalloc>: Error in " 152#ifdef _WIN32 153 "VirtualFree" 154#else 155 "munmap" 156#endif 157 "(): %s\n", buf); 158 if (opt_abort) { 159 abort(); 160 } 161 } 162} 163 164static void * 165pages_map_slow(size_t size, size_t alignment, bool *commit) { 166 size_t alloc_size = size + alignment - os_page; 167 /* Beware size_t wrap-around. */ 168 if (alloc_size < size) { 169 return NULL; 170 } 171 172 void *ret; 173 do { 174 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 175 if (pages == NULL) { 176 return NULL; 177 } 178 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 179 - (uintptr_t)pages; 180 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 181 } while (ret == NULL); 182 183 assert(ret != NULL); 184 assert(PAGE_ADDR2BASE(ret) == ret); 185 return ret; 186} 187 188void * 189pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 190 assert(alignment >= PAGE); 191 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 192 193 /* 194 * Ideally, there would be a way to specify alignment to mmap() (like 195 * NetBSD has), but in the absence of such a feature, we have to work 196 * hard to efficiently create aligned mappings. The reliable, but 197 * slow method is to create a mapping that is over-sized, then trim the 198 * excess. However, that always results in one or two calls to 199 * os_pages_unmap(), and it can leave holes in the process's virtual 200 * memory map if memory grows downward. 201 * 202 * Optimistically try mapping precisely the right amount before falling 203 * back to the slow method, with the expectation that the optimistic 204 * approach works most of the time. 205 */ 206 207 void *ret = os_pages_map(addr, size, os_page, commit); 208 if (ret == NULL || ret == addr) { 209 return ret; 210 } 211 assert(addr == NULL); 212 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 213 os_pages_unmap(ret, size); 214 return pages_map_slow(size, alignment, commit); 215 } 216 217 assert(PAGE_ADDR2BASE(ret) == ret); 218 return ret; 219} 220 221void 222pages_unmap(void *addr, size_t size) { 223 assert(PAGE_ADDR2BASE(addr) == addr); 224 assert(PAGE_CEILING(size) == size); 225 226 os_pages_unmap(addr, size); 227} 228 229static bool 230pages_commit_impl(void *addr, size_t size, bool commit) { 231 assert(PAGE_ADDR2BASE(addr) == addr); 232 assert(PAGE_CEILING(size) == size); 233 234 if (os_overcommits) { 235 return true; 236 } 237 238#ifdef _WIN32 239 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 240 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 241#else 242 { 243 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 244 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 245 -1, 0); 246 if (result == MAP_FAILED) { 247 return true; 248 } 249 if (result != addr) { 250 /* 251 * We succeeded in mapping memory, but not in the right 252 * place. 253 */ 254 os_pages_unmap(result, size); 255 return true; 256 } 257 return false; 258 } 259#endif 260} 261 262bool 263pages_commit(void *addr, size_t size) { 264 return pages_commit_impl(addr, size, true); 265} 266 267bool 268pages_decommit(void *addr, size_t size) { 269 return pages_commit_impl(addr, size, false); 270} 271 272bool 273pages_purge_lazy(void *addr, size_t size) { 274 assert(PAGE_ADDR2BASE(addr) == addr); 275 assert(PAGE_CEILING(size) == size); 276 277 if (!pages_can_purge_lazy) { 278 return true; 279 } 280 if (!pages_can_purge_lazy_runtime) { 281 /* 282 * Built with lazy purge enabled, but detected it was not 283 * supported on the current system. 284 */ 285 return true; 286 } 287 288#ifdef _WIN32 289 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 290 return false; 291#elif defined(JEMALLOC_PURGE_MADVISE_FREE) 292 return (madvise(addr, size, 293# ifdef MADV_FREE 294 MADV_FREE 295# else 296 JEMALLOC_MADV_FREE 297# endif 298 ) != 0); 299#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 300 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 301 return (madvise(addr, size, MADV_DONTNEED) != 0); 302#else 303 not_reached(); 304#endif 305} 306 307bool 308pages_purge_forced(void *addr, size_t size) { 309 assert(PAGE_ADDR2BASE(addr) == addr); 310 assert(PAGE_CEILING(size) == size); 311 312 if (!pages_can_purge_forced) { 313 return true; 314 } 315 316#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 317 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 318 return (madvise(addr, size, MADV_DONTNEED) != 0); 319#elif defined(JEMALLOC_MAPS_COALESCE) 320 /* Try to overlay a new demand-zeroed mapping. */ 321 return pages_commit(addr, size); 322#else 323 not_reached(); 324#endif 325} 326 327static bool 328pages_huge_impl(void *addr, size_t size, bool aligned) { 329 if (aligned) { 330 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 331 assert(HUGEPAGE_CEILING(size) == size); 332 } 333#ifdef JEMALLOC_HAVE_MADVISE_HUGE 334 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 335#else 336 return true; 337#endif 338} 339 340bool 341pages_huge(void *addr, size_t size) { 342 return pages_huge_impl(addr, size, true); 343} 344 345static bool 346pages_huge_unaligned(void *addr, size_t size) { 347 return pages_huge_impl(addr, size, false); 348} 349 350static bool 351pages_nohuge_impl(void *addr, size_t size, bool aligned) { 352 if (aligned) { 353 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 354 assert(HUGEPAGE_CEILING(size) == size); 355 } 356 357#ifdef JEMALLOC_HAVE_MADVISE_HUGE 358 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 359#else 360 return false; 361#endif 362} 363 364bool 365pages_nohuge(void *addr, size_t size) { 366 return pages_nohuge_impl(addr, size, true); 367} 368 369static bool 370pages_nohuge_unaligned(void *addr, size_t size) { 371 return pages_nohuge_impl(addr, size, false); 372} 373 374bool 375pages_dontdump(void *addr, size_t size) { 376 assert(PAGE_ADDR2BASE(addr) == addr); 377 assert(PAGE_CEILING(size) == size); 378#ifdef JEMALLOC_MADVISE_DONTDUMP 379 return madvise(addr, size, MADV_DONTDUMP) != 0; 380#else 381 return false; 382#endif 383} 384 385bool 386pages_dodump(void *addr, size_t size) { 387 assert(PAGE_ADDR2BASE(addr) == addr); 388 assert(PAGE_CEILING(size) == size); 389#ifdef JEMALLOC_MADVISE_DONTDUMP 390 return madvise(addr, size, MADV_DODUMP) != 0; 391#else 392 return false; 393#endif 394} 395 396 397static size_t 398os_page_detect(void) { 399#ifdef _WIN32 400 SYSTEM_INFO si; 401 GetSystemInfo(&si); 402 return si.dwPageSize; 403#elif defined(__FreeBSD__) 404 return getpagesize(); 405#else 406 long result = sysconf(_SC_PAGESIZE); 407 if (result == -1) { 408 return LG_PAGE; 409 } 410 return (size_t)result; 411#endif 412} 413 414#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 415static bool 416os_overcommits_sysctl(void) { 417 int vm_overcommit; 418 size_t sz; 419 420 sz = sizeof(vm_overcommit); 421#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 422 int mib[2]; 423 424 mib[0] = CTL_VM; 425 mib[1] = VM_OVERCOMMIT; 426 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 427 return false; /* Error. */ 428 } 429#else 430 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 431 return false; /* Error. */ 432 } 433#endif 434 435 return ((vm_overcommit & 0x3) == 0); 436} 437#endif 438 439#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 440/* 441 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 442 * reentry during bootstrapping if another library has interposed system call 443 * wrappers. 444 */ 445static bool 446os_overcommits_proc(void) { 447 int fd; 448 char buf[1]; 449 450#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 451 #if defined(O_CLOEXEC) 452 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 453 O_CLOEXEC); 454 #else 455 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 456 if (fd != -1) { 457 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 458 } 459 #endif 460#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 461 #if defined(O_CLOEXEC) 462 fd = (int)syscall(SYS_openat, 463 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 464 #else 465 fd = (int)syscall(SYS_openat, 466 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 467 if (fd != -1) { 468 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 469 } 470 #endif 471#else 472 #if defined(O_CLOEXEC) 473 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 474 #else 475 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 476 if (fd != -1) { 477 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 478 } 479 #endif 480#endif 481 482 if (fd == -1) { 483 return false; /* Error. */ 484 } 485 486 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 487#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 488 syscall(SYS_close, fd); 489#else 490 close(fd); 491#endif 492 493 if (nread < 1) { 494 return false; /* Error. */ 495 } 496 /* 497 * /proc/sys/vm/overcommit_memory meanings: 498 * 0: Heuristic overcommit. 499 * 1: Always overcommit. 500 * 2: Never overcommit. 501 */ 502 return (buf[0] == '0' || buf[0] == '1'); 503} 504#endif 505 506void 507pages_set_thp_state (void *ptr, size_t size) { 508 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 509 return; 510 } 511 assert(opt_thp != thp_mode_not_supported && 512 init_system_thp_mode != thp_mode_not_supported); 513 514 if (opt_thp == thp_mode_always 515 && init_system_thp_mode != thp_mode_never) { 516 assert(init_system_thp_mode == thp_mode_default); 517 pages_huge_unaligned(ptr, size); 518 } else if (opt_thp == thp_mode_never) { 519 assert(init_system_thp_mode == thp_mode_default || 520 init_system_thp_mode == thp_mode_always); 521 pages_nohuge_unaligned(ptr, size); 522 } 523} 524 525static void 526init_thp_state(void) { 527 if (!have_madvise_huge) { 528 if (metadata_thp_enabled() && opt_abort) { 529 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 530 abort(); 531 } 532 goto label_error; 533 } 534 535 static const char sys_state_madvise[] = "always [madvise] never\n"; 536 static const char sys_state_always[] = "[always] madvise never\n"; 537 static const char sys_state_never[] = "always madvise [never]\n"; 538 char buf[sizeof(sys_state_madvise)]; 539 540#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 541 int fd = (int)syscall(SYS_open, 542 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 543#else 544 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 545#endif 546 if (fd == -1) { 547 goto label_error; 548 } 549 550 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 551#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 552 syscall(SYS_close, fd); 553#else 554 close(fd); 555#endif 556 557 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 558 init_system_thp_mode = thp_mode_default; 559 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 560 init_system_thp_mode = thp_mode_always; 561 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 562 init_system_thp_mode = thp_mode_never; 563 } else { 564 goto label_error; 565 } 566 return; 567label_error: 568 opt_thp = init_system_thp_mode = thp_mode_not_supported; 569} 570 571bool 572pages_boot(void) { 573 os_page = os_page_detect(); 574 if (os_page > PAGE) { 575 malloc_write("<jemalloc>: Unsupported system page size\n"); 576 if (opt_abort) { 577 abort(); 578 } 579 return true; 580 } 581 582#ifndef _WIN32 583 mmap_flags = MAP_PRIVATE | MAP_ANON; 584#endif 585 586#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 587 os_overcommits = os_overcommits_sysctl(); 588#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 589 os_overcommits = os_overcommits_proc(); 590# ifdef MAP_NORESERVE 591 if (os_overcommits) { 592 mmap_flags |= MAP_NORESERVE; 593 } 594# endif 595#elif defined(__NetBSD__) 596 os_overcommits = true; 597#else 598 os_overcommits = false; 599#endif 600 601 init_thp_state(); 602 603 /* Detect lazy purge runtime support. */ 604 if (pages_can_purge_lazy) { 605 bool committed = false; 606 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 607 if (madv_free_page == NULL) { 608 return true; 609 } 610 assert(pages_can_purge_lazy_runtime); 611 if (pages_purge_lazy(madv_free_page, PAGE)) { 612 pages_can_purge_lazy_runtime = false; 613 } 614 os_pages_unmap(madv_free_page, PAGE); 615 } 616 617 return false; 618} 619