283#endif 284 285/* sizeof(int) == (1 << SIZEOF_INT_2POW). */ 286#ifndef SIZEOF_INT_2POW 287# define SIZEOF_INT_2POW 2 288#endif 289 290/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ 291#if (!defined(PIC) && !defined(NO_TLS)) 292# define NO_TLS 293#endif 294 295/* 296 * Size and alignment of memory chunks that are allocated by the OS's virtual 297 * memory system. 298 * 299 * chunksize limits: 300 * 301 * 2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28 302 */ 303#define CHUNK_2POW_DEFAULT 21 304#define CHUNK_2POW_MAX 28 305 306/* 307 * Maximum size of L1 cache line. This is used to avoid cache line aliasing, 308 * so over-estimates are okay (up to a point), but under-estimates will 309 * negatively affect performance. 310 */ 311#define CACHELINE_2POW 6 312#define CACHELINE ((size_t)(1 << CACHELINE_2POW)) 313 314/* 315 * Maximum size class that is a multiple of the quantum, but not (necessarily) 316 * a power of 2. Above this size, allocations are rounded up to the nearest 317 * power of 2. 318 */ 319#define SMALL_MAX_2POW_DEFAULT 9 320#define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT) 321 322/* 323 * Minimum number of regions that must fit into a run that serves quantum-size 324 * bin allocations. 325 * 326 * Note that if this is set too low, space will be wasted if there are size 327 * classes that are small enough that RUN_MIN_REGS regions don't fill a page. 328 * If this is set too high, then the overhead of searching through the bitmap 329 * that tracks region usage will become excessive. 330 */ 331#define RUN_MIN_REGS_2POW 10 332#define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW) 333 334/* 335 * Maximum number of pages for a run that is used for bin allocations. 336 * 337 * Note that if this is set too low, then fragmentation for the largest bin 338 * size classes will be high. If this is set too high, then even small 339 * programs will often have to allocate more than two chunks early on. 340 */ 341#define RUN_MAX_PAGES_2POW 4 342#define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW) 343 344/******************************************************************************/ 345 346/* 347 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because 348 * they require malloc()ed memory. 349 */ 350typedef struct { 351 spinlock_t lock; 352} malloc_mutex_t; 353 354/* Set to true once the allocator has been initialized. */ 355static bool malloc_initialized = false; 356 357/* Used to avoid initialization races. */ 358static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER}; 359 360/******************************************************************************/ 361/* 362 * Statistics data structures. 363 */ 364 365#ifdef MALLOC_STATS 366 367typedef struct malloc_bin_stats_s malloc_bin_stats_t; 368struct malloc_bin_stats_s { 369 /* 370 * Number of allocation requests that corresponded to the size of this 371 * bin. 372 */ 373 uint64_t nrequests; 374 375 /* Total number of runs created for this bin's size class. */ 376 uint64_t nruns; 377 378 /* 379 * Total number of run promotions/demotions for this bin's size class. 380 */ 381 uint64_t npromote; 382 uint64_t ndemote; 383 384 /* High-water mark for this bin. */ 385 unsigned long highruns; 386 387 /* Current number of runs in this bin. */ 388 unsigned long curruns; 389}; 390 391typedef struct arena_stats_s arena_stats_t; 392struct arena_stats_s { 393 /* Total byte count of allocated memory, not including overhead. */ 394 size_t allocated; 395 396 /* Number of times each function was called. */ 397 uint64_t nmalloc; 398 uint64_t ndalloc; 399 uint64_t nmadvise; 400 401 /* Number of large allocation requests. */ 402 uint64_t large_nrequests; 403}; 404 405typedef struct chunk_stats_s chunk_stats_t; 406struct chunk_stats_s { 407 /* Number of chunks that were allocated. */ 408 uint64_t nchunks; 409 410 /* High-water mark for number of chunks allocated. */ 411 unsigned long highchunks; 412 413 /* 414 * Current number of chunks allocated. This value isn't maintained for 415 * any other purpose, so keep track of it in order to be able to set 416 * highchunks. 417 */ 418 unsigned long curchunks; 419}; 420 421#endif /* #ifdef MALLOC_STATS */ 422 423/******************************************************************************/ 424/* 425 * Chunk data structures. 426 */ 427 428/* Tree of chunks. */ 429typedef struct chunk_node_s chunk_node_t; 430struct chunk_node_s { 431 /* Linkage for the chunk tree. */ 432 RB_ENTRY(chunk_node_s) link; 433 434 /* 435 * Pointer to the chunk that this tree node is responsible for. In some 436 * (but certainly not all) cases, this data structure is placed at the 437 * beginning of the corresponding chunk, so this field may point to this 438 * node. 439 */ 440 void *chunk; 441 442 /* Total chunk size. */ 443 size_t size; 444}; 445typedef struct chunk_tree_s chunk_tree_t; 446RB_HEAD(chunk_tree_s, chunk_node_s); 447 448/******************************************************************************/ 449/* 450 * Arena data structures. 451 */ 452 453typedef struct arena_s arena_t; 454typedef struct arena_bin_s arena_bin_t; 455 456typedef struct arena_chunk_map_s arena_chunk_map_t; 457struct arena_chunk_map_s { 458 bool free:1; 459 bool large:1; 460 unsigned npages:15; /* Limiting factor for CHUNK_2POW_MAX. */ 461 unsigned pos:15; 462}; 463 464/* Arena chunk header. */ 465typedef struct arena_chunk_s arena_chunk_t; 466struct arena_chunk_s { 467 /* Arena that owns the chunk. */ 468 arena_t *arena; 469 470 /* Linkage for the arena's chunk tree. */ 471 RB_ENTRY(arena_chunk_s) link; 472 473 /* 474 * Number of pages in use. This is maintained in order to make 475 * detection of empty chunks fast. 476 */ 477 uint32_t pages_used; 478 479 /* 480 * Array of counters that keeps track of how many free runs of each 481 * size are available in this chunk. This table is sized at compile 482 * time, which is wasteful. However, due to unrelated rounding, this 483 * doesn't actually waste any otherwise useful space. 484 * 485 * index == 2^n pages 486 * 487 * index | npages 488 * ------+------- 489 * 0 | 1 490 * 1 | 2 491 * 2 | 4 492 * 3 | 8 493 * : 494 */ 495 uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */]; 496 497 /* Map of pages within chunk that keeps track of free/large/small. */ 498 arena_chunk_map_t map[1]; /* Dynamically sized. */ 499}; 500typedef struct arena_chunk_tree_s arena_chunk_tree_t; 501RB_HEAD(arena_chunk_tree_s, arena_chunk_s); 502 503typedef struct arena_run_s arena_run_t; 504struct arena_run_s { 505 /* Linkage for run rings. */ 506 qr(arena_run_t) link; 507 508#ifdef MALLOC_DEBUG 509 uint32_t magic; 510# define ARENA_RUN_MAGIC 0x384adf93 511#endif 512 513 /* Bin this run is associated with. */ 514 arena_bin_t *bin; 515 516 /* Bitmask of in-use regions (0: in use, 1: free). */ 517#define REGS_MASK_NELMS \ 518 (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2)) 519 unsigned regs_mask[REGS_MASK_NELMS]; 520 521 /* Index of first element that might have a free region. */ 522 unsigned regs_minelm; 523 524 /* Number of free regions in run. */ 525 unsigned nfree; 526 527 /* 528 * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25, 529 * RUN_Q50, RUN_Q75, RUN_Q100}. 530 */ 531#define RUN_QINIT 0 532#define RUN_Q0 1 533#define RUN_Q25 2 534#define RUN_Q50 3 535#define RUN_Q75 4 536#define RUN_Q100 5 537 unsigned quartile; 538 539 /* 540 * Limits on the number of free regions for the fullness quartile this 541 * run is currently in. If nfree goes outside these limits, the run 542 * is moved to a different fullness quartile. 543 */ 544 unsigned free_max; 545 unsigned free_min; 546}; 547 548/* Used for run ring headers, where the run isn't actually used. */ 549typedef struct arena_run_link_s arena_run_link_t; 550struct arena_run_link_s { 551 /* Linkage for run rings. */ 552 qr(arena_run_t) link; 553}; 554 555struct arena_bin_s { 556 /* 557 * Current run being used to service allocations of this bin's size 558 * class. 559 */ 560 arena_run_t *runcur; 561 562 /* 563 * Links into rings of runs, of various fullnesses (names indicate 564 * approximate lower bounds). A new run conceptually starts off in 565 * runsinit, and it isn't inserted into the runs0 ring until it 566 * reaches 25% full (hysteresis mechanism). For the run to be moved 567 * again, it must become either empty or 50% full. Thus, each ring 568 * contains runs that are within 50% above the advertised fullness for 569 * the ring. This provides a low-overhead mechanism for segregating 570 * runs into approximate fullness classes. 571 * 572 * Conceptually, there is a runs100 that contains completely full runs. 573 * Since we don't need to search for these runs though, no runs100 ring 574 * is actually maintained. 575 * 576 * These rings are useful when looking for an existing run to use when 577 * runcur is no longer usable. We look for usable runs in the 578 * following order: 579 * 580 * 1) runs50 581 * 2) runs25 582 * 3) runs0 583 * 4) runs75 584 * 585 * runs75 isn't a good place to look, because it contains runs that may 586 * be nearly completely full. Still, we look there as a last resort in 587 * order to avoid allocating a new run if at all possible. 588 */ 589 /* arena_run_link_t runsinit; 0% <= fullness < 25% */ 590 arena_run_link_t runs0; /* 0% < fullness < 50% */ 591 arena_run_link_t runs25; /* 25% < fullness < 75% */ 592 arena_run_link_t runs50; /* 50% < fullness < 100% */ 593 arena_run_link_t runs75; /* 75% < fullness < 100% */ 594 /* arena_run_link_t runs100; fullness == 100% */ 595 596 /* Size of regions in a run for this bin's size class. */ 597 size_t reg_size; 598 599 /* Total size of a run for this bin's size class. */ 600 size_t run_size; 601 602 /* Total number of regions in a run for this bin's size class. */ 603 uint32_t nregs; 604 605 /* Offset of first region in a run for this bin's size class. */ 606 uint32_t reg0_offset; 607 608#ifdef MALLOC_STATS 609 /* Bin statistics. */ 610 malloc_bin_stats_t stats; 611#endif 612}; 613 614struct arena_s { 615#ifdef MALLOC_DEBUG 616 uint32_t magic; 617# define ARENA_MAGIC 0x947d3d24 618#endif 619 620 /* All operations on this arena require that mtx be locked. */ 621 malloc_mutex_t mtx; 622 623#ifdef MALLOC_STATS 624 arena_stats_t stats; 625#endif 626 627 /* 628 * Tree of chunks this arena manages. 629 */ 630 arena_chunk_tree_t chunks; 631 632 /* 633 * bins is used to store rings of free regions of the following sizes, 634 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS. 635 * 636 * bins[i] | size | 637 * --------+------+ 638 * 0 | 2 | 639 * 1 | 4 | 640 * 2 | 8 | 641 * --------+------+ 642 * 3 | 16 | 643 * 4 | 32 | 644 * 5 | 48 | 645 * 6 | 64 | 646 * : : 647 * : : 648 * 33 | 496 | 649 * 34 | 512 | 650 * --------+------+ 651 * 35 | 1024 | 652 * 36 | 2048 | 653 * --------+------+ 654 */ 655 arena_bin_t bins[1]; /* Dynamically sized. */ 656}; 657 658/******************************************************************************/ 659/* 660 * Data. 661 */ 662 663/* Number of CPUs. */ 664static unsigned ncpus; 665 666/* VM page size. */ 667static unsigned pagesize; 668static unsigned pagesize_2pow; 669 670/* Various bin-related settings. */ 671static size_t bin_maxclass; /* Max size class for bins. */ 672static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */ 673static unsigned nqbins; /* Number of quantum-spaced bins. */ 674static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */ 675static size_t small_min; 676static size_t small_max; 677static unsigned tiny_min_2pow; 678 679/* Various quantum-related settings. */ 680static size_t quantum; 681static size_t quantum_mask; /* (quantum - 1). */ 682 683/* Various chunk-related settings. */ 684static size_t chunk_size; 685static size_t chunk_size_mask; /* (chunk_size - 1). */ 686static size_t arena_maxclass; /* Max size class for arenas. */ 687static unsigned arena_chunk_maplen; 688 689/********/ 690/* 691 * Chunks. 692 */ 693 694/* Protects chunk-related data structures. */ 695static malloc_mutex_t chunks_mtx; 696 697/* Tree of chunks that are stand-alone huge allocations. */ 698static chunk_tree_t huge; 699 700#ifdef USE_BRK 701/* 702 * Try to use brk for chunk-size allocations, due to address space constraints. 703 */ 704/* Result of first sbrk(0) call. */ 705static void *brk_base; 706/* Current end of brk, or ((void *)-1) if brk is exhausted. */ 707static void *brk_prev; 708/* Current upper limit on brk addresses. */ 709static void *brk_max; 710#endif 711 712#ifdef MALLOC_STATS 713/* 714 * Byte counters for allocated/total space used by the chunks in the huge 715 * allocations tree. 716 */ 717static uint64_t huge_nmalloc; 718static uint64_t huge_ndalloc; 719static size_t huge_allocated; 720#endif 721 722/* 723 * Tree of chunks that were previously allocated. This is used when allocating 724 * chunks, in an attempt to re-use address space. 725 */ 726static chunk_tree_t old_chunks; 727 728/****************************/ 729/* 730 * base (internal allocation). 731 */ 732 733/* 734 * Current chunk that is being used for internal memory allocations. This 735 * chunk is carved up in cacheline-size quanta, so that there is no chance of 736 * false cache line sharing. 737 */ 738static void *base_chunk; 739static void *base_next_addr; 740static void *base_past_addr; /* Addr immediately past base_chunk. */ 741static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */ 742static malloc_mutex_t base_mtx; 743#ifdef MALLOC_STATS 744static uint64_t base_total; 745#endif 746 747/********/ 748/* 749 * Arenas. 750 */ 751 752/* 753 * Arenas that are used to service external requests. Not all elements of the 754 * arenas array are necessarily used; arenas are created lazily as needed. 755 */ 756static arena_t **arenas; 757static unsigned narenas; 758#ifndef NO_TLS 759static unsigned next_arena; 760#endif 761static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */ 762 763#ifndef NO_TLS 764/* 765 * Map of pthread_self() --> arenas[???], used for selecting an arena to use 766 * for allocations. 767 */ 768static __thread arena_t *arenas_map; 769#endif 770 771#ifdef MALLOC_STATS 772/* Chunk statistics. */ 773static chunk_stats_t stats_chunks; 774#endif 775 776/*******************************/ 777/* 778 * Runtime configuration options. 779 */ 780const char *_malloc_options; 781 782#ifndef NO_MALLOC_EXTRAS 783static bool opt_abort = true; 784static bool opt_junk = true; 785#else 786static bool opt_abort = false; 787static bool opt_junk = false; 788#endif 789static bool opt_hint = false; 790static bool opt_print_stats = false; 791static size_t opt_quantum_2pow = QUANTUM_2POW_MIN; 792static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; 793static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT; 794static bool opt_utrace = false; 795static bool opt_sysv = false; 796static bool opt_xmalloc = false; 797static bool opt_zero = false; 798static int32_t opt_narenas_lshift = 0; 799 800typedef struct { 801 void *p; 802 size_t s; 803 void *r; 804} malloc_utrace_t; 805 806#define UTRACE(a, b, c) \ 807 if (opt_utrace) { \ 808 malloc_utrace_t ut = {a, b, c}; \ 809 utrace(&ut, sizeof(ut)); \ 810 } 811 812/******************************************************************************/ 813/* 814 * Begin function prototypes for non-inline static functions. 815 */ 816 817static void malloc_mutex_init(malloc_mutex_t *a_mutex); 818static void wrtmessage(const char *p1, const char *p2, const char *p3, 819 const char *p4); 820static void malloc_printf(const char *format, ...); 821static void *base_alloc(size_t size); 822static chunk_node_t *base_chunk_node_alloc(void); 823static void base_chunk_node_dealloc(chunk_node_t *node); 824#ifdef MALLOC_STATS 825static void stats_print(arena_t *arena); 826#endif 827static void *pages_map(void *addr, size_t size); 828static void pages_unmap(void *addr, size_t size); 829static void *chunk_alloc(size_t size); 830static void chunk_dealloc(void *chunk, size_t size); 831#ifndef NO_TLS 832static arena_t *choose_arena_hard(void); 833#endif 834static void arena_run_split(arena_t *arena, arena_run_t *run, bool large, 835 size_t size); 836static arena_chunk_t *arena_chunk_alloc(arena_t *arena); 837static void arena_chunk_dealloc(arena_chunk_t *chunk); 838static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, 839 arena_run_t *run); 840static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, 841 arena_run_t *run); 842static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size); 843static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size); 844static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); 845static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); 846static void *arena_malloc(arena_t *arena, size_t size); 847static size_t arena_salloc(const void *ptr); 848static void *arena_ralloc(void *ptr, size_t size, size_t oldsize); 849static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); 850static bool arena_new(arena_t *arena); 851static arena_t *arenas_extend(unsigned ind); 852static void *huge_malloc(size_t size); 853static void *huge_ralloc(void *ptr, size_t size, size_t oldsize); 854static void huge_dalloc(void *ptr); 855static void *imalloc(size_t size); 856static void *ipalloc(size_t alignment, size_t size); 857static void *icalloc(size_t size); 858static size_t isalloc(const void *ptr); 859static void *iralloc(void *ptr, size_t size); 860static void idalloc(void *ptr); 861static void malloc_print_stats(void); 862static bool malloc_init_hard(void); 863 864/* 865 * End function prototypes. 866 */ 867/******************************************************************************/ 868/* 869 * Begin mutex. 870 */ 871 872static void 873malloc_mutex_init(malloc_mutex_t *a_mutex) 874{ 875 static const spinlock_t lock = _SPINLOCK_INITIALIZER; 876 877 a_mutex->lock = lock; 878} 879 880static inline void 881malloc_mutex_lock(malloc_mutex_t *a_mutex) 882{ 883 884 if (__isthreaded) 885 _SPINLOCK(&a_mutex->lock); 886} 887 888static inline void 889malloc_mutex_unlock(malloc_mutex_t *a_mutex) 890{ 891 892 if (__isthreaded) 893 _SPINUNLOCK(&a_mutex->lock); 894} 895 896/* 897 * End mutex. 898 */ 899/******************************************************************************/ 900/* 901 * Begin Utility functions/macros. 902 */ 903 904/* Return the chunk address for allocation address a. */ 905#define CHUNK_ADDR2BASE(a) \ 906 ((void *)((uintptr_t)(a) & ~chunk_size_mask)) 907 908/* Return the chunk offset of address a. */ 909#define CHUNK_ADDR2OFFSET(a) \ 910 ((size_t)((uintptr_t)(a) & chunk_size_mask)) 911 912/* Return the smallest chunk multiple that is >= s. */ 913#define CHUNK_CEILING(s) \ 914 (((s) + chunk_size_mask) & ~chunk_size_mask) 915 916/* Return the smallest cacheline multiple that is >= s. */ 917#define CACHELINE_CEILING(s) \ 918 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1)) 919 920/* Return the smallest quantum multiple that is >= a. */ 921#define QUANTUM_CEILING(a) \ 922 (((a) + quantum_mask) & ~quantum_mask) 923 924/* Compute the smallest power of 2 that is >= x. */ 925static inline size_t 926pow2_ceil(size_t x) 927{ 928 929 x--; 930 x |= x >> 1; 931 x |= x >> 2; 932 x |= x >> 4; 933 x |= x >> 8; 934 x |= x >> 16; 935#if (SIZEOF_PTR == 8) 936 x |= x >> 32; 937#endif 938 x++; 939 return (x); 940} 941 942static void 943wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4) 944{ 945 946 _write(STDERR_FILENO, p1, strlen(p1)); 947 _write(STDERR_FILENO, p2, strlen(p2)); 948 _write(STDERR_FILENO, p3, strlen(p3)); 949 _write(STDERR_FILENO, p4, strlen(p4)); 950} 951 952void (*_malloc_message)(const char *p1, const char *p2, const char *p3, 953 const char *p4) = wrtmessage; 954 955/* 956 * Print to stderr in such a way as to (hopefully) avoid memory allocation. 957 */ 958static void 959malloc_printf(const char *format, ...) 960{ 961 char buf[4096]; 962 va_list ap; 963 964 va_start(ap, format); 965 vsnprintf(buf, sizeof(buf), format, ap); 966 va_end(ap); 967 _malloc_message(buf, "", "", ""); 968} 969 970/******************************************************************************/ 971 972static void * 973base_alloc(size_t size) 974{ 975 void *ret; 976 size_t csize; 977 978 /* Round size up to nearest multiple of the cacheline size. */ 979 csize = CACHELINE_CEILING(size); 980 981 malloc_mutex_lock(&base_mtx); 982 983 /* Make sure there's enough space for the allocation. */ 984 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { 985 void *tchunk; 986 987 assert(csize <= chunk_size); 988 989 tchunk = chunk_alloc(chunk_size); 990 if (tchunk == NULL) { 991 ret = NULL; 992 goto RETURN; 993 } 994 base_chunk = tchunk; 995 base_next_addr = (void *)base_chunk; 996 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size); 997#ifdef MALLOC_STATS 998 base_total += chunk_size; 999#endif 1000 } 1001 1002 /* Allocate. */ 1003 ret = base_next_addr; 1004 base_next_addr = (void *)((uintptr_t)base_next_addr + csize); 1005 1006RETURN: 1007 malloc_mutex_unlock(&base_mtx); 1008 return (ret); 1009} 1010 1011static chunk_node_t * 1012base_chunk_node_alloc(void) 1013{ 1014 chunk_node_t *ret; 1015 1016 malloc_mutex_lock(&base_mtx); 1017 if (base_chunk_nodes != NULL) { 1018 ret = base_chunk_nodes; 1019 base_chunk_nodes = *(chunk_node_t **)ret; 1020 malloc_mutex_unlock(&base_mtx); 1021 } else { 1022 malloc_mutex_unlock(&base_mtx); 1023 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t)); 1024 } 1025 1026 return (ret); 1027} 1028 1029static void 1030base_chunk_node_dealloc(chunk_node_t *node) 1031{ 1032 1033 malloc_mutex_lock(&base_mtx); 1034 *(chunk_node_t **)node = base_chunk_nodes; 1035 base_chunk_nodes = node; 1036 malloc_mutex_unlock(&base_mtx); 1037} 1038 1039/******************************************************************************/ 1040 1041#ifdef MALLOC_STATS 1042static void 1043stats_print(arena_t *arena) 1044{ 1045 unsigned i; 1046 int gap_start; 1047 1048 malloc_printf("allocated: %zu\n", arena->stats.allocated); 1049 1050 malloc_printf("calls:\n"); 1051 malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise"); 1052 malloc_printf(" %12llu %12llu %12llu\n", 1053 arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise); 1054 1055 malloc_printf("large requests: %llu\n", arena->stats.large_nrequests); 1056 1057 malloc_printf("bins:\n"); 1058 malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n", 1059 "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns", 1060 "hiruns", "curruns", "npromo", "ndemo"); 1061 for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) { 1062 if (arena->bins[i].stats.nrequests == 0) { 1063 if (gap_start == -1) 1064 gap_start = i; 1065 } else { 1066 if (gap_start != -1) { 1067 if (i > gap_start + 1) { 1068 /* Gap of more than one size class. */ 1069 malloc_printf("[%u..%u]\n", 1070 gap_start, i - 1); 1071 } else { 1072 /* Gap of one size class. */ 1073 malloc_printf("[%u]\n", gap_start); 1074 } 1075 gap_start = -1; 1076 } 1077 malloc_printf( 1078 "%13u %1s %4u %5u %6u %9llu %5llu" 1079 " %6lu %7lu %6llu %6llu\n", 1080 i, 1081 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S", 1082 arena->bins[i].reg_size, 1083 arena->bins[i].nregs, 1084 arena->bins[i].run_size, 1085 arena->bins[i].stats.nrequests, 1086 arena->bins[i].stats.nruns, 1087 arena->bins[i].stats.highruns, 1088 arena->bins[i].stats.curruns, 1089 arena->bins[i].stats.npromote, 1090 arena->bins[i].stats.ndemote); 1091 } 1092 } 1093 if (gap_start != -1) { 1094 if (i > gap_start + 1) { 1095 /* Gap of more than one size class. */ 1096 malloc_printf("[%u..%u]\n", gap_start, i - 1); 1097 } else { 1098 /* Gap of one size class. */ 1099 malloc_printf("[%u]\n", gap_start); 1100 } 1101 } 1102} 1103#endif 1104 1105/* 1106 * End Utility functions/macros. 1107 */ 1108/******************************************************************************/ 1109/* 1110 * Begin chunk management functions. 1111 */ 1112 1113static inline int 1114chunk_comp(chunk_node_t *a, chunk_node_t *b) 1115{ 1116 1117 assert(a != NULL); 1118 assert(b != NULL); 1119 1120 if ((uintptr_t)a->chunk < (uintptr_t)b->chunk) 1121 return (-1); 1122 else if (a->chunk == b->chunk) 1123 return (0); 1124 else 1125 return (1); 1126} 1127 1128/* Generate red-black tree code for chunks. */ 1129RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp); 1130 1131static void * 1132pages_map(void *addr, size_t size) 1133{ 1134 void *ret; 1135 1136 /* 1137 * We don't use MAP_FIXED here, because it can cause the *replacement* 1138 * of existing mappings, and we only want to create new mappings. 1139 */ 1140 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 1141 -1, 0); 1142 assert(ret != NULL); 1143 1144 if (ret == MAP_FAILED) 1145 ret = NULL; 1146 else if (addr != NULL && ret != addr) { 1147 /* 1148 * We succeeded in mapping memory, but not in the right place. 1149 */ 1150 if (munmap(ret, size) == -1) { 1151 char buf[STRERROR_BUF]; 1152 1153 strerror_r(errno, buf, sizeof(buf)); 1154 malloc_printf("%s: (malloc) Error in munmap(): %s\n", 1155 _getprogname(), buf); 1156 if (opt_abort) 1157 abort(); 1158 } 1159 ret = NULL; 1160 } 1161 1162 assert(ret == NULL || (addr == NULL && ret != addr) 1163 || (addr != NULL && ret == addr)); 1164 return (ret); 1165} 1166 1167static void 1168pages_unmap(void *addr, size_t size) 1169{ 1170 1171 if (munmap(addr, size) == -1) { 1172 char buf[STRERROR_BUF]; 1173 1174 strerror_r(errno, buf, sizeof(buf)); 1175 malloc_printf("%s: (malloc) Error in munmap(): %s\n", 1176 _getprogname(), buf); 1177 if (opt_abort) 1178 abort(); 1179 } 1180} 1181 1182static void * 1183chunk_alloc(size_t size) 1184{ 1185 void *ret, *chunk; 1186 chunk_node_t *tchunk, *delchunk; 1187 1188 assert(size != 0); 1189 assert(size % chunk_size == 0); 1190 1191 malloc_mutex_lock(&chunks_mtx); 1192 1193 if (size == chunk_size) { 1194 /* 1195 * Check for address ranges that were previously chunks and try 1196 * to use them. 1197 */ 1198 1199 tchunk = RB_MIN(chunk_tree_s, &old_chunks); 1200 while (tchunk != NULL) { 1201 /* Found an address range. Try to recycle it. */ 1202 1203 chunk = tchunk->chunk; 1204 delchunk = tchunk; 1205 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk); 1206 1207 /* Remove delchunk from the tree. */ 1208 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk); 1209 base_chunk_node_dealloc(delchunk); 1210 1211#ifdef USE_BRK 1212 if ((uintptr_t)chunk >= (uintptr_t)brk_base 1213 && (uintptr_t)chunk < (uintptr_t)brk_max) { 1214 /* Re-use a previously freed brk chunk. */ 1215 ret = chunk; 1216 goto RETURN; 1217 } 1218#endif 1219 if ((ret = pages_map(chunk, size)) != NULL) { 1220 /* Success. */ 1221 goto RETURN; 1222 } 1223 } 1224 } 1225 1226#ifdef USE_BRK 1227 /* 1228 * Try to create allocations in brk, in order to make full use of 1229 * limited address space. 1230 */ 1231 if (brk_prev != (void *)-1) { 1232 void *brk_cur; 1233 intptr_t incr; 1234 1235 /* 1236 * The loop is necessary to recover from races with other 1237 * threads that are using brk for something other than malloc. 1238 */ 1239 do { 1240 /* Get the current end of brk. */ 1241 brk_cur = sbrk(0); 1242 1243 /* 1244 * Calculate how much padding is necessary to 1245 * chunk-align the end of brk. 1246 */ 1247 incr = (intptr_t)size 1248 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); 1249 if (incr == size) { 1250 ret = brk_cur; 1251 } else { 1252 ret = (void *)(intptr_t)brk_cur + incr; 1253 incr += size; 1254 } 1255 1256 brk_prev = sbrk(incr); 1257 if (brk_prev == brk_cur) { 1258 /* Success. */ 1259 brk_max = (void *)(intptr_t)ret + size; 1260 goto RETURN; 1261 } 1262 } while (brk_prev != (void *)-1); 1263 } 1264#endif 1265 1266 /* 1267 * Try to over-allocate, but allow the OS to place the allocation 1268 * anywhere. Beware of size_t wrap-around. 1269 */ 1270 if (size + chunk_size > size) { 1271 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) { 1272 size_t offset = CHUNK_ADDR2OFFSET(ret); 1273 1274 /* 1275 * Success. Clean up unneeded leading/trailing space. 1276 */ 1277 if (offset != 0) { 1278 /* Leading space. */ 1279 pages_unmap(ret, chunk_size - offset); 1280 1281 ret = (void *)((uintptr_t)ret + (chunk_size - 1282 offset)); 1283 1284 /* Trailing space. */ 1285 pages_unmap((void *)((uintptr_t)ret + size), 1286 offset); 1287 } else { 1288 /* Trailing space only. */ 1289 pages_unmap((void *)((uintptr_t)ret + size), 1290 chunk_size); 1291 } 1292 goto RETURN; 1293 } 1294 } 1295 1296 /* All strategies for allocation failed. */ 1297 ret = NULL; 1298RETURN: 1299#ifdef MALLOC_STATS 1300 if (ret != NULL) { 1301 stats_chunks.nchunks += (size / chunk_size); 1302 stats_chunks.curchunks += (size / chunk_size); 1303 } 1304 if (stats_chunks.curchunks > stats_chunks.highchunks) 1305 stats_chunks.highchunks = stats_chunks.curchunks; 1306#endif 1307 malloc_mutex_unlock(&chunks_mtx); 1308 1309 assert(CHUNK_ADDR2BASE(ret) == ret); 1310 return (ret); 1311} 1312 1313static void 1314chunk_dealloc(void *chunk, size_t size) 1315{ 1316 size_t offset; 1317 chunk_node_t key; 1318 chunk_node_t *node; 1319 1320 assert(chunk != NULL); 1321 assert(CHUNK_ADDR2BASE(chunk) == chunk); 1322 assert(size != 0); 1323 assert(size % chunk_size == 0); 1324 1325 malloc_mutex_lock(&chunks_mtx); 1326 1327#ifdef USE_BRK 1328 if ((uintptr_t)chunk >= (uintptr_t)brk_base 1329 && (uintptr_t)chunk < (uintptr_t)brk_max) { 1330 void *brk_cur; 1331 1332 /* Get the current end of brk. */ 1333 brk_cur = sbrk(0); 1334 1335 /* 1336 * Try to shrink the data segment if this chunk is at the end 1337 * of the data segment. The sbrk() call here is subject to a 1338 * race condition with threads that use brk(2) or sbrk(2) 1339 * directly, but the alternative would be to leak memory for 1340 * the sake of poorly designed multi-threaded programs. 1341 */ 1342 if (brk_cur == brk_max 1343 && (void *)(uintptr_t)chunk + size == brk_max 1344 && sbrk(-(intptr_t)size) == brk_max) { 1345 if (brk_prev == brk_max) { 1346 /* Success. */ 1347 brk_prev = (void *)(intptr_t)brk_max 1348 - (intptr_t)size; 1349 brk_max = brk_prev; 1350 } 1351 goto RETURN; 1352 } else 1353 madvise(chunk, size, MADV_FREE); 1354 } else 1355#endif 1356 pages_unmap(chunk, size); 1357 1358 /* 1359 * Iteratively create records of each chunk-sized memory region that 1360 * 'chunk' is comprised of, so that the address range can be recycled 1361 * if memory usage increases later on. 1362 */ 1363 for (offset = 0; offset < size; offset += chunk_size) { 1364 /* 1365 * It is possible for chunk to overlap existing entries in 1366 * old_chunks if it is a huge allocation, so take care to not 1367 * leak tree nodes. 1368 */ 1369 key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset); 1370 if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) { 1371 node = base_chunk_node_alloc(); 1372 if (node == NULL) 1373 break; 1374 1375 node->chunk = key.chunk; 1376 node->size = chunk_size; 1377 RB_INSERT(chunk_tree_s, &old_chunks, node); 1378 } 1379 } 1380 1381#ifdef USE_BRK 1382RETURN: 1383#endif 1384#ifdef MALLOC_STATS 1385 stats_chunks.curchunks -= (size / chunk_size); 1386#endif 1387 malloc_mutex_unlock(&chunks_mtx); 1388} 1389 1390/* 1391 * End chunk management functions. 1392 */ 1393/******************************************************************************/ 1394/* 1395 * Begin arena. 1396 */ 1397 1398/* 1399 * Choose an arena based on a per-thread value (fast-path code, calls slow-path 1400 * code if necessary. 1401 */ 1402static inline arena_t * 1403choose_arena(void) 1404{ 1405 arena_t *ret; 1406 1407 /* 1408 * We can only use TLS if this is a PIC library, since for the static 1409 * library version, libc's malloc is used by TLS allocation, which 1410 * introduces a bootstrapping issue. 1411 */ 1412#ifndef NO_TLS 1413 if (__isthreaded == false) { 1414 /* 1415 * Avoid the overhead of TLS for single-threaded operation. If the 1416 * app switches to threaded mode, the initial thread may end up 1417 * being assigned to some other arena, but this one-time switch 1418 * shouldn't cause significant issues. 1419 * */ 1420 return (arenas[0]); 1421 } 1422 1423 ret = arenas_map; 1424 if (ret == NULL) 1425 ret = choose_arena_hard(); 1426#else 1427 if (__isthreaded) { 1428 unsigned long ind; 1429 1430 /* 1431 * Hash _pthread_self() to one of the arenas. There is a prime 1432 * number of arenas, so this has a reasonable chance of 1433 * working. Even so, the hashing can be easily thwarted by 1434 * inconvenient _pthread_self() values. Without specific 1435 * knowledge of how _pthread_self() calculates values, we can't 1436 * easily do much better than this. 1437 */ 1438 ind = (unsigned long) _pthread_self() % narenas; 1439 1440 /* 1441 * Optimistially assume that arenas[ind] has been initialized. 1442 * At worst, we find out that some other thread has already 1443 * done so, after acquiring the lock in preparation. Note that 1444 * this lazy locking also has the effect of lazily forcing 1445 * cache coherency; without the lock acquisition, there's no 1446 * guarantee that modification of arenas[ind] by another thread 1447 * would be seen on this CPU for an arbitrary amount of time. 1448 * 1449 * In general, this approach to modifying a synchronized value 1450 * isn't a good idea, but in this case we only ever modify the 1451 * value once, so things work out well. 1452 */ 1453 ret = arenas[ind]; 1454 if (ret == NULL) { 1455 /* 1456 * Avoid races with another thread that may have already 1457 * initialized arenas[ind]. 1458 */ 1459 malloc_mutex_lock(&arenas_mtx); 1460 if (arenas[ind] == NULL) 1461 ret = arenas_extend((unsigned)ind); 1462 else 1463 ret = arenas[ind]; 1464 malloc_mutex_unlock(&arenas_mtx); 1465 } 1466 } else 1467 ret = arenas[0]; 1468#endif 1469 1470 assert(ret != NULL); 1471 return (ret); 1472} 1473 1474#ifndef NO_TLS 1475/* 1476 * Choose an arena based on a per-thread value (slow-path code only, called 1477 * only by choose_arena()). 1478 */ 1479static arena_t * 1480choose_arena_hard(void) 1481{ 1482 arena_t *ret; 1483 1484 assert(__isthreaded); 1485 1486 /* Assign one of the arenas to this thread, in a round-robin fashion. */ 1487 malloc_mutex_lock(&arenas_mtx); 1488 ret = arenas[next_arena]; 1489 if (ret == NULL) 1490 ret = arenas_extend(next_arena); 1491 if (ret == NULL) { 1492 /* 1493 * Make sure that this function never returns NULL, so that 1494 * choose_arena() doesn't have to check for a NULL return 1495 * value. 1496 */ 1497 ret = arenas[0]; 1498 } 1499 next_arena = (next_arena + 1) % narenas; 1500 malloc_mutex_unlock(&arenas_mtx); 1501 arenas_map = ret; 1502 1503 return (ret); 1504} 1505#endif 1506 1507static inline int 1508arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b) 1509{ 1510 1511 assert(a != NULL); 1512 assert(b != NULL); 1513 1514 if ((uintptr_t)a < (uintptr_t)b) 1515 return (-1); 1516 else if (a == b) 1517 return (0); 1518 else 1519 return (1); 1520} 1521 1522/* Generate red-black tree code for arena chunks. */ 1523RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp); 1524 1525static inline void * 1526arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) 1527{ 1528 void *ret; 1529 unsigned i, mask, bit, regind; 1530 1531 assert(run->magic == ARENA_RUN_MAGIC); 1532 1533 for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) { 1534 mask = run->regs_mask[i]; 1535 if (mask != 0) { 1536 /* Usable allocation found. */ 1537 bit = ffs(mask) - 1; 1538 1539 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit); 1540 ret = (void *)&((char *)run)[bin->reg0_offset 1541 + (bin->reg_size * regind)]; 1542 1543 /* Clear bit. */ 1544 mask ^= (1 << bit); 1545 run->regs_mask[i] = mask; 1546 1547 return (ret); 1548 } else { 1549 /* 1550 * Make a note that nothing before this element 1551 * contains a free region. 1552 */ 1553 run->regs_minelm = i + 1; 1554 } 1555 } 1556 /* Not reached. */ 1557 assert(0); 1558 return (NULL); 1559} 1560 1561static inline void 1562arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size) 1563{ 1564 /* 1565 * To divide by a number D that is not a power of two we multiply 1566 * by (2^21 / D) and then right shift by 21 positions. 1567 * 1568 * X / D 1569 * 1570 * becomes 1571 * 1572 * (size_invs[(D >> QUANTUM_2POW_MIN) - 3] * D) >> SIZE_INV_SHIFT 1573 */ 1574#define SIZE_INV_SHIFT 21 1575#define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1) 1576 static const unsigned size_invs[] = { 1577 SIZE_INV(3), 1578 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), 1579 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), 1580 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), 1581 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), 1582 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), 1583 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), 1584 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) 1585#if (QUANTUM_2POW_MIN < 4) 1586 , 1587 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35), 1588 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39), 1589 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43), 1590 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47), 1591 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51), 1592 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55), 1593 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59), 1594 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63) 1595#endif 1596 }; 1597 unsigned diff, regind, elm, bit; 1598 1599 assert(run->magic == ARENA_RUN_MAGIC); 1600 assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3 1601 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN)); 1602 1603 /* 1604 * Avoid doing division with a variable divisor if possible. Using 1605 * actual division here can reduce allocator throughput by over 20%! 1606 */ 1607 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); 1608 if ((size & (size - 1)) == 0) { 1609 /* 1610 * log2_table allows fast division of a power of two in the 1611 * [1..128] range. 1612 * 1613 * (x / divisor) becomes (x >> log2_table[divisor - 1]). 1614 */ 1615 static const unsigned char log2_table[] = { 1616 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 1624 }; 1625 1626 if (size <= 128) 1627 regind = (diff >> log2_table[size - 1]); 1628 else if (size <= 32768) 1629 regind = diff >> (8 + log2_table[(size >> 8) - 1]); 1630 else { 1631 /* 1632 * The page size is too large for us to use the lookup 1633 * table. Use real division. 1634 */ 1635 regind = diff / size; 1636 } 1637 } else if (size <= ((sizeof(size_invs) / sizeof(unsigned)) 1638 << QUANTUM_2POW_MIN) + 2) { 1639 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff; 1640 regind >>= SIZE_INV_SHIFT; 1641 } else { 1642 /* 1643 * size_invs isn't large enough to handle this size class, so 1644 * calculate regind using actual division. This only happens 1645 * if the user increases small_max via the 'S' runtime 1646 * configuration option. 1647 */ 1648 regind = diff / size; 1649 }; 1650 assert(regind == diff / size); 1651 assert(regind < bin->nregs); 1652 1653 elm = regind >> (SIZEOF_INT_2POW + 3); 1654 if (elm < run->regs_minelm) 1655 run->regs_minelm = elm; 1656 bit = regind - (elm << (SIZEOF_INT_2POW + 3)); 1657 assert((run->regs_mask[elm] & (1 << bit)) == 0); 1658 run->regs_mask[elm] |= (1 << bit); 1659#undef SIZE_INV 1660#undef SIZE_INV_SHIFT 1661} 1662 1663static void 1664arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size) 1665{ 1666 arena_chunk_t *chunk; 1667 unsigned run_ind, map_offset, total_pages, need_pages; 1668 unsigned i, log2_run_pages, run_pages; 1669 1670 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); 1671 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) 1672 >> pagesize_2pow); 1673 assert(chunk->map[run_ind].free); 1674 total_pages = chunk->map[run_ind].npages; 1675 need_pages = (size >> pagesize_2pow); 1676 1677 assert(chunk->map[run_ind].free); 1678 assert(chunk->map[run_ind].large == false); 1679 assert(chunk->map[run_ind].npages == total_pages); 1680 1681 /* Split enough pages from the front of run to fit allocation size. */ 1682 map_offset = run_ind; 1683 for (i = 0; i < need_pages; i++) { 1684 chunk->map[map_offset + i].free = false; 1685 chunk->map[map_offset + i].large = large; 1686 chunk->map[map_offset + i].npages = need_pages; 1687 chunk->map[map_offset + i].pos = i; 1688 } 1689 1690 /* Update map for trailing pages. */ 1691 map_offset += need_pages; 1692 while (map_offset < run_ind + total_pages) { 1693 log2_run_pages = ffs(map_offset) - 1; 1694 run_pages = (1 << log2_run_pages); 1695 1696 chunk->map[map_offset].free = true; 1697 chunk->map[map_offset].large = false; 1698 chunk->map[map_offset].npages = run_pages; 1699 1700 chunk->nfree_runs[log2_run_pages]++; 1701 1702 map_offset += run_pages; 1703 } 1704 1705 chunk->pages_used += (size >> pagesize_2pow); 1706} 1707 1708static arena_chunk_t * 1709arena_chunk_alloc(arena_t *arena) 1710{ 1711 arena_chunk_t *chunk; 1712 unsigned i, j, header_npages, pow2_header_npages, map_offset; 1713 unsigned log2_run_pages, run_pages; 1714 size_t header_size; 1715 1716 chunk = (arena_chunk_t *)chunk_alloc(chunk_size); 1717 if (chunk == NULL) 1718 return (NULL); 1719 1720 chunk->arena = arena; 1721 1722 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); 1723 1724 /* 1725 * Claim that no pages are in use, since the header is merely overhead. 1726 */ 1727 chunk->pages_used = 0; 1728 1729 memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs)); 1730 1731 header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen] 1732 - (uintptr_t)chunk); 1733 if (header_size % pagesize != 0) { 1734 /* Round up to the nearest page boundary. */ 1735 header_size += pagesize - (header_size % pagesize); 1736 } 1737 1738 header_npages = header_size >> pagesize_2pow; 1739 pow2_header_npages = pow2_ceil(header_npages); 1740 1741 /* 1742 * Iteratively mark runs as in use, until we've spoken for the entire 1743 * header. 1744 */ 1745 map_offset = 0; 1746 for (i = 0; header_npages > 0; i++) { 1747 if ((pow2_header_npages >> i) <= header_npages) { 1748 for (j = 0; j < (pow2_header_npages >> i); j++) { 1749 chunk->map[map_offset + j].free = false; 1750 chunk->map[map_offset + j].large = false; 1751 chunk->map[map_offset + j].npages = 1752 (pow2_header_npages >> i); 1753 chunk->map[map_offset + j].pos = j; 1754 } 1755 header_npages -= (pow2_header_npages >> i); 1756 map_offset += (pow2_header_npages >> i); 1757 } 1758 } 1759 1760 /* 1761 * Finish initializing map. The chunk header takes up some space at 1762 * the beginning of the chunk, which we just took care of by 1763 * "allocating" the leading pages. 1764 */ 1765 while (map_offset < (chunk_size >> pagesize_2pow)) { 1766 log2_run_pages = ffs(map_offset) - 1; 1767 run_pages = (1 << log2_run_pages); 1768 1769 chunk->map[map_offset].free = true; 1770 chunk->map[map_offset].large = false; 1771 chunk->map[map_offset].npages = run_pages; 1772 1773 chunk->nfree_runs[log2_run_pages]++; 1774 1775 map_offset += run_pages; 1776 } 1777 1778 return (chunk); 1779} 1780 1781static void 1782arena_chunk_dealloc(arena_chunk_t *chunk) 1783{ 1784 1785 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk); 1786 1787 chunk_dealloc((void *)chunk, chunk_size); 1788} 1789 1790static void 1791arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run) 1792{ 1793 1794 assert(bin == run->bin); 1795 1796 /* Promote. */ 1797 assert(run->free_min > run->nfree); 1798 assert(run->quartile < RUN_Q100); 1799 run->quartile++; 1800#ifdef MALLOC_STATS 1801 bin->stats.npromote++; 1802#endif 1803 1804 /* Re-file run. */ 1805 switch (run->quartile) { 1806 case RUN_QINIT: 1807 assert(0); 1808 break; 1809 case RUN_Q0: 1810 qr_before_insert((arena_run_t *)&bin->runs0, run, link); 1811 run->free_max = bin->nregs - 1; 1812 run->free_min = (bin->nregs >> 1) + 1; 1813 assert(run->nfree <= run->free_max); 1814 assert(run->nfree >= run->free_min); 1815 break; 1816 case RUN_Q25: 1817 qr_remove(run, link); 1818 qr_before_insert((arena_run_t *)&bin->runs25, run, 1819 link); 1820 run->free_max = ((bin->nregs >> 2) * 3) - 1; 1821 run->free_min = (bin->nregs >> 2) + 1; 1822 assert(run->nfree <= run->free_max); 1823 assert(run->nfree >= run->free_min); 1824 break; 1825 case RUN_Q50: 1826 qr_remove(run, link); 1827 qr_before_insert((arena_run_t *)&bin->runs50, run, 1828 link); 1829 run->free_max = (bin->nregs >> 1) - 1; 1830 run->free_min = 1; 1831 assert(run->nfree <= run->free_max); 1832 assert(run->nfree >= run->free_min); 1833 break; 1834 case RUN_Q75: 1835 /* 1836 * Skip RUN_Q75 during promotion from RUN_Q50. 1837 * Separate handling of RUN_Q75 and RUN_Q100 allows us 1838 * to keep completely full runs in RUN_Q100, thus 1839 * guaranteeing that runs in RUN_Q75 are only mostly 1840 * full. This provides a method for avoiding a linear 1841 * search for non-full runs, which avoids some 1842 * pathological edge cases. 1843 */ 1844 run->quartile++; 1845 /* Fall through. */ 1846 case RUN_Q100: 1847 qr_remove(run, link); 1848 assert(bin->runcur == run); 1849 bin->runcur = NULL; 1850 run->free_max = 0; 1851 run->free_min = 0; 1852 assert(run->nfree <= run->free_max); 1853 assert(run->nfree >= run->free_min); 1854 break; 1855 default: 1856 assert(0); 1857 break; 1858 } 1859} 1860 1861static void 1862arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run) 1863{ 1864 1865 assert(bin == run->bin); 1866 1867 /* Demote. */ 1868 assert(run->free_max < run->nfree); 1869 assert(run->quartile > RUN_QINIT); 1870 run->quartile--; 1871#ifdef MALLOC_STATS 1872 bin->stats.ndemote++; 1873#endif 1874 1875 /* Re-file run. */ 1876 switch (run->quartile) { 1877 case RUN_QINIT: 1878 qr_remove(run, link); 1879#ifdef MALLOC_STATS 1880 bin->stats.curruns--; 1881#endif 1882 if (bin->runcur == run) 1883 bin->runcur = NULL; 1884#ifdef MALLOC_DEBUG 1885 run->magic = 0; 1886#endif 1887 arena_run_dalloc(arena, run, bin->run_size); 1888 break; 1889 case RUN_Q0: 1890 qr_remove(run, link); 1891 qr_before_insert((arena_run_t *)&bin->runs0, run, link); 1892 run->free_max = bin->nregs - 1; 1893 run->free_min = (bin->nregs >> 1) + 1; 1894 assert(run->nfree <= run->free_max); 1895 assert(run->nfree >= run->free_min); 1896 break; 1897 case RUN_Q25: 1898 qr_remove(run, link); 1899 qr_before_insert((arena_run_t *)&bin->runs25, run, 1900 link); 1901 run->free_max = ((bin->nregs >> 2) * 3) - 1; 1902 run->free_min = (bin->nregs >> 2) + 1; 1903 assert(run->nfree <= run->free_max); 1904 assert(run->nfree >= run->free_min); 1905 break; 1906 case RUN_Q50: 1907 qr_remove(run, link); 1908 qr_before_insert((arena_run_t *)&bin->runs50, run, 1909 link); 1910 run->free_max = (bin->nregs >> 1) - 1; 1911 run->free_min = 1; 1912 assert(run->nfree <= run->free_max); 1913 assert(run->nfree >= run->free_min); 1914 break; 1915 case RUN_Q75: 1916 qr_before_insert((arena_run_t *)&bin->runs75, run, 1917 link); 1918 run->free_max = (bin->nregs >> 2) - 1; 1919 run->free_min = 1; 1920 assert(run->nfree <= run->free_max); 1921 assert(run->nfree >= run->free_min); 1922 break; 1923 case RUN_Q100: 1924 default: 1925 assert(0); 1926 break; 1927 } 1928} 1929 1930static arena_run_t * 1931arena_run_alloc(arena_t *arena, bool large, size_t size) 1932{ 1933 arena_run_t *run; 1934 unsigned min_ind, i, j; 1935 arena_chunk_t *chunk; 1936#ifndef NDEBUG 1937 int rep = 0; 1938#endif 1939 1940 assert(size <= arena_maxclass); 1941 1942AGAIN: 1943#ifndef NDEBUG 1944 rep++; 1945 assert(rep <= 2); 1946#endif 1947 1948 /* 1949 * Search through arena's chunks in address order for a run that is 1950 * large enough. Look for a precise fit, but do not pass up a chunk 1951 * that has a run which is large enough to split. 1952 */ 1953 min_ind = ffs(size >> pagesize_2pow) - 1; 1954 RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) { 1955 for (i = min_ind; 1956 i < (opt_chunk_2pow - pagesize_2pow); 1957 i++) { 1958 if (chunk->nfree_runs[i] > 0) { 1959 arena_chunk_map_t *map = chunk->map; 1960 1961 /* Scan chunk's map for free run. */ 1962 for (j = 0; 1963 j < arena_chunk_maplen; 1964 j += map[j].npages) { 1965 if (map[j].free 1966 && map[j].npages == (1 << i)) 1967 {/*<----------------------------*/ 1968 run = (arena_run_t *)&((char *)chunk)[j 1969 << pagesize_2pow]; 1970 1971 assert(chunk->nfree_runs[i] > 0); 1972 chunk->nfree_runs[i]--; 1973 1974 /* Update page map. */ 1975 arena_run_split(arena, run, large, size); 1976 1977 return (run); 1978 }/*---------------------------->*/ 1979 } 1980 /* Not reached. */ 1981 assert(0); 1982 } 1983 } 1984 } 1985 1986 /* No usable runs. Allocate a new chunk, then try again. */ 1987 if (arena_chunk_alloc(arena) == NULL) 1988 return (NULL); 1989 goto AGAIN; 1990} 1991 1992static void 1993arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size) 1994{ 1995 arena_chunk_t *chunk; 1996 unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages; 1997 1998 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); 1999 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) 2000 >> pagesize_2pow); 2001 run_pages = (size >> pagesize_2pow); 2002 log2_run_pages = ffs(run_pages) - 1; 2003 assert(run_pages > 0); 2004 2005 /* Subtract pages from count of pages used in chunk. */ 2006 chunk->pages_used -= run_pages; 2007 2008 /* Mark run as deallocated. */ 2009 chunk->map[run_ind].free = true; 2010 chunk->map[run_ind].large = false; 2011 chunk->map[run_ind].npages = run_pages; 2012 2013 /* 2014 * Tell the kernel that we don't need the data in this run, but only if 2015 * requested via runtime configuration. 2016 */ 2017 if (opt_hint) { 2018 madvise(run, size, MADV_FREE); 2019#ifdef MALLOC_STATS 2020 arena->stats.nmadvise += (size >> pagesize_2pow); 2021#endif 2022 } 2023 2024 /* 2025 * Iteratively coalesce with buddies. Conceptually, the buddy scheme 2026 * induces a tree on the set of pages. If we know the number of pages 2027 * in the subtree rooted at the current node, we can quickly determine 2028 * whether a run is the left or right buddy, and then calculate the 2029 * buddy's index. 2030 */ 2031 for (; 2032 (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen; 2033 log2_run_pages++) { 2034 if (((run_ind >> log2_run_pages) & 1) == 0) { 2035 /* Current run precedes its buddy. */ 2036 buddy_ind = run_ind + run_pages; 2037 base_run_ind = run_ind; 2038 } else { 2039 /* Current run follows its buddy. */ 2040 buddy_ind = run_ind - run_pages; 2041 base_run_ind = buddy_ind; 2042 } 2043 2044 if (chunk->map[buddy_ind].free == false 2045 || chunk->map[buddy_ind].npages != run_pages) 2046 break; 2047 2048 assert(chunk->nfree_runs[log2_run_pages] > 0); 2049 chunk->nfree_runs[log2_run_pages]--; 2050 2051 /* Coalesce. */ 2052 chunk->map[base_run_ind].npages = (run_pages << 1); 2053 2054 /* Update run_ind to be the beginning of the coalesced run. */ 2055 run_ind = base_run_ind; 2056 } 2057 2058 chunk->nfree_runs[log2_run_pages]++; 2059 2060 /* Free pages, to the extent possible. */ 2061 if (chunk->pages_used == 0) { 2062 /* This chunk is completely unused now, so deallocate it. */ 2063 arena_chunk_dealloc(chunk); 2064 } 2065} 2066 2067static arena_run_t * 2068arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) 2069{ 2070 arena_run_t *run; 2071 unsigned i, remainder; 2072 2073 /* Look for a usable run. */ 2074 if ((run = qr_next((arena_run_t *)&bin->runs50, link)) 2075 != (arena_run_t *)&bin->runs50 2076 || (run = qr_next((arena_run_t *)&bin->runs25, link)) 2077 != (arena_run_t *)&bin->runs25 2078 || (run = qr_next((arena_run_t *)&bin->runs0, link)) 2079 != (arena_run_t *)&bin->runs0 2080 || (run = qr_next((arena_run_t *)&bin->runs75, link)) 2081 != (arena_run_t *)&bin->runs75) { 2082 /* run is guaranteed to have available space. */ 2083 qr_remove(run, link); 2084 return (run); 2085 } 2086 /* No existing runs have any space available. */ 2087 2088 /* Allocate a new run. */ 2089 run = arena_run_alloc(arena, false, bin->run_size); 2090 if (run == NULL) 2091 return (NULL); 2092 2093 /* Initialize run internals. */ 2094 qr_new(run, link); 2095 run->bin = bin; 2096 2097 for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++) 2098 run->regs_mask[i] = UINT_MAX; 2099 remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3)); 2100 if (remainder != 0) { 2101 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3)) 2102 - remainder)); 2103 i++; 2104 } 2105 for (; i < REGS_MASK_NELMS; i++) 2106 run->regs_mask[i] = 0; 2107 2108 run->regs_minelm = 0; 2109 2110 run->nfree = bin->nregs; 2111 run->quartile = RUN_QINIT; 2112 run->free_max = bin->nregs; 2113 run->free_min = ((bin->nregs >> 2) * 3) + 1; 2114#ifdef MALLOC_DEBUG 2115 run->magic = ARENA_RUN_MAGIC; 2116#endif 2117 2118#ifdef MALLOC_STATS 2119 bin->stats.nruns++; 2120 bin->stats.curruns++; 2121 if (bin->stats.curruns > bin->stats.highruns) 2122 bin->stats.highruns = bin->stats.curruns; 2123#endif 2124 return (run); 2125} 2126 2127/* bin->runcur must have space available before this function is called. */ 2128static inline void * 2129arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run) 2130{ 2131 void *ret; 2132 2133 assert(run->magic == ARENA_RUN_MAGIC); 2134 assert(run->nfree > 0); 2135 2136 ret = arena_run_reg_alloc(run, bin); 2137 assert(ret != NULL); 2138 run->nfree--; 2139 if (run->nfree < run->free_min) { 2140 /* Promote run to higher fullness quartile. */ 2141 arena_bin_run_promote(arena, bin, run); 2142 } 2143 2144 return (ret); 2145} 2146 2147/* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */ 2148static void * 2149arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) 2150{ 2151 2152 assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100); 2153 2154 bin->runcur = arena_bin_nonfull_run_get(arena, bin); 2155 if (bin->runcur == NULL) 2156 return (NULL); 2157 assert(bin->runcur->magic == ARENA_RUN_MAGIC); 2158 assert(bin->runcur->nfree > 0); 2159 2160 return (arena_bin_malloc_easy(arena, bin, bin->runcur)); 2161} 2162 2163static void * 2164arena_malloc(arena_t *arena, size_t size) 2165{ 2166 void *ret; 2167 2168 assert(arena != NULL); 2169 assert(arena->magic == ARENA_MAGIC); 2170 assert(size != 0); 2171 assert(QUANTUM_CEILING(size) <= arena_maxclass); 2172 2173 if (size <= bin_maxclass) { 2174 arena_bin_t *bin; 2175 arena_run_t *run; 2176 2177 /* Small allocation. */ 2178 2179 if (size < small_min) { 2180 /* Tiny. */ 2181 size = pow2_ceil(size); 2182 bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))]; 2183#if (!defined(NDEBUG) || defined(MALLOC_STATS)) 2184 /* 2185 * Bin calculation is always correct, but we may need 2186 * to fix size for the purposes of assertions and/or 2187 * stats accuracy. 2188 */ 2189 if (size < (1 << tiny_min_2pow)) 2190 size = (1 << tiny_min_2pow); 2191#endif 2192 } else if (size <= small_max) { 2193 /* Quantum-spaced. */ 2194 size = QUANTUM_CEILING(size); 2195 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow) 2196 - 1]; 2197 } else { 2198 /* Sub-page. */ 2199 size = pow2_ceil(size); 2200 bin = &arena->bins[ntbins + nqbins 2201 + (ffs(size >> opt_small_max_2pow) - 2)]; 2202 } 2203 assert(size == bin->reg_size); 2204 2205 malloc_mutex_lock(&arena->mtx); 2206 if ((run = bin->runcur) != NULL) 2207 ret = arena_bin_malloc_easy(arena, bin, run); 2208 else 2209 ret = arena_bin_malloc_hard(arena, bin); 2210 2211#ifdef MALLOC_STATS 2212 bin->stats.nrequests++; 2213#endif 2214 } else { 2215 /* Medium allocation. */ 2216 size = pow2_ceil(size); 2217 malloc_mutex_lock(&arena->mtx); 2218 ret = (void *)arena_run_alloc(arena, true, size); 2219#ifdef MALLOC_STATS 2220 arena->stats.large_nrequests++; 2221#endif 2222 } 2223 2224#ifdef MALLOC_STATS 2225 arena->stats.nmalloc++; 2226 if (ret != NULL) 2227 arena->stats.allocated += size; 2228#endif 2229 2230 malloc_mutex_unlock(&arena->mtx); 2231 2232 if (opt_junk && ret != NULL) 2233 memset(ret, 0xa5, size); 2234 else if (opt_zero && ret != NULL) 2235 memset(ret, 0, size); 2236 return (ret); 2237} 2238 2239/* Return the size of the allocation pointed to by ptr. */ 2240static size_t 2241arena_salloc(const void *ptr) 2242{ 2243 size_t ret; 2244 arena_chunk_t *chunk; 2245 uint32_t pageind; 2246 arena_chunk_map_t mapelm; 2247 2248 assert(ptr != NULL); 2249 assert(CHUNK_ADDR2BASE(ptr) != ptr); 2250 2251 /* 2252 * No arena data structures that we query here can change in a way that 2253 * affects this function, so we don't need to lock. 2254 */ 2255 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 2256 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); 2257 mapelm = chunk->map[pageind]; 2258 assert(mapelm.free == false); 2259 if (mapelm.large == false) { 2260 arena_run_t *run; 2261 2262 pageind -= mapelm.pos; 2263 2264 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow]; 2265 assert(run->magic == ARENA_RUN_MAGIC); 2266 ret = run->bin->reg_size; 2267 } else 2268 ret = mapelm.npages << pagesize_2pow; 2269 2270 return (ret); 2271} 2272 2273static void * 2274arena_ralloc(void *ptr, size_t size, size_t oldsize) 2275{ 2276 void *ret; 2277 2278 /* Avoid moving the allocation if the size class would not change. */ 2279 if (size < small_min) { 2280 if (oldsize < small_min && 2281 ffs(pow2_ceil(size) >> (tiny_min_2pow + 1)) 2282 == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1))) 2283 goto IN_PLACE; 2284 } else if (size <= small_max) { 2285 if (oldsize >= small_min && oldsize <= small_max && 2286 (QUANTUM_CEILING(size) >> opt_quantum_2pow) 2287 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow)) 2288 goto IN_PLACE; 2289 } else { 2290 if (oldsize > small_max && pow2_ceil(size) == oldsize) 2291 goto IN_PLACE; 2292 } 2293 2294 /* 2295 * If we get here, then size and oldsize are different enough that we 2296 * need to use a different size class. In that case, fall back to 2297 * allocating new space and copying. 2298 */ 2299 ret = arena_malloc(choose_arena(), size); 2300 if (ret == NULL) 2301 return (NULL); 2302 2303 if (size < oldsize) 2304 memcpy(ret, ptr, size); 2305 else 2306 memcpy(ret, ptr, oldsize); 2307 idalloc(ptr); 2308 return (ret); 2309IN_PLACE: 2310 if (opt_junk && size < oldsize) 2311 memset(&((char *)ptr)[size], 0x5a, oldsize - size); 2312 else if (opt_zero && size > oldsize) 2313 memset(&((char *)ptr)[size], 0, size - oldsize); 2314 return (ptr); 2315} 2316 2317static void 2318arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) 2319{ 2320 unsigned pageind; 2321 arena_chunk_map_t mapelm; 2322 size_t size; 2323 2324 assert(arena != NULL); 2325 assert(arena->magic == ARENA_MAGIC); 2326 assert(chunk->arena == arena); 2327 assert(ptr != NULL); 2328 assert(CHUNK_ADDR2BASE(ptr) != ptr); 2329 2330 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); 2331 mapelm = chunk->map[pageind]; 2332 assert(mapelm.free == false); 2333 if (mapelm.large == false) { 2334 arena_run_t *run; 2335 arena_bin_t *bin; 2336 2337 /* Small allocation. */ 2338 2339 pageind -= mapelm.pos; 2340 2341 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow]; 2342 assert(run->magic == ARENA_RUN_MAGIC); 2343 bin = run->bin; 2344 size = bin->reg_size; 2345 2346 if (opt_junk) 2347 memset(ptr, 0x5a, size); 2348 2349 malloc_mutex_lock(&arena->mtx); 2350 arena_run_reg_dalloc(run, bin, ptr, size); 2351 run->nfree++; 2352 if (run->nfree > run->free_max) { 2353 /* Demote run to lower fullness quartile. */ 2354 arena_bin_run_demote(arena, bin, run); 2355 } 2356 } else { 2357 /* Medium allocation. */ 2358 2359 size = mapelm.npages << pagesize_2pow; 2360 assert((((uintptr_t)ptr) & (size - 1)) == 0); 2361 2362 if (opt_junk) 2363 memset(ptr, 0x5a, size); 2364 2365 malloc_mutex_lock(&arena->mtx); 2366 arena_run_dalloc(arena, (arena_run_t *)ptr, size); 2367 } 2368 2369#ifdef MALLOC_STATS 2370 arena->stats.allocated -= size; 2371#endif 2372 2373 malloc_mutex_unlock(&arena->mtx); 2374} 2375 2376static bool 2377arena_new(arena_t *arena) 2378{ 2379 unsigned i; 2380 arena_bin_t *bin; 2381 size_t pow2_size, run_size; 2382 2383 malloc_mutex_init(&arena->mtx); 2384 2385#ifdef MALLOC_STATS 2386 memset(&arena->stats, 0, sizeof(arena_stats_t)); 2387#endif 2388 2389 /* Initialize chunks. */ 2390 RB_INIT(&arena->chunks); 2391 2392 /* Initialize bins. */ 2393 2394 /* (2^n)-spaced tiny bins. */ 2395 for (i = 0; i < ntbins; i++) { 2396 bin = &arena->bins[i]; 2397 bin->runcur = NULL; 2398 qr_new((arena_run_t *)&bin->runs0, link); 2399 qr_new((arena_run_t *)&bin->runs25, link); 2400 qr_new((arena_run_t *)&bin->runs50, link); 2401 qr_new((arena_run_t *)&bin->runs75, link); 2402 2403 bin->reg_size = (1 << (tiny_min_2pow + i)); 2404 2405 /* 2406 * Calculate how large of a run to allocate. Make sure that at 2407 * least RUN_MIN_REGS regions fit in the run. 2408 */ 2409 run_size = bin->reg_size << RUN_MIN_REGS_2POW; 2410 if (run_size < pagesize) 2411 run_size = pagesize; 2412 if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) 2413 run_size = (pagesize << RUN_MAX_PAGES_2POW); 2414 if (run_size > arena_maxclass) 2415 run_size = arena_maxclass; 2416 bin->run_size = run_size; 2417 2418 assert(run_size >= sizeof(arena_run_t)); 2419 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; 2420 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) { 2421 /* Take care not to overflow regs_mask. */ 2422 bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3); 2423 } 2424 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); 2425 2426#ifdef MALLOC_STATS 2427 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); 2428#endif 2429 } 2430 2431 /* Quantum-spaced bins. */ 2432 for (; i < ntbins + nqbins; i++) { 2433 bin = &arena->bins[i]; 2434 bin->runcur = NULL; 2435 qr_new((arena_run_t *)&bin->runs0, link); 2436 qr_new((arena_run_t *)&bin->runs25, link); 2437 qr_new((arena_run_t *)&bin->runs50, link); 2438 qr_new((arena_run_t *)&bin->runs75, link); 2439 2440 bin->reg_size = quantum * (i - ntbins + 1); 2441 2442 /* 2443 * Calculate how large of a run to allocate. Make sure that at 2444 * least RUN_MIN_REGS regions fit in the run. 2445 */ 2446 pow2_size = pow2_ceil(quantum * (i - ntbins + 1)); 2447 run_size = (pow2_size << RUN_MIN_REGS_2POW); 2448 if (run_size < pagesize) 2449 run_size = pagesize; 2450 if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) 2451 run_size = (pagesize << RUN_MAX_PAGES_2POW); 2452 if (run_size > arena_maxclass) 2453 run_size = arena_maxclass; 2454 bin->run_size = run_size; 2455 2456 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; 2457 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3)); 2458 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); 2459 2460#ifdef MALLOC_STATS 2461 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); 2462#endif 2463 } 2464 2465 /* (2^n)-spaced sub-page bins. */ 2466 for (; i < ntbins + nqbins + nsbins; i++) { 2467 bin = &arena->bins[i]; 2468 bin->runcur = NULL; 2469 qr_new((arena_run_t *)&bin->runs0, link); 2470 qr_new((arena_run_t *)&bin->runs25, link); 2471 qr_new((arena_run_t *)&bin->runs50, link); 2472 qr_new((arena_run_t *)&bin->runs75, link); 2473 2474 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1)); 2475 2476 /* 2477 * Calculate how large of a run to allocate. Make sure that at 2478 * least RUN_MIN_REGS regions fit in the run. 2479 */ 2480 run_size = bin->reg_size << RUN_MIN_REGS_2POW; 2481 if (run_size < pagesize) 2482 run_size = pagesize; 2483 if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) 2484 run_size = (pagesize << RUN_MAX_PAGES_2POW); 2485 if (run_size > arena_maxclass) 2486 run_size = arena_maxclass; 2487 bin->run_size = run_size; 2488 2489 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; 2490 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3)); 2491 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); 2492 2493#ifdef MALLOC_STATS 2494 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); 2495#endif 2496 } 2497 2498#ifdef MALLOC_DEBUG 2499 arena->magic = ARENA_MAGIC; 2500#endif 2501 2502 return (false); 2503} 2504 2505/* Create a new arena and insert it into the arenas array at index ind. */ 2506static arena_t * 2507arenas_extend(unsigned ind) 2508{ 2509 arena_t *ret; 2510 2511 /* Allocate enough space for trailing bins. */ 2512 ret = (arena_t *)base_alloc(sizeof(arena_t) 2513 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1))); 2514 if (ret != NULL && arena_new(ret) == false) { 2515 arenas[ind] = ret; 2516 return (ret); 2517 } 2518 /* Only reached if there is an OOM error. */ 2519 2520 /* 2521 * OOM here is quite inconvenient to propagate, since dealing with it 2522 * would require a check for failure in the fast path. Instead, punt 2523 * by using arenas[0]. In practice, this is an extremely unlikely 2524 * failure. 2525 */ 2526 malloc_printf("%s: (malloc) Error initializing arena\n", 2527 _getprogname()); 2528 if (opt_abort) 2529 abort(); 2530 2531 return (arenas[0]); 2532} 2533 2534/* 2535 * End arena. 2536 */ 2537/******************************************************************************/ 2538/* 2539 * Begin general internal functions. 2540 */ 2541 2542static void * 2543huge_malloc(size_t size) 2544{ 2545 void *ret; 2546 size_t csize; 2547 chunk_node_t *node; 2548 2549 /* Allocate one or more contiguous chunks for this request. */ 2550 2551 csize = CHUNK_CEILING(size); 2552 if (csize == 0) { 2553 /* size is large enough to cause size_t wrap-around. */ 2554 return (NULL); 2555 } 2556 2557 /* Allocate a chunk node with which to track the chunk. */ 2558 node = base_chunk_node_alloc(); 2559 if (node == NULL) 2560 return (NULL); 2561 2562 ret = chunk_alloc(csize); 2563 if (ret == NULL) { 2564 base_chunk_node_dealloc(node); 2565 return (NULL); 2566 } 2567 2568 /* Insert node into huge. */ 2569 node->chunk = ret; 2570 node->size = csize; 2571 2572 malloc_mutex_lock(&chunks_mtx); 2573 RB_INSERT(chunk_tree_s, &huge, node); 2574#ifdef MALLOC_STATS 2575 huge_nmalloc++; 2576 huge_allocated += csize; 2577#endif 2578 malloc_mutex_unlock(&chunks_mtx); 2579 2580 if (opt_junk && ret != NULL) 2581 memset(ret, 0xa5, csize); 2582 else if (opt_zero && ret != NULL) 2583 memset(ret, 0, csize); 2584 2585 return (ret); 2586} 2587 2588static void * 2589huge_ralloc(void *ptr, size_t size, size_t oldsize) 2590{ 2591 void *ret; 2592 2593 /* Avoid moving the allocation if the size class would not change. */ 2594 if (oldsize > arena_maxclass && 2595 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) 2596 return (ptr); 2597 2598 /* 2599 * If we get here, then size and oldsize are different enough that we 2600 * need to use a different size class. In that case, fall back to 2601 * allocating new space and copying. 2602 */ 2603 ret = huge_malloc(size); 2604 if (ret == NULL) 2605 return (NULL); 2606 2607 if (CHUNK_ADDR2BASE(ptr) == ptr) { 2608 /* The old allocation is a chunk. */ 2609 if (size < oldsize) 2610 memcpy(ret, ptr, size); 2611 else 2612 memcpy(ret, ptr, oldsize); 2613 } else { 2614 /* The old allocation is a region. */ 2615 assert(oldsize < size); 2616 memcpy(ret, ptr, oldsize); 2617 } 2618 idalloc(ptr); 2619 return (ret); 2620} 2621 2622static void 2623huge_dalloc(void *ptr) 2624{ 2625 chunk_node_t key; 2626 chunk_node_t *node; 2627 2628 malloc_mutex_lock(&chunks_mtx); 2629 2630 /* Extract from tree of huge allocations. */ 2631 key.chunk = ptr; 2632 node = RB_FIND(chunk_tree_s, &huge, &key); 2633 assert(node != NULL); 2634 assert(node->chunk == ptr); 2635 RB_REMOVE(chunk_tree_s, &huge, node); 2636 2637#ifdef MALLOC_STATS 2638 /* Update counters. */ 2639 huge_ndalloc++; 2640 huge_allocated -= node->size; 2641#endif 2642 2643 malloc_mutex_unlock(&chunks_mtx); 2644 2645 /* Unmap chunk. */ 2646#ifdef USE_BRK 2647 if (opt_junk) 2648 memset(node->chunk, 0x5a, node->size); 2649#endif 2650 chunk_dealloc(node->chunk, node->size); 2651 2652 base_chunk_node_dealloc(node); 2653} 2654 2655static void * 2656imalloc(size_t size) 2657{ 2658 void *ret; 2659 2660 assert(size != 0); 2661 2662 if (size <= arena_maxclass) 2663 ret = arena_malloc(choose_arena(), size); 2664 else 2665 ret = huge_malloc(size); 2666 2667 return (ret); 2668} 2669 2670static void * 2671ipalloc(size_t alignment, size_t size) 2672{ 2673 void *ret; 2674 size_t alloc_size; 2675 2676 /* 2677 * Take advantage of the fact that for each size class, every object is 2678 * aligned at the smallest power of two that is non-zero in the base 2679 * two representation of the size. For example: 2680 * 2681 * Size | Base 2 | Minimum alignment 2682 * -----+----------+------------------ 2683 * 96 | 1100000 | 32 2684 * 144 | 10100000 | 32 2685 * 192 | 11000000 | 64 2686 * 2687 * Depending on runtime settings, it is possible that arena_malloc() 2688 * will further round up to a power of two, but that never causes 2689 * correctness issues. 2690 */ 2691 alloc_size = (size + (alignment - 1)) & (-alignment); 2692 if (alloc_size < size) { 2693 /* size_t overflow. */ 2694 return (NULL); 2695 } 2696 2697 if (alloc_size <= arena_maxclass) 2698 ret = arena_malloc(choose_arena(), alloc_size); 2699 else { 2700 if (alignment <= chunk_size) 2701 ret = huge_malloc(size); 2702 else { 2703 size_t chunksize, offset; 2704 chunk_node_t *node; 2705 2706 /* 2707 * This allocation requires alignment that is even 2708 * larger than chunk alignment. This means that 2709 * huge_malloc() isn't good enough. 2710 * 2711 * Allocate almost twice as many chunks as are demanded 2712 * by the size or alignment, in order to assure the 2713 * alignment can be achieved, then unmap leading and 2714 * trailing chunks. 2715 */ 2716 2717 chunksize = CHUNK_CEILING(size); 2718 2719 if (size >= alignment) 2720 alloc_size = chunksize + alignment - chunk_size; 2721 else 2722 alloc_size = (alignment << 1) - chunk_size; 2723 2724 /* 2725 * Allocate a chunk node with which to track the chunk. 2726 */ 2727 node = base_chunk_node_alloc(); 2728 if (node == NULL) 2729 return (NULL); 2730 2731 ret = chunk_alloc(alloc_size); 2732 if (ret == NULL) { 2733 base_chunk_node_dealloc(node); 2734 return (NULL); 2735 } 2736 2737 offset = (uintptr_t)ret & (alignment - 1); 2738 assert(offset % chunk_size == 0); 2739 assert(offset < alloc_size); 2740 if (offset == 0) { 2741 /* Trim trailing space. */ 2742 chunk_dealloc((void *)((uintptr_t)ret 2743 + chunksize), alloc_size - chunksize); 2744 } else { 2745 size_t trailsize; 2746 2747 /* Trim leading space. */ 2748 chunk_dealloc(ret, alignment - offset); 2749 2750 ret = (void *)((uintptr_t)ret + (alignment 2751 - offset)); 2752 2753 trailsize = alloc_size - (alignment - offset) 2754 - chunksize; 2755 if (trailsize != 0) { 2756 /* Trim trailing space. */ 2757 assert(trailsize < alloc_size); 2758 chunk_dealloc((void *)((uintptr_t)ret 2759 + chunksize), trailsize); 2760 } 2761 } 2762 2763 /* Insert node into huge. */ 2764 node->chunk = ret; 2765 node->size = chunksize; 2766 2767 malloc_mutex_lock(&chunks_mtx); 2768 RB_INSERT(chunk_tree_s, &huge, node); 2769#ifdef MALLOC_STATS 2770 huge_allocated += size; 2771#endif 2772 malloc_mutex_unlock(&chunks_mtx); 2773 2774 if (opt_junk) 2775 memset(ret, 0xa5, chunksize); 2776 else if (opt_zero) 2777 memset(ret, 0, chunksize); 2778 } 2779 } 2780 2781 assert(((uintptr_t)ret & (alignment - 1)) == 0); 2782 return (ret); 2783} 2784 2785static void * 2786icalloc(size_t size) 2787{ 2788 void *ret; 2789 2790 if (size <= arena_maxclass) { 2791 ret = arena_malloc(choose_arena(), size); 2792 if (ret == NULL) 2793 return (NULL); 2794 memset(ret, 0, size); 2795 } else { 2796 /* 2797 * The virtual memory system provides zero-filled pages, so 2798 * there is no need to do so manually, unless opt_junk is 2799 * enabled, in which case huge_malloc() fills huge allocations 2800 * with junk. 2801 */ 2802 ret = huge_malloc(size); 2803 if (ret == NULL) 2804 return (NULL); 2805 2806 if (opt_junk) 2807 memset(ret, 0, size); 2808#ifdef USE_BRK 2809 else if ((uintptr_t)ret >= (uintptr_t)brk_base 2810 && (uintptr_t)ret < (uintptr_t)brk_max) { 2811 /* 2812 * This may be a re-used brk chunk. Therefore, zero 2813 * the memory. 2814 */ 2815 memset(ret, 0, size); 2816 } 2817#endif 2818 } 2819 2820 return (ret); 2821} 2822 2823static size_t 2824isalloc(const void *ptr) 2825{ 2826 size_t ret; 2827 arena_chunk_t *chunk; 2828 2829 assert(ptr != NULL); 2830 2831 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 2832 if (chunk != ptr) { 2833 /* Region. */ 2834 assert(chunk->arena->magic == ARENA_MAGIC); 2835 2836 ret = arena_salloc(ptr); 2837 } else { 2838 chunk_node_t *node, key; 2839 2840 /* Chunk (huge allocation). */ 2841 2842 malloc_mutex_lock(&chunks_mtx); 2843 2844 /* Extract from tree of huge allocations. */ 2845 key.chunk = (void *)ptr; 2846 node = RB_FIND(chunk_tree_s, &huge, &key); 2847 assert(node != NULL); 2848 2849 ret = node->size; 2850 2851 malloc_mutex_unlock(&chunks_mtx); 2852 } 2853 2854 return (ret); 2855} 2856 2857static void * 2858iralloc(void *ptr, size_t size) 2859{ 2860 void *ret; 2861 size_t oldsize; 2862 2863 assert(ptr != NULL); 2864 assert(size != 0); 2865 2866 oldsize = isalloc(ptr); 2867 2868 if (size <= arena_maxclass) 2869 ret = arena_ralloc(ptr, size, oldsize); 2870 else 2871 ret = huge_ralloc(ptr, size, oldsize); 2872 2873 return (ret); 2874} 2875 2876static void 2877idalloc(void *ptr) 2878{ 2879 arena_chunk_t *chunk; 2880 2881 assert(ptr != NULL); 2882 2883 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 2884 if (chunk != ptr) { 2885 /* Region. */ 2886#ifdef MALLOC_STATS 2887 malloc_mutex_lock(&chunk->arena->mtx); 2888 chunk->arena->stats.ndalloc++; 2889 malloc_mutex_unlock(&chunk->arena->mtx); 2890#endif 2891 arena_dalloc(chunk->arena, chunk, ptr); 2892 } else 2893 huge_dalloc(ptr); 2894} 2895 2896static void 2897malloc_print_stats(void) 2898{ 2899 2900 if (opt_print_stats) { 2901 malloc_printf("___ Begin malloc statistics ___\n"); 2902 malloc_printf("Number of CPUs: %u\n", ncpus); 2903 malloc_printf("Number of arenas: %u\n", narenas); 2904 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size, 2905 opt_chunk_2pow); 2906 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum, 2907 opt_quantum_2pow); 2908 malloc_printf("Max small size: %zu\n", small_max); 2909 malloc_printf("Pointer size: %u\n", sizeof(void *)); 2910 malloc_printf("Assertions %s\n", 2911#ifdef NDEBUG 2912 "disabled" 2913#else 2914 "enabled" 2915#endif 2916 ); 2917 2918#ifdef MALLOC_STATS 2919 2920 { 2921 size_t allocated, total; 2922 unsigned i; 2923 arena_t *arena; 2924 2925 /* Calculate and print allocated/total stats. */ 2926 2927 /* arenas. */ 2928 for (i = 0, allocated = 0; i < narenas; i++) { 2929 if (arenas[i] != NULL) { 2930 malloc_mutex_lock(&arenas[i]->mtx); 2931 allocated += arenas[i]->stats.allocated; 2932 malloc_mutex_unlock(&arenas[i]->mtx); 2933 } 2934 } 2935 2936 /* huge. */ 2937 malloc_mutex_lock(&chunks_mtx); 2938 allocated += huge_allocated; 2939 total = stats_chunks.curchunks * chunk_size; 2940 malloc_mutex_unlock(&chunks_mtx); 2941 2942 malloc_printf("Allocated: %zu, space used: %zu\n", 2943 allocated, total); 2944 2945 /* Print chunk stats. */ 2946 { 2947 chunk_stats_t chunks_stats; 2948 2949 malloc_mutex_lock(&chunks_mtx); 2950 chunks_stats = stats_chunks; 2951 malloc_mutex_unlock(&chunks_mtx); 2952 2953 malloc_printf("\nchunks:\n"); 2954 malloc_printf(" %13s%13s%13s\n", "nchunks", 2955 "highchunks", "curchunks"); 2956 malloc_printf(" %13llu%13lu%13lu\n", 2957 chunks_stats.nchunks, 2958 chunks_stats.highchunks, 2959 chunks_stats.curchunks); 2960 } 2961 2962 /* Print chunk stats. */ 2963 malloc_printf("\nhuge:\n"); 2964 malloc_printf("%12s %12s %12s\n", 2965 "nmalloc", "ndalloc", "allocated"); 2966 malloc_printf("%12llu %12llu %12zu\n", 2967 huge_nmalloc, huge_ndalloc, huge_allocated); 2968 2969 /* Print stats for each arena. */ 2970 for (i = 0; i < narenas; i++) { 2971 arena = arenas[i]; 2972 if (arena != NULL) { 2973 malloc_printf( 2974 "\narenas[%u] statistics:\n", i); 2975 malloc_mutex_lock(&arena->mtx); 2976 stats_print(arena); 2977 malloc_mutex_unlock(&arena->mtx); 2978 } 2979 } 2980 } 2981#endif /* #ifdef MALLOC_STATS */ 2982 malloc_printf("--- End malloc statistics ---\n"); 2983 } 2984} 2985 2986/* 2987 * FreeBSD's pthreads implementation calls malloc(3), so the malloc 2988 * implementation has to take pains to avoid infinite recursion during 2989 * initialization. 2990 */ 2991static inline bool 2992malloc_init(void) 2993{ 2994 2995 if (malloc_initialized == false) 2996 return (malloc_init_hard()); 2997 2998 return (false); 2999} 3000 3001static bool 3002malloc_init_hard(void) 3003{ 3004 unsigned i, j; 3005 int linklen; 3006 char buf[PATH_MAX + 1]; 3007 const char *opts; 3008 3009 malloc_mutex_lock(&init_lock); 3010 if (malloc_initialized) { 3011 /* 3012 * Another thread initialized the allocator before this one 3013 * acquired init_lock. 3014 */ 3015 malloc_mutex_unlock(&init_lock); 3016 return (false); 3017 } 3018 3019 /* Get number of CPUs. */ 3020 { 3021 int mib[2]; 3022 size_t len; 3023 3024 mib[0] = CTL_HW; 3025 mib[1] = HW_NCPU; 3026 len = sizeof(ncpus); 3027 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) { 3028 /* Error. */ 3029 ncpus = 1; 3030 } 3031 } 3032 3033 /* Get page size. */ 3034 { 3035 long result; 3036 3037 result = sysconf(_SC_PAGESIZE); 3038 assert(result != -1); 3039 pagesize = (unsigned) result; 3040 3041 /* 3042 * We assume that pagesize is a power of 2 when calculating 3043 * pagesize_2pow. 3044 */ 3045 assert(((result - 1) & result) == 0); 3046 pagesize_2pow = ffs(result) - 1; 3047 } 3048 3049 for (i = 0; i < 3; i++) { 3050 /* Get runtime configuration. */ 3051 switch (i) { 3052 case 0: 3053 if ((linklen = readlink("/etc/malloc.conf", buf, 3054 sizeof(buf) - 1)) != -1) { 3055 /* 3056 * Use the contents of the "/etc/malloc.conf" 3057 * symbolic link's name. 3058 */ 3059 buf[linklen] = '\0'; 3060 opts = buf; 3061 } else { 3062 /* No configuration specified. */ 3063 buf[0] = '\0'; 3064 opts = buf; 3065 } 3066 break; 3067 case 1: 3068 if (issetugid() == 0 && (opts = 3069 getenv("MALLOC_OPTIONS")) != NULL) { 3070 /* 3071 * Do nothing; opts is already initialized to 3072 * the value of the MALLOC_OPTIONS environment 3073 * variable. 3074 */ 3075 } else { 3076 /* No configuration specified. */ 3077 buf[0] = '\0'; 3078 opts = buf; 3079 } 3080 break; 3081 case 2: 3082 if (_malloc_options != NULL) { 3083 /* 3084 * Use options that were compiled into the program. 3085 */ 3086 opts = _malloc_options; 3087 } else { 3088 /* No configuration specified. */ 3089 buf[0] = '\0'; 3090 opts = buf; 3091 } 3092 break; 3093 default: 3094 /* NOTREACHED */ 3095 assert(false); 3096 } 3097 3098 for (j = 0; opts[j] != '\0'; j++) { 3099 switch (opts[j]) { 3100 case 'a': 3101 opt_abort = false; 3102 break; 3103 case 'A': 3104 opt_abort = true; 3105 break; 3106 case 'h': 3107 opt_hint = false; 3108 break; 3109 case 'H': 3110 opt_hint = true; 3111 break; 3112 case 'j': 3113 opt_junk = false; 3114 break; 3115 case 'J': 3116 opt_junk = true; 3117 break; 3118 case 'k': 3119 /* 3120 * Run fullness quartile limits don't have 3121 * enough resolution if there are too few 3122 * regions for the largest bin size classes. 3123 */ 3124 if (opt_chunk_2pow > pagesize_2pow + 4) 3125 opt_chunk_2pow--; 3126 break; 3127 case 'K': 3128 if (opt_chunk_2pow < CHUNK_2POW_MAX) 3129 opt_chunk_2pow++; 3130 break; 3131 case 'n': 3132 opt_narenas_lshift--; 3133 break; 3134 case 'N': 3135 opt_narenas_lshift++; 3136 break; 3137 case 'p': 3138 opt_print_stats = false; 3139 break; 3140 case 'P': 3141 opt_print_stats = true; 3142 break; 3143 case 'q': 3144 if (opt_quantum_2pow > QUANTUM_2POW_MIN) 3145 opt_quantum_2pow--; 3146 break; 3147 case 'Q': 3148 if (opt_quantum_2pow < pagesize_2pow - 1) 3149 opt_quantum_2pow++; 3150 break; 3151 case 's': 3152 if (opt_small_max_2pow > QUANTUM_2POW_MIN) 3153 opt_small_max_2pow--; 3154 break; 3155 case 'S': 3156 if (opt_small_max_2pow < pagesize_2pow - 1) 3157 opt_small_max_2pow++; 3158 break; 3159 case 'u': 3160 opt_utrace = false; 3161 break; 3162 case 'U': 3163 opt_utrace = true; 3164 break; 3165 case 'v': 3166 opt_sysv = false; 3167 break; 3168 case 'V': 3169 opt_sysv = true; 3170 break; 3171 case 'x': 3172 opt_xmalloc = false; 3173 break; 3174 case 'X': 3175 opt_xmalloc = true; 3176 break; 3177 case 'z': 3178 opt_zero = false; 3179 break; 3180 case 'Z': 3181 opt_zero = true; 3182 break; 3183 default: 3184 malloc_printf("%s: (malloc) Unsupported" 3185 " character in malloc options: '%c'\n", 3186 _getprogname(), opts[j]); 3187 } 3188 } 3189 } 3190 3191 /* Take care to call atexit() only once. */ 3192 if (opt_print_stats) { 3193 /* Print statistics at exit. */ 3194 atexit(malloc_print_stats); 3195 } 3196 3197 /* Set variables according to the value of opt_small_max_2pow. */ 3198 if (opt_small_max_2pow < opt_quantum_2pow) 3199 opt_small_max_2pow = opt_quantum_2pow; 3200 small_max = (1 << opt_small_max_2pow); 3201 3202 /* Set bin-related variables. */ 3203 bin_maxclass = (pagesize >> 1); 3204 if (pagesize_2pow > RUN_MIN_REGS_2POW + 1) 3205 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1); 3206 else 3207 tiny_min_2pow = 1; 3208 assert(opt_quantum_2pow >= tiny_min_2pow); 3209 ntbins = opt_quantum_2pow - tiny_min_2pow; 3210 assert(ntbins <= opt_quantum_2pow); 3211 nqbins = (small_max >> opt_quantum_2pow); 3212 nsbins = pagesize_2pow - opt_small_max_2pow - 1; 3213 3214 /* Set variables according to the value of opt_quantum_2pow. */ 3215 quantum = (1 << opt_quantum_2pow); 3216 quantum_mask = quantum - 1; 3217 if (ntbins > 0) 3218 small_min = (quantum >> 1) + 1; 3219 else 3220 small_min = 1; 3221 assert(small_min <= quantum); 3222 3223 /* Set variables according to the value of opt_chunk_2pow. */ 3224 chunk_size = (1 << opt_chunk_2pow); 3225 chunk_size_mask = chunk_size - 1; 3226 arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow)); 3227 arena_maxclass = (chunk_size >> 1); 3228 3229 UTRACE(0, 0, 0); 3230 3231#ifdef MALLOC_STATS 3232 memset(&stats_chunks, 0, sizeof(chunk_stats_t)); 3233#endif 3234 3235 /* Various sanity checks that regard configuration. */ 3236 assert(quantum >= sizeof(void *)); 3237 assert(quantum <= pagesize); 3238 assert(chunk_size >= pagesize); 3239 assert(quantum * 4 <= chunk_size); 3240 3241 /* Initialize chunks data. */ 3242 malloc_mutex_init(&chunks_mtx); 3243 RB_INIT(&huge); 3244#ifdef USE_BRK 3245 brk_base = sbrk(0); 3246 brk_prev = brk_base; 3247 brk_max = brk_base; 3248#endif 3249#ifdef MALLOC_STATS 3250 huge_nmalloc = 0; 3251 huge_ndalloc = 0; 3252 huge_allocated = 0; 3253#endif 3254 RB_INIT(&old_chunks); 3255 3256 /* Initialize base allocation data structures. */ 3257#ifdef MALLOC_STATS 3258 base_total = 0; 3259#endif 3260#ifdef USE_BRK 3261 /* 3262 * Do special brk allocation here, since the base chunk doesn't really 3263 * need to be chunk-aligned. 3264 */ 3265 { 3266 void *brk_cur; 3267 intptr_t incr; 3268 3269 do { 3270 /* Get the current end of brk. */ 3271 brk_cur = sbrk(0); 3272 3273 /* 3274 * Calculate how much padding is necessary to 3275 * chunk-align the end of brk. Don't worry about 3276 * brk_cur not being chunk-aligned though. 3277 */ 3278 incr = (intptr_t)chunk_size 3279 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur); 3280 3281 brk_prev = sbrk(incr); 3282 if (brk_prev == brk_cur) { 3283 /* Success. */ 3284 break; 3285 } 3286 } while (brk_prev != (void *)-1); 3287 3288 base_chunk = brk_cur; 3289 base_next_addr = base_chunk; 3290 base_past_addr = (void *)((uintptr_t)base_chunk + incr); 3291#ifdef MALLOC_STATS 3292 base_total += incr; 3293 stats_chunks.nchunks = 1; 3294 stats_chunks.curchunks = 1; 3295 stats_chunks.highchunks = 1; 3296#endif 3297 } 3298#else 3299 /* 3300 * The first base chunk will be allocated when needed by base_alloc(). 3301 */ 3302 base_chunk = NULL; 3303 base_next_addr = NULL; 3304 base_past_addr = NULL; 3305#endif 3306 base_chunk_nodes = NULL; 3307 malloc_mutex_init(&base_mtx); 3308 3309 if (ncpus > 1) { 3310 /* 3311 * For SMP systems, create four times as many arenas as there 3312 * are CPUs by default. 3313 */ 3314 opt_narenas_lshift += 2; 3315 } 3316 3317 /* Determine how many arenas to use. */ 3318 narenas = ncpus; 3319 if (opt_narenas_lshift > 0) { 3320 if ((narenas << opt_narenas_lshift) > narenas) 3321 narenas <<= opt_narenas_lshift; 3322 /* 3323 * Make sure not to exceed the limits of what base_malloc() 3324 * can handle. 3325 */ 3326 if (narenas * sizeof(arena_t *) > chunk_size) 3327 narenas = chunk_size / sizeof(arena_t *); 3328 } else if (opt_narenas_lshift < 0) { 3329 if ((narenas << opt_narenas_lshift) < narenas) 3330 narenas <<= opt_narenas_lshift; 3331 /* Make sure there is at least one arena. */ 3332 if (narenas == 0) 3333 narenas = 1; 3334 } 3335 3336#ifdef NO_TLS 3337 if (narenas > 1) { 3338 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, 3339 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 3340 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 3341 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 3342 223, 227, 229, 233, 239, 241, 251, 257, 263}; 3343 unsigned i, nprimes, parenas; 3344 3345 /* 3346 * Pick a prime number of hash arenas that is more than narenas 3347 * so that direct hashing of pthread_self() pointers tends to 3348 * spread allocations evenly among the arenas. 3349 */ 3350 assert((narenas & 1) == 0); /* narenas must be even. */ 3351 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW); 3352 parenas = primes[nprimes - 1]; /* In case not enough primes. */ 3353 for (i = 1; i < nprimes; i++) { 3354 if (primes[i] > narenas) { 3355 parenas = primes[i]; 3356 break; 3357 } 3358 } 3359 narenas = parenas; 3360 } 3361#endif 3362 3363#ifndef NO_TLS 3364 next_arena = 0; 3365#endif 3366 3367 /* Allocate and initialize arenas. */ 3368 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); 3369 if (arenas == NULL) { 3370 malloc_mutex_unlock(&init_lock); 3371 return (true); 3372 } 3373 /* 3374 * Zero the array. In practice, this should always be pre-zeroed, 3375 * since it was just mmap()ed, but let's be sure. 3376 */ 3377 memset(arenas, 0, sizeof(arena_t *) * narenas); 3378 3379 /* 3380 * Initialize one arena here. The rest are lazily created in 3381 * arena_choose_hard(). 3382 */ 3383 arenas_extend(0); 3384 if (arenas[0] == NULL) { 3385 malloc_mutex_unlock(&init_lock); 3386 return (true); 3387 } 3388 3389 malloc_mutex_init(&arenas_mtx); 3390 3391 malloc_initialized = true; 3392 malloc_mutex_unlock(&init_lock); 3393 return (false); 3394} 3395 3396/* 3397 * End general internal functions. 3398 */ 3399/******************************************************************************/ 3400/* 3401 * Begin malloc(3)-compatible functions. 3402 */ 3403 3404void * 3405malloc(size_t size) 3406{ 3407 void *ret; 3408 3409 if (malloc_init()) { 3410 ret = NULL; 3411 goto RETURN; 3412 } 3413 3414 if (size == 0) { 3415 if (opt_sysv == false) 3416 size = 1; 3417 else { 3418 ret = NULL; 3419 goto RETURN; 3420 } 3421 } 3422 3423 ret = imalloc(size); 3424 3425RETURN: 3426 if (ret == NULL) { 3427 if (opt_xmalloc) { 3428 malloc_printf("%s: (malloc) Error in malloc(%zu):" 3429 " out of memory\n", _getprogname(), size); 3430 abort(); 3431 } 3432 errno = ENOMEM; 3433 } 3434 3435 UTRACE(0, size, ret); 3436 return (ret); 3437} 3438 3439int 3440posix_memalign(void **memptr, size_t alignment, size_t size) 3441{ 3442 int ret; 3443 void *result; 3444 3445 if (malloc_init()) 3446 result = NULL; 3447 else { 3448 /* Make sure that alignment is a large enough power of 2. */ 3449 if (((alignment - 1) & alignment) != 0 3450 || alignment < sizeof(void *)) { 3451 if (opt_xmalloc) { 3452 malloc_printf("%s: (malloc) Error in" 3453 " posix_memalign(%zu, %zu):" 3454 " invalid alignment\n", 3455 _getprogname(), alignment, size); 3456 abort(); 3457 } 3458 result = NULL; 3459 ret = EINVAL; 3460 goto RETURN; 3461 } 3462 3463 result = ipalloc(alignment, size); 3464 } 3465 3466 if (result == NULL) { 3467 if (opt_xmalloc) { 3468 malloc_printf("%s: (malloc) Error in" 3469 " posix_memalign(%zu, %zu): out of memory\n", 3470 _getprogname(), alignment, size); 3471 abort(); 3472 } 3473 ret = ENOMEM; 3474 goto RETURN; 3475 } 3476 3477 *memptr = result; 3478 ret = 0; 3479 3480RETURN: 3481 UTRACE(0, size, result); 3482 return (ret); 3483} 3484 3485void * 3486calloc(size_t num, size_t size) 3487{ 3488 void *ret; 3489 size_t num_size; 3490 3491 if (malloc_init()) { 3492 ret = NULL; 3493 goto RETURN; 3494 } 3495 3496 num_size = num * size; 3497 if (num_size == 0) { 3498 if (opt_sysv == false) 3499 num_size = 1; 3500 else { 3501 ret = NULL; 3502 goto RETURN; 3503 } 3504 /* 3505 * Try to avoid division here. We know that it isn't possible to 3506 * overflow during multiplication if neither operand uses any of the 3507 * most significant half of the bits in a size_t. 3508 */ 3509 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) 3510 && (num_size / size != num)) { 3511 /* size_t overflow. */ 3512 ret = NULL; 3513 goto RETURN; 3514 } 3515 3516 ret = icalloc(num_size); 3517 3518RETURN: 3519 if (ret == NULL) { 3520 if (opt_xmalloc) { 3521 malloc_printf("%s: (malloc) Error in" 3522 " calloc(%zu, %zu): out of memory\n", 3523 _getprogname(), num, size); 3524 abort(); 3525 } 3526 errno = ENOMEM; 3527 } 3528 3529 UTRACE(0, num_size, ret); 3530 return (ret); 3531} 3532 3533void * 3534realloc(void *ptr, size_t size) 3535{ 3536 void *ret; 3537 3538 if (size == 0) { 3539 if (opt_sysv == false) 3540 size = 1; 3541 else { 3542 if (ptr != NULL) 3543 idalloc(ptr); 3544 ret = NULL; 3545 goto RETURN; 3546 } 3547 } 3548 3549 if (ptr != NULL) { 3550 assert(malloc_initialized); 3551 3552 ret = iralloc(ptr, size); 3553 3554 if (ret == NULL) { 3555 if (opt_xmalloc) { 3556 malloc_printf("%s: (malloc) Error in" 3557 " realloc(%p, %zu): out of memory\n", 3558 _getprogname(), ptr, size); 3559 abort(); 3560 } 3561 errno = ENOMEM; 3562 } 3563 } else { 3564 if (malloc_init()) 3565 ret = NULL; 3566 else 3567 ret = imalloc(size); 3568 3569 if (ret == NULL) { 3570 if (opt_xmalloc) { 3571 malloc_printf("%s: (malloc) Error in" 3572 " realloc(%p, %zu): out of memory\n", 3573 _getprogname(), ptr, size); 3574 abort(); 3575 } 3576 errno = ENOMEM; 3577 } 3578 } 3579 3580RETURN: 3581 UTRACE(ptr, size, ret); 3582 return (ret); 3583} 3584 3585void 3586free(void *ptr) 3587{ 3588 3589 UTRACE(ptr, 0, 0); 3590 if (ptr != NULL) { 3591 assert(malloc_initialized); 3592 3593 idalloc(ptr); 3594 } 3595} 3596 3597/* 3598 * End malloc(3)-compatible functions. 3599 */ 3600/******************************************************************************/ 3601/* 3602 * Begin non-standard functions. 3603 */ 3604 3605size_t 3606malloc_usable_size(const void *ptr) 3607{ 3608 3609 assert(ptr != NULL); 3610 3611 return (isalloc(ptr)); 3612} 3613 3614/* 3615 * End non-standard functions. 3616 */ 3617/******************************************************************************/ 3618/* 3619 * Begin library-private functions, used by threading libraries for protection 3620 * of malloc during fork(). These functions are only called if the program is 3621 * running in threaded mode, so there is no need to check whether the program 3622 * is threaded here. 3623 */ 3624 3625void 3626_malloc_prefork(void) 3627{ 3628 unsigned i; 3629 3630 /* Acquire all mutexes in a safe order. */ 3631 3632 malloc_mutex_lock(&arenas_mtx); 3633 for (i = 0; i < narenas; i++) { 3634 if (arenas[i] != NULL) 3635 malloc_mutex_lock(&arenas[i]->mtx); 3636 } 3637 malloc_mutex_unlock(&arenas_mtx); 3638 3639 malloc_mutex_lock(&base_mtx); 3640 3641 malloc_mutex_lock(&chunks_mtx); 3642} 3643 3644void 3645_malloc_postfork(void) 3646{ 3647 unsigned i; 3648 3649 /* Release all mutexes, now that fork() has completed. */ 3650 3651 malloc_mutex_unlock(&chunks_mtx); 3652 3653 malloc_mutex_unlock(&base_mtx); 3654 3655 malloc_mutex_lock(&arenas_mtx); 3656 for (i = 0; i < narenas; i++) { 3657 if (arenas[i] != NULL) 3658 malloc_mutex_unlock(&arenas[i]->mtx); 3659 } 3660 malloc_mutex_unlock(&arenas_mtx); 3661} 3662 3663/* 3664 * End library-private functions. 3665 */ 3666/******************************************************************************/
|