arena.h revision 234543
1234370Sjasone/******************************************************************************/ 2234370Sjasone#ifdef JEMALLOC_H_TYPES 3234370Sjasone 4234370Sjasone/* 5234370Sjasone * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized 6234370Sjasone * as small as possible such that this setting is still honored, without 7234370Sjasone * violating other constraints. The goal is to make runs as small as possible 8234370Sjasone * without exceeding a per run external fragmentation threshold. 9234370Sjasone * 10234370Sjasone * We use binary fixed point math for overhead computations, where the binary 11234370Sjasone * point is implicitly RUN_BFP bits to the left. 12234370Sjasone * 13234370Sjasone * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be 14234370Sjasone * honored for some/all object sizes, since when heap profiling is enabled 15234370Sjasone * there is one pointer of header overhead per object (plus a constant). This 16234370Sjasone * constraint is relaxed (ignored) for runs that are so small that the 17234370Sjasone * per-region overhead is greater than: 18234370Sjasone * 19234370Sjasone * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) 20234370Sjasone */ 21234370Sjasone#define RUN_BFP 12 22234370Sjasone/* \/ Implicit binary fixed point. */ 23234370Sjasone#define RUN_MAX_OVRHD 0x0000003dU 24234370Sjasone#define RUN_MAX_OVRHD_RELAX 0x00001800U 25234370Sjasone 26234370Sjasone/* Maximum number of regions in one run. */ 27234370Sjasone#define LG_RUN_MAXREGS 11 28234370Sjasone#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) 29234370Sjasone 30234370Sjasone/* 31234370Sjasone * Minimum redzone size. Redzones may be larger than this if necessary to 32234370Sjasone * preserve region alignment. 33234370Sjasone */ 34234370Sjasone#define REDZONE_MINSIZE 16 35234370Sjasone 36234370Sjasone/* 37234370Sjasone * The minimum ratio of active:dirty pages per arena is computed as: 38234370Sjasone * 39234370Sjasone * (nactive >> opt_lg_dirty_mult) >= ndirty 40234370Sjasone * 41234370Sjasone * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 42234370Sjasone * times as many active pages as dirty pages. 43234370Sjasone */ 44234370Sjasone#define LG_DIRTY_MULT_DEFAULT 5 45234370Sjasone 46234370Sjasonetypedef struct arena_chunk_map_s arena_chunk_map_t; 47234370Sjasonetypedef struct arena_chunk_s arena_chunk_t; 48234370Sjasonetypedef struct arena_run_s arena_run_t; 49234370Sjasonetypedef struct arena_bin_info_s arena_bin_info_t; 50234370Sjasonetypedef struct arena_bin_s arena_bin_t; 51234370Sjasonetypedef struct arena_s arena_t; 52234370Sjasone 53234370Sjasone#endif /* JEMALLOC_H_TYPES */ 54234370Sjasone/******************************************************************************/ 55234370Sjasone#ifdef JEMALLOC_H_STRUCTS 56234370Sjasone 57234370Sjasone/* Each element of the chunk map corresponds to one page within the chunk. */ 58234370Sjasonestruct arena_chunk_map_s { 59234370Sjasone#ifndef JEMALLOC_PROF 60234370Sjasone /* 61234370Sjasone * Overlay prof_ctx in order to allow it to be referenced by dead code. 62234370Sjasone * Such antics aren't warranted for per arena data structures, but 63234370Sjasone * chunk map overhead accounts for a percentage of memory, rather than 64234370Sjasone * being just a fixed cost. 65234370Sjasone */ 66234370Sjasone union { 67234370Sjasone#endif 68234370Sjasone union { 69234370Sjasone /* 70234370Sjasone * Linkage for run trees. There are two disjoint uses: 71234370Sjasone * 72234370Sjasone * 1) arena_t's runs_avail_{clean,dirty} trees. 73234370Sjasone * 2) arena_run_t conceptually uses this linkage for in-use 74234370Sjasone * non-full runs, rather than directly embedding linkage. 75234370Sjasone */ 76234370Sjasone rb_node(arena_chunk_map_t) rb_link; 77234370Sjasone /* 78234370Sjasone * List of runs currently in purgatory. arena_chunk_purge() 79234370Sjasone * temporarily allocates runs that contain dirty pages while 80234370Sjasone * purging, so that other threads cannot use the runs while the 81234370Sjasone * purging thread is operating without the arena lock held. 82234370Sjasone */ 83234370Sjasone ql_elm(arena_chunk_map_t) ql_link; 84234370Sjasone } u; 85234370Sjasone 86234370Sjasone /* Profile counters, used for large object runs. */ 87234370Sjasone prof_ctx_t *prof_ctx; 88234370Sjasone#ifndef JEMALLOC_PROF 89234370Sjasone }; /* union { ... }; */ 90234370Sjasone#endif 91234370Sjasone 92234370Sjasone /* 93234370Sjasone * Run address (or size) and various flags are stored together. The bit 94234370Sjasone * layout looks like (assuming 32-bit system): 95234370Sjasone * 96234370Sjasone * ???????? ???????? ????---- ----dula 97234370Sjasone * 98234370Sjasone * ? : Unallocated: Run address for first/last pages, unset for internal 99234370Sjasone * pages. 100234370Sjasone * Small: Run page offset. 101234370Sjasone * Large: Run size for first page, unset for trailing pages. 102234370Sjasone * - : Unused. 103234370Sjasone * d : dirty? 104234370Sjasone * u : unzeroed? 105234370Sjasone * l : large? 106234370Sjasone * a : allocated? 107234370Sjasone * 108234370Sjasone * Following are example bit patterns for the three types of runs. 109234370Sjasone * 110234370Sjasone * p : run page offset 111234370Sjasone * s : run size 112234370Sjasone * c : (binind+1) for size class (used only if prof_promote is true) 113234370Sjasone * x : don't care 114234370Sjasone * - : 0 115234370Sjasone * + : 1 116234370Sjasone * [DULA] : bit set 117234370Sjasone * [dula] : bit unset 118234370Sjasone * 119234370Sjasone * Unallocated (clean): 120234370Sjasone * ssssssss ssssssss ssss---- ----du-a 121234370Sjasone * xxxxxxxx xxxxxxxx xxxx---- -----Uxx 122234370Sjasone * ssssssss ssssssss ssss---- ----dU-a 123234370Sjasone * 124234370Sjasone * Unallocated (dirty): 125234370Sjasone * ssssssss ssssssss ssss---- ----D--a 126234370Sjasone * xxxxxxxx xxxxxxxx xxxx---- ----xxxx 127234370Sjasone * ssssssss ssssssss ssss---- ----D--a 128234370Sjasone * 129234370Sjasone * Small: 130234370Sjasone * pppppppp pppppppp pppp---- ----d--A 131234370Sjasone * pppppppp pppppppp pppp---- -------A 132234370Sjasone * pppppppp pppppppp pppp---- ----d--A 133234370Sjasone * 134234370Sjasone * Large: 135234370Sjasone * ssssssss ssssssss ssss---- ----D-LA 136234370Sjasone * xxxxxxxx xxxxxxxx xxxx---- ----xxxx 137234370Sjasone * -------- -------- -------- ----D-LA 138234370Sjasone * 139234370Sjasone * Large (sampled, size <= PAGE): 140234370Sjasone * ssssssss ssssssss sssscccc ccccD-LA 141234370Sjasone * 142234370Sjasone * Large (not sampled, size == PAGE): 143234370Sjasone * ssssssss ssssssss ssss---- ----D-LA 144234370Sjasone */ 145234370Sjasone size_t bits; 146234370Sjasone#define CHUNK_MAP_CLASS_SHIFT 4 147234370Sjasone#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) 148234370Sjasone#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) 149234370Sjasone#define CHUNK_MAP_DIRTY ((size_t)0x8U) 150234370Sjasone#define CHUNK_MAP_UNZEROED ((size_t)0x4U) 151234370Sjasone#define CHUNK_MAP_LARGE ((size_t)0x2U) 152234370Sjasone#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) 153234370Sjasone#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED 154234370Sjasone}; 155234370Sjasonetypedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; 156234370Sjasonetypedef rb_tree(arena_chunk_map_t) arena_run_tree_t; 157234370Sjasone 158234370Sjasone/* Arena chunk header. */ 159234370Sjasonestruct arena_chunk_s { 160234370Sjasone /* Arena that owns the chunk. */ 161234370Sjasone arena_t *arena; 162234370Sjasone 163234370Sjasone /* Linkage for the arena's chunks_dirty list. */ 164234370Sjasone ql_elm(arena_chunk_t) link_dirty; 165234370Sjasone 166234370Sjasone /* 167234370Sjasone * True if the chunk is currently in the chunks_dirty list, due to 168234370Sjasone * having at some point contained one or more dirty pages. Removal 169234370Sjasone * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. 170234370Sjasone */ 171234370Sjasone bool dirtied; 172234370Sjasone 173234370Sjasone /* Number of dirty pages. */ 174234370Sjasone size_t ndirty; 175234370Sjasone 176234370Sjasone /* 177234370Sjasone * Map of pages within chunk that keeps track of free/large/small. The 178234370Sjasone * first map_bias entries are omitted, since the chunk header does not 179234370Sjasone * need to be tracked in the map. This omission saves a header page 180234370Sjasone * for common chunk sizes (e.g. 4 MiB). 181234370Sjasone */ 182234370Sjasone arena_chunk_map_t map[1]; /* Dynamically sized. */ 183234370Sjasone}; 184234370Sjasonetypedef rb_tree(arena_chunk_t) arena_chunk_tree_t; 185234370Sjasone 186234370Sjasonestruct arena_run_s { 187234370Sjasone /* Bin this run is associated with. */ 188234370Sjasone arena_bin_t *bin; 189234370Sjasone 190234370Sjasone /* Index of next region that has never been allocated, or nregs. */ 191234370Sjasone uint32_t nextind; 192234370Sjasone 193234370Sjasone /* Number of free regions in run. */ 194234370Sjasone unsigned nfree; 195234370Sjasone}; 196234370Sjasone 197234370Sjasone/* 198234370Sjasone * Read-only information associated with each element of arena_t's bins array 199234370Sjasone * is stored separately, partly to reduce memory usage (only one copy, rather 200234370Sjasone * than one per arena), but mainly to avoid false cacheline sharing. 201234370Sjasone * 202234370Sjasone * Each run has the following layout: 203234370Sjasone * 204234370Sjasone * /--------------------\ 205234370Sjasone * | arena_run_t header | 206234370Sjasone * | ... | 207234370Sjasone * bitmap_offset | bitmap | 208234370Sjasone * | ... | 209234370Sjasone * ctx0_offset | ctx map | 210234370Sjasone * | ... | 211234370Sjasone * |--------------------| 212234370Sjasone * | redzone | 213234370Sjasone * reg0_offset | region 0 | 214234370Sjasone * | redzone | 215234370Sjasone * |--------------------| \ 216234370Sjasone * | redzone | | 217234370Sjasone * | region 1 | > reg_interval 218234370Sjasone * | redzone | / 219234370Sjasone * |--------------------| 220234370Sjasone * | ... | 221234370Sjasone * | ... | 222234370Sjasone * | ... | 223234370Sjasone * |--------------------| 224234370Sjasone * | redzone | 225234370Sjasone * | region nregs-1 | 226234370Sjasone * | redzone | 227234370Sjasone * |--------------------| 228234370Sjasone * | alignment pad? | 229234370Sjasone * \--------------------/ 230234370Sjasone * 231234370Sjasone * reg_interval has at least the same minimum alignment as reg_size; this 232234370Sjasone * preserves the alignment constraint that sa2u() depends on. Alignment pad is 233234370Sjasone * either 0 or redzone_size; it is present only if needed to align reg0_offset. 234234370Sjasone */ 235234370Sjasonestruct arena_bin_info_s { 236234370Sjasone /* Size of regions in a run for this bin's size class. */ 237234370Sjasone size_t reg_size; 238234370Sjasone 239234370Sjasone /* Redzone size. */ 240234370Sjasone size_t redzone_size; 241234370Sjasone 242234370Sjasone /* Interval between regions (reg_size + (redzone_size << 1)). */ 243234370Sjasone size_t reg_interval; 244234370Sjasone 245234370Sjasone /* Total size of a run for this bin's size class. */ 246234370Sjasone size_t run_size; 247234370Sjasone 248234370Sjasone /* Total number of regions in a run for this bin's size class. */ 249234370Sjasone uint32_t nregs; 250234370Sjasone 251234370Sjasone /* 252234370Sjasone * Offset of first bitmap_t element in a run header for this bin's size 253234370Sjasone * class. 254234370Sjasone */ 255234370Sjasone uint32_t bitmap_offset; 256234370Sjasone 257234370Sjasone /* 258234370Sjasone * Metadata used to manipulate bitmaps for runs associated with this 259234370Sjasone * bin. 260234370Sjasone */ 261234370Sjasone bitmap_info_t bitmap_info; 262234370Sjasone 263234370Sjasone /* 264234370Sjasone * Offset of first (prof_ctx_t *) in a run header for this bin's size 265234370Sjasone * class, or 0 if (config_prof == false || opt_prof == false). 266234370Sjasone */ 267234370Sjasone uint32_t ctx0_offset; 268234370Sjasone 269234370Sjasone /* Offset of first region in a run for this bin's size class. */ 270234370Sjasone uint32_t reg0_offset; 271234370Sjasone}; 272234370Sjasone 273234370Sjasonestruct arena_bin_s { 274234370Sjasone /* 275234370Sjasone * All operations on runcur, runs, and stats require that lock be 276234370Sjasone * locked. Run allocation/deallocation are protected by the arena lock, 277234370Sjasone * which may be acquired while holding one or more bin locks, but not 278234370Sjasone * vise versa. 279234370Sjasone */ 280234370Sjasone malloc_mutex_t lock; 281234370Sjasone 282234370Sjasone /* 283234370Sjasone * Current run being used to service allocations of this bin's size 284234370Sjasone * class. 285234370Sjasone */ 286234370Sjasone arena_run_t *runcur; 287234370Sjasone 288234370Sjasone /* 289234370Sjasone * Tree of non-full runs. This tree is used when looking for an 290234370Sjasone * existing run when runcur is no longer usable. We choose the 291234370Sjasone * non-full run that is lowest in memory; this policy tends to keep 292234370Sjasone * objects packed well, and it can also help reduce the number of 293234370Sjasone * almost-empty chunks. 294234370Sjasone */ 295234370Sjasone arena_run_tree_t runs; 296234370Sjasone 297234370Sjasone /* Bin statistics. */ 298234370Sjasone malloc_bin_stats_t stats; 299234370Sjasone}; 300234370Sjasone 301234370Sjasonestruct arena_s { 302234370Sjasone /* This arena's index within the arenas array. */ 303234370Sjasone unsigned ind; 304234370Sjasone 305234370Sjasone /* 306234370Sjasone * Number of threads currently assigned to this arena. This field is 307234370Sjasone * protected by arenas_lock. 308234370Sjasone */ 309234370Sjasone unsigned nthreads; 310234370Sjasone 311234370Sjasone /* 312234370Sjasone * There are three classes of arena operations from a locking 313234370Sjasone * perspective: 314234370Sjasone * 1) Thread asssignment (modifies nthreads) is protected by 315234370Sjasone * arenas_lock. 316234370Sjasone * 2) Bin-related operations are protected by bin locks. 317234370Sjasone * 3) Chunk- and run-related operations are protected by this mutex. 318234370Sjasone */ 319234370Sjasone malloc_mutex_t lock; 320234370Sjasone 321234370Sjasone arena_stats_t stats; 322234370Sjasone /* 323234370Sjasone * List of tcaches for extant threads associated with this arena. 324234370Sjasone * Stats from these are merged incrementally, and at exit. 325234370Sjasone */ 326234370Sjasone ql_head(tcache_t) tcache_ql; 327234370Sjasone 328234370Sjasone uint64_t prof_accumbytes; 329234370Sjasone 330234370Sjasone /* List of dirty-page-containing chunks this arena manages. */ 331234370Sjasone ql_head(arena_chunk_t) chunks_dirty; 332234370Sjasone 333234370Sjasone /* 334234370Sjasone * In order to avoid rapid chunk allocation/deallocation when an arena 335234370Sjasone * oscillates right on the cusp of needing a new chunk, cache the most 336234370Sjasone * recently freed chunk. The spare is left in the arena's chunk trees 337234370Sjasone * until it is deleted. 338234370Sjasone * 339234370Sjasone * There is one spare chunk per arena, rather than one spare total, in 340234370Sjasone * order to avoid interactions between multiple threads that could make 341234370Sjasone * a single spare inadequate. 342234370Sjasone */ 343234370Sjasone arena_chunk_t *spare; 344234370Sjasone 345234370Sjasone /* Number of pages in active runs. */ 346234370Sjasone size_t nactive; 347234370Sjasone 348234370Sjasone /* 349234370Sjasone * Current count of pages within unused runs that are potentially 350234370Sjasone * dirty, and for which madvise(... MADV_DONTNEED) has not been called. 351234370Sjasone * By tracking this, we can institute a limit on how much dirty unused 352234370Sjasone * memory is mapped for each arena. 353234370Sjasone */ 354234370Sjasone size_t ndirty; 355234370Sjasone 356234370Sjasone /* 357234370Sjasone * Approximate number of pages being purged. It is possible for 358234370Sjasone * multiple threads to purge dirty pages concurrently, and they use 359234370Sjasone * npurgatory to indicate the total number of pages all threads are 360234370Sjasone * attempting to purge. 361234370Sjasone */ 362234370Sjasone size_t npurgatory; 363234370Sjasone 364234370Sjasone /* 365234370Sjasone * Size/address-ordered trees of this arena's available runs. The trees 366234370Sjasone * are used for first-best-fit run allocation. The dirty tree contains 367234370Sjasone * runs with dirty pages (i.e. very likely to have been touched and 368234370Sjasone * therefore have associated physical pages), whereas the clean tree 369234370Sjasone * contains runs with pages that either have no associated physical 370234370Sjasone * pages, or have pages that the kernel may recycle at any time due to 371234370Sjasone * previous madvise(2) calls. The dirty tree is used in preference to 372234370Sjasone * the clean tree for allocations, because using dirty pages reduces 373234370Sjasone * the amount of dirty purging necessary to keep the active:dirty page 374234370Sjasone * ratio below the purge threshold. 375234370Sjasone */ 376234370Sjasone arena_avail_tree_t runs_avail_clean; 377234370Sjasone arena_avail_tree_t runs_avail_dirty; 378234370Sjasone 379234370Sjasone /* bins is used to store trees of free regions. */ 380234370Sjasone arena_bin_t bins[NBINS]; 381234370Sjasone}; 382234370Sjasone 383234370Sjasone#endif /* JEMALLOC_H_STRUCTS */ 384234370Sjasone/******************************************************************************/ 385234370Sjasone#ifdef JEMALLOC_H_EXTERNS 386234370Sjasone 387234370Sjasoneextern ssize_t opt_lg_dirty_mult; 388234370Sjasone/* 389234370Sjasone * small_size2bin is a compact lookup table that rounds request sizes up to 390234370Sjasone * size classes. In order to reduce cache footprint, the table is compressed, 391234370Sjasone * and all accesses are via the SMALL_SIZE2BIN macro. 392234370Sjasone */ 393234370Sjasoneextern uint8_t const small_size2bin[]; 394234370Sjasone#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) 395234370Sjasone 396234370Sjasoneextern arena_bin_info_t arena_bin_info[NBINS]; 397234370Sjasone 398234370Sjasone/* Number of large size classes. */ 399234370Sjasone#define nlclasses (chunk_npages - map_bias) 400234370Sjasone 401234370Sjasonevoid arena_purge_all(arena_t *arena); 402234370Sjasonevoid arena_prof_accum(arena_t *arena, uint64_t accumbytes); 403234370Sjasonevoid arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, 404234370Sjasone size_t binind, uint64_t prof_accumbytes); 405234370Sjasonevoid arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, 406234370Sjasone bool zero); 407234370Sjasonevoid arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); 408234370Sjasonevoid *arena_malloc_small(arena_t *arena, size_t size, bool zero); 409234370Sjasonevoid *arena_malloc_large(arena_t *arena, size_t size, bool zero); 410234370Sjasonevoid *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); 411234370Sjasonevoid arena_prof_promoted(const void *ptr, size_t size); 412234370Sjasonevoid arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, 413234370Sjasone arena_chunk_map_t *mapelm); 414234370Sjasonevoid arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); 415234370Sjasonevoid arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, 416234370Sjasone arena_stats_t *astats, malloc_bin_stats_t *bstats, 417234370Sjasone malloc_large_stats_t *lstats); 418234370Sjasonevoid *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, 419234370Sjasone size_t extra, bool zero); 420234370Sjasonevoid *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, 421234370Sjasone size_t alignment, bool zero, bool try_tcache); 422234370Sjasonebool arena_new(arena_t *arena, unsigned ind); 423234370Sjasonevoid arena_boot(void); 424234370Sjasonevoid arena_prefork(arena_t *arena); 425234370Sjasonevoid arena_postfork_parent(arena_t *arena); 426234370Sjasonevoid arena_postfork_child(arena_t *arena); 427234370Sjasone 428234370Sjasone#endif /* JEMALLOC_H_EXTERNS */ 429234370Sjasone/******************************************************************************/ 430234370Sjasone#ifdef JEMALLOC_H_INLINES 431234370Sjasone 432234370Sjasone#ifndef JEMALLOC_ENABLE_INLINE 433234370Sjasonesize_t arena_bin_index(arena_t *arena, arena_bin_t *bin); 434234370Sjasoneunsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, 435234370Sjasone const void *ptr); 436234370Sjasoneprof_ctx_t *arena_prof_ctx_get(const void *ptr); 437234370Sjasonevoid arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); 438234370Sjasonevoid *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); 439234543Sjasonesize_t arena_salloc(const void *ptr, bool demote); 440234370Sjasonevoid arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, 441234370Sjasone bool try_tcache); 442234370Sjasone#endif 443234370Sjasone 444234370Sjasone#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) 445234370SjasoneJEMALLOC_INLINE size_t 446234370Sjasonearena_bin_index(arena_t *arena, arena_bin_t *bin) 447234370Sjasone{ 448234370Sjasone size_t binind = bin - arena->bins; 449234370Sjasone assert(binind < NBINS); 450234370Sjasone return (binind); 451234370Sjasone} 452234370Sjasone 453234370SjasoneJEMALLOC_INLINE unsigned 454234370Sjasonearena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) 455234370Sjasone{ 456234370Sjasone unsigned shift, diff, regind; 457234370Sjasone size_t interval; 458234370Sjasone 459234370Sjasone /* 460234370Sjasone * Freeing a pointer lower than region zero can cause assertion 461234370Sjasone * failure. 462234370Sjasone */ 463234370Sjasone assert((uintptr_t)ptr >= (uintptr_t)run + 464234370Sjasone (uintptr_t)bin_info->reg0_offset); 465234370Sjasone 466234370Sjasone /* 467234370Sjasone * Avoid doing division with a variable divisor if possible. Using 468234370Sjasone * actual division here can reduce allocator throughput by over 20%! 469234370Sjasone */ 470234370Sjasone diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - 471234370Sjasone bin_info->reg0_offset); 472234370Sjasone 473234370Sjasone /* Rescale (factor powers of 2 out of the numerator and denominator). */ 474234370Sjasone interval = bin_info->reg_interval; 475234370Sjasone shift = ffs(interval) - 1; 476234370Sjasone diff >>= shift; 477234370Sjasone interval >>= shift; 478234370Sjasone 479234370Sjasone if (interval == 1) { 480234370Sjasone /* The divisor was a power of 2. */ 481234370Sjasone regind = diff; 482234370Sjasone } else { 483234370Sjasone /* 484234370Sjasone * To divide by a number D that is not a power of two we 485234370Sjasone * multiply by (2^21 / D) and then right shift by 21 positions. 486234370Sjasone * 487234370Sjasone * X / D 488234370Sjasone * 489234370Sjasone * becomes 490234370Sjasone * 491234370Sjasone * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT 492234370Sjasone * 493234370Sjasone * We can omit the first three elements, because we never 494234370Sjasone * divide by 0, and 1 and 2 are both powers of two, which are 495234370Sjasone * handled above. 496234370Sjasone */ 497234370Sjasone#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) 498234370Sjasone#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) 499234370Sjasone static const unsigned interval_invs[] = { 500234370Sjasone SIZE_INV(3), 501234370Sjasone SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), 502234370Sjasone SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), 503234370Sjasone SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), 504234370Sjasone SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), 505234370Sjasone SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), 506234370Sjasone SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), 507234370Sjasone SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) 508234370Sjasone }; 509234370Sjasone 510234370Sjasone if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + 511234370Sjasone 2)) { 512234370Sjasone regind = (diff * interval_invs[interval - 3]) >> 513234370Sjasone SIZE_INV_SHIFT; 514234370Sjasone } else 515234370Sjasone regind = diff / interval; 516234370Sjasone#undef SIZE_INV 517234370Sjasone#undef SIZE_INV_SHIFT 518234370Sjasone } 519234370Sjasone assert(diff == regind * interval); 520234370Sjasone assert(regind < bin_info->nregs); 521234370Sjasone 522234370Sjasone return (regind); 523234370Sjasone} 524234370Sjasone 525234370SjasoneJEMALLOC_INLINE prof_ctx_t * 526234370Sjasonearena_prof_ctx_get(const void *ptr) 527234370Sjasone{ 528234370Sjasone prof_ctx_t *ret; 529234370Sjasone arena_chunk_t *chunk; 530234370Sjasone size_t pageind, mapbits; 531234370Sjasone 532234370Sjasone cassert(config_prof); 533234370Sjasone assert(ptr != NULL); 534234370Sjasone assert(CHUNK_ADDR2BASE(ptr) != ptr); 535234370Sjasone 536234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 537234370Sjasone pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 538234370Sjasone mapbits = chunk->map[pageind-map_bias].bits; 539234370Sjasone assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 540234370Sjasone if ((mapbits & CHUNK_MAP_LARGE) == 0) { 541234370Sjasone if (prof_promote) 542234370Sjasone ret = (prof_ctx_t *)(uintptr_t)1U; 543234370Sjasone else { 544234370Sjasone arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 545234370Sjasone (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 546234370Sjasone LG_PAGE)); 547234370Sjasone size_t binind = arena_bin_index(chunk->arena, run->bin); 548234370Sjasone arena_bin_info_t *bin_info = &arena_bin_info[binind]; 549234370Sjasone unsigned regind; 550234370Sjasone 551234370Sjasone regind = arena_run_regind(run, bin_info, ptr); 552234370Sjasone ret = *(prof_ctx_t **)((uintptr_t)run + 553234370Sjasone bin_info->ctx0_offset + (regind * 554234370Sjasone sizeof(prof_ctx_t *))); 555234370Sjasone } 556234370Sjasone } else 557234370Sjasone ret = chunk->map[pageind-map_bias].prof_ctx; 558234370Sjasone 559234370Sjasone return (ret); 560234370Sjasone} 561234370Sjasone 562234370SjasoneJEMALLOC_INLINE void 563234370Sjasonearena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) 564234370Sjasone{ 565234370Sjasone arena_chunk_t *chunk; 566234370Sjasone size_t pageind, mapbits; 567234370Sjasone 568234370Sjasone cassert(config_prof); 569234370Sjasone assert(ptr != NULL); 570234370Sjasone assert(CHUNK_ADDR2BASE(ptr) != ptr); 571234370Sjasone 572234370Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 573234370Sjasone pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 574234370Sjasone mapbits = chunk->map[pageind-map_bias].bits; 575234370Sjasone assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 576234370Sjasone if ((mapbits & CHUNK_MAP_LARGE) == 0) { 577234370Sjasone if (prof_promote == false) { 578234370Sjasone arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 579234370Sjasone (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 580234370Sjasone LG_PAGE)); 581234370Sjasone arena_bin_t *bin = run->bin; 582234370Sjasone size_t binind; 583234370Sjasone arena_bin_info_t *bin_info; 584234370Sjasone unsigned regind; 585234370Sjasone 586234370Sjasone binind = arena_bin_index(chunk->arena, bin); 587234370Sjasone bin_info = &arena_bin_info[binind]; 588234370Sjasone regind = arena_run_regind(run, bin_info, ptr); 589234370Sjasone 590234370Sjasone *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset 591234370Sjasone + (regind * sizeof(prof_ctx_t *)))) = ctx; 592234370Sjasone } else 593234370Sjasone assert((uintptr_t)ctx == (uintptr_t)1U); 594234370Sjasone } else 595234370Sjasone chunk->map[pageind-map_bias].prof_ctx = ctx; 596234370Sjasone} 597234370Sjasone 598234370SjasoneJEMALLOC_INLINE void * 599234370Sjasonearena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) 600234370Sjasone{ 601234370Sjasone tcache_t *tcache; 602234370Sjasone 603234370Sjasone assert(size != 0); 604234370Sjasone assert(size <= arena_maxclass); 605234370Sjasone 606234370Sjasone if (size <= SMALL_MAXCLASS) { 607234370Sjasone if (try_tcache && (tcache = tcache_get(true)) != NULL) 608234370Sjasone return (tcache_alloc_small(tcache, size, zero)); 609234370Sjasone else { 610234370Sjasone return (arena_malloc_small(choose_arena(arena), size, 611234370Sjasone zero)); 612234370Sjasone } 613234370Sjasone } else { 614234370Sjasone /* 615234370Sjasone * Initialize tcache after checking size in order to avoid 616234370Sjasone * infinite recursion during tcache initialization. 617234370Sjasone */ 618234370Sjasone if (try_tcache && size <= tcache_maxclass && (tcache = 619234370Sjasone tcache_get(true)) != NULL) 620234370Sjasone return (tcache_alloc_large(tcache, size, zero)); 621234370Sjasone else { 622234370Sjasone return (arena_malloc_large(choose_arena(arena), size, 623234370Sjasone zero)); 624234370Sjasone } 625234370Sjasone } 626234370Sjasone} 627234370Sjasone 628234543Sjasone/* Return the size of the allocation pointed to by ptr. */ 629234543SjasoneJEMALLOC_INLINE size_t 630234543Sjasonearena_salloc(const void *ptr, bool demote) 631234543Sjasone{ 632234543Sjasone size_t ret; 633234543Sjasone arena_chunk_t *chunk; 634234543Sjasone size_t pageind, mapbits; 635234543Sjasone 636234543Sjasone assert(ptr != NULL); 637234543Sjasone assert(CHUNK_ADDR2BASE(ptr) != ptr); 638234543Sjasone 639234543Sjasone chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 640234543Sjasone pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 641234543Sjasone mapbits = chunk->map[pageind-map_bias].bits; 642234543Sjasone assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 643234543Sjasone if ((mapbits & CHUNK_MAP_LARGE) == 0) { 644234543Sjasone arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 645234543Sjasone (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); 646234543Sjasone size_t binind = arena_bin_index(chunk->arena, run->bin); 647234543Sjasone arena_bin_info_t *bin_info = &arena_bin_info[binind]; 648234543Sjasone assert(((uintptr_t)ptr - ((uintptr_t)run + 649234543Sjasone (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval 650234543Sjasone == 0); 651234543Sjasone ret = bin_info->reg_size; 652234543Sjasone } else { 653234543Sjasone assert(((uintptr_t)ptr & PAGE_MASK) == 0); 654234543Sjasone ret = mapbits & ~PAGE_MASK; 655234543Sjasone if (config_prof && demote && prof_promote && ret == PAGE && 656234543Sjasone (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { 657234543Sjasone size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> 658234543Sjasone CHUNK_MAP_CLASS_SHIFT) - 1; 659234543Sjasone assert(binind < NBINS); 660234543Sjasone ret = arena_bin_info[binind].reg_size; 661234543Sjasone } 662234543Sjasone assert(ret != 0); 663234543Sjasone } 664234543Sjasone 665234543Sjasone return (ret); 666234543Sjasone} 667234543Sjasone 668234370SjasoneJEMALLOC_INLINE void 669234370Sjasonearena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) 670234370Sjasone{ 671234370Sjasone size_t pageind; 672234370Sjasone arena_chunk_map_t *mapelm; 673234370Sjasone tcache_t *tcache; 674234370Sjasone 675234370Sjasone assert(arena != NULL); 676234370Sjasone assert(chunk->arena == arena); 677234370Sjasone assert(ptr != NULL); 678234370Sjasone assert(CHUNK_ADDR2BASE(ptr) != ptr); 679234370Sjasone 680234370Sjasone pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 681234370Sjasone mapelm = &chunk->map[pageind-map_bias]; 682234370Sjasone assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); 683234370Sjasone if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { 684234370Sjasone /* Small allocation. */ 685234370Sjasone if (try_tcache && (tcache = tcache_get(false)) != NULL) 686234370Sjasone tcache_dalloc_small(tcache, ptr); 687234370Sjasone else { 688234370Sjasone arena_run_t *run; 689234370Sjasone arena_bin_t *bin; 690234370Sjasone 691234370Sjasone run = (arena_run_t *)((uintptr_t)chunk + 692234370Sjasone (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << 693234370Sjasone LG_PAGE)); 694234370Sjasone bin = run->bin; 695234370Sjasone if (config_debug) { 696234370Sjasone size_t binind = arena_bin_index(arena, bin); 697234370Sjasone UNUSED arena_bin_info_t *bin_info = 698234370Sjasone &arena_bin_info[binind]; 699234370Sjasone assert(((uintptr_t)ptr - ((uintptr_t)run + 700234370Sjasone (uintptr_t)bin_info->reg0_offset)) % 701234370Sjasone bin_info->reg_interval == 0); 702234370Sjasone } 703234370Sjasone malloc_mutex_lock(&bin->lock); 704234370Sjasone arena_dalloc_bin(arena, chunk, ptr, mapelm); 705234370Sjasone malloc_mutex_unlock(&bin->lock); 706234370Sjasone } 707234370Sjasone } else { 708234370Sjasone size_t size = mapelm->bits & ~PAGE_MASK; 709234370Sjasone 710234370Sjasone assert(((uintptr_t)ptr & PAGE_MASK) == 0); 711234370Sjasone 712234370Sjasone if (try_tcache && size <= tcache_maxclass && (tcache = 713234370Sjasone tcache_get(false)) != NULL) { 714234370Sjasone tcache_dalloc_large(tcache, ptr, size); 715234370Sjasone } else { 716234370Sjasone malloc_mutex_lock(&arena->lock); 717234370Sjasone arena_dalloc_large(arena, chunk, ptr); 718234370Sjasone malloc_mutex_unlock(&arena->lock); 719234370Sjasone } 720234370Sjasone } 721234370Sjasone} 722234370Sjasone#endif 723234370Sjasone 724234370Sjasone#endif /* JEMALLOC_H_INLINES */ 725234370Sjasone/******************************************************************************/ 726