1/* 2 * Copyright (c) 1999, 2000, 2003, 2005, 2008, 2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#ifdef __LP64__ /* nano_malloc for 64bit ABI */ 25#define NDEBUG 1 26#define NANO_FREE_DEQUEUE_DILIGENCE 1 /* Check for corrupt free list */ 27 28#include <_simple.h> 29#include <assert.h> 30#include <stddef.h> 31#include <stdint.h> 32#include <stdlib.h> 33#include <unistd.h> 34#include <limits.h> 35#include <errno.h> 36#include <TargetConditionals.h> 37 38#include <sys/types.h> 39#include <sys/mman.h> 40#include <sys/param.h> 41 42#include <mach/mach.h> 43#include <mach/mach_vm.h> 44 45#include <libkern/OSAtomic.h> 46#include <mach-o/dyld_priv.h> /* for _dyld_get_image_slide() */ 47#include <crt_externs.h> /* for _NSGetMachExecuteHeader() */ 48 49#include <os/tsd.h> 50 51#if defined(__x86_64__) 52#define __APPLE_API_PRIVATE 53#include <machine/cpu_capabilities.h> 54#define _COMM_PAGE_VERSION_REQD 9 55#undef __APPLE_API_PRIVATE 56#else 57Unknown Architecture 58#endif 59 60#include "scalable_malloc.h" 61#include "malloc_internal.h" 62#include "malloc_printf.h" 63 64#include <CrashReporterClient.h> 65 66#include "bitarray.h" 67 68#ifndef VM_MEMORY_MALLOC_NANO /* Until osfmk/mach/vm_statistics.h is updated in xnu */ 69#define VM_MEMORY_MALLOC_NANO 11 70#endif 71 72extern uint64_t malloc_entropy[2]; 73/********************* DEFINITIONS ************************/ 74 75#define INLINE __inline__ 76#define ALWAYSINLINE __attribute__((always_inline)) 77#define NOINLINE __attribute__((noinline)) 78 79#if defined(__x86_64__) 80#define CACHE_LINE 64 81#define CACHE_ALIGN __attribute__ ((aligned (64) )) 82#else 83#define CACHE_ALIGN /* TBD for other platforms */ 84#endif 85 86#define NANO_MAG_INDEX(nz) (_os_cpu_number() >> nz->hyper_shift) 87 88#define SCRIBBLE_BYTE 0xaa /* allocated scribble */ 89#define SCRABBLE_BYTE 0x55 /* free()'d scribble */ 90#define SCRUBBLE_BYTE 0xdd /* madvise(..., MADV_FREE) scriblle */ 91 92#define MAX_RECORDER_BUFFER 256 93 94/************* nanozone address field layout ******************/ 95 96#if defined(__x86_64) 97#define NANO_SIGNATURE_BITS 20 98#define NANOZONE_SIGNATURE 0x00006ULL // 0x00006nnnnnnnnnnn the address range devoted to us. 99#define NANO_MAG_BITS 5 100#define NANO_BAND_BITS 18 101#define NANO_SLOT_BITS 4 102#define NANO_OFFSET_BITS 17 103 104#else 105#error Unknown Architecture 106#endif 107 108#if defined(__BIG_ENDIAN__) 109struct nano_blk_addr_s { 110 uint64_t 111nano_signature:NANO_SIGNATURE_BITS, // 0x00006nnnnnnnnnnn the address range devoted to us. 112nano_mag_index:NANO_MAG_BITS, // the core that allocated this block 113nano_band:NANO_BAND_BITS, 114nano_slot:NANO_SLOT_BITS, // bucket of homogenous quanta-multiple blocks 115nano_offset:NANO_OFFSET_BITS; // locates the block 116}; 117#else 118// least significant bits declared first 119struct nano_blk_addr_s { 120 uint64_t 121nano_offset:NANO_OFFSET_BITS, // locates the block 122nano_slot:NANO_SLOT_BITS, // bucket of homogenous quanta-multiple blocks 123nano_band:NANO_BAND_BITS, 124nano_mag_index:NANO_MAG_BITS, // the core that allocated this block 125nano_signature:NANO_SIGNATURE_BITS; // 0x00006nnnnnnnnnnn the address range devoted to us. 126}; 127#endif 128 129typedef union { 130 uint64_t addr; 131 struct nano_blk_addr_s fields; 132} nano_blk_addr_t; 133 134/* Are we using the nano allocator? Set by the initializer. */ 135__attribute__((visibility("hidden"))) 136boolean_t _malloc_engaged_nano; 137 138#define NANO_MAX_SIZE 256 /* Buckets sized {16, 32, 48, 64, 80, 96, 112, ...} */ 139#define SHIFT_NANO_QUANTUM 4 140#define NANO_REGIME_QUANTA_SIZE (1 << SHIFT_NANO_QUANTUM) // 16 141#define NANO_QUANTA_MASK 0xFULL // NANO_REGIME_QUANTA_SIZE - 1 142 143#define SLOT_IN_BAND_SIZE (1 << NANO_OFFSET_BITS) 144#define SLOT_KEY_LIMIT (1 << NANO_SLOT_BITS) /* Must track nano_slot width */ 145#define BAND_SIZE (1 << (NANO_SLOT_BITS + NANO_OFFSET_BITS)) /* == Number of bytes covered by a page table entry */ 146#define NANO_MAG_SIZE (1 << NANO_MAG_BITS) 147#define NANO_SLOT_SIZE (1 << NANO_SLOT_BITS) 148 149/****************************** zone itself ***********************************/ 150 151/* 152 * Note that objects whose adddress are held in pointers here must be pursued 153 * individually in the nano_in_use_enumeration() routines. 154 */ 155 156typedef struct chained_block_s { 157 uintptr_t double_free_guard; 158 struct chained_block_s *next; 159} *chained_block_t; 160 161typedef struct nano_meta_s { 162 OSQueueHead slot_LIFO CACHE_ALIGN; 163 unsigned int slot_madvised_log_page_count; 164 volatile uintptr_t slot_current_base_addr; 165 volatile uintptr_t slot_limit_addr; 166 volatile size_t slot_objects_mapped; 167 volatile size_t slot_objects_skipped; 168 bitarray_t slot_madvised_pages; 169 volatile uintptr_t slot_bump_addr CACHE_ALIGN; // position on cache line distinct from that of slot_LIFO 170 volatile boolean_t slot_exhausted; 171 unsigned int slot_bytes; 172 unsigned int slot_objects; 173} *nano_meta_admin_t; 174 175typedef struct nanozone_s { // vm_allocate()'d, so page-aligned to begin with. 176 malloc_zone_t basic_zone; // first page will be given read-only protection 177 uint8_t pad[PAGE_MAX_SIZE - sizeof(malloc_zone_t)]; 178 179 // remainder of structure is R/W (contains no function pointers) 180 // page-aligned 181 struct nano_meta_s meta_data[NANO_MAG_SIZE][NANO_SLOT_SIZE]; // max: NANO_MAG_SIZE cores x NANO_SLOT_SIZE slots for nano blocks {16 .. 256} 182 _malloc_lock_s band_resupply_lock[NANO_MAG_SIZE]; 183 uintptr_t band_max_mapped_baseaddr[NANO_MAG_SIZE]; 184 size_t core_mapped_size[NANO_MAG_SIZE]; 185 186 unsigned debug_flags; 187 unsigned our_signature; 188 unsigned phys_ncpus; 189 unsigned logical_ncpus; 190 unsigned hyper_shift; 191 192 /* security cookie */ 193 uintptr_t cookie; 194 195 /* 196 * The nano zone constructed by create_nano_zone() would like to hand off tiny, small, and large 197 * allocations to the default scalable zone. Record the latter as the "helper" zone here. 198 */ 199 malloc_zone_t *helper_zone; 200} nanozone_t; 201 202#define SZONE_PAGED_SIZE ((sizeof(nanozone_t) + vm_page_size - 1) & ~ (vm_page_size - 1)) 203 204/********************* PROTOTYPES ***********************/ 205extern void malloc_error_break(void); 206 207// msg prints after fmt, ... 208static NOINLINE void nanozone_error(nanozone_t *nanozone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...) 209__printflike(5, 6); 210 211static void nano_statistics(nanozone_t *nanozone, malloc_statistics_t *stats); 212 213/********************* VERY LOW LEVEL UTILITIES ************************/ 214// msg prints after fmt, ... 215 216static NOINLINE void 217nanozone_error(nanozone_t *nanozone, int is_corruption, const char *msg, const void *ptr, const char *fmt, ...) 218{ 219 va_list ap; 220 _SIMPLE_STRING b = _simple_salloc(); 221 222 if (b) { 223 if (fmt) { 224 va_start(ap, fmt); 225 _simple_vsprintf(b, fmt, ap); 226 va_end(ap); 227 } 228 if (ptr) { 229 _simple_sprintf(b, "*** error for object %p: %s\n", ptr, msg); 230 } else { 231 _simple_sprintf(b, "*** error: %s\n", msg); 232 } 233 malloc_printf("%s*** set a breakpoint in malloc_error_break to debug\n", _simple_string(b)); 234 } else { 235 /* 236 * Should only get here if vm_allocate() can't get a single page of 237 * memory, implying _simple_asl_log() would also fail. So we just 238 * print to the file descriptor. 239 */ 240 if (fmt) { 241 va_start(ap, fmt); 242 _malloc_vprintf(MALLOC_PRINTF_NOLOG, fmt, ap); 243 va_end(ap); 244 } 245 if (ptr) { 246 _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error for object %p: %s\n", ptr, msg); 247 } else { 248 _malloc_printf(MALLOC_PRINTF_NOLOG, "*** error: %s\n", msg); 249 } 250 _malloc_printf(MALLOC_PRINTF_NOLOG, "*** set a breakpoint in malloc_error_break to debug\n"); 251 } 252 malloc_error_break(); 253 254 // Call abort() if this is a memory corruption error and the abort on 255 // corruption flag is set, or if any error should abort. 256 if ((is_corruption && (nanozone->debug_flags & SCALABLE_MALLOC_ABORT_ON_CORRUPTION)) || 257 (nanozone->debug_flags & SCALABLE_MALLOC_ABORT_ON_ERROR)) { 258 CRSetCrashLogMessage(b ? _simple_string(b) : msg); 259 abort(); 260 } else if (b) { 261 _simple_sfree(b); 262 } 263} 264 265static void 266protect(void *address, size_t size, unsigned protection, unsigned debug_flags) 267{ 268 kern_return_t err; 269 270 if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_PRELUDE)) { 271 err = mprotect((void *)((uintptr_t)address - vm_page_size), vm_page_size, protection); 272 if (err) { 273 malloc_printf("*** can't protect(%p) region for prelude guard page at %p\n", 274 protection,(uintptr_t)address - (1 << vm_page_shift)); 275 } 276 } 277 if (!(debug_flags & SCALABLE_MALLOC_DONT_PROTECT_POSTLUDE)) { 278 err = mprotect((void *)((uintptr_t)address + size), vm_page_size, protection); 279 if (err) { 280 malloc_printf("*** can't protect(%p) region for postlude guard page at %p\n", 281 protection, (uintptr_t)address + size); 282 } 283 } 284} 285 286static void * 287allocate_based_pages(nanozone_t *nanozone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label, void *base_addr) 288{ 289 boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; 290 mach_vm_address_t vm_addr; 291 uintptr_t addr; 292 mach_vm_size_t allocation_size = round_page(size); 293 mach_vm_offset_t allocation_mask = ((mach_vm_offset_t)1 << align) - 1; 294 int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(vm_page_label); 295 kern_return_t kr; 296 297 if (!allocation_size) allocation_size = vm_page_size; 298 if (add_guard_pages) allocation_size += 2 * vm_page_size; 299 if (allocation_size < size) // size_t arithmetic wrapped! 300 return NULL; 301 302 vm_addr = round_page((mach_vm_address_t)base_addr); 303 if (!vm_addr) vm_addr = vm_page_size; 304 kr = mach_vm_map(mach_task_self(), &vm_addr, allocation_size, 305 allocation_mask, alloc_flags, MEMORY_OBJECT_NULL, 0, FALSE, 306 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 307 if (kr) { 308 nanozone_error(nanozone, 0, "can't allocate pages", NULL, 309 "*** mach_vm_map(size=%lu) failed (error code=%d)\n", 310 size, kr); 311 return NULL; 312 } 313 addr = (uintptr_t)vm_addr; 314 315 if (add_guard_pages) { 316 addr += vm_page_size; 317 protect((void *)addr, size, PROT_NONE, debug_flags); 318 } 319 return (void *)addr; 320} 321 322static void * 323allocate_pages(nanozone_t *nanozone, size_t size, unsigned char align, unsigned debug_flags, int vm_page_label) 324{ 325 return allocate_based_pages(nanozone, size, align, debug_flags, vm_page_label, 0); 326} 327 328static void 329deallocate_pages(nanozone_t *nanozone, void *addr, size_t size, unsigned debug_flags) 330{ 331 boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; 332 mach_vm_address_t vm_addr = (mach_vm_address_t)addr; 333 mach_vm_size_t allocation_size = size; 334 kern_return_t kr; 335 336 if (add_guard_pages) { 337 vm_addr -= vm_page_size; 338 allocation_size += 2 * vm_page_size; 339 } 340 kr = mach_vm_deallocate(mach_task_self(), vm_addr, allocation_size); 341 if (kr && nanozone) 342 nanozone_error(nanozone, 0, "Can't deallocate_pages at", addr, NULL); 343} 344 345/* 346 * We maintain separate free lists for each (quantized) size. The literature 347 * calls this the "segregated policy". 348 */ 349 350static boolean_t 351segregated_band_grow(nanozone_t *nanozone, nano_meta_admin_t pMeta, unsigned int slot_bytes, unsigned int mag_index) 352{ 353 nano_blk_addr_t u; // the compiler holds this in a register 354 uintptr_t p, s; 355 size_t watermark, hiwater; 356 357 if (0 == pMeta->slot_current_base_addr) { // First encounter? 358 359 u.fields.nano_signature = NANOZONE_SIGNATURE; 360 u.fields.nano_mag_index = mag_index; 361 u.fields.nano_band = 0; 362 u.fields.nano_slot = (slot_bytes >> SHIFT_NANO_QUANTUM) - 1; 363 u.fields.nano_offset = 0; 364 365 p = u.addr; 366 pMeta->slot_bytes = slot_bytes; 367 pMeta->slot_objects = SLOT_IN_BAND_SIZE / slot_bytes; 368 } else { 369 p = pMeta->slot_current_base_addr + BAND_SIZE; // Growing, so stride ahead by BAND_SIZE 370 371 u.addr = (uint64_t)p; 372 if (0 == u.fields.nano_band) // Did the band index wrap? 373 return FALSE; 374 375 assert(slot_bytes == pMeta->slot_bytes); 376 } 377 pMeta->slot_current_base_addr = p; 378 379 mach_vm_address_t vm_addr = p & ~((uintptr_t)(BAND_SIZE - 1)); // Address of the (2MB) band covering this (128KB) slot 380 381 if (nanozone->band_max_mapped_baseaddr[mag_index] < vm_addr) { 382 // Obtain the next band to cover this slot 383 kern_return_t kr = mach_vm_map(mach_task_self(), &vm_addr, BAND_SIZE, 384 0, VM_MAKE_TAG(VM_MEMORY_MALLOC_NANO), MEMORY_OBJECT_NULL, 0, FALSE, 385 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 386 387 void *q = (uintptr_t)vm_addr; 388 if (kr || q != (void *)(p & ~((uintptr_t)(BAND_SIZE - 1)))) // Must get exactly what we asked for 389 return FALSE; 390 391 nanozone->band_max_mapped_baseaddr[mag_index] = vm_addr; 392 } 393 394 // Randomize the starting allocation from this slot (introduces 11 to 14 bits of entropy) 395 if (0 == pMeta->slot_objects_mapped) { // First encounter? 396 pMeta->slot_objects_skipped = (malloc_entropy[1] % (SLOT_IN_BAND_SIZE / slot_bytes)); 397 pMeta->slot_bump_addr = p + (pMeta->slot_objects_skipped * slot_bytes); 398 } else { 399 pMeta->slot_bump_addr = p; 400 } 401 402 pMeta->slot_limit_addr = p + (SLOT_IN_BAND_SIZE / slot_bytes) * slot_bytes; 403 pMeta->slot_objects_mapped += (SLOT_IN_BAND_SIZE / slot_bytes); 404 405 u.fields.nano_signature = NANOZONE_SIGNATURE; 406 u.fields.nano_mag_index = mag_index; 407 u.fields.nano_band = 0; 408 u.fields.nano_slot = 0; 409 u.fields.nano_offset = 0; 410 s = u.addr; // Base for this core. 411 412 // Set the high water mark for this CPU's entire magazine, if this resupply raised it. 413 watermark = nanozone->core_mapped_size[mag_index]; 414 hiwater = MAX( watermark, p - s + SLOT_IN_BAND_SIZE ); 415 nanozone->core_mapped_size[mag_index] = hiwater; 416 417 return TRUE; 418} 419 420static inline unsigned long 421divrem(unsigned long a, unsigned int b, unsigned int *remainder) 422{ 423 // Encapsulating the modulo and division in an in-lined function convinces the compiler 424 // to issue just a single divide instruction to obtain quotient and remainder. Go figure. 425 *remainder = a % b; 426 return a / b; 427} 428 429static INLINE void * 430segregated_next_block(nanozone_t *nanozone, nano_meta_admin_t pMeta, unsigned int slot_bytes, unsigned int mag_index) 431{ 432 while (1) { 433 uintptr_t theLimit = pMeta->slot_limit_addr; // Capture the slot limit that bounds slot_bump_addr right now 434 uintptr_t b = OSAtomicAdd64Barrier(slot_bytes, (volatile int64_t *)&(pMeta->slot_bump_addr)); 435 b -= slot_bytes; // Atomic op returned addr of *next* free block. Subtract to get addr for *this* allocation. 436 437 if (b < theLimit) { // Did we stay within the bound of the present slot allocation? 438 return (void *)b; // Yep, so the slot_bump_addr this thread incremented is good to go 439 } else { 440 if (pMeta->slot_exhausted) { // exhausted all the bands availble for this slot? 441 return 0; // We're toast 442 } else { 443 // One thread will grow the heap, others will see its been grown and retry allocation 444 _malloc_lock_lock(&nanozone->band_resupply_lock[mag_index]); 445 // re-check state now that we've taken the lock 446 if (pMeta->slot_exhausted) { 447 _malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]); 448 return 0; // Toast 449 } else if (b < pMeta->slot_limit_addr) { 450 _malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]); 451 continue; // ... the slot was successfully grown by first-taker (not us). Now try again. 452 } else if (segregated_band_grow(nanozone, pMeta, slot_bytes, mag_index)) { 453 _malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]); 454 continue; // ... the slot has been successfully grown by us. Now try again. 455 } else { 456 pMeta->slot_exhausted = TRUE; 457 _malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]); 458 return 0; 459 } 460 } 461 } 462 } 463} 464 465static INLINE unsigned int 466segregated_size_to_fit(nanozone_t *nanozone, size_t size, unsigned int *pKey) 467{ 468 unsigned int k, slot_bytes; 469 470 if (0 == size) 471 size = NANO_REGIME_QUANTA_SIZE; // Historical behavior 472 473 k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta 474 slot_bytes = k << SHIFT_NANO_QUANTUM; // multiply by power of two quanta size 475 *pKey = k - 1; // Zero-based! 476 477 return slot_bytes; 478} 479 480static INLINE index_t 481offset_to_index(nanozone_t *nanozone, nano_meta_admin_t pMeta, uintptr_t offset) 482{ 483 unsigned int slot_bytes = pMeta->slot_bytes; 484 unsigned int slot_objects = pMeta->slot_objects; // SLOT_IN_BAND_SIZE / slot_bytes; 485 unsigned int rem; 486 unsigned long quo = divrem(offset, BAND_SIZE, &rem); 487 488 assert(0 == rem%slot_bytes); 489 return (quo * slot_objects) + (rem / slot_bytes); 490} 491 492static INLINE uintptr_t 493index_to_offset(nanozone_t *nanozone, nano_meta_admin_t pMeta, index_t i) 494{ 495 unsigned int slot_bytes = pMeta->slot_bytes; 496 unsigned int slot_objects = pMeta->slot_objects; // SLOT_IN_BAND_SIZE / slot_bytes; 497 unsigned int rem; 498 unsigned long quo = divrem(i, slot_objects, &rem); 499 500 return (quo * BAND_SIZE) + (rem * slot_bytes); 501} 502 503static kern_return_t 504segregated_in_use_enumerator(task_t task, void *context, unsigned type_mask, nanozone_t *nanozone, 505 memory_reader_t reader, vm_range_recorder_t recorder) 506{ 507 unsigned int mag_index, slot_key; 508 vm_range_t ptr_range; 509 vm_range_t buffer[MAX_RECORDER_BUFFER]; 510 kern_return_t err; 511 unsigned count = 0; 512 513 for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) { 514 uintptr_t clone_magazine; // magazine base for ourselves 515 nano_blk_addr_t p; // slot base for remote 516 uintptr_t clone_slot_base; // slot base for ourselves (tracks with "p") 517 518 // Establish p as base address for slot 0 in remote 519 p.fields.nano_signature = NANOZONE_SIGNATURE; 520 p.fields.nano_mag_index = mag_index; 521 p.fields.nano_band = 0; 522 p.fields.nano_slot = 0; 523 p.fields.nano_offset = 0; 524 525 if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { 526 mach_vm_address_t vm_addr; 527 mach_vm_size_t alloc_size = nanozone->core_mapped_size[mag_index]; 528 int alloc_flags = VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_MALLOC); 529 530 vm_addr = vm_page_size; 531 kern_return_t kr = mach_vm_allocate(mach_task_self(), &vm_addr, alloc_size, alloc_flags); 532 if (kr) { 533 return kr; 534 } 535 clone_magazine = (uintptr_t)vm_addr; 536 clone_slot_base = clone_magazine; // base for slot 0 in this local magazine 537 } else { 538 clone_slot_base = clone_magazine = 0; // and won't be used in this loop 539 } 540 541 for (slot_key = 0; slot_key < SLOT_KEY_LIMIT; 542 p.addr += SLOT_IN_BAND_SIZE, // Advance to next slot base for remote 543 clone_slot_base += SLOT_IN_BAND_SIZE, // Advance to next slot base for ourselves 544 slot_key++) { 545 nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]); 546 size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count 547 548 if (0 == slot_objects_mapped) // Nothing allocated in this magazine for this slot? 549 continue; 550 551 if (type_mask & MALLOC_ADMIN_REGION_RANGE_TYPE) { 552 /* do NOTHING as there is no distinct admin region */ 553 } 554 555 if (type_mask & (MALLOC_PTR_REGION_RANGE_TYPE | MALLOC_ADMIN_REGION_RANGE_TYPE)) { 556 nano_blk_addr_t q = p; 557 uintptr_t skip_adj = index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped); 558 559 while (q.addr < pMeta->slot_limit_addr) { 560 ptr_range.address = q.addr + skip_adj; 561 ptr_range.size = SLOT_IN_BAND_SIZE - skip_adj; 562 skip_adj = 0; 563 recorder(task, context, MALLOC_PTR_REGION_RANGE_TYPE, &ptr_range, 1); 564 q.addr += BAND_SIZE; 565 } 566 } 567 568 if (type_mask & MALLOC_PTR_IN_USE_RANGE_TYPE) { 569 nano_blk_addr_t q = p; 570 uintptr_t slot_band, clone_slot_band_base = clone_slot_base; 571 uintptr_t skip_adj = index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped); 572 573 while (q.addr < pMeta->slot_limit_addr) { 574 // read slot in each remote band. Lands in some random location. 575 size_t len = MIN(pMeta->slot_bump_addr - q.addr, SLOT_IN_BAND_SIZE); 576 err = reader(task, (vm_address_t)(q.addr + skip_adj), len - skip_adj, (void **)&slot_band); 577 if (err) 578 return err; 579 580 // Place the data just read in the correct position relative to the local magazine. 581 memcpy((void *)(clone_slot_band_base + skip_adj), (void *)slot_band, len - skip_adj); 582 583 // Simultaneously advance pointers in remote and ourselves to the next band. 584 q.addr += BAND_SIZE; 585 clone_slot_band_base += BAND_SIZE; 586 skip_adj = 0; 587 } 588 589 // Walk the slot free list and populate a bitarray_t 590 int log_size = 64 - __builtin_clzl(slot_objects_mapped); 591 bitarray_t slot_bitarray = bitarray_create(log_size); 592 593 if (!slot_bitarray) 594 return errno; 595 596 chained_block_t t; 597 unsigned stoploss = slot_objects_mapped; 598 while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next) + (clone_slot_base - p.addr)))) { 599 if (0 == stoploss) { 600 malloc_printf("Free list walk in segregated_in_use_enumerator exceeded object count."); 601 break; 602 } 603 stoploss--; 604 605 uintptr_t offset = ((uintptr_t)t - p.addr); // offset from beginning of slot, task-independent 606 index_t block_index = offset_to_index(nanozone, pMeta, offset); 607 608 if (block_index < slot_objects_mapped) 609 bitarray_set(slot_bitarray, log_size, block_index); 610 } 611 // N.B. pMeta->slot_LIFO in *this* task is now drained (remote free list has *not* been disturbed) 612 613 // Copy the bitarray_t denoting madvise()'d pages (if any) into *this* task's address space 614 bitarray_t madv_page_bitarray; 615 int log_page_count; 616 617 if (pMeta->slot_madvised_pages) { 618 log_page_count = pMeta->slot_madvised_log_page_count; 619 err = reader(task, (vm_address_t)(pMeta->slot_madvised_pages), bitarray_size(log_page_count), (void **)&madv_page_bitarray); 620 if (err) 621 return err; 622 } else { 623 madv_page_bitarray = NULL; 624 log_page_count = 0; 625 } 626 627 // Enumerate all the block indices issued to date, and report those not on the free list 628 index_t i; 629 for (i = pMeta->slot_objects_skipped; i < slot_objects_mapped; ++i) { 630 uintptr_t block_offset = index_to_offset(nanozone, pMeta, i); 631 if (p.addr + block_offset >= pMeta->slot_bump_addr) 632 break; 633 634 // blocks falling on madvise()'d pages are free! So not enumerated. 635 if (madv_page_bitarray) { 636 nano_blk_addr_t q; 637 index_t pgnum, pgnum_end; 638 639 q.addr = p.addr + block_offset; 640 pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 641 q.addr += pMeta->slot_bytes - 1; 642 pgnum_end = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 643 644 if (pgnum < (1 << log_page_count)) {// bounds check for bitarray_get()'s that follow 645 if (bitarray_get(madv_page_bitarray, log_page_count, pgnum) || 646 bitarray_get(madv_page_bitarray, log_page_count, pgnum_end)) { 647 continue; 648 } 649 } 650 } 651 652 if (!bitarray_get(slot_bitarray, log_size, i)) { 653 buffer[count].address = p.addr + block_offset; 654 buffer[count].size = (slot_key + 1) << SHIFT_NANO_QUANTUM; 655 count++; 656 if (count >= MAX_RECORDER_BUFFER) { 657 recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); 658 count = 0; 659 } 660 } 661 } 662 if (count) { 663 recorder(task, context, MALLOC_PTR_IN_USE_RANGE_TYPE, buffer, count); 664 count = 0; 665 } 666 667 free(slot_bitarray); 668 } 669 } 670 if (clone_magazine) { 671 mach_vm_address_t vm_addr = clone_magazine; 672 mach_vm_size_t alloc_size = nanozone->core_mapped_size[mag_index]; 673 mach_vm_deallocate(mach_task_self(), vm_addr, alloc_size); 674 } 675 } 676 return 0; 677} 678 679/****************** nanozone methods **********************/ 680/* 681 * These methods are called with "ptr" known to possess the nano signature (from 682 * which we can additionally infer "ptr" is not NULL), and with "size" bounded to 683 * the extent of the nano allocation regime -- (0, 256]. 684 */ 685 686static INLINE size_t 687__nano_vet_and_size(nanozone_t *nanozone, const void *ptr) 688{ 689 // Extracts the size of the block in bytes. Checks for a plausible ptr. 690 nano_blk_addr_t p; // the compiler holds this in a register 691 nano_meta_admin_t pMeta; 692 693 p.addr = (uint64_t)ptr; // Begin the dissection of ptr 694 695 if (nanozone->phys_ncpus <= p.fields.nano_mag_index) 696 return 0; 697 698 if (p.fields.nano_offset & NANO_QUANTA_MASK) // stray low-order bits? 699 return 0; 700 701 pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]); 702 if ((void *)(pMeta->slot_bump_addr) <= ptr) 703 return 0; // Beyond what's ever been allocated! 704 705 if ((p.fields.nano_offset % pMeta->slot_bytes) != 0) 706 return 0; // Not an exact multiple of the block size for this slot 707 708 return pMeta->slot_bytes; 709} 710 711static INLINE size_t 712_nano_vet_and_size_of_live(nanozone_t *nanozone, const void *ptr) 713{ 714 size_t size = __nano_vet_and_size(nanozone, ptr); 715 if (size && ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) != 0xBADDC0DEDEADBEADULL)) 716 return size; // Common case: not on a free list, hence live. Return its size. 717 else 718 // ptr is either on a free list (its got the correct canary) in which case return zero, OR 719 // the caller has stored the canary value in the double_free_guard slot entirely by coincidence 720 // and the block is a live allocation. The latter is very unlikely (1 in 2^64) so just return 0. 721 return 0; 722} 723 724static INLINE size_t 725_nano_vet_and_size_of_free(nanozone_t *nanozone, const void *ptr) 726{ 727 size_t size = __nano_vet_and_size(nanozone, ptr); 728 if (size && ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) == 0xBADDC0DEDEADBEADULL)) 729 return size; 730 else 731 return 0; 732} 733 734static void * 735_nano_malloc_check_clear(nanozone_t *nanozone, size_t size, boolean_t cleared_requested) 736{ 737 void *ptr; 738 unsigned int slot_key; 739 unsigned int slot_bytes = segregated_size_to_fit(nanozone, size, &slot_key); // Note slot_key is set here 740 unsigned int mag_index = NANO_MAG_INDEX(nanozone); 741 742 nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]); 743 744 ptr = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)); 745 if (ptr) { 746#if NANO_FREE_DEQUEUE_DILIGENCE 747 size_t gotSize; 748 nano_blk_addr_t p; // the compiler holds this in a register 749 750 p.addr = (uint64_t)ptr; // Begin the dissection of ptr 751 if (nanozone->our_signature != p.fields.nano_signature) { 752 nanozone_error(nanozone, 1, 753 "Invalid signature for pointer dequeued from free list", ptr, NULL); 754 } 755 756 if (mag_index != p.fields.nano_mag_index) { 757 nanozone_error(nanozone, 1, 758 "Mismatched magazine for pointer dequeued from free list", ptr, NULL); 759 } 760 761 gotSize = _nano_vet_and_size_of_free(nanozone, ptr); 762 if (0 == gotSize) { 763 nanozone_error(nanozone, 1, 764 "Invalid pointer dequeued from free list", ptr, NULL); 765 } 766 if (gotSize != slot_bytes) { 767 nanozone_error(nanozone, 1, 768 "Mismatched size for pointer dequeued from free list", ptr, NULL); 769 } 770 771 if ((((chained_block_t)ptr)->double_free_guard ^ nanozone->cookie) != 0xBADDC0DEDEADBEADULL) { 772 nanozone_error(nanozone, 1, 773 "Heap corruption detected, free list canary is damaged", ptr, NULL); 774 } 775#if defined(DEBUG) 776 void *next = (void *) (((chained_block_t)ptr)->next); 777 if (next) { 778 p.addr = (uint64_t)next; // Begin the dissection of next 779 if (nanozone->our_signature != p.fields.nano_signature) { 780 nanozone_error(nanozone, 1, 781 "Invalid next signature for pointer dequeued from free list (showing ptr, next)", 782 ptr, ", %p", next); 783 } 784 785 if (mag_index != p.fields.nano_mag_index) { 786 nanozone_error(nanozone, 1, 787 "Mismatched next magazine for pointer dequeued from free list (showing ptr, next)", 788 ptr, ", %p", next); 789 } 790 791 gotSize = _nano_vet_and_size_of_free(nanozone, next); 792 if (0 == gotSize) { 793 nanozone_error(nanozone, 1, 794 "Invalid next for pointer dequeued from free list (showing ptr, next)", 795 ptr, ", %p", next); 796 } 797 if (gotSize != slot_bytes) { 798 nanozone_error(nanozone, 1, 799 "Mismatched next size for pointer dequeued from free list (showing ptr, next)", 800 ptr, ", %p", next); 801 } 802 } 803#endif /* DEBUG */ 804#endif /* NANO_FREE_DEQUEUE_DILIGENCE */ 805 806 ((chained_block_t)ptr)->double_free_guard = 0; 807 ((chained_block_t)ptr)->next = NULL; // clear out next pointer to protect free list 808 } else { 809 ptr = segregated_next_block(nanozone, pMeta, slot_bytes, mag_index); 810 } 811 812 if (cleared_requested && ptr) 813 memset(ptr, 0, slot_bytes); // TODO: Needs a memory barrier after memset to ensure zeroes land first? 814 815 return ptr; 816} 817 818static void * 819_nano_malloc_check_scribble(nanozone_t *nanozone, size_t size) 820{ 821 void *ptr = _nano_malloc_check_clear(nanozone, size, 0); 822 823 /* 824 * Scribble on allocated memory when requested. 825 */ 826 if ((nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) && ptr && size) 827 memset(ptr, SCRIBBLE_BYTE, _nano_vet_and_size_of_live(nanozone, ptr)); 828 829 return ptr; 830} 831 832static INLINE boolean_t 833_nano_block_inuse_p(nanozone_t *nanozone, const void *ptr) 834{ 835 nano_blk_addr_t p; // happily, the compiler holds this in a register 836 nano_meta_admin_t pMeta; 837 chained_block_t head = NULL, tail = NULL, t; 838 boolean_t inuse = TRUE; 839 840 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 841 842 pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]); 843 844 if ((void *)(pMeta->slot_bump_addr) <= ptr) 845 return FALSE; // Beyond what's ever been allocated, so trivially not in use. 846 847 // pop elements off the free list all the while looking for ptr. 848 unsigned stoploss = pMeta->slot_objects_mapped; 849 while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) { 850 if (0 == stoploss) { 851 nanozone_error(nanozone, 1, "Free list walk in _nano_block_inuse_p exceeded object count.", 852 (void *)&(pMeta->slot_LIFO), NULL); 853 } 854 stoploss--; 855 856 if (NULL == head) 857 head = t; 858 else 859 tail->next = t; 860 tail = t; 861 862 if (ptr == t) { 863 inuse = FALSE; 864 break; 865 } 866 } 867 if (tail) 868 tail->next = NULL; 869 870 // push the free list extracted above back onto the LIFO, all at once 871 if (head) 872 OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next)); 873 874 return inuse; 875} 876 877static INLINE size_t 878_nano_size(nanozone_t *nanozone, const void *ptr) 879{ 880 return _nano_vet_and_size_of_live(nanozone, ptr); 881} 882 883static INLINE size_t 884_nano_good_size(nanozone_t *nanozone, size_t size) 885{ 886 return (size <= NANO_REGIME_QUANTA_SIZE) ? 887 NANO_REGIME_QUANTA_SIZE : 888 (((size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM) << SHIFT_NANO_QUANTUM); 889} 890 891static INLINE void _nano_free_trusted_size_check_scribble(nanozone_t *nanozone, void *ptr, size_t trusted_size, boolean_t do_scribble) ALWAYSINLINE; 892 893static INLINE void 894_nano_free_trusted_size_check_scribble(nanozone_t *nanozone, void *ptr, size_t trusted_size, boolean_t do_scribble) 895{ 896 if (trusted_size) { 897 nano_blk_addr_t p; // happily, the compiler holds this in a register 898 nano_meta_admin_t pMeta; 899 900 if (do_scribble) 901 (void)memset(ptr, SCRABBLE_BYTE, trusted_size); 902 ((chained_block_t)ptr)->double_free_guard = (0xBADDC0DEDEADBEADULL ^ nanozone->cookie); 903 904 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 905 pMeta = &(nanozone->meta_data[p.fields.nano_mag_index][p.fields.nano_slot]); 906 OSAtomicEnqueue( &(pMeta->slot_LIFO), ptr, offsetof(struct chained_block_s,next)); 907 } else { 908 nanozone_error(nanozone, 1, "Freeing unallocated pointer", ptr, NULL); 909 } 910} 911 912static INLINE void _nano_free_check_scribble(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) ALWAYSINLINE; 913 914static INLINE void 915_nano_free_check_scribble(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) 916{ 917 _nano_free_trusted_size_check_scribble(nanozone, ptr, _nano_vet_and_size_of_live(nanozone, ptr), do_scribble); 918} 919 920static INLINE void * 921_nano_realloc(nanozone_t *nanozone, void *ptr, size_t new_size) 922{ 923 size_t old_size, new_good_size, valid_size; 924 void *new_ptr; 925 926 if (FALSE && NULL == ptr) { // ptr has our_signature so can't be NULL, but if it were Posix sez ... 927 // If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size. 928 return _nano_malloc_check_scribble(nanozone, new_size); 929 } else if (0 == new_size) { 930 // If size is 0 and ptr is not a null pointer, the object pointed to is freed. 931 _nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)); 932 // If size is 0, either a null pointer or a unique pointer that can be successfully passed 933 // to free() shall be returned. 934 return _nano_malloc_check_scribble(nanozone, 1); 935 } 936 937 old_size = _nano_vet_and_size_of_live(nanozone, ptr); 938 if (!old_size) { 939 nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL); 940 return NULL; 941 } 942 943 new_good_size = _nano_good_size(nanozone, new_size); 944 if (new_good_size > old_size) { 945 /* Must grow. FALL THROUGH to alloc/copy/free. */ 946 } else if (new_good_size <= (old_size >> 1)) { 947 /* Serious shrinkage (more than half). FALL THROUGH to alloc/copy/free. */ 948 } else { 949 /* Let's hang on to what we got. */ 950 if (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) 951 memset(ptr + new_size, SCRIBBLE_BYTE, old_size - new_size); 952 return ptr; 953 } 954 955 /* 956 * Allocate a new buffer and copy. 957 */ 958 new_ptr = _nano_malloc_check_scribble(nanozone, new_good_size); 959 if (new_ptr == NULL) 960 return NULL; 961 962 valid_size = MIN(old_size, new_good_size); 963 memcpy(new_ptr, ptr, valid_size); 964 _nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)); 965 966 return new_ptr; 967} 968 969static INLINE void 970_nano_destroy(nanozone_t *nanozone) 971{ 972 /* Now destroy the separate nanozone region */ 973 deallocate_pages(nanozone, (void *)nanozone, SZONE_PAGED_SIZE, 0); 974} 975 976/****************** nanozone dispatch **********************/ 977 978static void * 979nano_malloc(nanozone_t *nanozone, size_t size) 980{ 981 if (size <= NANO_MAX_SIZE) { 982 void *p = _nano_malloc_check_clear(nanozone, size, 0); 983 if (p) { 984 return p; 985 } else { 986 /* FALLTHROUGH to helper zone */ 987 } 988 } 989 990 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 991 return zone->malloc(zone, size); 992} 993 994static void * 995nano_forked_malloc(nanozone_t *nanozone, size_t size) 996{ 997 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 998 return zone->malloc(zone, size); 999} 1000 1001static void * 1002nano_malloc_scribble(nanozone_t *nanozone, size_t size) 1003{ 1004 if (size <= NANO_MAX_SIZE) { 1005 void *ptr = _nano_malloc_check_clear(nanozone, size, 0); 1006 if (ptr) { 1007 /* 1008 * Scribble on allocated memory. 1009 */ 1010 if (size) 1011 memset(ptr, SCRIBBLE_BYTE, _nano_vet_and_size_of_live(nanozone, ptr)); 1012 1013 return ptr; 1014 } else { 1015 /* FALLTHROUGH to helper zone */ 1016 } 1017 } 1018 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1019 return zone->malloc(zone, size); 1020} 1021 1022static void * 1023nano_calloc(nanozone_t *nanozone, size_t num_items, size_t size) 1024{ 1025 size_t total_bytes = num_items * size; 1026 1027 // Check for overflow of integer multiplication 1028 if (num_items > 1) { 1029 /* size_t is uint64_t */ 1030 if ((num_items | size) & 0xffffffff00000000ul) { 1031 // num_items or size equals or exceeds sqrt(2^64) == 2^32, appeal to wider arithmetic 1032 __uint128_t product = ((__uint128_t)num_items) * ((__uint128_t)size); 1033 if ((uint64_t)(product >> 64)) // compiles to test on upper register of register pair 1034 return NULL; 1035 } 1036 } 1037 1038 if (total_bytes <= NANO_MAX_SIZE) { 1039 void *p = _nano_malloc_check_clear(nanozone, total_bytes, 1); 1040 if (p) { 1041 return p; 1042 } else { 1043 /* FALLTHROUGH to helper zone */ 1044 } 1045 } 1046 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1047 return zone->calloc(zone, 1, total_bytes); 1048} 1049 1050static void * 1051nano_forked_calloc(nanozone_t *nanozone, size_t num_items, size_t size) 1052{ 1053 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1054 return zone->calloc(zone, num_items, size); 1055} 1056 1057static void * 1058nano_valloc(nanozone_t *nanozone, size_t size) 1059{ 1060 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1061 return zone->valloc(zone, size); 1062} 1063 1064static INLINE void __nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size, boolean_t do_scribble) ALWAYSINLINE; 1065 1066static INLINE void 1067__nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size, boolean_t do_scribble) 1068{ 1069 nano_blk_addr_t p; // happily, the compiler holds this in a register 1070 1071 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1072 if (nanozone->our_signature == p.fields.nano_signature) { 1073 if (size == ((p.fields.nano_slot + 1) << SHIFT_NANO_QUANTUM)) { // "Trust but verify." 1074 _nano_free_trusted_size_check_scribble(nanozone, ptr, size, do_scribble); 1075 return; 1076 } else { 1077 nanozone_error(nanozone, 1, "Freeing pointer whose size was misdeclared", ptr, NULL); 1078 } 1079 } else { 1080 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1081 zone->free_definite_size(zone, ptr, size); 1082 return; 1083 } 1084 /* NOTREACHED */ 1085} 1086 1087static void 1088nano_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size) 1089{ 1090 __nano_free_definite_size(nanozone, ptr, size, 0); 1091} 1092 1093static void 1094nano_free_definite_size_scribble(nanozone_t *nanozone, void *ptr, size_t size) 1095{ 1096 __nano_free_definite_size(nanozone, ptr, size, 1); 1097} 1098 1099static INLINE void __nano_free(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) ALWAYSINLINE; 1100 1101static INLINE void 1102__nano_free(nanozone_t *nanozone, void *ptr, boolean_t do_scribble) 1103{ 1104 nano_blk_addr_t p; // happily, the compiler holds this in a register 1105 1106 if (!ptr) 1107 return; // Protect against malloc_zone_free() passing NULL. 1108 1109 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1110 if (nanozone->our_signature == p.fields.nano_signature) { 1111 _nano_free_check_scribble(nanozone, ptr, do_scribble); 1112 return; 1113 } else { 1114 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1115 zone->free(zone, ptr); 1116 return; 1117 } 1118 /* NOTREACHED */ 1119} 1120 1121static void 1122nano_free(nanozone_t *nanozone, void *ptr) 1123{ 1124 __nano_free(nanozone, ptr, 0); 1125} 1126 1127static void 1128nano_forked_free(nanozone_t *nanozone, void *ptr) 1129{ 1130 nano_blk_addr_t p; // happily, the compiler holds this in a register 1131 1132 if (!ptr) 1133 return; // Protect against malloc_zone_free() passing NULL. 1134 1135 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1136 if (nanozone->our_signature == p.fields.nano_signature) { 1137 /* NOTHING. Drop it on the floor as nanozone metadata could be fouled by fork. */ 1138 return; 1139 } else { 1140 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1141 zone->free(zone, ptr); 1142 return; 1143 } 1144 /* NOTREACHED */ 1145} 1146 1147static void 1148nano_forked_free_definite_size(nanozone_t *nanozone, void *ptr, size_t size) 1149{ 1150 nano_forked_free(nanozone, ptr); 1151} 1152 1153static void 1154nano_free_scribble(nanozone_t *nanozone, void *ptr) 1155{ 1156 __nano_free(nanozone, ptr, 1); 1157} 1158 1159static size_t 1160nano_size(nanozone_t *nanozone, const void *ptr) 1161{ 1162 nano_blk_addr_t p; // happily, the compiler holds this in a register 1163 1164 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1165 1166 if (nanozone->our_signature == p.fields.nano_signature) { // Our signature? 1167 return _nano_size(nanozone, ptr); 1168 } else { 1169 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1170 return zone->size(zone, ptr); // Not nano. Try other sizes. 1171 } 1172 /* NOTREACHED */ 1173} 1174 1175static void * 1176nano_realloc(nanozone_t *nanozone, void *ptr, size_t new_size) 1177{ 1178 nano_blk_addr_t p; // happily, the compiler holds this in a register 1179 1180 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1181 1182 if (NULL == ptr) { // could occur through malloc_zone_realloc() path 1183 // If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size. 1184 return nano_malloc(nanozone, new_size); 1185 } else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature? 1186 if (new_size <= NANO_MAX_SIZE) { // nano to nano? 1187 void *q = _nano_realloc(nanozone, ptr, new_size); 1188 if (q) { 1189 return q; 1190 } else { // nano exhausted 1191 /* FALLTHROUGH to helper zone copying case */ 1192 } 1193 } 1194 1195 // nano to larger-than-nano (or FALLTHROUGH from just above) 1196 size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr); 1197 1198 if (!old_size) { 1199 nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL); 1200 return NULL; 1201 } else { 1202 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1203 void *new_ptr = zone->malloc(zone, new_size); 1204 1205 if (new_ptr) { 1206 size_t valid_size = MIN(old_size, new_size); 1207 memcpy(new_ptr, ptr, valid_size); 1208 _nano_free_check_scribble(nanozone, ptr, (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE)); 1209 return new_ptr; 1210 } else { 1211 /* Original ptr is left intact */ 1212 return NULL; 1213 } 1214 /* NOTREACHED */ 1215 } 1216 } else { 1217 // other-than-nano (not necessarily larger! possibly NULL!) to whatever 1218 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1219 1220 return zone->realloc(zone, ptr, new_size); 1221 } 1222 /* NOTREACHED */ 1223} 1224 1225static void * 1226nano_forked_realloc(nanozone_t *nanozone, void *ptr, size_t new_size) 1227{ 1228 nano_blk_addr_t p; // happily, the compiler holds this in a register 1229 1230 p.addr = (uint64_t)ptr; // place ptr on the dissecting table 1231 1232 if (NULL == ptr) { // could occur through malloc_zone_realloc() path 1233 // If ptr is a null pointer, realloc() shall be equivalent to malloc() for the specified size. 1234 return nano_forked_malloc(nanozone, new_size); 1235 } else if (nanozone->our_signature == p.fields.nano_signature) { // Our signature? 1236 if (0 == new_size) { 1237 // If size is 0 and ptr is not a null pointer, the object pointed to is freed. 1238 // However as nanozone metadata could be fouled by fork, we'll intentionally leak it. 1239 1240 // If size is 0, either a null pointer or a unique pointer that can be successfully passed 1241 // to free() shall be returned. 1242 return nano_forked_malloc(nanozone, 1); 1243 } 1244 1245 size_t old_size = _nano_vet_and_size_of_live(nanozone, ptr); 1246 1247 if (!old_size) { 1248 nanozone_error(nanozone, 1, "pointer being reallocated was not allocated", ptr, NULL); 1249 return NULL; 1250 } else { 1251 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1252 void *new_ptr = zone->malloc(zone, new_size); 1253 1254 if (new_ptr) { 1255 size_t valid_size = MIN(old_size, new_size); 1256 memcpy(new_ptr, ptr, valid_size); 1257 /* Original pointer is intentionally leaked as nanozone metadata could be fouled by fork. */ 1258 return new_ptr; 1259 } else { 1260 /* Original ptr is left intact */ 1261 return NULL; 1262 } 1263 /* NOTREACHED */ 1264 } 1265 } else { 1266 // other-than-nano (not necessarily larger! possibly NULL!) to whatever 1267 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1268 1269 return zone->realloc(zone, ptr, new_size); 1270 } 1271 /* NOTREACHED */ 1272} 1273 1274static void 1275nano_destroy(nanozone_t *nanozone) 1276{ 1277 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1278 zone->destroy(zone); 1279 1280 _nano_destroy(nanozone); 1281} 1282 1283static unsigned 1284nano_batch_malloc(nanozone_t *nanozone, size_t size, void **results, unsigned count) 1285{ 1286 unsigned found = 0; 1287 1288 if (size <= NANO_MAX_SIZE) { 1289 while (found < count) { 1290 void *ptr = _nano_malloc_check_clear(nanozone, size, 0); 1291 if (!ptr) 1292 break; 1293 1294 *results++ = ptr; 1295 found++; 1296 } 1297 if (found == count) { 1298 return found; 1299 } else { 1300 /* FALLTHROUGH to mop-up in the helper zone */ 1301 } 1302 } 1303 1304 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1305 return found + zone->batch_malloc(zone, size, results, count - found); 1306} 1307 1308static unsigned 1309nano_forked_batch_malloc(nanozone_t *nanozone, size_t size, void **results, unsigned count) 1310{ 1311 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1312 return zone->batch_malloc(zone, size, results, count); 1313} 1314 1315static void 1316nano_batch_free(nanozone_t *nanozone, void **to_be_freed, unsigned count) 1317{ 1318 void *ptr; 1319 1320 // frees all the pointers in to_be_freed 1321 // note that to_be_freed may be overwritten during the process 1322 if (!count) 1323 return; 1324 1325 while (count--) { 1326 ptr = to_be_freed[count]; 1327 if (ptr) 1328 nano_free(nanozone, ptr); 1329 } 1330} 1331 1332static void 1333nano_forked_batch_free(nanozone_t *nanozone, void **to_be_freed, unsigned count) 1334{ 1335 void *ptr; 1336 1337 // frees all the pointers in to_be_freed 1338 // note that to_be_freed may be overwritten during the process 1339 if (!count) 1340 return; 1341 1342 while (count--) { 1343 ptr = to_be_freed[count]; 1344 if (ptr) 1345 nano_forked_free(nanozone, ptr); 1346 } 1347} 1348 1349static void * 1350nano_memalign(nanozone_t *nanozone, size_t alignment, size_t size) 1351{ 1352 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1353 return zone->memalign(zone, alignment, size); 1354} 1355 1356static size_t 1357nano_try_madvise(nanozone_t *nanozone, size_t goal) 1358{ 1359 unsigned int mag_index, slot_key; 1360 size_t bytes_toward_goal = 0; 1361 1362 for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) { 1363 nano_blk_addr_t p; 1364 1365 // Establish p as base address for band 0, slot 0, offset 0 1366 p.fields.nano_signature = NANOZONE_SIGNATURE; 1367 p.fields.nano_mag_index = mag_index; 1368 p.fields.nano_band = 0; 1369 p.fields.nano_slot = 0; 1370 p.fields.nano_offset = 0; 1371 1372 for (slot_key = 0; slot_key < SLOT_KEY_LIMIT; 1373 p.addr += SLOT_IN_BAND_SIZE, // Advance to next slot base 1374 slot_key++) { 1375 1376 // _malloc_printf(ASL_LEVEL_WARNING,"nano_try_madvise examining slot base %p\n", p.addr); 1377 nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]); 1378 uintptr_t slot_bump_addr = pMeta->slot_bump_addr; // capture this volatile pointer 1379 size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count 1380 1381 if (0 == slot_objects_mapped) { // Nothing allocated in this magazine for this slot? 1382 continue; 1383 } else { 1384 // Walk the slot free list and populate a bitarray_t 1385 int log_size = 64 - __builtin_clzl(slot_objects_mapped); 1386 bitarray_t slot_bitarray = bitarray_create(log_size); 1387 1388 unsigned int slot_bytes = pMeta->slot_bytes; 1389 int log_page_count = 64 - __builtin_clzl((slot_objects_mapped * slot_bytes) / vm_page_size); 1390 log_page_count = 1 + MAX(0, log_page_count); 1391 bitarray_t page_bitarray = bitarray_create(log_page_count); 1392 1393 // _malloc_printf(ASL_LEVEL_WARNING,"slot_bitarray: %db page_bitarray: %db\n", bitarray_size(log_size), bitarray_size(log_page_count)); 1394 if (!slot_bitarray) { 1395 malloc_printf("bitarray_create(%d) in nano_try_madvise returned errno=%d.", log_size, errno); 1396 return bytes_toward_goal; 1397 } 1398 1399 if (!page_bitarray) { 1400 malloc_printf("bitarray_create(%d) in nano_try_madvise returned errno=%d.", log_page_count, errno); 1401 free(slot_bitarray); 1402 return bytes_toward_goal; 1403 } 1404 1405 chained_block_t head = NULL, tail = NULL, t; 1406 unsigned stoploss = slot_objects_mapped; 1407 while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) { 1408 if (0 == stoploss) { 1409 malloc_printf("Free list walk in nano_try_madvise exceeded object count."); 1410 break; 1411 } 1412 stoploss--; 1413 1414 uintptr_t offset = ((uintptr_t)t - p.addr); // offset from beginning of slot 1415 index_t block_index = offset_to_index(nanozone, pMeta, offset); 1416 1417 // build a simple linked list of the free blocks we're able to obtain 1418 if (NULL == head) 1419 head = t; 1420 else 1421 tail->next = t; 1422 tail = t; 1423 1424 // take note in a bitarray_t of each free block we're able to obtain (allows fast lookup below) 1425 if (block_index < slot_objects_mapped) 1426 bitarray_set(slot_bitarray, log_size, block_index); 1427 } 1428 if (tail) 1429 tail->next = NULL; 1430 1431 if (NULL == head) { 1432 free(slot_bitarray); 1433 free(page_bitarray); 1434 continue; 1435 } 1436 1437 index_t i; 1438 nano_blk_addr_t q; 1439 size_t pgnum; 1440 for (i = pMeta->slot_objects_skipped; i < slot_objects_mapped; ++i) { 1441 uintptr_t block_offset = index_to_offset(nanozone, pMeta, i); 1442 if (p.addr + block_offset >= slot_bump_addr) 1443 break; 1444 1445 if (!bitarray_get(slot_bitarray, log_size, i)) { // is block i allocated or already on an madvise'd page? 1446 1447 // Mark the page(s) it resides on as live 1448 q.addr = p.addr + block_offset; 1449 pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1450 bitarray_set(page_bitarray, log_page_count, pgnum); 1451 1452 q.addr += slot_bytes - 1; 1453 pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1454 bitarray_set(page_bitarray, log_page_count, pgnum); 1455 } 1456 } 1457 1458 free(slot_bitarray); 1459 1460 q.addr = p.addr + index_to_offset(nanozone, pMeta, pMeta->slot_objects_skipped); 1461 index_t pgstart = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1462 1463 q.addr = slot_bump_addr - slot_bytes; 1464 pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1465 1466 // _malloc_printf(ASL_LEVEL_WARNING,"Examining %d pages. Slot base %p.\n", pgnum - pgstart + 1, p.addr); 1467 1468 if (pMeta->slot_madvised_pages) { 1469 if (pMeta->slot_madvised_log_page_count < log_page_count) { 1470 bitarray_t new_madvised_pages = bitarray_create(log_page_count); 1471 index_t index; 1472 while (bitarray_zap_first_set(pMeta->slot_madvised_pages, pMeta->slot_madvised_log_page_count, &index)) { 1473 bitarray_set(new_madvised_pages, log_page_count, index); 1474 } 1475 free(pMeta->slot_madvised_pages); 1476 pMeta->slot_madvised_pages = new_madvised_pages; 1477 pMeta->slot_madvised_log_page_count = log_page_count; 1478 } 1479 } else { 1480 pMeta->slot_madvised_pages = bitarray_create(log_page_count); 1481 pMeta->slot_madvised_log_page_count = log_page_count; 1482 } 1483 1484 bitarray_t will_madvise_pages = bitarray_create(log_page_count); 1485 int num_advised = 0; 1486 1487 for (i = pgstart; i < pgnum; ++i) { 1488 if ((i < (1 << log_page_count)) && // bounds check for the bitarray_get()'s that follow. 1489 !bitarray_get(pMeta->slot_madvised_pages, log_page_count, i) && // already madvise'd? 1490 !bitarray_get(page_bitarray, log_page_count, i)) // no live allocations? 1491 { 1492 num_advised++; 1493 bitarray_set(will_madvise_pages, log_page_count, i); 1494 } 1495 } 1496 free(page_bitarray); 1497 1498 if (num_advised) { 1499 chained_block_t new_head = NULL, new_tail = NULL; 1500 // _malloc_printf(ASL_LEVEL_WARNING,"Constructing residual free list starting at %p num_advised %d\n", head, num_advised); 1501 t = head; 1502 while (t) { 1503 q.addr = (uintptr_t)t; 1504 index_t pgnum_start = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1505 q.addr += slot_bytes - 1; 1506 index_t pgnum_end = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1507 1508 // bounds check for the bitarray_get()'s that follow. If the pgnum is beyond the 1509 // capacity of the will_madvise_pages just restore the block to the free list. 1510 if (pgnum_start >= (1 << log_page_count)) { 1511 if (NULL == new_head) 1512 new_head = t; 1513 else 1514 new_tail->next = t; 1515 new_tail = t; 1516 } 1517 // If the block nowhere lies on an madvise()'d page restore it to the slot free list. 1518 else if (!bitarray_get(will_madvise_pages, log_page_count, pgnum_start) && 1519 !bitarray_get(will_madvise_pages, log_page_count, pgnum_end)) { 1520 if (NULL == new_head) 1521 new_head = t; 1522 else 1523 new_tail->next = t; 1524 new_tail = t; 1525 } 1526 1527 t = t->next; 1528 } 1529 if (new_tail) 1530 new_tail->next = NULL; 1531 1532 // push the free list extracted above back onto the LIFO, all at once 1533 if (new_head) 1534 OSAtomicEnqueue( &(pMeta->slot_LIFO), new_head, 1535 (uintptr_t)new_tail - (uintptr_t)new_head + offsetof(struct chained_block_s,next)); 1536 } else { 1537 // _malloc_printf(ASL_LEVEL_WARNING,"Reinstating free list since no pages were madvised (%d).\n", num_advised); 1538 if (head) 1539 OSAtomicEnqueue( &(pMeta->slot_LIFO), head, 1540 (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next)); 1541 } 1542 1543 for (i = pgstart; i < pgnum; ++i) { 1544 if ((i < (1 << log_page_count)) && bitarray_get(will_madvise_pages, log_page_count, i)) { 1545 q = p; 1546 q.fields.nano_band = (i << vm_page_shift) >> NANO_OFFSET_BITS; 1547 q.fields.nano_offset = (i << vm_page_shift) & ((1 << NANO_OFFSET_BITS) - 1); 1548 // _malloc_printf(ASL_LEVEL_WARNING,"Entire page non-live: %d. Slot base %p, madvising %p\n", i, p.addr, q.addr); 1549 1550 if (nanozone->debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) 1551 memset((void *)q.addr, SCRUBBLE_BYTE, vm_page_size); 1552#if TARGET_OS_EMBEDDED 1553 if (-1 == madvise((void *)q.addr, vm_page_size, MADV_FREE)) 1554#else 1555 if (-1 == madvise((void *)q.addr, vm_page_size, MADV_FREE_REUSABLE)) 1556#endif 1557 { 1558 /* -1 return: VM map entry change makes this unfit for reuse. Something evil lurks. */ 1559#if DEBUG_MADVISE 1560 nanozone_error(nanozone, 0, "madvise(..., MADV_FREE_REUSABLE) failed", 1561 (void *)cwq.addrpgLo, "length=%d\n", vm_page_size); 1562#endif 1563 } else { 1564 bytes_toward_goal += vm_page_size; 1565 bitarray_set(pMeta->slot_madvised_pages, log_page_count, i); 1566 } 1567 } 1568 } 1569 free(will_madvise_pages); 1570 1571 if (!bitarray_first_set(pMeta->slot_madvised_pages, log_page_count)) { 1572 free(pMeta->slot_madvised_pages); 1573 pMeta->slot_madvised_pages = NULL; 1574 pMeta->slot_madvised_log_page_count = 0; 1575 } 1576 1577 if (goal && bytes_toward_goal >= goal) 1578 return bytes_toward_goal; 1579 } 1580 } 1581 } 1582 return bytes_toward_goal; 1583} 1584 1585static size_t 1586nano_pressure_relief(nanozone_t *nanozone, size_t goal) 1587{ 1588 return nano_try_madvise(nanozone, goal); 1589} 1590 1591/**************** introspection methods *********************/ 1592 1593static kern_return_t 1594nanozone_default_reader(task_t task, vm_address_t address, vm_size_t size, void **ptr) 1595{ 1596 *ptr = (void *)address; 1597 return 0; 1598} 1599 1600static kern_return_t 1601nano_ptr_in_use_enumerator(task_t task, void *context, unsigned type_mask, vm_address_t zone_address, 1602 memory_reader_t reader, vm_range_recorder_t recorder) 1603{ 1604 nanozone_t *nanozone; 1605 kern_return_t err; 1606 1607 if (!reader) reader = nanozone_default_reader; 1608 1609 err = reader(task, zone_address, sizeof(nanozone_t), (void **)&nanozone); 1610 if (err) return err; 1611 1612 err = segregated_in_use_enumerator(task, context, type_mask, nanozone, reader, recorder); 1613 return err; 1614} 1615 1616static size_t 1617nano_good_size(nanozone_t *nanozone, size_t size) 1618{ 1619 if (size <= NANO_MAX_SIZE) 1620 return _nano_good_size(nanozone, size); 1621 else { 1622 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1623 return zone->introspect->good_size(zone, size); 1624 } 1625} 1626 1627// TODO sanity checks 1628unsigned nanozone_check_counter = 0; 1629unsigned nanozone_check_start = 0; 1630unsigned nanozone_check_modulo = 1; 1631 1632static boolean_t 1633nano_check_all(nanozone_t *nanozone, const char *function) 1634{ 1635 return 1; 1636} 1637 1638static boolean_t 1639nanozone_check(nanozone_t *nanozone) 1640{ 1641 if ((++nanozone_check_counter % 10000) == 0) 1642 _malloc_printf(ASL_LEVEL_NOTICE, "at nanozone_check counter=%d\n", nanozone_check_counter); 1643 1644 if (nanozone_check_counter < nanozone_check_start) 1645 return 1; 1646 1647 if (nanozone_check_counter % nanozone_check_modulo) 1648 return 1; 1649 1650 return nano_check_all(nanozone, ""); 1651} 1652 1653static unsigned 1654count_free(nanozone_t *nanozone, nano_meta_admin_t pMeta) 1655{ 1656 chained_block_t head = NULL, tail = NULL, t; 1657 unsigned count = 0; 1658 1659 unsigned stoploss = pMeta->slot_objects_mapped; 1660 while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) { 1661 if (0 == stoploss) { 1662 nanozone_error(nanozone, 1, "Free list walk in count_free exceeded object count.", 1663 (void *)&(pMeta->slot_LIFO), NULL); 1664 } 1665 stoploss--; 1666 1667 if (NULL == head) 1668 head = t; 1669 else 1670 tail->next = t; 1671 tail = t; 1672 1673 count++; 1674 } 1675 if (tail) 1676 tail->next = NULL; 1677 1678 // push the free list extracted above back onto the LIFO, all at once 1679 if (head) 1680 OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next)); 1681 1682 return count; 1683} 1684 1685static void 1686nano_print(nanozone_t *nanozone, boolean_t verbose) 1687{ 1688 unsigned int mag_index, slot_key; 1689 malloc_statistics_t stats; 1690 1691 nano_statistics(nanozone, &stats); 1692 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, 1693 "Nanozone %p: inUse=%d(%dKB) touched=%dKB allocated=%dMB\n", 1694 nanozone, stats.blocks_in_use, stats.size_in_use>>10, stats.max_size_in_use>>10, stats.size_allocated>>20); 1695 1696 for (mag_index = 0; mag_index < nanozone->phys_ncpus; mag_index++) { 1697 nano_blk_addr_t p; 1698 1699 // Establish p as base address for band 0, slot 0, offset 0 1700 p.fields.nano_signature = NANOZONE_SIGNATURE; 1701 p.fields.nano_mag_index = mag_index; 1702 p.fields.nano_band = 0; 1703 p.fields.nano_slot = 0; 1704 p.fields.nano_offset = 0; 1705 1706 for (slot_key = 0; slot_key < SLOT_KEY_LIMIT; 1707 p.addr += SLOT_IN_BAND_SIZE, // Advance to next slot base 1708 slot_key++) { 1709 1710 nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]); 1711 uintptr_t slot_bump_addr = pMeta->slot_bump_addr; // capture this volatile pointer 1712 size_t slot_objects_mapped = pMeta->slot_objects_mapped; // capture this volatile count 1713 1714 if (0 == slot_objects_mapped) { // Nothing allocated in this magazine for this slot? 1715 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, 1716 "Magazine %2d(%3d) Unrealized\n",mag_index, (slot_key + 1) << SHIFT_NANO_QUANTUM); 1717 continue; 1718 } 1719 1720 uintptr_t offset = (0 == slot_bump_addr ? 0 : slot_bump_addr - p.addr); 1721 unsigned blocks_touched = offset_to_index(nanozone, pMeta, offset) - pMeta->slot_objects_skipped; 1722 unsigned blocks_now_free = count_free(nanozone, pMeta); 1723 unsigned blocks_in_use = blocks_touched - blocks_now_free; 1724 1725 size_t size_hiwater = ((slot_key + 1) << SHIFT_NANO_QUANTUM) * blocks_touched; 1726 size_t size_in_use = ((slot_key + 1) << SHIFT_NANO_QUANTUM) * blocks_in_use; 1727 size_t size_allocated = ((offset / BAND_SIZE) + 1) * SLOT_IN_BAND_SIZE; 1728 1729 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX, 1730 "Magazine %2d(%3d) [%p, %3dKB] \t Allocations in use=%4d \t Bytes in use=%db \t Untouched=%dKB\n", 1731 mag_index, (slot_key + 1) << SHIFT_NANO_QUANTUM, p, 1732 (size_allocated>>10), blocks_in_use, size_in_use, (size_allocated - size_hiwater)>>10); 1733 1734 if (!verbose) { 1735 continue; 1736 } else { 1737 // Walk the slot free list and populate a bitarray_t 1738 int log_size = 64 - __builtin_clzl(slot_objects_mapped); 1739 bitarray_t slot_bitarray = bitarray_create(log_size); 1740 1741 if (!slot_bitarray) { 1742 malloc_printf("bitarray_create(%d) in nano_print returned errno=%d.", log_size, errno); 1743 return; 1744 } 1745 1746 chained_block_t head = NULL, tail = NULL, t; 1747 unsigned stoploss = slot_objects_mapped; 1748 while ((t = OSAtomicDequeue( &(pMeta->slot_LIFO), offsetof(struct chained_block_s,next)))) { 1749 if (0 == stoploss) { 1750 malloc_printf("Free list walk in nano_print exceeded object count."); 1751 break; 1752 } 1753 stoploss--; 1754 1755 uintptr_t offset = ((uintptr_t)t - p.addr); // offset from beginning of slot 1756 index_t block_index = offset_to_index(nanozone, pMeta, offset); 1757 1758 if (NULL == head) 1759 head = t; 1760 else 1761 tail->next = t; 1762 tail = t; 1763 1764 if (block_index < slot_objects_mapped) 1765 bitarray_set(slot_bitarray, log_size, block_index); 1766 } 1767 if (tail) 1768 tail->next = NULL; 1769 1770 index_t i; 1771 for (i = 0; i < slot_objects_mapped; ++i) { 1772 nano_blk_addr_t q; 1773 size_t pgnum; 1774 uintptr_t block_offset = index_to_offset(nanozone, pMeta, i); 1775 if (p.addr + block_offset >= slot_bump_addr) 1776 break; 1777 1778 q.addr = p.addr + block_offset; 1779 pgnum = ((((unsigned)q.fields.nano_band) << NANO_OFFSET_BITS) | ((unsigned)q.fields.nano_offset)) >> vm_page_shift; 1780 1781 if (i < pMeta->slot_objects_skipped) { 1782 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"_"); 1783 } else if (bitarray_get(slot_bitarray, log_size, i)) { 1784 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"F"); 1785 } else if (pMeta->slot_madvised_pages && (pgnum < ( 1 << pMeta->slot_madvised_log_page_count)) && 1786 bitarray_get(pMeta->slot_madvised_pages, pMeta->slot_madvised_log_page_count, pgnum)) { 1787 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"M"); 1788 } else { 1789 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"."); 1790 } 1791 } 1792 _malloc_printf(MALLOC_PRINTF_NOLOG | MALLOC_PRINTF_NOPREFIX,"\n"); 1793 1794 free(slot_bitarray); 1795 1796 // push the free list extracted above back onto the LIFO, all at once 1797 if (head) 1798 OSAtomicEnqueue( &(pMeta->slot_LIFO), head, (uintptr_t)tail - (uintptr_t)head + offsetof(struct chained_block_s,next)); 1799 } 1800 } 1801 } 1802 return; 1803} 1804 1805static void 1806nano_log(malloc_zone_t *zone, void *log_address) 1807{ 1808} 1809 1810static void 1811nano_force_lock(nanozone_t *nanozone) 1812{ 1813 int i; 1814 1815 for (i = 0; i < nanozone->phys_ncpus; ++i) { 1816 _malloc_lock_lock(&nanozone->band_resupply_lock[i]); 1817 } 1818} 1819 1820static void 1821nano_force_unlock(nanozone_t *nanozone) 1822{ 1823 int i; 1824 1825 for (i = 0; i < nanozone->phys_ncpus; ++i) { 1826 _malloc_lock_unlock(&nanozone->band_resupply_lock[i]); 1827 } 1828} 1829 1830static void 1831nano_statistics(nanozone_t *nanozone, malloc_statistics_t *stats) 1832{ 1833 int i,j; 1834 1835 bzero(stats, sizeof(*stats)); 1836 1837 for (i = 0; i < nanozone->phys_ncpus; ++i) { 1838 nano_blk_addr_t p; 1839 1840 // Establish p as base address for slot 0 in this CPU magazine 1841 p.fields.nano_signature = NANOZONE_SIGNATURE; 1842 p.fields.nano_mag_index = i; 1843 p.fields.nano_band = 0; 1844 p.fields.nano_slot = 0; 1845 p.fields.nano_offset = 0; 1846 1847 for (j = 0; j < NANO_SLOT_SIZE; 1848 p.addr += SLOT_IN_BAND_SIZE, // Advance to next slot base 1849 ++j) { 1850 nano_meta_admin_t pMeta = &nanozone->meta_data[i][j]; 1851 uintptr_t offset = pMeta->slot_bump_addr - p.addr; 1852 1853 if (0 == pMeta->slot_current_base_addr) { // Nothing allocated in this magazine for this slot? 1854 continue; 1855 } else { 1856 unsigned blocks_touched = offset_to_index(nanozone, pMeta, offset) - pMeta->slot_objects_skipped; 1857 unsigned blocks_now_free = count_free(nanozone, pMeta); 1858 unsigned blocks_in_use = blocks_touched - blocks_now_free; 1859 1860 size_t size_hiwater = ((j + 1) << SHIFT_NANO_QUANTUM) * blocks_touched; 1861 size_t size_in_use = ((j + 1) << SHIFT_NANO_QUANTUM) * blocks_in_use; 1862 size_t size_allocated = ((offset / BAND_SIZE) + 1) * SLOT_IN_BAND_SIZE; 1863 1864 stats->blocks_in_use += blocks_in_use; 1865 1866 stats->max_size_in_use += size_hiwater; 1867 stats->size_in_use += size_in_use; 1868 stats->size_allocated += size_allocated; 1869 } 1870 } 1871 } 1872} 1873 1874static boolean_t 1875_nano_locked(nanozone_t *nanozone) 1876{ 1877 int i; 1878 1879 for (i = 0; i < nanozone->phys_ncpus; ++i) { 1880 if (_malloc_lock_trylock(&nanozone->band_resupply_lock[i])) { 1881 _malloc_lock_unlock(&nanozone->band_resupply_lock[i]); 1882 return TRUE; 1883 } 1884 } 1885 return FALSE; 1886} 1887 1888static boolean_t 1889nano_locked(nanozone_t *nanozone) 1890{ 1891 malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone); 1892 1893 return _nano_locked(nanozone) || zone->introspect->zone_locked(zone); 1894} 1895 1896static const struct malloc_introspection_t nano_introspect = { 1897 (void *)nano_ptr_in_use_enumerator, 1898 (void *)nano_good_size, 1899 (void *)nanozone_check, 1900 (void *)nano_print, 1901 nano_log, 1902 (void *)nano_force_lock, 1903 (void *)nano_force_unlock, 1904 (void *)nano_statistics, 1905 (void *)nano_locked, 1906 NULL, NULL, NULL, NULL, /* Zone enumeration version 7 and forward. */ 1907}; // marked as const to spare the DATA section 1908 1909__attribute__((visibility("hidden"))) 1910void 1911nano_forked_zone(nanozone_t *nanozone) 1912{ 1913 /* 1914 * Hobble the nano zone in the child of a fork prior to an exec since 1915 * the state of the zone can be made inconsistent by a parent thread while the 1916 * fork is underway. 1917 * All new allocations will be referred to the helper zone (which is more stable.) 1918 * All free()'s of existing nano objects will be leaked. 1919 */ 1920 1921 mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ | PROT_WRITE); 1922 1923 nanozone->basic_zone.size = (void *)nano_size; /* Unchanged. */ 1924 nanozone->basic_zone.malloc = (void *)nano_forked_malloc; 1925 nanozone->basic_zone.calloc = (void *)nano_forked_calloc; 1926 nanozone->basic_zone.valloc = (void *)nano_valloc; /* Unchanged, already always obtained from helper zone. */ 1927 nanozone->basic_zone.free = (void *)nano_forked_free; 1928 nanozone->basic_zone.realloc = (void *)nano_forked_realloc; 1929 nanozone->basic_zone.destroy = (void *)nano_destroy; /* Unchanged. */ 1930 nanozone->basic_zone.batch_malloc = (void *)nano_forked_batch_malloc; 1931 nanozone->basic_zone.batch_free = (void *)nano_forked_batch_free; 1932 nanozone->basic_zone.introspect = (struct malloc_introspection_t *)&nano_introspect; /* Unchanged. */ 1933 nanozone->basic_zone.memalign = (void *)nano_memalign; /* Unchanged. */ 1934 nanozone->basic_zone.free_definite_size = (void *)nano_forked_free_definite_size; 1935 1936 mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ); 1937 1938} 1939 1940__attribute__((visibility("hidden"))) 1941malloc_zone_t * 1942create_nano_zone(size_t initial_size, malloc_zone_t *helper_zone, unsigned debug_flags) 1943{ 1944 nanozone_t *nanozone; 1945 int i, j; 1946 1947 if (!_malloc_engaged_nano) return NULL; 1948 1949#if defined(__x86_64__) 1950 if (_COMM_PAGE_VERSION_REQD > (*((uint16_t *)_COMM_PAGE_VERSION))) { 1951 malloc_printf("*** FATAL ERROR - comm page version mismatch.\n"); 1952 exit(-1); 1953 } 1954#endif 1955 1956 /* get memory for the zone. */ 1957 nanozone = allocate_pages(NULL, SZONE_PAGED_SIZE, 0, 0, VM_MEMORY_MALLOC); 1958 if (!nanozone) 1959 return NULL; 1960 1961 /* set up the basic_zone portion of the nanozone structure */ 1962 nanozone->basic_zone.version = 8; 1963 nanozone->basic_zone.size = (void *)nano_size; 1964 nanozone->basic_zone.malloc = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ? (void *)nano_malloc_scribble : (void *)nano_malloc; 1965 nanozone->basic_zone.calloc = (void *)nano_calloc; 1966 nanozone->basic_zone.valloc = (void *)nano_valloc; 1967 nanozone->basic_zone.free = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ? (void *)nano_free_scribble : (void *)nano_free; 1968 nanozone->basic_zone.realloc = (void *)nano_realloc; 1969 nanozone->basic_zone.destroy = (void *)nano_destroy; 1970 nanozone->basic_zone.batch_malloc = (void *)nano_batch_malloc; 1971 nanozone->basic_zone.batch_free = (void *)nano_batch_free; 1972 nanozone->basic_zone.introspect = (struct malloc_introspection_t *)&nano_introspect; 1973 nanozone->basic_zone.memalign = (void *)nano_memalign; 1974 nanozone->basic_zone.free_definite_size = (debug_flags & SCALABLE_MALLOC_DO_SCRIBBLE) ? 1975 (void *)nano_free_definite_size_scribble : (void *)nano_free_definite_size; 1976 1977 nanozone->basic_zone.pressure_relief = (void *)nano_pressure_relief; 1978 1979 nanozone->basic_zone.reserved1 = 0; /* Set to zero once and for all as required by CFAllocator. */ 1980 nanozone->basic_zone.reserved2 = 0; /* Set to zero once and for all as required by CFAllocator. */ 1981 1982 mprotect(nanozone, sizeof(nanozone->basic_zone), PROT_READ); /* Prevent overwriting the function pointers in basic_zone. */ 1983 1984 /* set up the remainder of the nanozone structure */ 1985 nanozone->debug_flags = debug_flags; 1986 nanozone->our_signature = NANOZONE_SIGNATURE; 1987 1988 /* Query the number of configured processors. */ 1989#if defined(__x86_64__) 1990 nanozone->phys_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_PHYSICAL_CPUS; 1991 nanozone->logical_ncpus = *(uint8_t *)(uintptr_t)_COMM_PAGE_LOGICAL_CPUS; 1992#else 1993#error Unknown architecture 1994#endif 1995 1996 if (nanozone->phys_ncpus > sizeof(nanozone->core_mapped_size)/sizeof(nanozone->core_mapped_size[0])) { 1997 _malloc_printf(ASL_LEVEL_NOTICE, "nano zone abandoned because NCPUS mismatch.\n"); 1998 return NULL; 1999 } 2000 2001 if (0 != (nanozone->logical_ncpus % nanozone->phys_ncpus)) { 2002 malloc_printf("*** FATAL ERROR - logical_ncpus % phys_ncpus != 0.\n"); 2003 exit(-1); 2004 } 2005 2006 switch (nanozone->logical_ncpus/nanozone->phys_ncpus) { 2007 case 1: 2008 nanozone->hyper_shift = 0; 2009 break; 2010 case 2: 2011 nanozone->hyper_shift = 1; 2012 break; 2013 case 4: 2014 nanozone->hyper_shift = 2; 2015 break; 2016 default: 2017 malloc_printf("*** FATAL ERROR - logical_ncpus / phys_ncpus not 1, 2, or 4.\n"); 2018 exit(-1); 2019 } 2020 2021 /* Initialize slot queue heads and resupply locks. */ 2022 OSQueueHead q0 = OS_ATOMIC_QUEUE_INIT; 2023 for (i = 0; i < nanozone->phys_ncpus; ++i) { 2024 _malloc_lock_init(&nanozone->band_resupply_lock[i]); 2025 2026 for (j = 0; j < NANO_SLOT_SIZE; ++j) { 2027 nanozone->meta_data[i][j].slot_LIFO = q0; 2028 } 2029 } 2030 2031 /* Initialize the security token. */ 2032 if (0 == _dyld_get_image_slide((const struct mach_header*)_NSGetMachExecuteHeader())) { 2033 // zero slide when ASLR has been disabled by boot-arg. Eliminate cloaking. 2034 malloc_entropy[0] = 0; 2035 malloc_entropy[1] = 0; 2036 } 2037 nanozone->cookie = (uintptr_t)malloc_entropy[0] & 0x0000ffffffff0000ULL; // scramble central 32bits with this cookie 2038 2039 /* Nano zone does not support SCALABLE_MALLOC_ADD_GUARD_PAGES. */ 2040 if (nanozone->debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES) { 2041 _malloc_printf(ASL_LEVEL_INFO, "nano zone does not support guard pages\n"); 2042 nanozone->debug_flags &= ~SCALABLE_MALLOC_ADD_GUARD_PAGES; 2043 } 2044 2045 nanozone->helper_zone = helper_zone; 2046 2047 return (malloc_zone_t *)nanozone; 2048} 2049#endif /* defined(__LP64__) */ 2050 2051/* vim: set noet:ts=4:sw=4:cindent: */ 2052