1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: kern/zalloc.c 60 * Author: Avadis Tevanian, Jr. 61 * 62 * Zone-based memory allocator. A zone is a collection of fixed size 63 * data blocks for which quick allocation/deallocation is possible. 64 */ 65#include <zone_debug.h> 66#include <zone_alias_addr.h> 67 68#include <mach/mach_types.h> 69#include <mach/vm_param.h> 70#include <mach/kern_return.h> 71#include <mach/mach_host_server.h> 72#include <mach/task_server.h> 73#include <mach/machine/vm_types.h> 74#include <mach_debug/zone_info.h> 75#include <mach/vm_map.h> 76 77#include <kern/kern_types.h> 78#include <kern/assert.h> 79#include <kern/host.h> 80#include <kern/macro_help.h> 81#include <kern/sched.h> 82#include <kern/locks.h> 83#include <kern/sched_prim.h> 84#include <kern/misc_protos.h> 85#include <kern/thread_call.h> 86#include <kern/zalloc.h> 87#include <kern/kalloc.h> 88 89#include <vm/pmap.h> 90#include <vm/vm_map.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_page.h> 93 94#include <pexpert/pexpert.h> 95 96#include <machine/machparam.h> 97 98#include <libkern/OSDebug.h> 99#include <libkern/OSAtomic.h> 100#include <sys/kdebug.h> 101 102/* 103 * Zone Corruption Debugging 104 * 105 * We perform three methods to detect use of a zone element after it's been freed. These 106 * checks are enabled for every N'th element (counted per-zone) by specifying 107 * "zp-factor=N" as a boot-arg. To turn this feature off, set "zp-factor=0" or "-no-zp". 108 * 109 * (1) Range-check the free-list "next" pointer for sanity. 110 * (2) Store the pointer in two different words, one at the beginning of the freed element 111 * and one at the end, and compare them against each other when re-using the element, 112 * to detect modifications. 113 * (3) Poison the freed memory by overwriting it with 0xdeadbeef, and check it when the 114 * memory is being reused to make sure it is still poisoned. 115 * 116 * As a result, each element (that is large enough to hold this data inside) must be marked 117 * as either "ZP_POISONED" or "ZP_NOT_POISONED" in the first integer within the would-be 118 * poisoned segment after the first free-list pointer. 119 * 120 * Performance slowdown is inversely proportional to the frequency with which you check 121 * (as would be expected), with a 4-5% hit around N=1, down to ~0.3% at N=16 and just 122 * "noise" at N=32 and higher. You can expect to find a 100% reproducible 123 * bug in an average of N tries, with a standard deviation of about N, but you will probably 124 * want to set "zp-factor=1" or "-zp" if you are attempting to reproduce a known bug. 125 * 126 * 127 * Zone corruption logging 128 * 129 * You can also track where corruptions come from by using the boot-arguments: 130 * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later in this 131 * document for more implementation and usage information. 132 */ 133 134#define ZP_POISON 0xdeadbeef 135#define ZP_POISONED 0xfeedface 136#define ZP_NOT_POISONED 0xbaddecaf 137 138#if CONFIG_EMBEDDED 139 #define ZP_DEFAULT_SAMPLING_FACTOR 0 140#else /* CONFIG_EMBEDDED */ 141 #define ZP_DEFAULT_SAMPLING_FACTOR 16 142#endif /* CONFIG_EMBEDDED */ 143 144uint32_t free_check_sample_factor = 0; /* set by zp-factor=N boot arg */ 145boolean_t corruption_debug_flag = FALSE; /* enabled by "-zc" boot-arg */ 146 147/* 148 * Zone checking helper macro. 149 */ 150#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3))) 151 152/* 153 * Frees the specified element, which is within the specified zone. If this 154 * element should be poisoned and its free list checker should be set, both are 155 * done here. These checks will only be enabled if the element size is at least 156 * large enough to hold two vm_offset_t's and one uint32_t (to enable both types 157 * of checks). 158 */ 159static inline void 160free_to_zone(zone_t zone, void *elem) { 161 /* get the index of the first uint32_t beyond the 'next' pointer */ 162 unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t); 163 164 /* should we run checks on this piece of memory? */ 165 if (free_check_sample_factor != 0 && 166 zone->free_check_count++ % free_check_sample_factor == 0 && 167 zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { 168 zone->free_check_count = 1; 169 ((uint32_t *) elem)[i] = ZP_POISONED; 170 for (i++; i < zone->elem_size / sizeof(uint32_t); i++) { 171 ((uint32_t *) elem)[i] = ZP_POISON; 172 } 173 ((vm_offset_t *) elem)[((zone->elem_size)/sizeof(vm_offset_t))-1] = zone->free_elements; 174 } else { 175 ((uint32_t *) elem)[i] = ZP_NOT_POISONED; 176 } 177 178 /* maintain free list and decrement number of active objects in zone */ 179 ((vm_offset_t *) elem)[0] = zone->free_elements; 180 zone->free_elements = (vm_offset_t) elem; 181 zone->count--; 182} 183 184/* 185 * Allocates an element from the specifed zone, storing its address in the 186 * return arg. This function will look for corruptions revealed through zone 187 * poisoning and free list checks. 188 */ 189static inline void 190alloc_from_zone(zone_t zone, void **ret) { 191 void *elem = (void *) zone->free_elements; 192 if (elem != NULL) { 193 /* get the index of the first uint32_t beyond the 'next' pointer */ 194 unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t); 195 196 /* first int in data section must be ZP_POISONED or ZP_NOT_POISONED */ 197 if (((uint32_t *) elem)[i] == ZP_POISONED && 198 zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { 199 /* check the free list pointers */ 200 if (!is_kernel_data_addr(((vm_offset_t *) elem)[0]) || 201 ((vm_offset_t *) elem)[0] != 202 ((vm_offset_t *) elem)[(zone->elem_size/sizeof(vm_offset_t))-1]) { 203 panic("a freed zone element has been modified in zone: %s (0x%08x)", 204 zone->zone_name, ((uint32_t *) elem)[i]); 205 } 206 207 /* check for poisoning in free space */ 208 for (i++; 209 i < zone->elem_size / sizeof(uint32_t) - 210 sizeof(vm_offset_t) / sizeof(uint32_t); 211 i++) { 212 if (((uint32_t *) elem)[i] != ZP_POISON) { 213 panic("a freed zone element has been modified in zone: %s, element is %08x but expected %08x (element: %p)", 214 zone->zone_name, ((uint32_t *) elem)[i], ZP_POISON, elem); 215 } 216 } 217 } else if (((uint32_t *) elem)[i] != ZP_NOT_POISONED) { 218 panic("a freed zone element has been modified in zone: %s, element is %08x but expected %08x (element: %p)", 219 zone->zone_name, ((uint32_t *) elem)[i], ZP_NOT_POISONED, elem); 220 } 221 222 zone->count++; 223 zone->sum_count++; 224 zone->free_elements = ((vm_offset_t *) elem)[0]; 225 } 226 *ret = elem; 227} 228 229 230/* 231 * Fake zones for things that want to report via zprint but are not actually zones. 232 */ 233struct fake_zone_info { 234 const char* name; 235 void (*init)(int); 236 void (*query)(int *, 237 vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, 238 uint64_t *, int *, int *, int *); 239}; 240 241static const struct fake_zone_info fake_zones[] = { 242 { 243 .name = "kernel_stacks", 244 .init = stack_fake_zone_init, 245 .query = stack_fake_zone_info, 246 }, 247 { 248 .name = "page_tables", 249 .init = pt_fake_zone_init, 250 .query = pt_fake_zone_info, 251 }, 252 { 253 .name = "kalloc.large", 254 .init = kalloc_fake_zone_init, 255 .query = kalloc_fake_zone_info, 256 }, 257}; 258static const unsigned int num_fake_zones = 259 sizeof (fake_zones) / sizeof (fake_zones[0]); 260 261/* 262 * Zone info options 263 */ 264boolean_t zinfo_per_task = FALSE; /* enabled by -zinfop in boot-args */ 265#define ZINFO_SLOTS 200 /* for now */ 266#define ZONES_MAX (ZINFO_SLOTS - num_fake_zones - 1) 267 268/* 269 * Support for garbage collection of unused zone pages 270 * 271 * The kernel virtually allocates the "zone map" submap of the kernel 272 * map. When an individual zone needs more storage, memory is allocated 273 * out of the zone map, and the two-level "zone_page_table" is 274 * on-demand expanded so that it has entries for those pages. 275 * zone_page_init()/zone_page_alloc() initialize "alloc_count" 276 * to the number of zone elements that occupy the zone page (which may 277 * be a minimum of 1, including if a zone element spans multiple 278 * pages). 279 * 280 * Asynchronously, the zone_gc() logic attempts to walk zone free 281 * lists to see if all the elements on a zone page are free. If 282 * "collect_count" (which it increments during the scan) matches 283 * "alloc_count", the zone page is a candidate for collection and the 284 * physical page is returned to the VM system. During this process, the 285 * first word of the zone page is re-used to maintain a linked list of 286 * to-be-collected zone pages. 287 */ 288typedef uint32_t zone_page_index_t; 289#define ZONE_PAGE_INDEX_INVALID ((zone_page_index_t)0xFFFFFFFFU) 290 291struct zone_page_table_entry { 292 volatile uint16_t alloc_count; 293 volatile uint16_t collect_count; 294}; 295 296#define ZONE_PAGE_USED 0 297#define ZONE_PAGE_UNUSED 0xffff 298 299/* Forwards */ 300void zone_page_init( 301 vm_offset_t addr, 302 vm_size_t size); 303 304void zone_page_alloc( 305 vm_offset_t addr, 306 vm_size_t size); 307 308void zone_page_free_element( 309 zone_page_index_t *free_page_head, 310 zone_page_index_t *free_page_tail, 311 vm_offset_t addr, 312 vm_size_t size); 313 314void zone_page_collect( 315 vm_offset_t addr, 316 vm_size_t size); 317 318boolean_t zone_page_collectable( 319 vm_offset_t addr, 320 vm_size_t size); 321 322void zone_page_keep( 323 vm_offset_t addr, 324 vm_size_t size); 325 326void zalloc_async( 327 thread_call_param_t p0, 328 thread_call_param_t p1); 329 330void zone_display_zprint( void ); 331 332vm_map_t zone_map = VM_MAP_NULL; 333 334zone_t zone_zone = ZONE_NULL; /* the zone containing other zones */ 335 336zone_t zinfo_zone = ZONE_NULL; /* zone of per-task zone info */ 337 338/* 339 * The VM system gives us an initial chunk of memory. 340 * It has to be big enough to allocate the zone_zone 341 * all the way through the pmap zone. 342 */ 343 344vm_offset_t zdata; 345vm_size_t zdata_size; 346 347#define zone_wakeup(zone) thread_wakeup((event_t)(zone)) 348#define zone_sleep(zone) \ 349 (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT); 350 351 352#define lock_zone_init(zone) \ 353MACRO_BEGIN \ 354 char _name[32]; \ 355 (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \ 356 lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \ 357 lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \ 358 lck_attr_setdefault(&(zone)->lock_attr); \ 359 lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \ 360 &(zone)->lock_grp, &(zone)->lock_attr); \ 361MACRO_END 362 363#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) 364 365/* 366 * Garbage collection map information 367 */ 368#define ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE (32) 369struct zone_page_table_entry * volatile zone_page_table[ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE]; 370vm_size_t zone_page_table_used_size; 371vm_offset_t zone_map_min_address; 372vm_offset_t zone_map_max_address; 373unsigned int zone_pages; 374unsigned int zone_page_table_second_level_size; /* power of 2 */ 375unsigned int zone_page_table_second_level_shift_amount; 376 377#define zone_page_table_first_level_slot(x) ((x) >> zone_page_table_second_level_shift_amount) 378#define zone_page_table_second_level_slot(x) ((x) & (zone_page_table_second_level_size - 1)) 379 380void zone_page_table_expand(zone_page_index_t pindex); 381struct zone_page_table_entry *zone_page_table_lookup(zone_page_index_t pindex); 382 383/* 384 * Exclude more than one concurrent garbage collection 385 */ 386decl_lck_mtx_data(, zone_gc_lock) 387 388lck_attr_t zone_lck_attr; 389lck_grp_t zone_lck_grp; 390lck_grp_attr_t zone_lck_grp_attr; 391lck_mtx_ext_t zone_lck_ext; 392 393#if !ZONE_ALIAS_ADDR 394#define from_zone_map(addr, size) \ 395 ((vm_offset_t)(addr) >= zone_map_min_address && \ 396 ((vm_offset_t)(addr) + size -1) < zone_map_max_address) 397#else 398#define from_zone_map(addr, size) \ 399 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) >= zone_map_min_address && \ 400 ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) + size -1) < zone_map_max_address) 401#endif 402 403/* 404 * Protects first_zone, last_zone, num_zones, 405 * and the next_zone field of zones. 406 */ 407decl_simple_lock_data(, all_zones_lock) 408zone_t first_zone; 409zone_t *last_zone; 410unsigned int num_zones; 411 412boolean_t zone_gc_allowed = TRUE; 413boolean_t zone_gc_forced = FALSE; 414boolean_t panic_include_zprint = FALSE; 415boolean_t zone_gc_allowed_by_time_throttle = TRUE; 416 417/* 418 * Zone leak debugging code 419 * 420 * When enabled, this code keeps a log to track allocations to a particular zone that have not 421 * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated 422 * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is 423 * off by default. 424 * 425 * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone> 426 * is the name of the zone you wish to log. 427 * 428 * This code only tracks one zone, so you need to identify which one is leaking first. 429 * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone 430 * garbage collector. Note that the zone name printed in the panic message is not necessarily the one 431 * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This 432 * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The 433 * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs. 434 * See the help in the kgmacros for usage info. 435 * 436 * 437 * Zone corruption logging 438 * 439 * Logging can also be used to help identify the source of a zone corruption. First, identify the zone 440 * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction 441 * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the 442 * corruption is detected, examining the log will show you the stack traces of the callers who last allocated 443 * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been 444 * corrupted to examine its history. This should lead to the source of the corruption. 445 */ 446 447static int log_records; /* size of the log, expressed in number of records */ 448 449#define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */ 450 451static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */ 452 453/* 454 * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to 455 * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note 456 * that the larger the size of the log, the slower the system will run due to linear searching in the log, 457 * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000 458 * records since going much larger than this tends to make the system unresponsive and unbootable on small 459 * memory configurations. The default value is 4000 records. 460 */ 461 462#if defined(__LP64__) 463#define ZRECORDS_MAX 128000 /* Max records allowed in the log */ 464#else 465#define ZRECORDS_MAX 8000 /* Max records allowed in the log */ 466#endif 467#define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */ 468 469/* 470 * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows 471 * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A 472 * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging, 473 * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees. 474 * If the log fills, old records are replaced as if it were a circular buffer. 475 */ 476 477struct zrecord { 478 void *z_element; /* the element that was zalloc'ed of zfree'ed */ 479 uint32_t z_opcode:1, /* whether it was a zalloc or zfree */ 480 z_time:31; /* time index when operation was done */ 481 void *z_pc[MAX_ZTRACE_DEPTH]; /* stack trace of caller */ 482}; 483 484/* 485 * Opcodes for the z_opcode field: 486 */ 487 488#define ZOP_ALLOC 1 489#define ZOP_FREE 0 490 491/* 492 * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest 493 */ 494 495static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */ 496static int zcurrent = 0; /* index of the next slot in the log to use */ 497static int zrecorded = 0; /* number of allocations recorded in the log */ 498static unsigned int ztime = 0; /* a timestamp of sorts */ 499static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */ 500 501/* 502 * Decide if we want to log this zone by doing a string compare between a zone name and the name 503 * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not 504 * possible to include spaces in strings passed in via the boot-args, a period in the logname will 505 * match a space in the zone name. 506 */ 507 508static int 509log_this_zone(const char *zonename, const char *logname) 510{ 511 int len; 512 const char *zc = zonename; 513 const char *lc = logname; 514 515 /* 516 * Compare the strings. We bound the compare by MAX_ZONE_NAME. 517 */ 518 519 for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) { 520 521 /* 522 * If the current characters don't match, check for a space in 523 * in the zone name and a corresponding period in the log name. 524 * If that's not there, then the strings don't match. 525 */ 526 527 if (*zc != *lc && !(*zc == ' ' && *lc == '.')) 528 break; 529 530 /* 531 * The strings are equal so far. If we're at the end, then it's a match. 532 */ 533 534 if (*zc == '\0') 535 return TRUE; 536 } 537 538 return FALSE; 539} 540 541 542/* 543 * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and 544 * the buffer for the records has been allocated. 545 */ 546 547#define DO_LOGGING(z) (zrecords && (z) == zone_of_interest) 548 549extern boolean_t zlog_ready; 550 551#if CONFIG_ZLEAKS 552#pragma mark - 553#pragma mark Zone Leak Detection 554 555/* 556 * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding 557 * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a 558 * backtrace. Every free, we examine the table and determine if the allocation was being tracked, 559 * and stop tracking it if it was being tracked. 560 * 561 * We track the allocations in the zallocations hash table, which stores the address that was returned from 562 * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which 563 * stores the backtrace associated with that allocation. This provides uniquing for the relatively large 564 * backtraces - we don't store them more than once. 565 * 566 * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up 567 * a large amount of virtual space. 568 */ 569#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */ 570#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */ 571#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */ 572#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */ 573uint32_t zleak_state = 0; /* State of collection, as above */ 574 575boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */ 576vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */ 577vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */ 578unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */ 579 580/* 581 * Counters for allocation statistics. 582 */ 583 584/* Times two active records want to occupy the same spot */ 585unsigned int z_alloc_collisions = 0; 586unsigned int z_trace_collisions = 0; 587 588/* Times a new record lands on a spot previously occupied by a freed allocation */ 589unsigned int z_alloc_overwrites = 0; 590unsigned int z_trace_overwrites = 0; 591 592/* Times a new alloc or trace is put into the hash table */ 593unsigned int z_alloc_recorded = 0; 594unsigned int z_trace_recorded = 0; 595 596/* Times zleak_log returned false due to not being able to acquire the lock */ 597unsigned int z_total_conflicts = 0; 598 599 600#pragma mark struct zallocation 601/* 602 * Structure for keeping track of an allocation 603 * An allocation bucket is in use if its element is not NULL 604 */ 605struct zallocation { 606 uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */ 607 vm_size_t za_size; /* how much memory did this allocation take up? */ 608 uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */ 609 /* TODO: #if this out */ 610 uint32_t za_hit_count; /* for determining effectiveness of hash function */ 611}; 612 613/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */ 614uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM; 615uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM; 616 617vm_size_t zleak_max_zonemap_size; 618 619/* Hashmaps of allocations and their corresponding traces */ 620static struct zallocation* zallocations; 621static struct ztrace* ztraces; 622 623/* not static so that panic can see this, see kern/debug.c */ 624struct ztrace* top_ztrace; 625 626/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */ 627static lck_spin_t zleak_lock; 628static lck_attr_t zleak_lock_attr; 629static lck_grp_t zleak_lock_grp; 630static lck_grp_attr_t zleak_lock_grp_attr; 631 632/* 633 * Initializes the zone leak monitor. Called from zone_init() 634 */ 635static void 636zleak_init(vm_size_t max_zonemap_size) 637{ 638 char scratch_buf[16]; 639 boolean_t zleak_enable_flag = FALSE; 640 641 zleak_max_zonemap_size = max_zonemap_size; 642 zleak_global_tracking_threshold = max_zonemap_size / 2; 643 zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8; 644 645#if CONFIG_EMBEDDED 646 if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) { 647 zleak_enable_flag = TRUE; 648 printf("zone leak detection enabled\n"); 649 } else { 650 zleak_enable_flag = FALSE; 651 printf("zone leak detection disabled\n"); 652 } 653#else /* CONFIG_EMBEDDED */ 654 /* -zleakoff (flag to disable zone leak monitor) */ 655 if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) { 656 zleak_enable_flag = FALSE; 657 printf("zone leak detection disabled\n"); 658 } else { 659 zleak_enable_flag = TRUE; 660 printf("zone leak detection enabled\n"); 661 } 662#endif /* CONFIG_EMBEDDED */ 663 664 /* zfactor=XXXX (override how often to sample the zone allocator) */ 665 if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) { 666 printf("Zone leak factor override:%u\n", zleak_sample_factor); 667 } 668 669 /* zleak-allocs=XXXX (override number of buckets in zallocations) */ 670 if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) { 671 printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets); 672 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ 673 if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) { 674 printf("Override isn't a power of two, bad things might happen!"); 675 } 676 } 677 678 /* zleak-traces=XXXX (override number of buckets in ztraces) */ 679 if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) { 680 printf("Zone leak trace buckets override:%u\n", zleak_trace_buckets); 681 /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ 682 if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) { 683 printf("Override isn't a power of two, bad things might happen!"); 684 } 685 } 686 687 /* allocate the zleak_lock */ 688 lck_grp_attr_setdefault(&zleak_lock_grp_attr); 689 lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr); 690 lck_attr_setdefault(&zleak_lock_attr); 691 lck_spin_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr); 692 693 if (zleak_enable_flag) { 694 zleak_state = ZLEAK_STATE_ENABLED; 695 } 696} 697 698#if CONFIG_ZLEAKS 699 700/* 701 * Support for kern.zleak.active sysctl - a simplified 702 * version of the zleak_state variable. 703 */ 704int 705get_zleak_state(void) 706{ 707 if (zleak_state & ZLEAK_STATE_FAILED) 708 return (-1); 709 if (zleak_state & ZLEAK_STATE_ACTIVE) 710 return (1); 711 return (0); 712} 713 714#endif 715 716 717kern_return_t 718zleak_activate(void) 719{ 720 kern_return_t retval; 721 vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation); 722 vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace); 723 void *allocations_ptr = NULL; 724 void *traces_ptr = NULL; 725 726 /* Only one thread attempts to activate at a time */ 727 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { 728 return KERN_SUCCESS; 729 } 730 731 /* Indicate that we're doing the setup */ 732 lck_spin_lock(&zleak_lock); 733 if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) { 734 lck_spin_unlock(&zleak_lock); 735 return KERN_SUCCESS; 736 } 737 738 zleak_state |= ZLEAK_STATE_ACTIVATING; 739 lck_spin_unlock(&zleak_lock); 740 741 /* Allocate and zero tables */ 742 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size); 743 if (retval != KERN_SUCCESS) { 744 goto fail; 745 } 746 747 retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size); 748 if (retval != KERN_SUCCESS) { 749 goto fail; 750 } 751 752 bzero(allocations_ptr, z_alloc_size); 753 bzero(traces_ptr, z_trace_size); 754 755 /* Everything's set. Install tables, mark active. */ 756 zallocations = allocations_ptr; 757 ztraces = traces_ptr; 758 759 /* 760 * Initialize the top_ztrace to the first entry in ztraces, 761 * so we don't have to check for null in zleak_log 762 */ 763 top_ztrace = &ztraces[0]; 764 765 /* 766 * Note that we do need a barrier between installing 767 * the tables and setting the active flag, because the zfree() 768 * path accesses the table without a lock if we're active. 769 */ 770 lck_spin_lock(&zleak_lock); 771 zleak_state |= ZLEAK_STATE_ACTIVE; 772 zleak_state &= ~ZLEAK_STATE_ACTIVATING; 773 lck_spin_unlock(&zleak_lock); 774 775 return 0; 776 777fail: 778 /* 779 * If we fail to allocate memory, don't further tax 780 * the system by trying again. 781 */ 782 lck_spin_lock(&zleak_lock); 783 zleak_state |= ZLEAK_STATE_FAILED; 784 zleak_state &= ~ZLEAK_STATE_ACTIVATING; 785 lck_spin_unlock(&zleak_lock); 786 787 if (allocations_ptr != NULL) { 788 kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size); 789 } 790 791 if (traces_ptr != NULL) { 792 kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size); 793 } 794 795 return retval; 796} 797 798/* 799 * TODO: What about allocations that never get deallocated, 800 * especially ones with unique backtraces? Should we wait to record 801 * until after boot has completed? 802 * (How many persistent zallocs are there?) 803 */ 804 805/* 806 * This function records the allocation in the allocations table, 807 * and stores the associated backtrace in the traces table 808 * (or just increments the refcount if the trace is already recorded) 809 * If the allocation slot is in use, the old allocation is replaced with the new allocation, and 810 * the associated trace's refcount is decremented. 811 * If the trace slot is in use, it returns. 812 * The refcount is incremented by the amount of memory the allocation consumes. 813 * The return value indicates whether to try again next time. 814 */ 815static boolean_t 816zleak_log(uintptr_t* bt, 817 uintptr_t addr, 818 uint32_t depth, 819 vm_size_t allocation_size) 820{ 821 /* Quit if there's someone else modifying the hash tables */ 822 if (!lck_spin_try_lock(&zleak_lock)) { 823 z_total_conflicts++; 824 return FALSE; 825 } 826 827 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; 828 829 uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets); 830 struct ztrace* trace = &ztraces[trace_index]; 831 832 allocation->za_hit_count++; 833 trace->zt_hit_count++; 834 835 /* 836 * If the allocation bucket we want to be in is occupied, and if the occupier 837 * has the same trace as us, just bail. 838 */ 839 if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) { 840 z_alloc_collisions++; 841 842 lck_spin_unlock(&zleak_lock); 843 return TRUE; 844 } 845 846 /* STEP 1: Store the backtrace in the traces array. */ 847 /* A size of zero indicates that the trace bucket is free. */ 848 849 if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0 ) { 850 /* 851 * Different unique trace with same hash! 852 * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated 853 * and get out of the way for later chances 854 */ 855 trace->zt_collisions++; 856 z_trace_collisions++; 857 858 lck_spin_unlock(&zleak_lock); 859 return TRUE; 860 } else if (trace->zt_size > 0) { 861 /* Same trace, already added, so increment refcount */ 862 trace->zt_size += allocation_size; 863 } else { 864 /* Found an unused trace bucket, record the trace here! */ 865 if (trace->zt_depth != 0) /* if this slot was previously used but not currently in use */ 866 z_trace_overwrites++; 867 868 z_trace_recorded++; 869 trace->zt_size = allocation_size; 870 memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) ); 871 872 trace->zt_depth = depth; 873 trace->zt_collisions = 0; 874 } 875 876 /* STEP 2: Store the allocation record in the allocations array. */ 877 878 if (allocation->za_element != (uintptr_t) 0) { 879 /* 880 * Straight up replace any allocation record that was there. We don't want to do the work 881 * to preserve the allocation entries that were there, because we only record a subset of the 882 * allocations anyways. 883 */ 884 885 z_alloc_collisions++; 886 887 struct ztrace* associated_trace = &ztraces[allocation->za_trace_index]; 888 /* Knock off old allocation's size, not the new allocation */ 889 associated_trace->zt_size -= allocation->za_size; 890 } else if (allocation->za_trace_index != 0) { 891 /* Slot previously used but not currently in use */ 892 z_alloc_overwrites++; 893 } 894 895 allocation->za_element = addr; 896 allocation->za_trace_index = trace_index; 897 allocation->za_size = allocation_size; 898 899 z_alloc_recorded++; 900 901 if (top_ztrace->zt_size < trace->zt_size) 902 top_ztrace = trace; 903 904 lck_spin_unlock(&zleak_lock); 905 return TRUE; 906} 907 908/* 909 * Free the allocation record and release the stacktrace. 910 * This should be as fast as possible because it will be called for every free. 911 */ 912static void 913zleak_free(uintptr_t addr, 914 vm_size_t allocation_size) 915{ 916 if (addr == (uintptr_t) 0) 917 return; 918 919 struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)]; 920 921 /* Double-checked locking: check to find out if we're interested, lock, check to make 922 * sure it hasn't changed, then modify it, and release the lock. 923 */ 924 925 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { 926 /* if the allocation was the one, grab the lock, check again, then delete it */ 927 lck_spin_lock(&zleak_lock); 928 929 if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) { 930 struct ztrace *trace; 931 932 /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */ 933 if (allocation->za_size != allocation_size) { 934 panic("Freeing as size %lu memory that was allocated with size %lu\n", 935 (uintptr_t)allocation_size, (uintptr_t)allocation->za_size); 936 } 937 938 trace = &ztraces[allocation->za_trace_index]; 939 940 /* size of 0 indicates trace bucket is unused */ 941 if (trace->zt_size > 0) { 942 trace->zt_size -= allocation_size; 943 } 944 945 /* A NULL element means the allocation bucket is unused */ 946 allocation->za_element = 0; 947 } 948 lck_spin_unlock(&zleak_lock); 949 } 950} 951 952#endif /* CONFIG_ZLEAKS */ 953 954/* These functions outside of CONFIG_ZLEAKS because they are also used in 955 * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix. 956 */ 957 958/* 959 * This function captures a backtrace from the current stack and 960 * returns the number of frames captured, limited by max_frames. 961 * It's fast because it does no checking to make sure there isn't bad data. 962 * Since it's only called from threads that we're going to keep executing, 963 * if there's bad data we were going to die eventually. 964 * If this function is inlined, it doesn't record the frame of the function it's inside. 965 * (because there's no stack frame!) 966 */ 967 968uint32_t 969fastbacktrace(uintptr_t* bt, uint32_t max_frames) 970{ 971 uintptr_t* frameptr = NULL, *frameptr_next = NULL; 972 uintptr_t retaddr = 0; 973 uint32_t frame_index = 0, frames = 0; 974 uintptr_t kstackb, kstackt; 975 thread_t cthread = current_thread(); 976 977 if (__improbable(cthread == NULL)) 978 return 0; 979 980 kstackb = cthread->kernel_stack; 981 kstackt = kstackb + kernel_stack_size; 982 /* Load stack frame pointer (EBP on x86) into frameptr */ 983 frameptr = __builtin_frame_address(0); 984 985 while (frameptr != NULL && frame_index < max_frames ) { 986 /* Next frame pointer is pointed to by the previous one */ 987 frameptr_next = (uintptr_t*) *frameptr; 988 989 /* Bail if we see a zero in the stack frame, that means we've reached the top of the stack */ 990 /* That also means the return address is worthless, so don't record it */ 991 if (frameptr_next == NULL) 992 break; 993 /* Verify thread stack bounds */ 994 if (((uintptr_t)frameptr_next > kstackt) || ((uintptr_t)frameptr_next < kstackb)) 995 break; 996 /* Pull return address from one spot above the frame pointer */ 997 retaddr = *(frameptr + 1); 998 999 /* Store it in the backtrace array */ 1000 bt[frame_index++] = retaddr; 1001 1002 frameptr = frameptr_next; 1003 } 1004 1005 /* Save the number of frames captured for return value */ 1006 frames = frame_index; 1007 1008 /* Fill in the rest of the backtrace with zeros */ 1009 while (frame_index < max_frames) 1010 bt[frame_index++] = 0; 1011 1012 return frames; 1013} 1014 1015/* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */ 1016uintptr_t 1017hash_mix(uintptr_t x) 1018{ 1019#ifndef __LP64__ 1020 x += ~(x << 15); 1021 x ^= (x >> 10); 1022 x += (x << 3 ); 1023 x ^= (x >> 6 ); 1024 x += ~(x << 11); 1025 x ^= (x >> 16); 1026#else 1027 x += ~(x << 32); 1028 x ^= (x >> 22); 1029 x += ~(x << 13); 1030 x ^= (x >> 8 ); 1031 x += (x << 3 ); 1032 x ^= (x >> 15); 1033 x += ~(x << 27); 1034 x ^= (x >> 31); 1035#endif 1036 return x; 1037} 1038 1039uint32_t 1040hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size) 1041{ 1042 1043 uintptr_t hash = 0; 1044 uintptr_t mask = max_size - 1; 1045 1046 while (depth) { 1047 hash += bt[--depth]; 1048 } 1049 1050 hash = hash_mix(hash) & mask; 1051 1052 assert(hash < max_size); 1053 1054 return (uint32_t) hash; 1055} 1056 1057/* 1058 * TODO: Determine how well distributed this is 1059 * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask 1060 */ 1061uint32_t 1062hashaddr(uintptr_t pt, uint32_t max_size) 1063{ 1064 uintptr_t hash = 0; 1065 uintptr_t mask = max_size - 1; 1066 1067 hash = hash_mix(pt) & mask; 1068 1069 assert(hash < max_size); 1070 1071 return (uint32_t) hash; 1072} 1073 1074/* End of all leak-detection code */ 1075#pragma mark - 1076 1077/* 1078 * zinit initializes a new zone. The zone data structures themselves 1079 * are stored in a zone, which is initially a static structure that 1080 * is initialized by zone_init. 1081 */ 1082zone_t 1083zinit( 1084 vm_size_t size, /* the size of an element */ 1085 vm_size_t max, /* maximum memory to use */ 1086 vm_size_t alloc, /* allocation size */ 1087 const char *name) /* a name for the zone */ 1088{ 1089 zone_t z; 1090 1091 if (zone_zone == ZONE_NULL) { 1092 1093 z = (struct zone *)zdata; 1094 zdata += sizeof(*z); 1095 zdata_size -= sizeof(*z); 1096 } else 1097 z = (zone_t) zalloc(zone_zone); 1098 1099 if (z == ZONE_NULL) 1100 return(ZONE_NULL); 1101 1102 /* 1103 * Round off all the parameters appropriately. 1104 */ 1105 if (size < sizeof(z->free_elements)) 1106 size = sizeof(z->free_elements); 1107 size = ((size-1) + sizeof(z->free_elements)) - 1108 ((size-1) % sizeof(z->free_elements)); 1109 if (alloc == 0) 1110 alloc = PAGE_SIZE; 1111 alloc = round_page(alloc); 1112 max = round_page(max); 1113 /* 1114 * we look for an allocation size with less than 1% waste 1115 * up to 5 pages in size... 1116 * otherwise, we look for an allocation size with least fragmentation 1117 * in the range of 1 - 5 pages 1118 * This size will be used unless 1119 * the user suggestion is larger AND has less fragmentation 1120 */ 1121#if ZONE_ALIAS_ADDR 1122 if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10)) 1123 alloc = PAGE_SIZE; 1124 else 1125#endif 1126#if defined(__LP64__) 1127 if (((alloc % size) != 0) || (alloc > PAGE_SIZE * 8)) 1128#endif 1129 { 1130 vm_size_t best, waste; unsigned int i; 1131 best = PAGE_SIZE; 1132 waste = best % size; 1133 1134 for (i = 1; i <= 5; i++) { 1135 vm_size_t tsize, twaste; 1136 1137 tsize = i * PAGE_SIZE; 1138 1139 if ((tsize % size) < (tsize / 100)) { 1140 alloc = tsize; 1141 goto use_this_allocation; 1142 } 1143 twaste = tsize % size; 1144 if (twaste < waste) 1145 best = tsize, waste = twaste; 1146 } 1147 if (alloc <= best || (alloc % size >= waste)) 1148 alloc = best; 1149 } 1150use_this_allocation: 1151 if (max && (max < alloc)) 1152 max = alloc; 1153 1154 z->free_elements = 0; 1155 z->cur_size = 0; 1156 z->max_size = max; 1157 z->elem_size = size; 1158 z->alloc_size = alloc; 1159 z->zone_name = name; 1160 z->count = 0; 1161 z->sum_count = 0LL; 1162 z->doing_alloc = FALSE; 1163 z->doing_gc = FALSE; 1164 z->exhaustible = FALSE; 1165 z->collectable = TRUE; 1166 z->allows_foreign = FALSE; 1167 z->expandable = TRUE; 1168 z->waiting = FALSE; 1169 z->async_pending = FALSE; 1170 z->caller_acct = TRUE; 1171 z->noencrypt = FALSE; 1172 z->no_callout = FALSE; 1173 z->async_prio_refill = FALSE; 1174 z->gzalloc_exempt = FALSE; 1175 z->alignment_required = FALSE; 1176 z->prio_refill_watermark = 0; 1177 z->zone_replenish_thread = NULL; 1178#if CONFIG_ZLEAKS 1179 z->num_allocs = 0; 1180 z->num_frees = 0; 1181 z->zleak_capture = 0; 1182 z->zleak_on = FALSE; 1183#endif /* CONFIG_ZLEAKS */ 1184 1185#if ZONE_DEBUG 1186 z->active_zones.next = z->active_zones.prev = NULL; 1187 zone_debug_enable(z); 1188#endif /* ZONE_DEBUG */ 1189 lock_zone_init(z); 1190 1191 /* 1192 * Add the zone to the all-zones list. 1193 * If we are tracking zone info per task, and we have 1194 * already used all the available stat slots, then keep 1195 * using the overflow zone slot. 1196 */ 1197 z->next_zone = ZONE_NULL; 1198 thread_call_setup(&z->call_async_alloc, zalloc_async, z); 1199 simple_lock(&all_zones_lock); 1200 *last_zone = z; 1201 last_zone = &z->next_zone; 1202 z->index = num_zones; 1203 if (zinfo_per_task) { 1204 if (num_zones > ZONES_MAX) 1205 z->index = ZONES_MAX; 1206 } 1207 num_zones++; 1208 simple_unlock(&all_zones_lock); 1209 1210 /* 1211 * Check if we should be logging this zone. If so, remember the zone pointer. 1212 */ 1213 if (log_this_zone(z->zone_name, zone_name_to_log)) { 1214 zone_of_interest = z; 1215 } 1216 1217 /* 1218 * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are 1219 * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to 1220 * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one 1221 * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again 1222 * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized 1223 * right now. 1224 */ 1225 if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) { 1226 if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) { 1227 1228 /* 1229 * We got the memory for the log. Zero it out since the code needs this to identify unused records. 1230 * At this point, everything is set up and we're ready to start logging this zone. 1231 */ 1232 1233 bzero((void *)zrecords, log_records * sizeof(struct zrecord)); 1234 printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest); 1235 1236 } else { 1237 printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n"); 1238 zone_of_interest = NULL; 1239 } 1240 } 1241#if CONFIG_GZALLOC 1242 gzalloc_zone_init(z); 1243#endif 1244 return(z); 1245} 1246unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated; 1247 1248static void zone_replenish_thread(zone_t); 1249 1250/* High priority VM privileged thread used to asynchronously refill a designated 1251 * zone, such as the reserved VM map entry zone. 1252 */ 1253static void zone_replenish_thread(zone_t z) { 1254 vm_size_t free_size; 1255 current_thread()->options |= TH_OPT_VMPRIV; 1256 1257 for (;;) { 1258 lock_zone(z); 1259 assert(z->prio_refill_watermark != 0); 1260 while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) { 1261 assert(z->doing_alloc == FALSE); 1262 assert(z->async_prio_refill == TRUE); 1263 1264 unlock_zone(z); 1265 int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; 1266 vm_offset_t space, alloc_size; 1267 kern_return_t kr; 1268 1269 if (vm_pool_low()) 1270 alloc_size = round_page(z->elem_size); 1271 else 1272 alloc_size = z->alloc_size; 1273 1274 if (z->noencrypt) 1275 zflags |= KMA_NOENCRYPT; 1276 1277 kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); 1278 1279 if (kr == KERN_SUCCESS) { 1280#if ZONE_ALIAS_ADDR 1281 if (alloc_size == PAGE_SIZE) 1282 space = zone_alias_addr(space); 1283#endif 1284 zcram(z, space, alloc_size); 1285 } else if (kr == KERN_RESOURCE_SHORTAGE) { 1286 VM_PAGE_WAIT(); 1287 } else if (kr == KERN_NO_SPACE) { 1288 kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags); 1289 if (kr == KERN_SUCCESS) { 1290#if ZONE_ALIAS_ADDR 1291 if (alloc_size == PAGE_SIZE) 1292 space = zone_alias_addr(space); 1293#endif 1294 zcram(z, space, alloc_size); 1295 } else { 1296 assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC); 1297 thread_block(THREAD_CONTINUE_NULL); 1298 } 1299 } 1300 1301 lock_zone(z); 1302 zone_replenish_loops++; 1303 } 1304 1305 unlock_zone(z); 1306 assert_wait(&z->zone_replenish_thread, THREAD_UNINT); 1307 thread_block(THREAD_CONTINUE_NULL); 1308 zone_replenish_wakeups++; 1309 } 1310} 1311 1312void 1313zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) { 1314 z->prio_refill_watermark = low_water_mark; 1315 1316 z->async_prio_refill = TRUE; 1317 OSMemoryBarrier(); 1318 kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread); 1319 1320 if (tres != KERN_SUCCESS) { 1321 panic("zone_prio_refill_configure, thread create: 0x%x", tres); 1322 } 1323 1324 thread_deallocate(z->zone_replenish_thread); 1325} 1326 1327/* 1328 * Cram the given memory into the specified zone. 1329 */ 1330void 1331zcram( 1332 zone_t zone, 1333 vm_offset_t newmem, 1334 vm_size_t size) 1335{ 1336 vm_size_t elem_size; 1337 boolean_t from_zm = FALSE; 1338 1339 /* Basic sanity checks */ 1340 assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); 1341 assert(!zone->collectable || zone->allows_foreign 1342 || (from_zone_map(newmem, size))); 1343 1344 elem_size = zone->elem_size; 1345 1346 if (from_zone_map(newmem, size)) 1347 from_zm = TRUE; 1348 1349 if (from_zm) 1350 zone_page_init(newmem, size); 1351 1352 lock_zone(zone); 1353 while (size >= elem_size) { 1354 free_to_zone(zone, (void *) newmem); 1355 if (from_zm) 1356 zone_page_alloc(newmem, elem_size); 1357 zone->count++; /* compensate for free_to_zone */ 1358 size -= elem_size; 1359 newmem += elem_size; 1360 zone->cur_size += elem_size; 1361 } 1362 unlock_zone(zone); 1363} 1364 1365 1366/* 1367 * Steal memory for the zone package. Called from 1368 * vm_page_bootstrap(). 1369 */ 1370void 1371zone_steal_memory(void) 1372{ 1373#if CONFIG_GZALLOC 1374 gzalloc_configure(); 1375#endif 1376 /* Request enough early memory to get to the pmap zone */ 1377 zdata_size = 12 * sizeof(struct zone); 1378 zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size)); 1379} 1380 1381 1382/* 1383 * Fill a zone with enough memory to contain at least nelem elements. 1384 * Memory is obtained with kmem_alloc_kobject from the kernel_map. 1385 * Return the number of elements actually put into the zone, which may 1386 * be more than the caller asked for since the memory allocation is 1387 * rounded up to a full page. 1388 */ 1389int 1390zfill( 1391 zone_t zone, 1392 int nelem) 1393{ 1394 kern_return_t kr; 1395 vm_size_t size; 1396 vm_offset_t memory; 1397 int nalloc; 1398 1399 assert(nelem > 0); 1400 if (nelem <= 0) 1401 return 0; 1402 size = nelem * zone->elem_size; 1403 size = round_page(size); 1404 kr = kmem_alloc_kobject(kernel_map, &memory, size); 1405 if (kr != KERN_SUCCESS) 1406 return 0; 1407 1408 zone_change(zone, Z_FOREIGN, TRUE); 1409 zcram(zone, memory, size); 1410 nalloc = (int)(size / zone->elem_size); 1411 assert(nalloc >= nelem); 1412 1413 return nalloc; 1414} 1415 1416/* 1417 * Initialize the "zone of zones" which uses fixed memory allocated 1418 * earlier in memory initialization. zone_bootstrap is called 1419 * before zone_init. 1420 */ 1421void 1422zone_bootstrap(void) 1423{ 1424 char temp_buf[16]; 1425 1426 if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof(temp_buf))) { 1427 zinfo_per_task = TRUE; 1428 } 1429 1430 /* do we want corruption-style debugging with zlog? */ 1431 if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) { 1432 corruption_debug_flag = TRUE; 1433 } 1434 1435 /* Debug */ 1436#if 0 1437 corruption_debug_flag = TRUE; 1438 log_records = ZRECORDS_DEFAULT; 1439 strcpy(zone_name_to_log, "kalloc.512"); 1440#endif 1441 1442 /* Set up zone poisoning */ 1443 1444 free_check_sample_factor = ZP_DEFAULT_SAMPLING_FACTOR; 1445 1446 /* support for old zone poisoning boot-args */ 1447 if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) { 1448 free_check_sample_factor = 1; 1449 } 1450 if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) { 1451 free_check_sample_factor = 0; 1452 } 1453 1454 /* zp-factor=XXXX (override how often to poison freed zone elements) */ 1455 if (PE_parse_boot_argn("zp-factor", &free_check_sample_factor, sizeof(free_check_sample_factor))) { 1456 printf("Zone poisoning factor override:%u\n", free_check_sample_factor); 1457 } 1458 1459 /* 1460 * Check for and set up zone leak detection if requested via boot-args. We recognized two 1461 * boot-args: 1462 * 1463 * zlog=<zone_to_log> 1464 * zrecs=<num_records_in_log> 1465 * 1466 * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to 1467 * control the size of the log. If zrecs is not specified, a default value is used. 1468 */ 1469 1470 if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) { 1471 if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) { 1472 1473 /* 1474 * Don't allow more than ZRECORDS_MAX records even if the user asked for more. 1475 * This prevents accidentally hogging too much kernel memory and making the system 1476 * unusable. 1477 */ 1478 1479 log_records = MIN(ZRECORDS_MAX, log_records); 1480 1481 } else { 1482 log_records = ZRECORDS_DEFAULT; 1483 } 1484 } 1485 1486 simple_lock_init(&all_zones_lock, 0); 1487 1488 first_zone = ZONE_NULL; 1489 last_zone = &first_zone; 1490 num_zones = 0; 1491 1492 /* assertion: nobody else called zinit before us */ 1493 assert(zone_zone == ZONE_NULL); 1494 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), 1495 sizeof(struct zone), "zones"); 1496 zone_change(zone_zone, Z_COLLECT, FALSE); 1497 zone_change(zone_zone, Z_CALLERACCT, FALSE); 1498 zone_change(zone_zone, Z_NOENCRYPT, TRUE); 1499 1500 zcram(zone_zone, zdata, zdata_size); 1501 1502 /* initialize fake zones and zone info if tracking by task */ 1503 if (zinfo_per_task) { 1504 vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS; 1505 unsigned int i; 1506 1507 for (i = 0; i < num_fake_zones; i++) 1508 fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i); 1509 zinfo_zone = zinit(zisize, zisize * CONFIG_TASK_MAX, 1510 zisize, "per task zinfo"); 1511 zone_change(zinfo_zone, Z_CALLERACCT, FALSE); 1512 } 1513} 1514 1515void 1516zinfo_task_init(task_t task) 1517{ 1518 if (zinfo_per_task) { 1519 task->tkm_zinfo = zalloc(zinfo_zone); 1520 memset(task->tkm_zinfo, 0, sizeof(zinfo_usage_store_t) * ZINFO_SLOTS); 1521 } else { 1522 task->tkm_zinfo = NULL; 1523 } 1524} 1525 1526void 1527zinfo_task_free(task_t task) 1528{ 1529 assert(task != kernel_task); 1530 if (task->tkm_zinfo != NULL) { 1531 zfree(zinfo_zone, task->tkm_zinfo); 1532 task->tkm_zinfo = NULL; 1533 } 1534} 1535 1536void 1537zone_init( 1538 vm_size_t max_zonemap_size) 1539{ 1540 kern_return_t retval; 1541 vm_offset_t zone_min; 1542 vm_offset_t zone_max; 1543 1544 retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, 1545 FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, 1546 &zone_map); 1547 1548 if (retval != KERN_SUCCESS) 1549 panic("zone_init: kmem_suballoc failed"); 1550 zone_max = zone_min + round_page(max_zonemap_size); 1551#if CONFIG_GZALLOC 1552 gzalloc_init(max_zonemap_size); 1553#endif 1554 /* 1555 * Setup garbage collection information: 1556 */ 1557 zone_map_min_address = zone_min; 1558 zone_map_max_address = zone_max; 1559 1560 zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); 1561 zone_page_table_used_size = sizeof(zone_page_table); 1562 1563 zone_page_table_second_level_size = 1; 1564 zone_page_table_second_level_shift_amount = 0; 1565 1566 /* 1567 * Find the power of 2 for the second level that allows 1568 * the first level to fit in ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE 1569 * slots. 1570 */ 1571 while ((zone_page_table_first_level_slot(zone_pages-1)) >= ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE) { 1572 zone_page_table_second_level_size <<= 1; 1573 zone_page_table_second_level_shift_amount++; 1574 } 1575 1576 lck_grp_attr_setdefault(&zone_lck_grp_attr); 1577 lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr); 1578 lck_attr_setdefault(&zone_lck_attr); 1579 lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr); 1580 1581#if CONFIG_ZLEAKS 1582 /* 1583 * Initialize the zone leak monitor 1584 */ 1585 zleak_init(max_zonemap_size); 1586#endif /* CONFIG_ZLEAKS */ 1587} 1588 1589void 1590zone_page_table_expand(zone_page_index_t pindex) 1591{ 1592 unsigned int first_index; 1593 struct zone_page_table_entry * volatile * first_level_ptr; 1594 1595 assert(pindex < zone_pages); 1596 1597 first_index = zone_page_table_first_level_slot(pindex); 1598 first_level_ptr = &zone_page_table[first_index]; 1599 1600 if (*first_level_ptr == NULL) { 1601 /* 1602 * We were able to verify the old first-level slot 1603 * had NULL, so attempt to populate it. 1604 */ 1605 1606 vm_offset_t second_level_array = 0; 1607 vm_size_t second_level_size = round_page(zone_page_table_second_level_size * sizeof(struct zone_page_table_entry)); 1608 zone_page_index_t i; 1609 struct zone_page_table_entry *entry_array; 1610 1611 if (kmem_alloc_kobject(zone_map, &second_level_array, 1612 second_level_size) != KERN_SUCCESS) { 1613 panic("zone_page_table_expand"); 1614 } 1615 1616 /* 1617 * zone_gc() may scan the "zone_page_table" directly, 1618 * so make sure any slots have a valid unused state. 1619 */ 1620 entry_array = (struct zone_page_table_entry *)second_level_array; 1621 for (i=0; i < zone_page_table_second_level_size; i++) { 1622 entry_array[i].alloc_count = ZONE_PAGE_UNUSED; 1623 entry_array[i].collect_count = 0; 1624 } 1625 1626 if (OSCompareAndSwapPtr(NULL, entry_array, first_level_ptr)) { 1627 /* Old slot was NULL, replaced with expanded level */ 1628 OSAddAtomicLong(second_level_size, &zone_page_table_used_size); 1629 } else { 1630 /* Old slot was not NULL, someone else expanded first */ 1631 kmem_free(zone_map, second_level_array, second_level_size); 1632 } 1633 } else { 1634 /* Old slot was not NULL, already been expanded */ 1635 } 1636} 1637 1638struct zone_page_table_entry * 1639zone_page_table_lookup(zone_page_index_t pindex) 1640{ 1641 unsigned int first_index = zone_page_table_first_level_slot(pindex); 1642 struct zone_page_table_entry *second_level = zone_page_table[first_index]; 1643 1644 if (second_level) { 1645 return &second_level[zone_page_table_second_level_slot(pindex)]; 1646 } 1647 1648 return NULL; 1649} 1650 1651extern volatile SInt32 kfree_nop_count; 1652 1653#pragma mark - 1654#pragma mark zalloc_canblock 1655 1656/* 1657 * zalloc returns an element from the specified zone. 1658 */ 1659void * 1660zalloc_canblock( 1661 register zone_t zone, 1662 boolean_t canblock) 1663{ 1664 vm_offset_t addr = 0; 1665 kern_return_t retval; 1666 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */ 1667 int numsaved = 0; 1668 int i; 1669 boolean_t zone_replenish_wakeup = FALSE; 1670 boolean_t did_gzalloc; 1671 1672 did_gzalloc = FALSE; 1673#if CONFIG_ZLEAKS 1674 uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ 1675#endif /* CONFIG_ZLEAKS */ 1676 1677 assert(zone != ZONE_NULL); 1678 1679#if CONFIG_GZALLOC 1680 addr = gzalloc_alloc(zone, canblock); 1681 did_gzalloc = (addr != 0); 1682#endif 1683 1684 lock_zone(zone); 1685 1686 /* 1687 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. 1688 */ 1689 1690 if (DO_LOGGING(zone)) 1691 numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH); 1692 1693#if CONFIG_ZLEAKS 1694 /* 1695 * Zone leak detection: capture a backtrace every zleak_sample_factor 1696 * allocations in this zone. 1697 */ 1698 if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) { 1699 zone->zleak_capture = 1; 1700 1701 /* Avoid backtracing twice if zone logging is on */ 1702 if (numsaved == 0 ) 1703 zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); 1704 else 1705 zleak_tracedepth = numsaved; 1706 } 1707#endif /* CONFIG_ZLEAKS */ 1708 1709 if (__probable(addr == 0)) 1710 alloc_from_zone(zone, (void **) &addr); 1711 1712 if (zone->async_prio_refill && 1713 ((zone->cur_size - (zone->count * zone->elem_size)) < 1714 (zone->prio_refill_watermark * zone->elem_size))) { 1715 zone_replenish_wakeup = TRUE; 1716 zone_replenish_wakeups_initiated++; 1717 } 1718 1719 while ((addr == 0) && canblock) { 1720 /* 1721 * If nothing was there, try to get more 1722 */ 1723 if (zone->doing_alloc) { 1724 /* 1725 * Someone is allocating memory for this zone. 1726 * Wait for it to show up, then try again. 1727 */ 1728 zone->waiting = TRUE; 1729 zone_sleep(zone); 1730 } else if (zone->doing_gc) { 1731 /* zone_gc() is running. Since we need an element 1732 * from the free list that is currently being 1733 * collected, set the waiting bit and try to 1734 * interrupt the GC process, and try again 1735 * when we obtain the lock. 1736 */ 1737 zone->waiting = TRUE; 1738 zone_sleep(zone); 1739 } else { 1740 vm_offset_t space; 1741 vm_size_t alloc_size; 1742 int retry = 0; 1743 1744 if ((zone->cur_size + zone->elem_size) > 1745 zone->max_size) { 1746 if (zone->exhaustible) 1747 break; 1748 if (zone->expandable) { 1749 /* 1750 * We're willing to overflow certain 1751 * zones, but not without complaining. 1752 * 1753 * This is best used in conjunction 1754 * with the collectable flag. What we 1755 * want is an assurance we can get the 1756 * memory back, assuming there's no 1757 * leak. 1758 */ 1759 zone->max_size += (zone->max_size >> 1); 1760 } else { 1761 unlock_zone(zone); 1762 1763 panic_include_zprint = TRUE; 1764#if CONFIG_ZLEAKS 1765 if (zleak_state & ZLEAK_STATE_ACTIVE) 1766 panic_include_ztrace = TRUE; 1767#endif /* CONFIG_ZLEAKS */ 1768 panic("zalloc: zone \"%s\" empty.", zone->zone_name); 1769 } 1770 } 1771 zone->doing_alloc = TRUE; 1772 unlock_zone(zone); 1773 1774 for (;;) { 1775 int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; 1776 1777 if (vm_pool_low() || retry >= 1) 1778 alloc_size = 1779 round_page(zone->elem_size); 1780 else 1781 alloc_size = zone->alloc_size; 1782 1783 if (zone->noencrypt) 1784 zflags |= KMA_NOENCRYPT; 1785 1786 retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); 1787 if (retval == KERN_SUCCESS) { 1788#if ZONE_ALIAS_ADDR 1789 if (alloc_size == PAGE_SIZE) 1790 space = zone_alias_addr(space); 1791#endif 1792 1793#if CONFIG_ZLEAKS 1794 if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) { 1795 if (zone_map->size >= zleak_global_tracking_threshold) { 1796 kern_return_t kr; 1797 1798 kr = zleak_activate(); 1799 if (kr != KERN_SUCCESS) { 1800 printf("Failed to activate live zone leak debugging (%d).\n", kr); 1801 } 1802 } 1803 } 1804 1805 if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) { 1806 if (zone->cur_size > zleak_per_zone_tracking_threshold) { 1807 zone->zleak_on = TRUE; 1808 } 1809 } 1810#endif /* CONFIG_ZLEAKS */ 1811 1812 zcram(zone, space, alloc_size); 1813 1814 break; 1815 } else if (retval != KERN_RESOURCE_SHORTAGE) { 1816 retry++; 1817 1818 if (retry == 2) { 1819 zone_gc(TRUE); 1820 printf("zalloc did gc\n"); 1821 zone_display_zprint(); 1822 } 1823 if (retry == 3) { 1824 panic_include_zprint = TRUE; 1825#if CONFIG_ZLEAKS 1826 if ((zleak_state & ZLEAK_STATE_ACTIVE)) { 1827 panic_include_ztrace = TRUE; 1828 } 1829#endif /* CONFIG_ZLEAKS */ 1830 /* TODO: Change this to something more descriptive, perhaps 1831 * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE). 1832 */ 1833 panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); 1834 } 1835 } else { 1836 break; 1837 } 1838 } 1839 lock_zone(zone); 1840 zone->doing_alloc = FALSE; 1841 if (zone->waiting) { 1842 zone->waiting = FALSE; 1843 zone_wakeup(zone); 1844 } 1845 alloc_from_zone(zone, (void **) &addr); 1846 if (addr == 0 && 1847 retval == KERN_RESOURCE_SHORTAGE) { 1848 unlock_zone(zone); 1849 1850 VM_PAGE_WAIT(); 1851 lock_zone(zone); 1852 } 1853 } 1854 if (addr == 0) 1855 alloc_from_zone(zone, (void **) &addr); 1856 } 1857 1858#if CONFIG_ZLEAKS 1859 /* Zone leak detection: 1860 * If we're sampling this allocation, add it to the zleaks hash table. 1861 */ 1862 if (addr && zleak_tracedepth > 0) { 1863 /* Sampling can fail if another sample is happening at the same time in a different zone. */ 1864 if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) { 1865 /* If it failed, roll back the counter so we sample the next allocation instead. */ 1866 zone->zleak_capture = zleak_sample_factor; 1867 } 1868 } 1869#endif /* CONFIG_ZLEAKS */ 1870 1871 1872 /* 1873 * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is 1874 * suspected, so this code rarely executes. We need to do this code while still holding the zone lock 1875 * since it protects the various log related data structures. 1876 */ 1877 1878 if (DO_LOGGING(zone) && addr) { 1879 1880 /* 1881 * Look for a place to record this new allocation. We implement two different logging strategies 1882 * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking 1883 * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker 1884 * among all the records. So we look for an unused slot in the log and fill that in before overwriting 1885 * an old entry. When looking for a corruption however, it's better to have a chronological log of all 1886 * the allocations and frees done in the zone so that the history of operations for a specific zone 1887 * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the 1888 * oldest entry whenever a new one needs to be added. 1889 * 1890 * The corruption_debug_flag flag tells us what style of logging to do. It's set if we're supposed to be 1891 * doing corruption style logging (indicated via -zc in the boot-args). 1892 */ 1893 1894 if (!corruption_debug_flag && zrecords[zcurrent].z_element && zrecorded < log_records) { 1895 1896 /* 1897 * If we get here, we're doing leak style logging and there's still some unused entries in 1898 * the log (since zrecorded is smaller than the size of the log). Look for an unused slot 1899 * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer 1900 * is already full, we just fall through and overwrite the element indexed by zcurrent. 1901 */ 1902 1903 for (i = zcurrent; i < log_records; i++) { 1904 if (zrecords[i].z_element == NULL) { 1905 zcurrent = i; 1906 goto empty_slot; 1907 } 1908 } 1909 1910 for (i = 0; i < zcurrent; i++) { 1911 if (zrecords[i].z_element == NULL) { 1912 zcurrent = i; 1913 goto empty_slot; 1914 } 1915 } 1916 } 1917 1918 /* 1919 * Save a record of this allocation 1920 */ 1921 1922empty_slot: 1923 if (zrecords[zcurrent].z_element == NULL) 1924 zrecorded++; 1925 1926 zrecords[zcurrent].z_element = (void *)addr; 1927 zrecords[zcurrent].z_time = ztime++; 1928 zrecords[zcurrent].z_opcode = ZOP_ALLOC; 1929 1930 for (i = 0; i < numsaved; i++) 1931 zrecords[zcurrent].z_pc[i] = (void*) zbt[i]; 1932 1933 for (; i < MAX_ZTRACE_DEPTH; i++) 1934 zrecords[zcurrent].z_pc[i] = 0; 1935 1936 zcurrent++; 1937 1938 if (zcurrent >= log_records) 1939 zcurrent = 0; 1940 } 1941 1942 if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { 1943 zone->async_pending = TRUE; 1944 unlock_zone(zone); 1945 thread_call_enter(&zone->call_async_alloc); 1946 lock_zone(zone); 1947 alloc_from_zone(zone, (void **) &addr); 1948 } 1949 1950#if ZONE_DEBUG 1951 if (!did_gzalloc && addr && zone_debug_enabled(zone)) { 1952 enqueue_tail(&zone->active_zones, (queue_entry_t)addr); 1953 addr += ZONE_DEBUG_OFFSET; 1954 } 1955#endif 1956 1957#if CONFIG_ZLEAKS 1958 if (addr != 0) { 1959 zone->num_allocs++; 1960 } 1961#endif /* CONFIG_ZLEAKS */ 1962 1963 unlock_zone(zone); 1964 1965 if (zone_replenish_wakeup) 1966 thread_wakeup(&zone->zone_replenish_thread); 1967 1968 TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); 1969 1970 if (addr) { 1971 thread_t thr = current_thread(); 1972 task_t task; 1973 zinfo_usage_t zinfo; 1974 vm_size_t sz = zone->elem_size; 1975 1976 if (zone->caller_acct) 1977 ledger_credit(thr->t_ledger, task_ledgers.tkm_private, sz); 1978 else 1979 ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, sz); 1980 1981 if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 1982 OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].alloc); 1983 } 1984 return((void *)addr); 1985} 1986 1987 1988void * 1989zalloc( 1990 register zone_t zone) 1991{ 1992 return( zalloc_canblock(zone, TRUE) ); 1993} 1994 1995void * 1996zalloc_noblock( 1997 register zone_t zone) 1998{ 1999 return( zalloc_canblock(zone, FALSE) ); 2000} 2001 2002void 2003zalloc_async( 2004 thread_call_param_t p0, 2005 __unused thread_call_param_t p1) 2006{ 2007 void *elt; 2008 2009 elt = zalloc_canblock((zone_t)p0, TRUE); 2010 zfree((zone_t)p0, elt); 2011 lock_zone(((zone_t)p0)); 2012 ((zone_t)p0)->async_pending = FALSE; 2013 unlock_zone(((zone_t)p0)); 2014} 2015 2016/* 2017 * zget returns an element from the specified zone 2018 * and immediately returns nothing if there is nothing there. 2019 * 2020 * This form should be used when you can not block (like when 2021 * processing an interrupt). 2022 * 2023 * XXX: It seems like only vm_page_grab_fictitious_common uses this, and its 2024 * friend vm_page_more_fictitious can block, so it doesn't seem like 2025 * this is used for interrupts any more.... 2026 */ 2027void * 2028zget( 2029 register zone_t zone) 2030{ 2031 vm_offset_t addr; 2032 2033#if CONFIG_ZLEAKS 2034 uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used for zone leak detection */ 2035 uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ 2036#endif /* CONFIG_ZLEAKS */ 2037 2038 assert( zone != ZONE_NULL ); 2039 2040 if (!lock_try_zone(zone)) 2041 return NULL; 2042 2043#if CONFIG_ZLEAKS 2044 /* 2045 * Zone leak detection: capture a backtrace 2046 */ 2047 if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) { 2048 zone->zleak_capture = 1; 2049 zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); 2050 } 2051#endif /* CONFIG_ZLEAKS */ 2052 2053 alloc_from_zone(zone, (void **) &addr); 2054#if ZONE_DEBUG 2055 if (addr && zone_debug_enabled(zone)) { 2056 enqueue_tail(&zone->active_zones, (queue_entry_t)addr); 2057 addr += ZONE_DEBUG_OFFSET; 2058 } 2059#endif /* ZONE_DEBUG */ 2060 2061#if CONFIG_ZLEAKS 2062 /* 2063 * Zone leak detection: record the allocation 2064 */ 2065 if (zone->zleak_on && zleak_tracedepth > 0 && addr) { 2066 /* Sampling can fail if another sample is happening at the same time in a different zone. */ 2067 if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) { 2068 /* If it failed, roll back the counter so we sample the next allocation instead. */ 2069 zone->zleak_capture = zleak_sample_factor; 2070 } 2071 } 2072 2073 if (addr != 0) { 2074 zone->num_allocs++; 2075 } 2076#endif /* CONFIG_ZLEAKS */ 2077 2078 unlock_zone(zone); 2079 2080 return((void *) addr); 2081} 2082 2083/* Keep this FALSE by default. Large memory machine run orders of magnitude 2084 slower in debug mode when true. Use debugger to enable if needed */ 2085/* static */ boolean_t zone_check = FALSE; 2086 2087static zone_t zone_last_bogus_zone = ZONE_NULL; 2088static vm_offset_t zone_last_bogus_elem = 0; 2089 2090void 2091zfree( 2092 register zone_t zone, 2093 void *addr) 2094{ 2095 vm_offset_t elem = (vm_offset_t) addr; 2096 void *zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ 2097 int numsaved = 0; 2098 boolean_t gzfreed = FALSE; 2099 2100 assert(zone != ZONE_NULL); 2101 2102 /* 2103 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. 2104 */ 2105 2106 if (DO_LOGGING(zone)) 2107 numsaved = OSBacktrace(&zbt[0], MAX_ZTRACE_DEPTH); 2108 2109#if MACH_ASSERT 2110 /* Basic sanity checks */ 2111 if (zone == ZONE_NULL || elem == (vm_offset_t)0) 2112 panic("zfree: NULL"); 2113 /* zone_gc assumes zones are never freed */ 2114 if (zone == zone_zone) 2115 panic("zfree: freeing to zone_zone breaks zone_gc!"); 2116#endif 2117 2118#if CONFIG_GZALLOC 2119 gzfreed = gzalloc_free(zone, addr); 2120#endif 2121 2122 TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr); 2123 2124 if (__improbable(!gzfreed && zone->collectable && !zone->allows_foreign && 2125 !from_zone_map(elem, zone->elem_size))) { 2126#if MACH_ASSERT 2127 panic("zfree: non-allocated memory in collectable zone!"); 2128#endif 2129 zone_last_bogus_zone = zone; 2130 zone_last_bogus_elem = elem; 2131 return; 2132 } 2133 2134 lock_zone(zone); 2135 2136 /* 2137 * See if we're doing logging on this zone. There are two styles of logging used depending on 2138 * whether we're trying to catch a leak or corruption. See comments above in zalloc for details. 2139 */ 2140 2141 if (DO_LOGGING(zone)) { 2142 int i; 2143 2144 if (corruption_debug_flag) { 2145 2146 /* 2147 * We're logging to catch a corruption. Add a record of this zfree operation 2148 * to log. 2149 */ 2150 2151 if (zrecords[zcurrent].z_element == NULL) 2152 zrecorded++; 2153 2154 zrecords[zcurrent].z_element = (void *)addr; 2155 zrecords[zcurrent].z_time = ztime++; 2156 zrecords[zcurrent].z_opcode = ZOP_FREE; 2157 2158 for (i = 0; i < numsaved; i++) 2159 zrecords[zcurrent].z_pc[i] = zbt[i]; 2160 2161 for (; i < MAX_ZTRACE_DEPTH; i++) 2162 zrecords[zcurrent].z_pc[i] = 0; 2163 2164 zcurrent++; 2165 2166 if (zcurrent >= log_records) 2167 zcurrent = 0; 2168 2169 } else { 2170 2171 /* 2172 * We're logging to catch a leak. Remove any record we might have for this 2173 * element since it's being freed. Note that we may not find it if the buffer 2174 * overflowed and that's OK. Since the log is of a limited size, old records 2175 * get overwritten if there are more zallocs than zfrees. 2176 */ 2177 2178 for (i = 0; i < log_records; i++) { 2179 if (zrecords[i].z_element == addr) { 2180 zrecords[i].z_element = NULL; 2181 zcurrent = i; 2182 zrecorded--; 2183 break; 2184 } 2185 } 2186 } 2187 } 2188 2189 2190#if ZONE_DEBUG 2191 if (!gzfreed && zone_debug_enabled(zone)) { 2192 queue_t tmp_elem; 2193 2194 elem -= ZONE_DEBUG_OFFSET; 2195 if (zone_check) { 2196 /* check the zone's consistency */ 2197 2198 for (tmp_elem = queue_first(&zone->active_zones); 2199 !queue_end(tmp_elem, &zone->active_zones); 2200 tmp_elem = queue_next(tmp_elem)) 2201 if (elem == (vm_offset_t)tmp_elem) 2202 break; 2203 if (elem != (vm_offset_t)tmp_elem) 2204 panic("zfree()ing element from wrong zone"); 2205 } 2206 remqueue((queue_t) elem); 2207 } 2208#endif /* ZONE_DEBUG */ 2209 if (zone_check) { 2210 vm_offset_t this; 2211 2212 /* check the zone's consistency */ 2213 2214 for (this = zone->free_elements; 2215 this != 0; 2216 this = * (vm_offset_t *) this) 2217 if (!pmap_kernel_va(this) || this == elem) 2218 panic("zfree"); 2219 } 2220 2221 if (__probable(!gzfreed)) 2222 free_to_zone(zone, (void *) elem); 2223 2224#if MACH_ASSERT 2225 if (zone->count < 0) 2226 panic("zfree: count < 0!"); 2227#endif 2228 2229 2230#if CONFIG_ZLEAKS 2231 zone->num_frees++; 2232 2233 /* 2234 * Zone leak detection: un-track the allocation 2235 */ 2236 if (zone->zleak_on) { 2237 zleak_free(elem, zone->elem_size); 2238 } 2239#endif /* CONFIG_ZLEAKS */ 2240 2241 /* 2242 * If elements have one or more pages, and memory is low, 2243 * request to run the garbage collection in the zone the next 2244 * time the pageout thread runs. 2245 */ 2246 if (zone->elem_size >= PAGE_SIZE && 2247 vm_pool_low()){ 2248 zone_gc_forced = TRUE; 2249 } 2250 unlock_zone(zone); 2251 2252 { 2253 thread_t thr = current_thread(); 2254 task_t task; 2255 zinfo_usage_t zinfo; 2256 vm_size_t sz = zone->elem_size; 2257 2258 if (zone->caller_acct) 2259 ledger_debit(thr->t_ledger, task_ledgers.tkm_private, sz); 2260 else 2261 ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, sz); 2262 2263 if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 2264 OSAddAtomic64(sz, (int64_t *)&zinfo[zone->index].free); 2265 } 2266} 2267 2268 2269/* Change a zone's flags. 2270 * This routine must be called immediately after zinit. 2271 */ 2272void 2273zone_change( 2274 zone_t zone, 2275 unsigned int item, 2276 boolean_t value) 2277{ 2278 assert( zone != ZONE_NULL ); 2279 assert( value == TRUE || value == FALSE ); 2280 2281 switch(item){ 2282 case Z_NOENCRYPT: 2283 zone->noencrypt = value; 2284 break; 2285 case Z_EXHAUST: 2286 zone->exhaustible = value; 2287 break; 2288 case Z_COLLECT: 2289 zone->collectable = value; 2290 break; 2291 case Z_EXPAND: 2292 zone->expandable = value; 2293 break; 2294 case Z_FOREIGN: 2295 zone->allows_foreign = value; 2296 break; 2297 case Z_CALLERACCT: 2298 zone->caller_acct = value; 2299 break; 2300 case Z_NOCALLOUT: 2301 zone->no_callout = value; 2302 break; 2303 case Z_GZALLOC_EXEMPT: 2304 zone->gzalloc_exempt = value; 2305#if CONFIG_GZALLOC 2306 gzalloc_reconfigure(zone); 2307#endif 2308 break; 2309 case Z_ALIGNMENT_REQUIRED: 2310 zone->alignment_required = value; 2311#if ZONE_DEBUG 2312 zone_debug_disable(zone); 2313#endif 2314#if CONFIG_GZALLOC 2315 gzalloc_reconfigure(zone); 2316#endif 2317 break; 2318 default: 2319 panic("Zone_change: Wrong Item Type!"); 2320 /* break; */ 2321 } 2322} 2323 2324/* 2325 * Return the expected number of free elements in the zone. 2326 * This calculation will be incorrect if items are zfree'd that 2327 * were never zalloc'd/zget'd. The correct way to stuff memory 2328 * into a zone is by zcram. 2329 */ 2330 2331integer_t 2332zone_free_count(zone_t zone) 2333{ 2334 integer_t free_count; 2335 2336 lock_zone(zone); 2337 free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count); 2338 unlock_zone(zone); 2339 2340 assert(free_count >= 0); 2341 2342 return(free_count); 2343} 2344 2345/* 2346 * Zone garbage collection subroutines 2347 */ 2348 2349boolean_t 2350zone_page_collectable( 2351 vm_offset_t addr, 2352 vm_size_t size) 2353{ 2354 struct zone_page_table_entry *zp; 2355 zone_page_index_t i, j; 2356 2357#if ZONE_ALIAS_ADDR 2358 addr = zone_virtual_addr(addr); 2359#endif 2360#if MACH_ASSERT 2361 if (!from_zone_map(addr, size)) 2362 panic("zone_page_collectable"); 2363#endif 2364 2365 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2366 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2367 2368 for (; i <= j; i++) { 2369 zp = zone_page_table_lookup(i); 2370 if (zp->collect_count == zp->alloc_count) 2371 return (TRUE); 2372 } 2373 2374 return (FALSE); 2375} 2376 2377void 2378zone_page_keep( 2379 vm_offset_t addr, 2380 vm_size_t size) 2381{ 2382 struct zone_page_table_entry *zp; 2383 zone_page_index_t i, j; 2384 2385#if ZONE_ALIAS_ADDR 2386 addr = zone_virtual_addr(addr); 2387#endif 2388#if MACH_ASSERT 2389 if (!from_zone_map(addr, size)) 2390 panic("zone_page_keep"); 2391#endif 2392 2393 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2394 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2395 2396 for (; i <= j; i++) { 2397 zp = zone_page_table_lookup(i); 2398 zp->collect_count = 0; 2399 } 2400} 2401 2402void 2403zone_page_collect( 2404 vm_offset_t addr, 2405 vm_size_t size) 2406{ 2407 struct zone_page_table_entry *zp; 2408 zone_page_index_t i, j; 2409 2410#if ZONE_ALIAS_ADDR 2411 addr = zone_virtual_addr(addr); 2412#endif 2413#if MACH_ASSERT 2414 if (!from_zone_map(addr, size)) 2415 panic("zone_page_collect"); 2416#endif 2417 2418 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2419 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2420 2421 for (; i <= j; i++) { 2422 zp = zone_page_table_lookup(i); 2423 ++zp->collect_count; 2424 } 2425} 2426 2427void 2428zone_page_init( 2429 vm_offset_t addr, 2430 vm_size_t size) 2431{ 2432 struct zone_page_table_entry *zp; 2433 zone_page_index_t i, j; 2434 2435#if ZONE_ALIAS_ADDR 2436 addr = zone_virtual_addr(addr); 2437#endif 2438#if MACH_ASSERT 2439 if (!from_zone_map(addr, size)) 2440 panic("zone_page_init"); 2441#endif 2442 2443 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2444 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2445 2446 for (; i <= j; i++) { 2447 /* make sure entry exists before marking unused */ 2448 zone_page_table_expand(i); 2449 2450 zp = zone_page_table_lookup(i); 2451 assert(zp); 2452 zp->alloc_count = ZONE_PAGE_UNUSED; 2453 zp->collect_count = 0; 2454 } 2455} 2456 2457void 2458zone_page_alloc( 2459 vm_offset_t addr, 2460 vm_size_t size) 2461{ 2462 struct zone_page_table_entry *zp; 2463 zone_page_index_t i, j; 2464 2465#if ZONE_ALIAS_ADDR 2466 addr = zone_virtual_addr(addr); 2467#endif 2468#if MACH_ASSERT 2469 if (!from_zone_map(addr, size)) 2470 panic("zone_page_alloc"); 2471#endif 2472 2473 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2474 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2475 2476 for (; i <= j; i++) { 2477 zp = zone_page_table_lookup(i); 2478 assert(zp); 2479 2480 /* 2481 * Set alloc_count to ZONE_PAGE_USED if 2482 * it was previously set to ZONE_PAGE_UNUSED. 2483 */ 2484 if (zp->alloc_count == ZONE_PAGE_UNUSED) 2485 zp->alloc_count = ZONE_PAGE_USED; 2486 2487 ++zp->alloc_count; 2488 } 2489} 2490 2491void 2492zone_page_free_element( 2493 zone_page_index_t *free_page_head, 2494 zone_page_index_t *free_page_tail, 2495 vm_offset_t addr, 2496 vm_size_t size) 2497{ 2498 struct zone_page_table_entry *zp; 2499 zone_page_index_t i, j; 2500 2501#if ZONE_ALIAS_ADDR 2502 addr = zone_virtual_addr(addr); 2503#endif 2504#if MACH_ASSERT 2505 if (!from_zone_map(addr, size)) 2506 panic("zone_page_free_element"); 2507#endif 2508 2509 i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); 2510 j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); 2511 2512 for (; i <= j; i++) { 2513 zp = zone_page_table_lookup(i); 2514 2515 if (zp->collect_count > 0) 2516 --zp->collect_count; 2517 if (--zp->alloc_count == 0) { 2518 vm_address_t free_page_address; 2519 vm_address_t prev_free_page_address; 2520 2521 zp->alloc_count = ZONE_PAGE_UNUSED; 2522 zp->collect_count = 0; 2523 2524 2525 /* 2526 * This element was the last one on this page, re-use the page's 2527 * storage for a page freelist 2528 */ 2529 free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)i); 2530 *(zone_page_index_t *)free_page_address = ZONE_PAGE_INDEX_INVALID; 2531 2532 if (*free_page_head == ZONE_PAGE_INDEX_INVALID) { 2533 *free_page_head = i; 2534 *free_page_tail = i; 2535 } else { 2536 prev_free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)(*free_page_tail)); 2537 *(zone_page_index_t *)prev_free_page_address = i; 2538 *free_page_tail = i; 2539 } 2540 } 2541 } 2542} 2543 2544 2545/* This is used for walking through a zone's free element list. 2546 */ 2547struct zone_free_element { 2548 struct zone_free_element * next; 2549}; 2550 2551/* 2552 * Add a linked list of pages starting at base back into the zone 2553 * free list. Tail points to the last element on the list. 2554 */ 2555#define ADD_LIST_TO_ZONE(zone, base, tail) \ 2556MACRO_BEGIN \ 2557 (tail)->next = (void *)((zone)->free_elements); \ 2558 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { \ 2559 ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ 2560 (zone)->free_elements; \ 2561 } \ 2562 (zone)->free_elements = (unsigned long)(base); \ 2563MACRO_END 2564 2565/* 2566 * Add an element to the chain pointed to by prev. 2567 */ 2568#define ADD_ELEMENT(zone, prev, elem) \ 2569MACRO_BEGIN \ 2570 (prev)->next = (elem); \ 2571 if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { \ 2572 ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ 2573 (vm_offset_t)(elem); \ 2574 } \ 2575MACRO_END 2576 2577struct { 2578 uint32_t pgs_freed; 2579 2580 uint32_t elems_collected, 2581 elems_freed, 2582 elems_kept; 2583} zgc_stats; 2584 2585/* Zone garbage collection 2586 * 2587 * zone_gc will walk through all the free elements in all the 2588 * zones that are marked collectable looking for reclaimable 2589 * pages. zone_gc is called by consider_zone_gc when the system 2590 * begins to run out of memory. 2591 */ 2592void 2593zone_gc(boolean_t all_zones) 2594{ 2595 unsigned int max_zones; 2596 zone_t z; 2597 unsigned int i; 2598 zone_page_index_t zone_free_page_head; 2599 zone_page_index_t zone_free_page_tail; 2600 thread_t mythread = current_thread(); 2601 2602 lck_mtx_lock(&zone_gc_lock); 2603 2604 simple_lock(&all_zones_lock); 2605 max_zones = num_zones; 2606 z = first_zone; 2607 simple_unlock(&all_zones_lock); 2608 2609 2610 /* 2611 * it's ok to allow eager kernel preemption while 2612 * while holding a zone lock since it's taken 2613 * as a spin lock (which prevents preemption) 2614 */ 2615 thread_set_eager_preempt(mythread); 2616 2617#if MACH_ASSERT 2618 for (i = 0; i < zone_pages; i++) { 2619 struct zone_page_table_entry *zp; 2620 2621 zp = zone_page_table_lookup(i); 2622 assert(!zp || (zp->collect_count == 0)); 2623 } 2624#endif /* MACH_ASSERT */ 2625 2626 for (i = 0; i < max_zones; i++, z = z->next_zone) { 2627 unsigned int n, m; 2628 vm_size_t elt_size, size_freed; 2629 struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail; 2630 int kmem_frees = 0; 2631 2632 assert(z != ZONE_NULL); 2633 2634 if (!z->collectable) 2635 continue; 2636 2637 if (all_zones == FALSE && z->elem_size < PAGE_SIZE) 2638 continue; 2639 2640 lock_zone(z); 2641 2642 elt_size = z->elem_size; 2643 2644 /* 2645 * Do a quick feasibility check before we scan the zone: 2646 * skip unless there is likelihood of getting pages back 2647 * (i.e we need a whole allocation block's worth of free 2648 * elements before we can garbage collect) and 2649 * the zone has more than 10 percent of it's elements free 2650 * or the element size is a multiple of the PAGE_SIZE 2651 */ 2652 if ((elt_size & PAGE_MASK) && 2653 (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) || 2654 ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) { 2655 unlock_zone(z); 2656 continue; 2657 } 2658 2659 z->doing_gc = TRUE; 2660 2661 /* 2662 * Snatch all of the free elements away from the zone. 2663 */ 2664 2665 scan = (void *)z->free_elements; 2666 z->free_elements = 0; 2667 2668 unlock_zone(z); 2669 2670 /* 2671 * Pass 1: 2672 * 2673 * Determine which elements we can attempt to collect 2674 * and count them up in the page table. Foreign elements 2675 * are returned to the zone. 2676 */ 2677 2678 prev = (void *)&scan; 2679 elt = scan; 2680 n = 0; tail = keep = NULL; 2681 2682 zone_free_page_head = ZONE_PAGE_INDEX_INVALID; 2683 zone_free_page_tail = ZONE_PAGE_INDEX_INVALID; 2684 2685 2686 while (elt != NULL) { 2687 if (from_zone_map(elt, elt_size)) { 2688 zone_page_collect((vm_offset_t)elt, elt_size); 2689 2690 prev = elt; 2691 elt = elt->next; 2692 2693 ++zgc_stats.elems_collected; 2694 } 2695 else { 2696 if (keep == NULL) 2697 keep = tail = elt; 2698 else { 2699 ADD_ELEMENT(z, tail, elt); 2700 tail = elt; 2701 } 2702 2703 ADD_ELEMENT(z, prev, elt->next); 2704 elt = elt->next; 2705 ADD_ELEMENT(z, tail, NULL); 2706 } 2707 2708 /* 2709 * Dribble back the elements we are keeping. 2710 */ 2711 2712 if (++n >= 50) { 2713 if (z->waiting == TRUE) { 2714 /* z->waiting checked without lock held, rechecked below after locking */ 2715 lock_zone(z); 2716 2717 if (keep != NULL) { 2718 ADD_LIST_TO_ZONE(z, keep, tail); 2719 tail = keep = NULL; 2720 } else { 2721 m =0; 2722 base_elt = elt; 2723 base_prev = prev; 2724 while ((elt != NULL) && (++m < 50)) { 2725 prev = elt; 2726 elt = elt->next; 2727 } 2728 if (m !=0 ) { 2729 ADD_LIST_TO_ZONE(z, base_elt, prev); 2730 ADD_ELEMENT(z, base_prev, elt); 2731 prev = base_prev; 2732 } 2733 } 2734 2735 if (z->waiting) { 2736 z->waiting = FALSE; 2737 zone_wakeup(z); 2738 } 2739 2740 unlock_zone(z); 2741 } 2742 n =0; 2743 } 2744 } 2745 2746 /* 2747 * Return any remaining elements. 2748 */ 2749 2750 if (keep != NULL) { 2751 lock_zone(z); 2752 2753 ADD_LIST_TO_ZONE(z, keep, tail); 2754 2755 if (z->waiting) { 2756 z->waiting = FALSE; 2757 zone_wakeup(z); 2758 } 2759 2760 unlock_zone(z); 2761 } 2762 2763 /* 2764 * Pass 2: 2765 * 2766 * Determine which pages we can reclaim and 2767 * free those elements. 2768 */ 2769 2770 size_freed = 0; 2771 elt = scan; 2772 n = 0; tail = keep = NULL; 2773 2774 while (elt != NULL) { 2775 if (zone_page_collectable((vm_offset_t)elt, elt_size)) { 2776 struct zone_free_element *next_elt = elt->next; 2777 2778 size_freed += elt_size; 2779 2780 /* 2781 * If this is the last allocation on the page(s), 2782 * we may use their storage to maintain the linked 2783 * list of free-able pages. So store elt->next because 2784 * "elt" may be scribbled over. 2785 */ 2786 zone_page_free_element(&zone_free_page_head, &zone_free_page_tail, (vm_offset_t)elt, elt_size); 2787 2788 elt = next_elt; 2789 2790 ++zgc_stats.elems_freed; 2791 } 2792 else { 2793 zone_page_keep((vm_offset_t)elt, elt_size); 2794 2795 if (keep == NULL) 2796 keep = tail = elt; 2797 else { 2798 ADD_ELEMENT(z, tail, elt); 2799 tail = elt; 2800 } 2801 2802 elt = elt->next; 2803 ADD_ELEMENT(z, tail, NULL); 2804 2805 ++zgc_stats.elems_kept; 2806 } 2807 2808 /* 2809 * Dribble back the elements we are keeping, 2810 * and update the zone size info. 2811 */ 2812 2813 if (++n >= 50) { 2814 lock_zone(z); 2815 2816 z->cur_size -= size_freed; 2817 size_freed = 0; 2818 2819 if (keep != NULL) { 2820 ADD_LIST_TO_ZONE(z, keep, tail); 2821 } 2822 2823 if (z->waiting) { 2824 z->waiting = FALSE; 2825 zone_wakeup(z); 2826 } 2827 2828 unlock_zone(z); 2829 2830 n = 0; tail = keep = NULL; 2831 } 2832 } 2833 2834 /* 2835 * Return any remaining elements, and update 2836 * the zone size info. 2837 */ 2838 2839 lock_zone(z); 2840 2841 if (size_freed > 0 || keep != NULL) { 2842 2843 z->cur_size -= size_freed; 2844 2845 if (keep != NULL) { 2846 ADD_LIST_TO_ZONE(z, keep, tail); 2847 } 2848 2849 } 2850 2851 z->doing_gc = FALSE; 2852 if (z->waiting) { 2853 z->waiting = FALSE; 2854 zone_wakeup(z); 2855 } 2856 unlock_zone(z); 2857 2858 2859 if (zone_free_page_head == ZONE_PAGE_INDEX_INVALID) 2860 continue; 2861 2862 /* 2863 * we don't want to allow eager kernel preemption while holding the 2864 * various locks taken in the kmem_free path of execution 2865 */ 2866 thread_clear_eager_preempt(mythread); 2867 2868 /* 2869 * Reclaim the pages we are freeing. 2870 */ 2871 while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) { 2872 zone_page_index_t zind = zone_free_page_head; 2873 vm_address_t free_page_address; 2874 int page_count; 2875 2876 /* 2877 * Use the first word of the page about to be freed to find the next free page 2878 */ 2879 free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind); 2880 zone_free_page_head = *(zone_page_index_t *)free_page_address; 2881 2882 page_count = 1; 2883 2884 while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) { 2885 zone_page_index_t next_zind = zone_free_page_head; 2886 vm_address_t next_free_page_address; 2887 2888 next_free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)next_zind); 2889 2890 if (next_free_page_address == (free_page_address - PAGE_SIZE)) { 2891 free_page_address = next_free_page_address; 2892 } else if (next_free_page_address != (free_page_address + (PAGE_SIZE * page_count))) 2893 break; 2894 2895 zone_free_page_head = *(zone_page_index_t *)next_free_page_address; 2896 page_count++; 2897 } 2898 kmem_free(zone_map, free_page_address, page_count * PAGE_SIZE); 2899 2900 zgc_stats.pgs_freed += page_count; 2901 2902 if (++kmem_frees == 32) { 2903 thread_yield_internal(1); 2904 kmem_frees = 0; 2905 } 2906 } 2907 thread_set_eager_preempt(mythread); 2908 } 2909 thread_clear_eager_preempt(mythread); 2910 2911 lck_mtx_unlock(&zone_gc_lock); 2912 2913} 2914 2915extern vm_offset_t kmapoff_kaddr; 2916extern unsigned int kmapoff_pgcnt; 2917 2918/* 2919 * consider_zone_gc: 2920 * 2921 * Called by the pageout daemon when the system needs more free pages. 2922 */ 2923 2924void 2925consider_zone_gc(boolean_t force) 2926{ 2927 boolean_t all_zones = FALSE; 2928 2929 if (kmapoff_kaddr != 0) { 2930 /* 2931 * One-time reclaim of kernel_map resources we allocated in 2932 * early boot. 2933 */ 2934 (void) vm_deallocate(kernel_map, 2935 kmapoff_kaddr, kmapoff_pgcnt * PAGE_SIZE_64); 2936 kmapoff_kaddr = 0; 2937 } 2938 2939 if (zone_gc_allowed && 2940 (zone_gc_allowed_by_time_throttle || 2941 zone_gc_forced || 2942 force)) { 2943 if (zone_gc_allowed_by_time_throttle == TRUE) { 2944 zone_gc_allowed_by_time_throttle = FALSE; 2945 all_zones = TRUE; 2946 } 2947 zone_gc_forced = FALSE; 2948 2949 zone_gc(all_zones); 2950 } 2951} 2952 2953/* 2954 * By default, don't attempt zone GC more frequently 2955 * than once / 1 minutes. 2956 */ 2957void 2958compute_zone_gc_throttle(void *arg __unused) 2959{ 2960 zone_gc_allowed_by_time_throttle = TRUE; 2961} 2962 2963 2964#if CONFIG_TASK_ZONE_INFO 2965 2966kern_return_t 2967task_zone_info( 2968 task_t task, 2969 mach_zone_name_array_t *namesp, 2970 mach_msg_type_number_t *namesCntp, 2971 task_zone_info_array_t *infop, 2972 mach_msg_type_number_t *infoCntp) 2973{ 2974 mach_zone_name_t *names; 2975 vm_offset_t names_addr; 2976 vm_size_t names_size; 2977 task_zone_info_t *info; 2978 vm_offset_t info_addr; 2979 vm_size_t info_size; 2980 unsigned int max_zones, i; 2981 zone_t z; 2982 mach_zone_name_t *zn; 2983 task_zone_info_t *zi; 2984 kern_return_t kr; 2985 2986 vm_size_t used; 2987 vm_map_copy_t copy; 2988 2989 2990 if (task == TASK_NULL) 2991 return KERN_INVALID_TASK; 2992 2993 /* 2994 * We assume that zones aren't freed once allocated. 2995 * We won't pick up any zones that are allocated later. 2996 */ 2997 2998 simple_lock(&all_zones_lock); 2999 max_zones = (unsigned int)(num_zones + num_fake_zones); 3000 z = first_zone; 3001 simple_unlock(&all_zones_lock); 3002 3003 names_size = round_page(max_zones * sizeof *names); 3004 kr = kmem_alloc_pageable(ipc_kernel_map, 3005 &names_addr, names_size); 3006 if (kr != KERN_SUCCESS) 3007 return kr; 3008 names = (mach_zone_name_t *) names_addr; 3009 3010 info_size = round_page(max_zones * sizeof *info); 3011 kr = kmem_alloc_pageable(ipc_kernel_map, 3012 &info_addr, info_size); 3013 if (kr != KERN_SUCCESS) { 3014 kmem_free(ipc_kernel_map, 3015 names_addr, names_size); 3016 return kr; 3017 } 3018 3019 info = (task_zone_info_t *) info_addr; 3020 3021 zn = &names[0]; 3022 zi = &info[0]; 3023 3024 for (i = 0; i < max_zones - num_fake_zones; i++) { 3025 struct zone zcopy; 3026 3027 assert(z != ZONE_NULL); 3028 3029 lock_zone(z); 3030 zcopy = *z; 3031 unlock_zone(z); 3032 3033 simple_lock(&all_zones_lock); 3034 z = z->next_zone; 3035 simple_unlock(&all_zones_lock); 3036 3037 /* assuming here the name data is static */ 3038 (void) strncpy(zn->mzn_name, zcopy.zone_name, 3039 sizeof zn->mzn_name); 3040 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; 3041 3042 zi->tzi_count = (uint64_t)zcopy.count; 3043 zi->tzi_cur_size = (uint64_t)zcopy.cur_size; 3044 zi->tzi_max_size = (uint64_t)zcopy.max_size; 3045 zi->tzi_elem_size = (uint64_t)zcopy.elem_size; 3046 zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size; 3047 zi->tzi_sum_size = zcopy.sum_count * zcopy.elem_size; 3048 zi->tzi_exhaustible = (uint64_t)zcopy.exhaustible; 3049 zi->tzi_collectable = (uint64_t)zcopy.collectable; 3050 zi->tzi_caller_acct = (uint64_t)zcopy.caller_acct; 3051 if (task->tkm_zinfo != NULL) { 3052 zi->tzi_task_alloc = task->tkm_zinfo[zcopy.index].alloc; 3053 zi->tzi_task_free = task->tkm_zinfo[zcopy.index].free; 3054 } else { 3055 zi->tzi_task_alloc = 0; 3056 zi->tzi_task_free = 0; 3057 } 3058 zn++; 3059 zi++; 3060 } 3061 3062 /* 3063 * loop through the fake zones and fill them using the specialized 3064 * functions 3065 */ 3066 for (i = 0; i < num_fake_zones; i++) { 3067 int count, collectable, exhaustible, caller_acct, index; 3068 vm_size_t cur_size, max_size, elem_size, alloc_size; 3069 uint64_t sum_size; 3070 3071 strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name); 3072 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; 3073 fake_zones[i].query(&count, &cur_size, 3074 &max_size, &elem_size, 3075 &alloc_size, &sum_size, 3076 &collectable, &exhaustible, &caller_acct); 3077 zi->tzi_count = (uint64_t)count; 3078 zi->tzi_cur_size = (uint64_t)cur_size; 3079 zi->tzi_max_size = (uint64_t)max_size; 3080 zi->tzi_elem_size = (uint64_t)elem_size; 3081 zi->tzi_alloc_size = (uint64_t)alloc_size; 3082 zi->tzi_sum_size = sum_size; 3083 zi->tzi_collectable = (uint64_t)collectable; 3084 zi->tzi_exhaustible = (uint64_t)exhaustible; 3085 zi->tzi_caller_acct = (uint64_t)caller_acct; 3086 if (task->tkm_zinfo != NULL) { 3087 index = ZINFO_SLOTS - num_fake_zones + i; 3088 zi->tzi_task_alloc = task->tkm_zinfo[index].alloc; 3089 zi->tzi_task_free = task->tkm_zinfo[index].free; 3090 } else { 3091 zi->tzi_task_alloc = 0; 3092 zi->tzi_task_free = 0; 3093 } 3094 zn++; 3095 zi++; 3096 } 3097 3098 used = max_zones * sizeof *names; 3099 if (used != names_size) 3100 bzero((char *) (names_addr + used), names_size - used); 3101 3102 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, 3103 (vm_map_size_t)names_size, TRUE, ©); 3104 assert(kr == KERN_SUCCESS); 3105 3106 *namesp = (mach_zone_name_t *) copy; 3107 *namesCntp = max_zones; 3108 3109 used = max_zones * sizeof *info; 3110 3111 if (used != info_size) 3112 bzero((char *) (info_addr + used), info_size - used); 3113 3114 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, 3115 (vm_map_size_t)info_size, TRUE, ©); 3116 assert(kr == KERN_SUCCESS); 3117 3118 *infop = (task_zone_info_t *) copy; 3119 *infoCntp = max_zones; 3120 3121 return KERN_SUCCESS; 3122} 3123 3124#else /* CONFIG_TASK_ZONE_INFO */ 3125 3126kern_return_t 3127task_zone_info( 3128 __unused task_t task, 3129 __unused mach_zone_name_array_t *namesp, 3130 __unused mach_msg_type_number_t *namesCntp, 3131 __unused task_zone_info_array_t *infop, 3132 __unused mach_msg_type_number_t *infoCntp) 3133{ 3134 return KERN_FAILURE; 3135} 3136 3137#endif /* CONFIG_TASK_ZONE_INFO */ 3138 3139kern_return_t 3140mach_zone_info( 3141 host_priv_t host, 3142 mach_zone_name_array_t *namesp, 3143 mach_msg_type_number_t *namesCntp, 3144 mach_zone_info_array_t *infop, 3145 mach_msg_type_number_t *infoCntp) 3146{ 3147 mach_zone_name_t *names; 3148 vm_offset_t names_addr; 3149 vm_size_t names_size; 3150 mach_zone_info_t *info; 3151 vm_offset_t info_addr; 3152 vm_size_t info_size; 3153 unsigned int max_zones, i; 3154 zone_t z; 3155 mach_zone_name_t *zn; 3156 mach_zone_info_t *zi; 3157 kern_return_t kr; 3158 3159 vm_size_t used; 3160 vm_map_copy_t copy; 3161 3162 3163 if (host == HOST_NULL) 3164 return KERN_INVALID_HOST; 3165#if CONFIG_DEBUGGER_FOR_ZONE_INFO 3166 if (!PE_i_can_has_debugger(NULL)) 3167 return KERN_INVALID_HOST; 3168#endif 3169 3170 /* 3171 * We assume that zones aren't freed once allocated. 3172 * We won't pick up any zones that are allocated later. 3173 */ 3174 3175 simple_lock(&all_zones_lock); 3176 max_zones = (unsigned int)(num_zones + num_fake_zones); 3177 z = first_zone; 3178 simple_unlock(&all_zones_lock); 3179 3180 names_size = round_page(max_zones * sizeof *names); 3181 kr = kmem_alloc_pageable(ipc_kernel_map, 3182 &names_addr, names_size); 3183 if (kr != KERN_SUCCESS) 3184 return kr; 3185 names = (mach_zone_name_t *) names_addr; 3186 3187 info_size = round_page(max_zones * sizeof *info); 3188 kr = kmem_alloc_pageable(ipc_kernel_map, 3189 &info_addr, info_size); 3190 if (kr != KERN_SUCCESS) { 3191 kmem_free(ipc_kernel_map, 3192 names_addr, names_size); 3193 return kr; 3194 } 3195 3196 info = (mach_zone_info_t *) info_addr; 3197 3198 zn = &names[0]; 3199 zi = &info[0]; 3200 3201 for (i = 0; i < max_zones - num_fake_zones; i++) { 3202 struct zone zcopy; 3203 3204 assert(z != ZONE_NULL); 3205 3206 lock_zone(z); 3207 zcopy = *z; 3208 unlock_zone(z); 3209 3210 simple_lock(&all_zones_lock); 3211 z = z->next_zone; 3212 simple_unlock(&all_zones_lock); 3213 3214 /* assuming here the name data is static */ 3215 (void) strncpy(zn->mzn_name, zcopy.zone_name, 3216 sizeof zn->mzn_name); 3217 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; 3218 3219 zi->mzi_count = (uint64_t)zcopy.count; 3220 zi->mzi_cur_size = (uint64_t)zcopy.cur_size; 3221 zi->mzi_max_size = (uint64_t)zcopy.max_size; 3222 zi->mzi_elem_size = (uint64_t)zcopy.elem_size; 3223 zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size; 3224 zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size; 3225 zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible; 3226 zi->mzi_collectable = (uint64_t)zcopy.collectable; 3227 zn++; 3228 zi++; 3229 } 3230 3231 /* 3232 * loop through the fake zones and fill them using the specialized 3233 * functions 3234 */ 3235 for (i = 0; i < num_fake_zones; i++) { 3236 int count, collectable, exhaustible, caller_acct; 3237 vm_size_t cur_size, max_size, elem_size, alloc_size; 3238 uint64_t sum_size; 3239 3240 strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name); 3241 zn->mzn_name[sizeof zn->mzn_name - 1] = '\0'; 3242 fake_zones[i].query(&count, &cur_size, 3243 &max_size, &elem_size, 3244 &alloc_size, &sum_size, 3245 &collectable, &exhaustible, &caller_acct); 3246 zi->mzi_count = (uint64_t)count; 3247 zi->mzi_cur_size = (uint64_t)cur_size; 3248 zi->mzi_max_size = (uint64_t)max_size; 3249 zi->mzi_elem_size = (uint64_t)elem_size; 3250 zi->mzi_alloc_size = (uint64_t)alloc_size; 3251 zi->mzi_sum_size = sum_size; 3252 zi->mzi_collectable = (uint64_t)collectable; 3253 zi->mzi_exhaustible = (uint64_t)exhaustible; 3254 3255 zn++; 3256 zi++; 3257 } 3258 3259 used = max_zones * sizeof *names; 3260 if (used != names_size) 3261 bzero((char *) (names_addr + used), names_size - used); 3262 3263 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, 3264 (vm_map_size_t)names_size, TRUE, ©); 3265 assert(kr == KERN_SUCCESS); 3266 3267 *namesp = (mach_zone_name_t *) copy; 3268 *namesCntp = max_zones; 3269 3270 used = max_zones * sizeof *info; 3271 3272 if (used != info_size) 3273 bzero((char *) (info_addr + used), info_size - used); 3274 3275 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, 3276 (vm_map_size_t)info_size, TRUE, ©); 3277 assert(kr == KERN_SUCCESS); 3278 3279 *infop = (mach_zone_info_t *) copy; 3280 *infoCntp = max_zones; 3281 3282 return KERN_SUCCESS; 3283} 3284 3285/* 3286 * host_zone_info - LEGACY user interface for Mach zone information 3287 * Should use mach_zone_info() instead! 3288 */ 3289kern_return_t 3290host_zone_info( 3291 host_priv_t host, 3292 zone_name_array_t *namesp, 3293 mach_msg_type_number_t *namesCntp, 3294 zone_info_array_t *infop, 3295 mach_msg_type_number_t *infoCntp) 3296{ 3297 zone_name_t *names; 3298 vm_offset_t names_addr; 3299 vm_size_t names_size; 3300 zone_info_t *info; 3301 vm_offset_t info_addr; 3302 vm_size_t info_size; 3303 unsigned int max_zones, i; 3304 zone_t z; 3305 zone_name_t *zn; 3306 zone_info_t *zi; 3307 kern_return_t kr; 3308 3309 vm_size_t used; 3310 vm_map_copy_t copy; 3311 3312 3313 if (host == HOST_NULL) 3314 return KERN_INVALID_HOST; 3315#if CONFIG_DEBUGGER_FOR_ZONE_INFO 3316 if (!PE_i_can_has_debugger(NULL)) 3317 return KERN_INVALID_HOST; 3318#endif 3319 3320#if defined(__LP64__) 3321 if (!thread_is_64bit(current_thread())) 3322 return KERN_NOT_SUPPORTED; 3323#else 3324 if (thread_is_64bit(current_thread())) 3325 return KERN_NOT_SUPPORTED; 3326#endif 3327 3328 /* 3329 * We assume that zones aren't freed once allocated. 3330 * We won't pick up any zones that are allocated later. 3331 */ 3332 3333 simple_lock(&all_zones_lock); 3334 max_zones = (unsigned int)(num_zones + num_fake_zones); 3335 z = first_zone; 3336 simple_unlock(&all_zones_lock); 3337 3338 names_size = round_page(max_zones * sizeof *names); 3339 kr = kmem_alloc_pageable(ipc_kernel_map, 3340 &names_addr, names_size); 3341 if (kr != KERN_SUCCESS) 3342 return kr; 3343 names = (zone_name_t *) names_addr; 3344 3345 info_size = round_page(max_zones * sizeof *info); 3346 kr = kmem_alloc_pageable(ipc_kernel_map, 3347 &info_addr, info_size); 3348 if (kr != KERN_SUCCESS) { 3349 kmem_free(ipc_kernel_map, 3350 names_addr, names_size); 3351 return kr; 3352 } 3353 3354 info = (zone_info_t *) info_addr; 3355 3356 zn = &names[0]; 3357 zi = &info[0]; 3358 3359 for (i = 0; i < max_zones - num_fake_zones; i++) { 3360 struct zone zcopy; 3361 3362 assert(z != ZONE_NULL); 3363 3364 lock_zone(z); 3365 zcopy = *z; 3366 unlock_zone(z); 3367 3368 simple_lock(&all_zones_lock); 3369 z = z->next_zone; 3370 simple_unlock(&all_zones_lock); 3371 3372 /* assuming here the name data is static */ 3373 (void) strncpy(zn->zn_name, zcopy.zone_name, 3374 sizeof zn->zn_name); 3375 zn->zn_name[sizeof zn->zn_name - 1] = '\0'; 3376 3377 zi->zi_count = zcopy.count; 3378 zi->zi_cur_size = zcopy.cur_size; 3379 zi->zi_max_size = zcopy.max_size; 3380 zi->zi_elem_size = zcopy.elem_size; 3381 zi->zi_alloc_size = zcopy.alloc_size; 3382 zi->zi_exhaustible = zcopy.exhaustible; 3383 zi->zi_collectable = zcopy.collectable; 3384 3385 zn++; 3386 zi++; 3387 } 3388 3389 /* 3390 * loop through the fake zones and fill them using the specialized 3391 * functions 3392 */ 3393 for (i = 0; i < num_fake_zones; i++) { 3394 int caller_acct; 3395 uint64_t sum_space; 3396 strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name); 3397 zn->zn_name[sizeof zn->zn_name - 1] = '\0'; 3398 fake_zones[i].query(&zi->zi_count, &zi->zi_cur_size, 3399 &zi->zi_max_size, &zi->zi_elem_size, 3400 &zi->zi_alloc_size, &sum_space, 3401 &zi->zi_collectable, &zi->zi_exhaustible, &caller_acct); 3402 zn++; 3403 zi++; 3404 } 3405 3406 used = max_zones * sizeof *names; 3407 if (used != names_size) 3408 bzero((char *) (names_addr + used), names_size - used); 3409 3410 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, 3411 (vm_map_size_t)names_size, TRUE, ©); 3412 assert(kr == KERN_SUCCESS); 3413 3414 *namesp = (zone_name_t *) copy; 3415 *namesCntp = max_zones; 3416 3417 used = max_zones * sizeof *info; 3418 if (used != info_size) 3419 bzero((char *) (info_addr + used), info_size - used); 3420 3421 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, 3422 (vm_map_size_t)info_size, TRUE, ©); 3423 assert(kr == KERN_SUCCESS); 3424 3425 *infop = (zone_info_t *) copy; 3426 *infoCntp = max_zones; 3427 3428 return KERN_SUCCESS; 3429} 3430 3431kern_return_t 3432mach_zone_force_gc( 3433 host_t host) 3434{ 3435 3436 if (host == HOST_NULL) 3437 return KERN_INVALID_HOST; 3438 3439 consider_zone_gc(TRUE); 3440 3441 return (KERN_SUCCESS); 3442} 3443 3444extern unsigned int stack_total; 3445extern unsigned long long stack_allocs; 3446 3447#if defined(__i386__) || defined (__x86_64__) 3448extern unsigned int inuse_ptepages_count; 3449extern long long alloc_ptepages_count; 3450#endif 3451 3452void zone_display_zprint() 3453{ 3454 unsigned int i; 3455 zone_t the_zone; 3456 3457 if(first_zone!=NULL) { 3458 the_zone = first_zone; 3459 for (i = 0; i < num_zones; i++) { 3460 if(the_zone->cur_size > (1024*1024)) { 3461 printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size); 3462 } 3463 3464 if(the_zone->next_zone == NULL) { 3465 break; 3466 } 3467 3468 the_zone = the_zone->next_zone; 3469 } 3470 } 3471 3472 printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total)); 3473 3474#if defined(__i386__) || defined (__x86_64__) 3475 printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count)); 3476#endif 3477 3478 printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total); 3479} 3480 3481#if ZONE_DEBUG 3482 3483/* should we care about locks here ? */ 3484 3485#define zone_in_use(z) ( z->count || z->free_elements ) 3486 3487void 3488zone_debug_enable( 3489 zone_t z) 3490{ 3491 if (zone_debug_enabled(z) || zone_in_use(z) || 3492 z->alloc_size < (z->elem_size + ZONE_DEBUG_OFFSET)) 3493 return; 3494 queue_init(&z->active_zones); 3495 z->elem_size += ZONE_DEBUG_OFFSET; 3496} 3497 3498void 3499zone_debug_disable( 3500 zone_t z) 3501{ 3502 if (!zone_debug_enabled(z) || zone_in_use(z)) 3503 return; 3504 z->elem_size -= ZONE_DEBUG_OFFSET; 3505 z->active_zones.next = z->active_zones.prev = NULL; 3506} 3507 3508 3509#endif /* ZONE_DEBUG */ 3510