1/* 2 * Copyright (c) 2003-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Kernel stack management routines. 30 */ 31 32#include <mach/mach_host.h> 33#include <mach/mach_types.h> 34#include <mach/processor_set.h> 35 36#include <kern/kern_types.h> 37#include <kern/mach_param.h> 38#include <kern/processor.h> 39#include <kern/thread.h> 40#include <kern/zalloc.h> 41#include <kern/kalloc.h> 42#include <kern/ledger.h> 43 44#include <vm/vm_map.h> 45#include <vm/vm_kern.h> 46 47#include <mach_debug.h> 48 49/* 50 * We allocate stacks from generic kernel VM. 51 * 52 * The stack_free_list can only be accessed at splsched, 53 * because stack_alloc_try/thread_invoke operate at splsched. 54 */ 55 56decl_simple_lock_data(static,stack_lock_data) 57#define stack_lock() simple_lock(&stack_lock_data) 58#define stack_unlock() simple_unlock(&stack_lock_data) 59 60#define STACK_CACHE_SIZE 2 61 62static vm_offset_t stack_free_list; 63 64static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ 65static unsigned int stack_hiwat; 66unsigned int stack_total; /* current total count */ 67unsigned long long stack_allocs; /* total count of allocations */ 68 69static int stack_fake_zone_index = -1; /* index in zone_info array */ 70 71static unsigned int stack_free_target; 72static int stack_free_delta; 73 74static unsigned int stack_new_count; /* total new stack allocations */ 75 76static vm_offset_t stack_addr_mask; 77 78unsigned int kernel_stack_pages; 79vm_offset_t kernel_stack_size; 80vm_offset_t kernel_stack_mask; 81vm_offset_t kernel_stack_depth_max; 82 83static inline void 84STACK_ZINFO_PALLOC(thread_t thread) 85{ 86 task_t task; 87 zinfo_usage_t zinfo; 88 89 ledger_credit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 90 91 if (stack_fake_zone_index != -1 && 92 (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 93 OSAddAtomic64(kernel_stack_size, 94 (int64_t *)&zinfo[stack_fake_zone_index].alloc); 95} 96 97static inline void 98STACK_ZINFO_PFREE(thread_t thread) 99{ 100 task_t task; 101 zinfo_usage_t zinfo; 102 103 ledger_debit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 104 105 if (stack_fake_zone_index != -1 && 106 (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 107 OSAddAtomic64(kernel_stack_size, 108 (int64_t *)&zinfo[stack_fake_zone_index].free); 109} 110 111static inline void 112STACK_ZINFO_HANDOFF(thread_t from, thread_t to) 113{ 114 ledger_debit(from->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 115 ledger_credit(to->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 116 117 if (stack_fake_zone_index != -1) { 118 task_t task; 119 zinfo_usage_t zinfo; 120 121 if ((task = from->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 122 OSAddAtomic64(kernel_stack_size, 123 (int64_t *)&zinfo[stack_fake_zone_index].free); 124 125 if ((task = to->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 126 OSAddAtomic64(kernel_stack_size, 127 (int64_t *)&zinfo[stack_fake_zone_index].alloc); 128 } 129} 130 131/* 132 * The next field is at the base of the stack, 133 * so the low end is left unsullied. 134 */ 135#define stack_next(stack) \ 136 (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) 137 138static inline int 139log2(vm_offset_t size) 140{ 141 int result; 142 for (result = 0; size > 0; result++) 143 size >>= 1; 144 return result; 145} 146 147static inline vm_offset_t 148roundup_pow2(vm_offset_t size) 149{ 150 return 1UL << (log2(size - 1) + 1); 151} 152 153static vm_offset_t stack_alloc_internal(void); 154static void stack_free_stack(vm_offset_t); 155 156void 157stack_init(void) 158{ 159 simple_lock_init(&stack_lock_data, 0); 160 161 kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; 162 kernel_stack_size = KERNEL_STACK_SIZE; 163 kernel_stack_mask = -KERNEL_STACK_SIZE; 164 kernel_stack_depth_max = 0; 165 166 if (PE_parse_boot_argn("kernel_stack_pages", 167 &kernel_stack_pages, 168 sizeof (kernel_stack_pages))) { 169 kernel_stack_size = kernel_stack_pages * PAGE_SIZE; 170 printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n", 171 kernel_stack_pages, (void *) kernel_stack_size); 172 } 173 174 if (kernel_stack_size < round_page(kernel_stack_size)) 175 panic("stack_init: stack size %p not a multiple of page size %d\n", 176 (void *) kernel_stack_size, PAGE_SIZE); 177 178 stack_addr_mask = roundup_pow2(kernel_stack_size) - 1; 179 kernel_stack_mask = ~stack_addr_mask; 180} 181 182/* 183 * stack_alloc: 184 * 185 * Allocate a stack for a thread, may 186 * block. 187 */ 188 189static vm_offset_t 190stack_alloc_internal(void) 191{ 192 vm_offset_t stack; 193 spl_t s; 194 int guard_flags; 195 196 s = splsched(); 197 stack_lock(); 198 stack_allocs++; 199 stack = stack_free_list; 200 if (stack != 0) { 201 stack_free_list = stack_next(stack); 202 stack_free_count--; 203 } 204 else { 205 if (++stack_total > stack_hiwat) 206 stack_hiwat = stack_total; 207 stack_new_count++; 208 } 209 stack_free_delta--; 210 stack_unlock(); 211 splx(s); 212 213 if (stack == 0) { 214 215 /* 216 * Request guard pages on either side of the stack. Ask 217 * kernel_memory_allocate() for two extra pages to account 218 * for these. 219 */ 220 221 guard_flags = KMA_GUARD_FIRST | KMA_GUARD_LAST; 222 if (kernel_memory_allocate(kernel_map, &stack, 223 kernel_stack_size + (2*PAGE_SIZE), 224 stack_addr_mask, 225 KMA_KSTACK | KMA_KOBJECT | guard_flags) 226 != KERN_SUCCESS) 227 panic("stack_alloc: kernel_memory_allocate"); 228 229 /* 230 * The stack address that comes back is the address of the lower 231 * guard page. Skip past it to get the actual stack base address. 232 */ 233 234 stack += PAGE_SIZE; 235 } 236 return stack; 237} 238 239void 240stack_alloc( 241 thread_t thread) 242{ 243 244 assert(thread->kernel_stack == 0); 245 machine_stack_attach(thread, stack_alloc_internal()); 246 STACK_ZINFO_PALLOC(thread); 247} 248 249void 250stack_handoff(thread_t from, thread_t to) 251{ 252 assert(from == current_thread()); 253 machine_stack_handoff(from, to); 254 STACK_ZINFO_HANDOFF(from, to); 255} 256 257/* 258 * stack_free: 259 * 260 * Detach and free the stack for a thread. 261 */ 262void 263stack_free( 264 thread_t thread) 265{ 266 vm_offset_t stack = machine_stack_detach(thread); 267 268 assert(stack); 269 if (stack != thread->reserved_stack) { 270 STACK_ZINFO_PFREE(thread); 271 stack_free_stack(stack); 272 } 273} 274 275void 276stack_free_reserved( 277 thread_t thread) 278{ 279 if (thread->reserved_stack != thread->kernel_stack) { 280 stack_free_stack(thread->reserved_stack); 281 STACK_ZINFO_PFREE(thread); 282 } 283} 284 285static void 286stack_free_stack( 287 vm_offset_t stack) 288{ 289 struct stack_cache *cache; 290 spl_t s; 291 292 s = splsched(); 293 cache = &PROCESSOR_DATA(current_processor(), stack_cache); 294 if (cache->count < STACK_CACHE_SIZE) { 295 stack_next(stack) = cache->free; 296 cache->free = stack; 297 cache->count++; 298 } 299 else { 300 stack_lock(); 301 stack_next(stack) = stack_free_list; 302 stack_free_list = stack; 303 if (++stack_free_count > stack_free_hiwat) 304 stack_free_hiwat = stack_free_count; 305 stack_free_delta++; 306 stack_unlock(); 307 } 308 splx(s); 309} 310 311/* 312 * stack_alloc_try: 313 * 314 * Non-blocking attempt to allocate a 315 * stack for a thread. 316 * 317 * Returns TRUE on success. 318 * 319 * Called at splsched. 320 */ 321boolean_t 322stack_alloc_try( 323 thread_t thread) 324{ 325 struct stack_cache *cache; 326 vm_offset_t stack; 327 328 cache = &PROCESSOR_DATA(current_processor(), stack_cache); 329 stack = cache->free; 330 if (stack != 0) { 331 STACK_ZINFO_PALLOC(thread); 332 cache->free = stack_next(stack); 333 cache->count--; 334 } 335 else { 336 if (stack_free_list != 0) { 337 stack_lock(); 338 stack = stack_free_list; 339 if (stack != 0) { 340 STACK_ZINFO_PALLOC(thread); 341 stack_free_list = stack_next(stack); 342 stack_free_count--; 343 stack_free_delta--; 344 } 345 stack_unlock(); 346 } 347 } 348 349 if (stack != 0 || (stack = thread->reserved_stack) != 0) { 350 machine_stack_attach(thread, stack); 351 return (TRUE); 352 } 353 354 return (FALSE); 355} 356 357static unsigned int stack_collect_tick, last_stack_tick; 358 359/* 360 * stack_collect: 361 * 362 * Free excess kernel stacks, may 363 * block. 364 */ 365void 366stack_collect(void) 367{ 368 if (stack_collect_tick != last_stack_tick) { 369 unsigned int target; 370 vm_offset_t stack; 371 spl_t s; 372 373 s = splsched(); 374 stack_lock(); 375 376 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 377 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 378 379 while (stack_free_count > target) { 380 stack = stack_free_list; 381 stack_free_list = stack_next(stack); 382 stack_free_count--; stack_total--; 383 stack_unlock(); 384 splx(s); 385 386 /* 387 * Get the stack base address, then decrement by one page 388 * to account for the lower guard page. Add two extra pages 389 * to the size to account for the guard pages on both ends 390 * that were originally requested when the stack was allocated 391 * back in stack_alloc(). 392 */ 393 394 stack = (vm_offset_t)vm_map_trunc_page( 395 stack, 396 VM_MAP_PAGE_MASK(kernel_map)); 397 stack -= PAGE_SIZE; 398 if (vm_map_remove( 399 kernel_map, 400 stack, 401 stack + kernel_stack_size+(2*PAGE_SIZE), 402 VM_MAP_REMOVE_KUNWIRE) 403 != KERN_SUCCESS) 404 panic("stack_collect: vm_map_remove"); 405 stack = 0; 406 407 s = splsched(); 408 stack_lock(); 409 410 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 411 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 412 } 413 414 last_stack_tick = stack_collect_tick; 415 416 stack_unlock(); 417 splx(s); 418 } 419} 420 421/* 422 * compute_stack_target: 423 * 424 * Computes a new target free list count 425 * based on recent alloc / free activity. 426 * 427 * Limits stack collection to once per 428 * computation period. 429 */ 430void 431compute_stack_target( 432__unused void *arg) 433{ 434 spl_t s; 435 436 s = splsched(); 437 stack_lock(); 438 439 if (stack_free_target > 5) 440 stack_free_target = (4 * stack_free_target) / 5; 441 else 442 if (stack_free_target > 0) 443 stack_free_target--; 444 445 stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 446 447 stack_free_delta = 0; 448 stack_collect_tick++; 449 450 stack_unlock(); 451 splx(s); 452} 453 454void 455stack_fake_zone_init(int zone_index) 456{ 457 stack_fake_zone_index = zone_index; 458} 459 460void 461stack_fake_zone_info(int *count, 462 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, 463 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) 464{ 465 unsigned int total, hiwat, free; 466 unsigned long long all; 467 spl_t s; 468 469 s = splsched(); 470 stack_lock(); 471 all = stack_allocs; 472 total = stack_total; 473 hiwat = stack_hiwat; 474 free = stack_free_count; 475 stack_unlock(); 476 splx(s); 477 478 *count = total - free; 479 *cur_size = kernel_stack_size * total; 480 *max_size = kernel_stack_size * hiwat; 481 *elem_size = kernel_stack_size; 482 *alloc_size = kernel_stack_size; 483 *sum_size = all * kernel_stack_size; 484 485 *collectable = 1; 486 *exhaustable = 0; 487 *caller_acct = 1; 488} 489 490/* OBSOLETE */ 491void stack_privilege( 492 thread_t thread); 493 494void 495stack_privilege( 496 __unused thread_t thread) 497{ 498 /* OBSOLETE */ 499} 500 501/* 502 * Return info on stack usage for threads in a specific processor set 503 */ 504kern_return_t 505processor_set_stack_usage( 506 processor_set_t pset, 507 unsigned int *totalp, 508 vm_size_t *spacep, 509 vm_size_t *residentp, 510 vm_size_t *maxusagep, 511 vm_offset_t *maxstackp) 512{ 513#if !MACH_DEBUG 514 return KERN_NOT_SUPPORTED; 515#else 516 unsigned int total; 517 vm_size_t maxusage; 518 vm_offset_t maxstack; 519 520 register thread_t *thread_list; 521 register thread_t thread; 522 523 unsigned int actual; /* this many things */ 524 unsigned int i; 525 526 vm_size_t size, size_needed; 527 void *addr; 528 529 if (pset == PROCESSOR_SET_NULL || pset != &pset0) 530 return KERN_INVALID_ARGUMENT; 531 532 size = 0; 533 addr = NULL; 534 535 for (;;) { 536 lck_mtx_lock(&tasks_threads_lock); 537 538 actual = threads_count; 539 540 /* do we have the memory we need? */ 541 542 size_needed = actual * sizeof(thread_t); 543 if (size_needed <= size) 544 break; 545 546 lck_mtx_unlock(&tasks_threads_lock); 547 548 if (size != 0) 549 kfree(addr, size); 550 551 assert(size_needed > 0); 552 size = size_needed; 553 554 addr = kalloc(size); 555 if (addr == 0) 556 return KERN_RESOURCE_SHORTAGE; 557 } 558 559 /* OK, have memory and list is locked */ 560 thread_list = (thread_t *) addr; 561 for (i = 0, thread = (thread_t)(void *) queue_first(&threads); 562 !queue_end(&threads, (queue_entry_t) thread); 563 thread = (thread_t)(void *) queue_next(&thread->threads)) { 564 thread_reference_internal(thread); 565 thread_list[i++] = thread; 566 } 567 assert(i <= actual); 568 569 lck_mtx_unlock(&tasks_threads_lock); 570 571 /* calculate maxusage and free thread references */ 572 573 total = 0; 574 maxusage = 0; 575 maxstack = 0; 576 while (i > 0) { 577 thread_t threadref = thread_list[--i]; 578 579 if (threadref->kernel_stack != 0) 580 total++; 581 582 thread_deallocate(threadref); 583 } 584 585 if (size != 0) 586 kfree(addr, size); 587 588 *totalp = total; 589 *residentp = *spacep = total * round_page(kernel_stack_size); 590 *maxusagep = maxusage; 591 *maxstackp = maxstack; 592 return KERN_SUCCESS; 593 594#endif /* MACH_DEBUG */ 595} 596 597vm_offset_t min_valid_stack_address(void) 598{ 599 return (vm_offset_t)vm_map_min(kernel_map); 600} 601 602vm_offset_t max_valid_stack_address(void) 603{ 604 return (vm_offset_t)vm_map_max(kernel_map); 605} 606