1/* 2 * Copyright (c) 2003-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Kernel stack management routines. 30 */ 31 32#include <mach/mach_host.h> 33#include <mach/mach_types.h> 34#include <mach/processor_set.h> 35 36#include <kern/kern_types.h> 37#include <kern/mach_param.h> 38#include <kern/processor.h> 39#include <kern/thread.h> 40#include <kern/zalloc.h> 41#include <kern/kalloc.h> 42#include <kern/ledger.h> 43 44#include <vm/vm_map.h> 45#include <vm/vm_kern.h> 46 47#include <mach_debug.h> 48 49/* 50 * We allocate stacks from generic kernel VM. 51 * 52 * The stack_free_list can only be accessed at splsched, 53 * because stack_alloc_try/thread_invoke operate at splsched. 54 */ 55 56decl_simple_lock_data(static,stack_lock_data) 57#define stack_lock() simple_lock(&stack_lock_data) 58#define stack_unlock() simple_unlock(&stack_lock_data) 59 60#define STACK_CACHE_SIZE 2 61 62static vm_offset_t stack_free_list; 63 64static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ 65static unsigned int stack_hiwat; 66unsigned int stack_total; /* current total count */ 67unsigned long long stack_allocs; /* total count of allocations */ 68 69static int stack_fake_zone_index = -1; /* index in zone_info array */ 70 71static unsigned int stack_free_target; 72static int stack_free_delta; 73 74static unsigned int stack_new_count; /* total new stack allocations */ 75 76static vm_offset_t stack_addr_mask; 77 78unsigned int kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; 79vm_offset_t kernel_stack_size = KERNEL_STACK_SIZE; 80vm_offset_t kernel_stack_mask = -KERNEL_STACK_SIZE; 81vm_offset_t kernel_stack_depth_max = 0; 82 83static inline void 84STACK_ZINFO_PALLOC(thread_t thread) 85{ 86 task_t task; 87 zinfo_usage_t zinfo; 88 89 ledger_credit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 90 91 if (stack_fake_zone_index != -1 && 92 (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 93 OSAddAtomic64(kernel_stack_size, 94 (int64_t *)&zinfo[stack_fake_zone_index].alloc); 95} 96 97static inline void 98STACK_ZINFO_PFREE(thread_t thread) 99{ 100 task_t task; 101 zinfo_usage_t zinfo; 102 103 ledger_debit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 104 105 if (stack_fake_zone_index != -1 && 106 (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 107 OSAddAtomic64(kernel_stack_size, 108 (int64_t *)&zinfo[stack_fake_zone_index].free); 109} 110 111static inline void 112STACK_ZINFO_HANDOFF(thread_t from, thread_t to) 113{ 114 ledger_debit(from->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 115 ledger_credit(to->t_ledger, task_ledgers.tkm_private, kernel_stack_size); 116 117 if (stack_fake_zone_index != -1) { 118 task_t task; 119 zinfo_usage_t zinfo; 120 121 if ((task = from->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 122 OSAddAtomic64(kernel_stack_size, 123 (int64_t *)&zinfo[stack_fake_zone_index].free); 124 125 if ((task = to->task) != NULL && (zinfo = task->tkm_zinfo) != NULL) 126 OSAddAtomic64(kernel_stack_size, 127 (int64_t *)&zinfo[stack_fake_zone_index].alloc); 128 } 129} 130 131/* 132 * The next field is at the base of the stack, 133 * so the low end is left unsullied. 134 */ 135#define stack_next(stack) \ 136 (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) 137 138static inline int 139log2(vm_offset_t size) 140{ 141 int result; 142 for (result = 0; size > 0; result++) 143 size >>= 1; 144 return result; 145} 146 147static inline vm_offset_t 148roundup_pow2(vm_offset_t size) 149{ 150 return 1UL << (log2(size - 1) + 1); 151} 152 153static vm_offset_t stack_alloc_internal(void); 154static void stack_free_stack(vm_offset_t); 155 156void 157stack_init(void) 158{ 159 simple_lock_init(&stack_lock_data, 0); 160 161 if (PE_parse_boot_argn("kernel_stack_pages", 162 &kernel_stack_pages, 163 sizeof (kernel_stack_pages))) { 164 kernel_stack_size = kernel_stack_pages * PAGE_SIZE; 165 printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n", 166 kernel_stack_pages, (void *) kernel_stack_size); 167 } 168 169 if (kernel_stack_size < round_page(kernel_stack_size)) 170 panic("stack_init: stack size %p not a multiple of page size %d\n", 171 (void *) kernel_stack_size, PAGE_SIZE); 172 173 stack_addr_mask = roundup_pow2(kernel_stack_size) - 1; 174 kernel_stack_mask = ~stack_addr_mask; 175} 176 177/* 178 * stack_alloc: 179 * 180 * Allocate a stack for a thread, may 181 * block. 182 */ 183 184static vm_offset_t 185stack_alloc_internal(void) 186{ 187 vm_offset_t stack; 188 spl_t s; 189 int guard_flags; 190 191 s = splsched(); 192 stack_lock(); 193 stack_allocs++; 194 stack = stack_free_list; 195 if (stack != 0) { 196 stack_free_list = stack_next(stack); 197 stack_free_count--; 198 } 199 else { 200 if (++stack_total > stack_hiwat) 201 stack_hiwat = stack_total; 202 stack_new_count++; 203 } 204 stack_free_delta--; 205 stack_unlock(); 206 splx(s); 207 208 if (stack == 0) { 209 210 /* 211 * Request guard pages on either side of the stack. Ask 212 * kernel_memory_allocate() for two extra pages to account 213 * for these. 214 */ 215 216 guard_flags = KMA_GUARD_FIRST | KMA_GUARD_LAST; 217 if (kernel_memory_allocate(kernel_map, &stack, 218 kernel_stack_size + (2*PAGE_SIZE), 219 stack_addr_mask, 220 KMA_KSTACK | KMA_KOBJECT | guard_flags) 221 != KERN_SUCCESS) 222 panic("stack_alloc: kernel_memory_allocate"); 223 224 /* 225 * The stack address that comes back is the address of the lower 226 * guard page. Skip past it to get the actual stack base address. 227 */ 228 229 stack += PAGE_SIZE; 230 } 231 return stack; 232} 233 234void 235stack_alloc( 236 thread_t thread) 237{ 238 239 assert(thread->kernel_stack == 0); 240 machine_stack_attach(thread, stack_alloc_internal()); 241 STACK_ZINFO_PALLOC(thread); 242} 243 244void 245stack_handoff(thread_t from, thread_t to) 246{ 247 assert(from == current_thread()); 248 machine_stack_handoff(from, to); 249 STACK_ZINFO_HANDOFF(from, to); 250} 251 252/* 253 * stack_free: 254 * 255 * Detach and free the stack for a thread. 256 */ 257void 258stack_free( 259 thread_t thread) 260{ 261 vm_offset_t stack = machine_stack_detach(thread); 262 263 assert(stack); 264 if (stack != thread->reserved_stack) { 265 STACK_ZINFO_PFREE(thread); 266 stack_free_stack(stack); 267 } 268} 269 270void 271stack_free_reserved( 272 thread_t thread) 273{ 274 if (thread->reserved_stack != thread->kernel_stack) { 275 stack_free_stack(thread->reserved_stack); 276 STACK_ZINFO_PFREE(thread); 277 } 278} 279 280static void 281stack_free_stack( 282 vm_offset_t stack) 283{ 284 struct stack_cache *cache; 285 spl_t s; 286 287 s = splsched(); 288 cache = &PROCESSOR_DATA(current_processor(), stack_cache); 289 if (cache->count < STACK_CACHE_SIZE) { 290 stack_next(stack) = cache->free; 291 cache->free = stack; 292 cache->count++; 293 } 294 else { 295 stack_lock(); 296 stack_next(stack) = stack_free_list; 297 stack_free_list = stack; 298 if (++stack_free_count > stack_free_hiwat) 299 stack_free_hiwat = stack_free_count; 300 stack_free_delta++; 301 stack_unlock(); 302 } 303 splx(s); 304} 305 306/* 307 * stack_alloc_try: 308 * 309 * Non-blocking attempt to allocate a 310 * stack for a thread. 311 * 312 * Returns TRUE on success. 313 * 314 * Called at splsched. 315 */ 316boolean_t 317stack_alloc_try( 318 thread_t thread) 319{ 320 struct stack_cache *cache; 321 vm_offset_t stack; 322 323 cache = &PROCESSOR_DATA(current_processor(), stack_cache); 324 stack = cache->free; 325 if (stack != 0) { 326 STACK_ZINFO_PALLOC(thread); 327 cache->free = stack_next(stack); 328 cache->count--; 329 } 330 else { 331 if (stack_free_list != 0) { 332 stack_lock(); 333 stack = stack_free_list; 334 if (stack != 0) { 335 STACK_ZINFO_PALLOC(thread); 336 stack_free_list = stack_next(stack); 337 stack_free_count--; 338 stack_free_delta--; 339 } 340 stack_unlock(); 341 } 342 } 343 344 if (stack != 0 || (stack = thread->reserved_stack) != 0) { 345 machine_stack_attach(thread, stack); 346 return (TRUE); 347 } 348 349 return (FALSE); 350} 351 352static unsigned int stack_collect_tick, last_stack_tick; 353 354/* 355 * stack_collect: 356 * 357 * Free excess kernel stacks, may 358 * block. 359 */ 360void 361stack_collect(void) 362{ 363 if (stack_collect_tick != last_stack_tick) { 364 unsigned int target; 365 vm_offset_t stack; 366 spl_t s; 367 368 s = splsched(); 369 stack_lock(); 370 371 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 372 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 373 374 while (stack_free_count > target) { 375 stack = stack_free_list; 376 stack_free_list = stack_next(stack); 377 stack_free_count--; stack_total--; 378 stack_unlock(); 379 splx(s); 380 381 /* 382 * Get the stack base address, then decrement by one page 383 * to account for the lower guard page. Add two extra pages 384 * to the size to account for the guard pages on both ends 385 * that were originally requested when the stack was allocated 386 * back in stack_alloc(). 387 */ 388 389 stack = (vm_offset_t)vm_map_trunc_page(stack); 390 stack -= PAGE_SIZE; 391 if (vm_map_remove( 392 kernel_map, 393 stack, 394 stack + kernel_stack_size+(2*PAGE_SIZE), 395 VM_MAP_REMOVE_KUNWIRE) 396 != KERN_SUCCESS) 397 panic("stack_collect: vm_map_remove"); 398 stack = 0; 399 400 s = splsched(); 401 stack_lock(); 402 403 target = stack_free_target + (STACK_CACHE_SIZE * processor_count); 404 target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 405 } 406 407 last_stack_tick = stack_collect_tick; 408 409 stack_unlock(); 410 splx(s); 411 } 412} 413 414/* 415 * compute_stack_target: 416 * 417 * Computes a new target free list count 418 * based on recent alloc / free activity. 419 * 420 * Limits stack collection to once per 421 * computation period. 422 */ 423void 424compute_stack_target( 425__unused void *arg) 426{ 427 spl_t s; 428 429 s = splsched(); 430 stack_lock(); 431 432 if (stack_free_target > 5) 433 stack_free_target = (4 * stack_free_target) / 5; 434 else 435 if (stack_free_target > 0) 436 stack_free_target--; 437 438 stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; 439 440 stack_free_delta = 0; 441 stack_collect_tick++; 442 443 stack_unlock(); 444 splx(s); 445} 446 447void 448stack_fake_zone_init(int zone_index) 449{ 450 stack_fake_zone_index = zone_index; 451} 452 453void 454stack_fake_zone_info(int *count, 455 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, 456 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) 457{ 458 unsigned int total, hiwat, free; 459 unsigned long long all; 460 spl_t s; 461 462 s = splsched(); 463 stack_lock(); 464 all = stack_allocs; 465 total = stack_total; 466 hiwat = stack_hiwat; 467 free = stack_free_count; 468 stack_unlock(); 469 splx(s); 470 471 *count = total - free; 472 *cur_size = kernel_stack_size * total; 473 *max_size = kernel_stack_size * hiwat; 474 *elem_size = kernel_stack_size; 475 *alloc_size = kernel_stack_size; 476 *sum_size = all * kernel_stack_size; 477 478 *collectable = 1; 479 *exhaustable = 0; 480 *caller_acct = 1; 481} 482 483/* OBSOLETE */ 484void stack_privilege( 485 thread_t thread); 486 487void 488stack_privilege( 489 __unused thread_t thread) 490{ 491 /* OBSOLETE */ 492} 493 494/* 495 * Return info on stack usage for threads in a specific processor set 496 */ 497kern_return_t 498processor_set_stack_usage( 499 processor_set_t pset, 500 unsigned int *totalp, 501 vm_size_t *spacep, 502 vm_size_t *residentp, 503 vm_size_t *maxusagep, 504 vm_offset_t *maxstackp) 505{ 506#if !MACH_DEBUG 507 return KERN_NOT_SUPPORTED; 508#else 509 unsigned int total; 510 vm_size_t maxusage; 511 vm_offset_t maxstack; 512 513 register thread_t *thread_list; 514 register thread_t thread; 515 516 unsigned int actual; /* this many things */ 517 unsigned int i; 518 519 vm_size_t size, size_needed; 520 void *addr; 521 522 if (pset == PROCESSOR_SET_NULL || pset != &pset0) 523 return KERN_INVALID_ARGUMENT; 524 525 size = 0; 526 addr = NULL; 527 528 for (;;) { 529 lck_mtx_lock(&tasks_threads_lock); 530 531 actual = threads_count; 532 533 /* do we have the memory we need? */ 534 535 size_needed = actual * sizeof(thread_t); 536 if (size_needed <= size) 537 break; 538 539 lck_mtx_unlock(&tasks_threads_lock); 540 541 if (size != 0) 542 kfree(addr, size); 543 544 assert(size_needed > 0); 545 size = size_needed; 546 547 addr = kalloc(size); 548 if (addr == 0) 549 return KERN_RESOURCE_SHORTAGE; 550 } 551 552 /* OK, have memory and list is locked */ 553 thread_list = (thread_t *) addr; 554 for (i = 0, thread = (thread_t) queue_first(&threads); 555 !queue_end(&threads, (queue_entry_t) thread); 556 thread = (thread_t) queue_next(&thread->threads)) { 557 thread_reference_internal(thread); 558 thread_list[i++] = thread; 559 } 560 assert(i <= actual); 561 562 lck_mtx_unlock(&tasks_threads_lock); 563 564 /* calculate maxusage and free thread references */ 565 566 total = 0; 567 maxusage = 0; 568 maxstack = 0; 569 while (i > 0) { 570 thread_t threadref = thread_list[--i]; 571 572 if (threadref->kernel_stack != 0) 573 total++; 574 575 thread_deallocate(threadref); 576 } 577 578 if (size != 0) 579 kfree(addr, size); 580 581 *totalp = total; 582 *residentp = *spacep = total * round_page(kernel_stack_size); 583 *maxusagep = maxusage; 584 *maxstackp = maxstack; 585 return KERN_SUCCESS; 586 587#endif /* MACH_DEBUG */ 588} 589 590vm_offset_t min_valid_stack_address(void) 591{ 592 return (vm_offset_t)vm_map_min(kernel_map); 593} 594 595vm_offset_t max_valid_stack_address(void) 596{ 597 return (vm_offset_t)vm_map_max(kernel_map); 598} 599