1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59#include <mach/boolean.h> 60#include <mach/thread_switch.h> 61#include <ipc/ipc_port.h> 62#include <ipc/ipc_space.h> 63#include <kern/counters.h> 64#include <kern/ipc_kobject.h> 65#include <kern/processor.h> 66#include <kern/sched.h> 67#include <kern/sched_prim.h> 68#include <kern/spl.h> 69#include <kern/task.h> 70#include <kern/thread.h> 71#include <mach/policy.h> 72 73#include <kern/syscall_subr.h> 74#include <mach/mach_host_server.h> 75#include <mach/mach_syscalls.h> 76#include <sys/kdebug.h> 77 78#ifdef MACH_BSD 79extern void workqueue_thread_yielded(void); 80extern sched_call_t workqueue_get_sched_callback(void); 81#endif /* MACH_BSD */ 82 83 84/* Called from commpage to take a delayed preemption when exiting 85 * the "Preemption Free Zone" (PFZ). 86 */ 87kern_return_t 88pfz_exit( 89__unused struct pfz_exit_args *args) 90{ 91 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */ 92 93 return (KERN_SUCCESS); 94} 95 96 97/* 98 * swtch and swtch_pri both attempt to context switch (logic in 99 * thread_block no-ops the context switch if nothing would happen). 100 * A boolean is returned that indicates whether there is anything 101 * else runnable. 102 * 103 * This boolean can be used by a thread waiting on a 104 * lock or condition: If FALSE is returned, the thread is justified 105 * in becoming a resource hog by continuing to spin because there's 106 * nothing else useful that the processor could do. If TRUE is 107 * returned, the thread should make one more check on the 108 * lock and then be a good citizen and really suspend. 109 */ 110 111static void 112swtch_continue(void) 113{ 114 register processor_t myprocessor; 115 boolean_t result; 116 117 disable_preemption(); 118 myprocessor = current_processor(); 119 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0; 120 enable_preemption(); 121 122 thread_syscall_return(result); 123 /*NOTREACHED*/ 124} 125 126boolean_t 127swtch( 128 __unused struct swtch_args *args) 129{ 130 register processor_t myprocessor; 131 boolean_t result; 132 133 disable_preemption(); 134 myprocessor = current_processor(); 135 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) { 136 mp_enable_preemption(); 137 138 return (FALSE); 139 } 140 enable_preemption(); 141 142 counter(c_swtch_block++); 143 144 thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD); 145 146 disable_preemption(); 147 myprocessor = current_processor(); 148 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0; 149 enable_preemption(); 150 151 return (result); 152} 153 154static void 155swtch_pri_continue(void) 156{ 157 register processor_t myprocessor; 158 boolean_t result; 159 160 thread_depress_abort_internal(current_thread()); 161 162 disable_preemption(); 163 myprocessor = current_processor(); 164 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0; 165 mp_enable_preemption(); 166 167 thread_syscall_return(result); 168 /*NOTREACHED*/ 169} 170 171boolean_t 172swtch_pri( 173__unused struct swtch_pri_args *args) 174{ 175 register processor_t myprocessor; 176 boolean_t result; 177 178 disable_preemption(); 179 myprocessor = current_processor(); 180 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) { 181 mp_enable_preemption(); 182 183 return (FALSE); 184 } 185 enable_preemption(); 186 187 counter(c_swtch_pri_block++); 188 189 thread_depress_abstime(thread_depress_time); 190 191 thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD); 192 193 thread_depress_abort_internal(current_thread()); 194 195 disable_preemption(); 196 myprocessor = current_processor(); 197 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0; 198 enable_preemption(); 199 200 return (result); 201} 202 203static int 204thread_switch_disable_workqueue_sched_callback(void) 205{ 206 sched_call_t callback = workqueue_get_sched_callback(); 207 thread_t self = current_thread(); 208 if (!callback || self->sched_call != callback) { 209 return FALSE; 210 } 211 spl_t s = splsched(); 212 thread_lock(self); 213 thread_sched_call(self, NULL); 214 thread_unlock(self); 215 splx(s); 216 return TRUE; 217} 218 219static void 220thread_switch_enable_workqueue_sched_callback(void) 221{ 222 sched_call_t callback = workqueue_get_sched_callback(); 223 thread_t self = current_thread(); 224 spl_t s = splsched(); 225 thread_lock(self); 226 thread_sched_call(self, callback); 227 thread_unlock(self); 228 splx(s); 229} 230 231static void 232thread_switch_continue(void) 233{ 234 register thread_t self = current_thread(); 235 int option = self->saved.swtch.option; 236 boolean_t reenable_workq_callback = self->saved.swtch.reenable_workq_callback; 237 238 239 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) 240 thread_depress_abort_internal(self); 241 242 if (reenable_workq_callback) 243 thread_switch_enable_workqueue_sched_callback(); 244 245 thread_syscall_return(KERN_SUCCESS); 246 /*NOTREACHED*/ 247} 248 249/* 250 * thread_switch: 251 * 252 * Context switch. User may supply thread hint. 253 */ 254kern_return_t 255thread_switch( 256 struct thread_switch_args *args) 257{ 258 register thread_t thread, self = current_thread(); 259 mach_port_name_t thread_name = args->thread_name; 260 int option = args->option; 261 mach_msg_timeout_t option_time = args->option_time; 262 uint32_t scale_factor = NSEC_PER_MSEC; 263 boolean_t reenable_workq_callback = FALSE; 264 boolean_t depress_option = FALSE; 265 boolean_t wait_option = FALSE; 266 267 /* 268 * Validate and process option. 269 */ 270 switch (option) { 271 272 case SWITCH_OPTION_NONE: 273 workqueue_thread_yielded(); 274 break; 275 case SWITCH_OPTION_WAIT: 276 wait_option = TRUE; 277 workqueue_thread_yielded(); 278 break; 279 case SWITCH_OPTION_DEPRESS: 280 depress_option = TRUE; 281 workqueue_thread_yielded(); 282 break; 283 case SWITCH_OPTION_DISPATCH_CONTENTION: 284 scale_factor = NSEC_PER_USEC; 285 wait_option = TRUE; 286 if (thread_switch_disable_workqueue_sched_callback()) 287 reenable_workq_callback = TRUE; 288 break; 289 case SWITCH_OPTION_OSLOCK_DEPRESS: 290 depress_option = TRUE; 291 if (thread_switch_disable_workqueue_sched_callback()) 292 reenable_workq_callback = TRUE; 293 break; 294 case SWITCH_OPTION_OSLOCK_WAIT: 295 wait_option = TRUE; 296 if (thread_switch_disable_workqueue_sched_callback()) 297 reenable_workq_callback = TRUE; 298 break; 299 default: 300 return (KERN_INVALID_ARGUMENT); 301 } 302 303 /* 304 * Translate the port name if supplied. 305 */ 306 if (thread_name != MACH_PORT_NULL) { 307 ipc_port_t port; 308 309 if (ipc_port_translate_send(self->task->itk_space, 310 thread_name, &port) == KERN_SUCCESS) { 311 ip_reference(port); 312 ip_unlock(port); 313 314 thread = convert_port_to_thread(port); 315 ip_release(port); 316 317 if (thread == self) { 318 (void)thread_deallocate_internal(thread); 319 thread = THREAD_NULL; 320 } 321 } 322 else 323 thread = THREAD_NULL; 324 } 325 else 326 thread = THREAD_NULL; 327 328 329 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) { 330 if (thread != THREAD_NULL) { 331 332 if (thread->task != self->task) { 333 /* 334 * OSLock boosting only applies to other threads 335 * in your same task (even if you have a port for 336 * a thread in another task) 337 */ 338 339 (void)thread_deallocate_internal(thread); 340 thread = THREAD_NULL; 341 } else { 342 /* 343 * Attempt to kick the lock owner up to our same IO throttling tier. 344 * If the thread is currently blocked in throttle_lowpri_io(), 345 * it will immediately break out. 346 */ 347 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO); 348 349 set_thread_iotier_override(thread, new_policy); 350 } 351 } 352 } 353 354 /* 355 * Try to handoff if supplied. 356 */ 357 if (thread != THREAD_NULL) { 358 processor_t processor; 359 spl_t s; 360 361 s = splsched(); 362 thread_lock(thread); 363 364 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE, 365 thread_tid(thread), thread->state, 0, 0, 0); 366 367 /* 368 * Check that the thread is not bound 369 * to a different processor, and that realtime 370 * is not involved. 371 * 372 * Next, pull it off its run queue. If it 373 * doesn't come, it's not eligible. 374 */ 375 processor = current_processor(); 376 if (processor->current_pri < BASEPRI_RTQUEUES && 377 thread->sched_pri < BASEPRI_RTQUEUES && 378 (thread->bound_processor == PROCESSOR_NULL || 379 thread->bound_processor == processor) && 380 thread_run_queue_remove(thread) ) { 381 /* 382 * Hah, got it!! 383 */ 384 thread_unlock(thread); 385 386 (void)thread_deallocate_internal(thread); 387 388 if (wait_option) 389 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, 390 option_time, scale_factor); 391 else 392 if (depress_option) 393 thread_depress_ms(option_time); 394 395 self->saved.swtch.option = option; 396 self->saved.swtch.reenable_workq_callback = reenable_workq_callback; 397 398 thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread); 399 /* NOTREACHED */ 400 } 401 402 thread_unlock(thread); 403 splx(s); 404 405 thread_deallocate(thread); 406 } 407 408 if (wait_option) 409 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor); 410 else 411 if (depress_option) 412 thread_depress_ms(option_time); 413 414 self->saved.swtch.option = option; 415 self->saved.swtch.reenable_workq_callback = reenable_workq_callback; 416 417 thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD); 418 419 if (depress_option) 420 thread_depress_abort_internal(self); 421 422 if (reenable_workq_callback) 423 thread_switch_enable_workqueue_sched_callback(); 424 425 return (KERN_SUCCESS); 426} 427 428/* 429 * Depress thread's priority to lowest possible for the specified interval, 430 * with a value of zero resulting in no timeout being scheduled. 431 */ 432void 433thread_depress_abstime( 434 uint64_t interval) 435{ 436 register thread_t self = current_thread(); 437 uint64_t deadline; 438 spl_t s; 439 440 s = splsched(); 441 thread_lock(self); 442 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) { 443 processor_t myprocessor = self->last_processor; 444 445 self->sched_pri = DEPRESSPRI; 446 myprocessor->current_pri = self->sched_pri; 447 self->sched_flags |= TH_SFLAG_DEPRESS; 448 449 if (interval != 0) { 450 clock_absolutetime_interval_to_deadline(interval, &deadline); 451 if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) 452 self->depress_timer_active++; 453 } 454 } 455 thread_unlock(self); 456 splx(s); 457} 458 459void 460thread_depress_ms( 461 mach_msg_timeout_t interval) 462{ 463 uint64_t abstime; 464 465 clock_interval_to_absolutetime_interval( 466 interval, NSEC_PER_MSEC, &abstime); 467 thread_depress_abstime(abstime); 468} 469 470/* 471 * Priority depression expiration. 472 */ 473void 474thread_depress_expire( 475 void *p0, 476 __unused void *p1) 477{ 478 thread_t thread = p0; 479 spl_t s; 480 481 s = splsched(); 482 thread_lock(thread); 483 if (--thread->depress_timer_active == 0) { 484 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; 485 SCHED(compute_priority)(thread, FALSE); 486 } 487 thread_unlock(thread); 488 splx(s); 489} 490 491/* 492 * Prematurely abort priority depression if there is one. 493 */ 494kern_return_t 495thread_depress_abort_internal( 496 thread_t thread) 497{ 498 kern_return_t result = KERN_NOT_DEPRESSED; 499 spl_t s; 500 501 s = splsched(); 502 thread_lock(thread); 503 if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) { 504 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { 505 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; 506 SCHED(compute_priority)(thread, FALSE); 507 result = KERN_SUCCESS; 508 } 509 510 if (timer_call_cancel(&thread->depress_timer)) 511 thread->depress_timer_active--; 512 } 513 thread_unlock(thread); 514 splx(s); 515 516 return (result); 517} 518 519void 520thread_poll_yield( 521 thread_t self) 522{ 523 spl_t s; 524 525 assert(self == current_thread()); 526 527 s = splsched(); 528 if (self->sched_mode == TH_MODE_FIXED) { 529 uint64_t total_computation, abstime; 530 531 abstime = mach_absolute_time(); 532 total_computation = abstime - self->computation_epoch; 533 total_computation += self->computation_metered; 534 if (total_computation >= max_poll_computation) { 535 processor_t myprocessor = current_processor(); 536 ast_t preempt; 537 538 thread_lock(self); 539 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) { 540 self->sched_pri = DEPRESSPRI; 541 myprocessor->current_pri = self->sched_pri; 542 } 543 self->computation_epoch = abstime; 544 self->computation_metered = 0; 545 self->sched_flags |= TH_SFLAG_POLLDEPRESS; 546 547 abstime += (total_computation >> sched_poll_yield_shift); 548 if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL)) 549 self->depress_timer_active++; 550 551 if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE) 552 ast_on(preempt); 553 554 thread_unlock(self); 555 } 556 } 557 splx(s); 558} 559 560 561void 562thread_yield_internal( 563 mach_msg_timeout_t ms) 564{ 565 processor_t myprocessor; 566 567 disable_preemption(); 568 myprocessor = current_processor(); 569 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) { 570 mp_enable_preemption(); 571 572 return; 573 } 574 enable_preemption(); 575 576 thread_depress_ms(ms); 577 578 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD); 579 580 thread_depress_abort_internal(current_thread()); 581} 582 583