1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59#include <mach/boolean.h>
60#include <mach/thread_switch.h>
61#include <ipc/ipc_port.h>
62#include <ipc/ipc_space.h>
63#include <kern/counters.h>
64#include <kern/ipc_kobject.h>
65#include <kern/processor.h>
66#include <kern/sched.h>
67#include <kern/sched_prim.h>
68#include <kern/spl.h>
69#include <kern/task.h>
70#include <kern/thread.h>
71#include <mach/policy.h>
72
73#include <kern/syscall_subr.h>
74#include <mach/mach_host_server.h>
75#include <mach/mach_syscalls.h>
76#include <sys/kdebug.h>
77
78#ifdef MACH_BSD
79extern void workqueue_thread_yielded(void);
80extern sched_call_t workqueue_get_sched_callback(void);
81#endif /* MACH_BSD */
82
83
84/* Called from commpage to take a delayed preemption when exiting
85 * the "Preemption Free Zone" (PFZ).
86 */
87kern_return_t
88pfz_exit(
89__unused	struct pfz_exit_args *args)
90{
91	/* For now, nothing special to do.  We'll pick up the ASTs on kernel exit. */
92
93	return (KERN_SUCCESS);
94}
95
96
97/*
98 *	swtch and swtch_pri both attempt to context switch (logic in
99 *	thread_block no-ops the context switch if nothing would happen).
100 *	A boolean is returned that indicates whether there is anything
101 *	else runnable.
102 *
103 *	This boolean can be used by a thread waiting on a
104 *	lock or condition:  If FALSE is returned, the thread is justified
105 *	in becoming a resource hog by continuing to spin because there's
106 *	nothing else useful that the processor could do.  If TRUE is
107 *	returned, the thread should make one more check on the
108 *	lock and then be a good citizen and really suspend.
109 */
110
111static void
112swtch_continue(void)
113{
114	register processor_t	myprocessor;
115    boolean_t				result;
116
117    disable_preemption();
118	myprocessor = current_processor();
119	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
120	enable_preemption();
121
122	thread_syscall_return(result);
123	/*NOTREACHED*/
124}
125
126boolean_t
127swtch(
128	__unused struct swtch_args *args)
129{
130	register processor_t	myprocessor;
131	boolean_t				result;
132
133	disable_preemption();
134	myprocessor = current_processor();
135	if (SCHED(processor_queue_empty)(myprocessor) &&	rt_runq.count == 0) {
136		mp_enable_preemption();
137
138		return (FALSE);
139	}
140	enable_preemption();
141
142	counter(c_swtch_block++);
143
144	thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD);
145
146	disable_preemption();
147	myprocessor = current_processor();
148	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
149	enable_preemption();
150
151	return (result);
152}
153
154static void
155swtch_pri_continue(void)
156{
157	register processor_t	myprocessor;
158    boolean_t				result;
159
160	thread_depress_abort_internal(current_thread());
161
162    disable_preemption();
163	myprocessor = current_processor();
164	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
165	mp_enable_preemption();
166
167	thread_syscall_return(result);
168	/*NOTREACHED*/
169}
170
171boolean_t
172swtch_pri(
173__unused	struct swtch_pri_args *args)
174{
175	register processor_t	myprocessor;
176	boolean_t				result;
177
178	disable_preemption();
179	myprocessor = current_processor();
180	if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
181		mp_enable_preemption();
182
183		return (FALSE);
184	}
185	enable_preemption();
186
187	counter(c_swtch_pri_block++);
188
189	thread_depress_abstime(thread_depress_time);
190
191	thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD);
192
193	thread_depress_abort_internal(current_thread());
194
195	disable_preemption();
196	myprocessor = current_processor();
197	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
198	enable_preemption();
199
200	return (result);
201}
202
203static int
204thread_switch_disable_workqueue_sched_callback(void)
205{
206	sched_call_t callback = workqueue_get_sched_callback();
207	thread_t self = current_thread();
208	if (!callback || self->sched_call != callback) {
209		return FALSE;
210	}
211	spl_t s = splsched();
212	thread_lock(self);
213	thread_sched_call(self, NULL);
214	thread_unlock(self);
215	splx(s);
216	return TRUE;
217}
218
219static void
220thread_switch_enable_workqueue_sched_callback(void)
221{
222	sched_call_t callback = workqueue_get_sched_callback();
223	thread_t self = current_thread();
224	spl_t s = splsched();
225	thread_lock(self);
226	thread_sched_call(self, callback);
227	thread_unlock(self);
228	splx(s);
229}
230
231static void
232thread_switch_continue(void)
233{
234	register thread_t	self = current_thread();
235	int					option = self->saved.swtch.option;
236	boolean_t			reenable_workq_callback = self->saved.swtch.reenable_workq_callback;
237
238
239	if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS)
240		thread_depress_abort_internal(self);
241
242	if (reenable_workq_callback)
243		thread_switch_enable_workqueue_sched_callback();
244
245	thread_syscall_return(KERN_SUCCESS);
246	/*NOTREACHED*/
247}
248
249/*
250 *	thread_switch:
251 *
252 *	Context switch.  User may supply thread hint.
253 */
254kern_return_t
255thread_switch(
256	struct thread_switch_args *args)
257{
258	register thread_t		thread, self = current_thread();
259	mach_port_name_t		thread_name = args->thread_name;
260	int						option = args->option;
261	mach_msg_timeout_t		option_time = args->option_time;
262	uint32_t				scale_factor = NSEC_PER_MSEC;
263	boolean_t				reenable_workq_callback = FALSE;
264	boolean_t				depress_option = FALSE;
265	boolean_t				wait_option = FALSE;
266
267    /*
268     *	Validate and process option.
269     */
270    switch (option) {
271
272	case SWITCH_OPTION_NONE:
273		workqueue_thread_yielded();
274		break;
275	case SWITCH_OPTION_WAIT:
276		wait_option = TRUE;
277		workqueue_thread_yielded();
278		break;
279	case SWITCH_OPTION_DEPRESS:
280		depress_option = TRUE;
281		workqueue_thread_yielded();
282		break;
283	case SWITCH_OPTION_DISPATCH_CONTENTION:
284		scale_factor = NSEC_PER_USEC;
285		wait_option = TRUE;
286		if (thread_switch_disable_workqueue_sched_callback())
287			reenable_workq_callback = TRUE;
288		break;
289	case SWITCH_OPTION_OSLOCK_DEPRESS:
290		depress_option = TRUE;
291		if (thread_switch_disable_workqueue_sched_callback())
292			reenable_workq_callback = TRUE;
293		break;
294	case SWITCH_OPTION_OSLOCK_WAIT:
295		wait_option = TRUE;
296		if (thread_switch_disable_workqueue_sched_callback())
297			reenable_workq_callback = TRUE;
298		break;
299	default:
300	    return (KERN_INVALID_ARGUMENT);
301    }
302
303	/*
304	 * Translate the port name if supplied.
305	 */
306    if (thread_name != MACH_PORT_NULL) {
307		ipc_port_t			port;
308
309		if (ipc_port_translate_send(self->task->itk_space,
310									thread_name, &port) == KERN_SUCCESS) {
311			ip_reference(port);
312			ip_unlock(port);
313
314			thread = convert_port_to_thread(port);
315			ip_release(port);
316
317			if (thread == self) {
318				(void)thread_deallocate_internal(thread);
319				thread = THREAD_NULL;
320			}
321		}
322		else
323			thread = THREAD_NULL;
324	}
325	else
326		thread = THREAD_NULL;
327
328
329	if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
330		if (thread != THREAD_NULL) {
331
332			if (thread->task != self->task) {
333				/*
334				 * OSLock boosting only applies to other threads
335				 * in your same task (even if you have a port for
336				 * a thread in another task)
337				 */
338
339				(void)thread_deallocate_internal(thread);
340				thread = THREAD_NULL;
341			} else {
342				/*
343				 * Attempt to kick the lock owner up to our same IO throttling tier.
344				 * If the thread is currently blocked in throttle_lowpri_io(),
345				 * it will immediately break out.
346				 */
347				int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
348
349				set_thread_iotier_override(thread, new_policy);
350			}
351		}
352	}
353
354	/*
355	 * Try to handoff if supplied.
356	 */
357	if (thread != THREAD_NULL) {
358		processor_t		processor;
359		spl_t			s;
360
361		s = splsched();
362		thread_lock(thread);
363
364		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
365							  thread_tid(thread), thread->state, 0, 0, 0);
366
367		/*
368		 *	Check that the thread is not bound
369		 *	to a different processor, and that realtime
370		 *	is not involved.
371		 *
372		 *	Next, pull it off its run queue.  If it
373		 *	doesn't come, it's not eligible.
374		 */
375		processor = current_processor();
376		if (processor->current_pri < BASEPRI_RTQUEUES			&&
377			thread->sched_pri < BASEPRI_RTQUEUES				&&
378			(thread->bound_processor == PROCESSOR_NULL	||
379			 thread->bound_processor == processor)				&&
380				thread_run_queue_remove(thread)							) {
381			/*
382			 *	Hah, got it!!
383			 */
384			thread_unlock(thread);
385
386			(void)thread_deallocate_internal(thread);
387
388			if (wait_option)
389				assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
390														option_time, scale_factor);
391			else
392			if (depress_option)
393				thread_depress_ms(option_time);
394
395			self->saved.swtch.option = option;
396			self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
397
398			thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread);
399			/* NOTREACHED */
400		}
401
402		thread_unlock(thread);
403		splx(s);
404
405		thread_deallocate(thread);
406	}
407
408	if (wait_option)
409		assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
410	else
411	if (depress_option)
412		thread_depress_ms(option_time);
413
414	self->saved.swtch.option = option;
415	self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
416
417	thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD);
418
419	if (depress_option)
420		thread_depress_abort_internal(self);
421
422	if (reenable_workq_callback)
423		thread_switch_enable_workqueue_sched_callback();
424
425    return (KERN_SUCCESS);
426}
427
428/*
429 * Depress thread's priority to lowest possible for the specified interval,
430 * with a value of zero resulting in no timeout being scheduled.
431 */
432void
433thread_depress_abstime(
434	uint64_t				interval)
435{
436	register thread_t		self = current_thread();
437	uint64_t				deadline;
438    spl_t					s;
439
440    s = splsched();
441    thread_lock(self);
442	if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
443		processor_t		myprocessor = self->last_processor;
444
445		self->sched_pri = DEPRESSPRI;
446		myprocessor->current_pri = self->sched_pri;
447		self->sched_flags |= TH_SFLAG_DEPRESS;
448
449		if (interval != 0) {
450			clock_absolutetime_interval_to_deadline(interval, &deadline);
451			if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL))
452				self->depress_timer_active++;
453		}
454	}
455	thread_unlock(self);
456    splx(s);
457}
458
459void
460thread_depress_ms(
461	mach_msg_timeout_t		interval)
462{
463	uint64_t		abstime;
464
465	clock_interval_to_absolutetime_interval(
466							interval, NSEC_PER_MSEC, &abstime);
467	thread_depress_abstime(abstime);
468}
469
470/*
471 *	Priority depression expiration.
472 */
473void
474thread_depress_expire(
475	void			*p0,
476	__unused void	*p1)
477{
478	thread_t		thread = p0;
479    spl_t			s;
480
481    s = splsched();
482    thread_lock(thread);
483	if (--thread->depress_timer_active == 0) {
484		thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
485		SCHED(compute_priority)(thread, FALSE);
486	}
487    thread_unlock(thread);
488    splx(s);
489}
490
491/*
492 *	Prematurely abort priority depression if there is one.
493 */
494kern_return_t
495thread_depress_abort_internal(
496	thread_t				thread)
497{
498    kern_return_t 			result = KERN_NOT_DEPRESSED;
499    spl_t					s;
500
501    s = splsched();
502    thread_lock(thread);
503	if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
504		if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
505			thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
506			SCHED(compute_priority)(thread, FALSE);
507			result = KERN_SUCCESS;
508		}
509
510		if (timer_call_cancel(&thread->depress_timer))
511			thread->depress_timer_active--;
512	}
513	thread_unlock(thread);
514    splx(s);
515
516    return (result);
517}
518
519void
520thread_poll_yield(
521	thread_t		self)
522{
523	spl_t			s;
524
525	assert(self == current_thread());
526
527	s = splsched();
528	if (self->sched_mode == TH_MODE_FIXED) {
529		uint64_t			total_computation, abstime;
530
531		abstime = mach_absolute_time();
532		total_computation = abstime - self->computation_epoch;
533		total_computation += self->computation_metered;
534		if (total_computation >= max_poll_computation) {
535			processor_t		myprocessor = current_processor();
536			ast_t			preempt;
537
538			thread_lock(self);
539			if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
540				self->sched_pri = DEPRESSPRI;
541				myprocessor->current_pri = self->sched_pri;
542			}
543			self->computation_epoch = abstime;
544			self->computation_metered = 0;
545			self->sched_flags |= TH_SFLAG_POLLDEPRESS;
546
547			abstime += (total_computation >> sched_poll_yield_shift);
548			if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL))
549				self->depress_timer_active++;
550
551			if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE)
552				ast_on(preempt);
553
554			thread_unlock(self);
555		}
556	}
557	splx(s);
558}
559
560
561void
562thread_yield_internal(
563	mach_msg_timeout_t	ms)
564{
565	processor_t	myprocessor;
566
567	disable_preemption();
568	myprocessor = current_processor();
569	if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
570		mp_enable_preemption();
571
572		return;
573	}
574	enable_preemption();
575
576	thread_depress_ms(ms);
577
578	thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
579
580	thread_depress_abort_internal(current_thread());
581}
582
583