kern_condvar.c revision 100913
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 100913 2002-07-30 06:54:05Z tanimura $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#ifdef KTRACE
42#include <sys/uio.h>
43#include <sys/ktrace.h>
44#endif
45
46/*
47 * Common sanity checks for cv_wait* functions.
48 */
49#define	CV_ASSERT(cvp, mp, td) do {					\
50	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51	KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__));	\
52	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55} while (0)
56
57#ifdef INVARIANTS
58#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60		/* Only waiter. */					\
61		(cvp)->cv_mtx = (mp);					\
62	} else {							\
63		/*							\
64		 * Other waiter; assert that we're using the		\
65		 * same mutex.						\
66		 */							\
67		KASSERT((cvp)->cv_mtx == (mp),				\
68		    ("%s: Multiple mutexes", __func__));		\
69	}								\
70} while (0)
71#define	CV_SIGNAL_VALIDATE(cvp) do {					\
72	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74		    ("%s: Mutex not owned", __func__));		\
75	}								\
76} while (0)
77#else
78#define	CV_WAIT_VALIDATE(cvp, mp)
79#define	CV_SIGNAL_VALIDATE(cvp)
80#endif
81
82static void cv_timedwait_end(void *arg);
83static void cv_check_upcall(struct thread *td);
84
85/*
86 * Initialize a condition variable.  Must be called before use.
87 */
88void
89cv_init(struct cv *cvp, const char *desc)
90{
91
92	TAILQ_INIT(&cvp->cv_waitq);
93	cvp->cv_mtx = NULL;
94	cvp->cv_description = desc;
95}
96
97/*
98 * Destroy a condition variable.  The condition variable must be re-initialized
99 * in order to be re-used.
100 */
101void
102cv_destroy(struct cv *cvp)
103{
104
105	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
106}
107
108/*
109 * Common code for cv_wait* functions.  All require sched_lock.
110 */
111
112/*
113 * Decide if we need to queue an upcall.
114 * This is copied from msleep(), perhaps this should be a common function.
115 */
116static void
117cv_check_upcall(struct thread *td)
118{
119
120	/*
121	 * If we are capable of async syscalls and there isn't already
122	 * another one ready to return, start a new thread
123	 * and queue it as ready to run. Note that there is danger here
124	 * because we need to make sure that we don't sleep allocating
125	 * the thread (recursion here might be bad).
126	 * Hence the TDF_INMSLEEP flag.
127	 */
128	if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
129	    (td->td_flags & TDF_INMSLEEP) == 0) {
130		/*
131		 * If we have no queued work to do,
132		 * upcall to the UTS to see if it has more work.
133		 * We don't need to upcall now, just queue it.
134		 */
135		if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
136			/* Don't recurse here! */
137			td->td_flags |= TDF_INMSLEEP;
138			thread_schedule_upcall(td, td->td_kse);
139			td->td_flags &= ~TDF_INMSLEEP;
140		}
141	}
142}
143
144/*
145 * Switch context.
146 */
147static __inline void
148cv_switch(struct thread *td)
149{
150
151	td->td_state = TDS_SLP;
152	td->td_proc->p_stats->p_ru.ru_nvcsw++;
153	cv_check_upcall(td);
154	mi_switch();
155	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
156	    td->td_proc->p_pid, td->td_proc->p_comm);
157}
158
159/*
160 * Switch context, catching signals.
161 */
162static __inline int
163cv_switch_catch(struct thread *td)
164{
165	struct proc *p;
166	int sig;
167
168	/*
169	 * We put ourselves on the sleep queue and start our timeout before
170	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
171	 * both) could occur while we were stopped.  A SIGCONT would cause us to
172	 * be marked as TDS_SLP without resuming us, thus we must be ready for
173	 * sleep when cursig is called.  If the wakeup happens while we're
174	 * stopped, td->td_wchan will be 0 upon return from cursig.
175	 */
176	td->td_flags |= TDF_SINTR;
177	mtx_unlock_spin(&sched_lock);
178	p = td->td_proc;
179	PROC_LOCK(p);
180	sig = cursig(td);	/* XXXKSE */
181	if (thread_suspend_check(1))
182		sig = SIGSTOP;
183	mtx_lock_spin(&sched_lock);
184	PROC_UNLOCK(p);
185	if (sig != 0) {
186		if (td->td_wchan != NULL)
187			cv_waitq_remove(td);
188		td->td_state = TDS_RUNNING;	/* XXXKSE */
189	} else if (td->td_wchan != NULL) {
190		cv_switch(td);
191	}
192	td->td_flags &= ~TDF_SINTR;
193
194	return sig;
195}
196
197/*
198 * Add a thread to the wait queue of a condition variable.
199 */
200static __inline void
201cv_waitq_add(struct cv *cvp, struct thread *td)
202{
203
204	/*
205	 * Process may be sitting on a slpque if asleep() was called, remove it
206	 * before re-adding.
207	 */
208	if (td->td_wchan != NULL)
209		unsleep(td);
210
211	td->td_flags |= TDF_CVWAITQ;
212	td->td_wchan = cvp;
213	td->td_wmesg = cvp->cv_description;
214	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
215	td->td_base_pri = td->td_priority;
216	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
217	    td->td_proc->p_pid, td->td_proc->p_comm);
218	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
219}
220
221/*
222 * Wait on a condition variable.  The current thread is placed on the condition
223 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
224 * condition variable will resume the thread.  The mutex is released before
225 * sleeping and will be held on return.  It is recommended that the mutex be
226 * held when cv_signal or cv_broadcast are called.
227 */
228void
229cv_wait(struct cv *cvp, struct mtx *mp)
230{
231	struct thread *td;
232	WITNESS_SAVE_DECL(mp);
233
234	td = curthread;
235#ifdef KTRACE
236	if (KTRPOINT(td, KTR_CSW))
237		ktrcsw(1, 0);
238#endif
239	CV_ASSERT(cvp, mp, td);
240	WITNESS_SLEEP(0, &mp->mtx_object);
241	WITNESS_SAVE(&mp->mtx_object, mp);
242
243	if (cold ) {
244		/*
245		 * During autoconfiguration, just give interrupts
246		 * a chance, then just return.  Don't run any other
247		 * thread or panic below, in case this is the idle
248		 * process and already asleep.
249		 */
250		return;
251	}
252
253	mtx_lock_spin(&sched_lock);
254
255	CV_WAIT_VALIDATE(cvp, mp);
256
257	DROP_GIANT();
258	mtx_unlock(mp);
259
260	cv_waitq_add(cvp, td);
261	cv_switch(td);
262
263	mtx_unlock_spin(&sched_lock);
264#ifdef KTRACE
265	if (KTRPOINT(td, KTR_CSW))
266		ktrcsw(0, 0);
267#endif
268	PICKUP_GIANT();
269	mtx_lock(mp);
270	WITNESS_RESTORE(&mp->mtx_object, mp);
271}
272
273/*
274 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
275 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
276 * a signal was caught.  If ERESTART is returned the system call should be
277 * restarted if possible.
278 */
279int
280cv_wait_sig(struct cv *cvp, struct mtx *mp)
281{
282	struct thread *td;
283	struct proc *p;
284	int rval;
285	int sig;
286	WITNESS_SAVE_DECL(mp);
287
288	td = curthread;
289	p = td->td_proc;
290	rval = 0;
291#ifdef KTRACE
292	if (KTRPOINT(td, KTR_CSW))
293		ktrcsw(1, 0);
294#endif
295	CV_ASSERT(cvp, mp, td);
296	WITNESS_SLEEP(0, &mp->mtx_object);
297	WITNESS_SAVE(&mp->mtx_object, mp);
298
299	if (cold || panicstr) {
300		/*
301		 * After a panic, or during autoconfiguration, just give
302		 * interrupts a chance, then just return; don't run any other
303		 * procs or panic below, in case this is the idle process and
304		 * already asleep.
305		 */
306		return 0;
307	}
308
309	mtx_lock_spin(&sched_lock);
310
311	CV_WAIT_VALIDATE(cvp, mp);
312
313	DROP_GIANT();
314	mtx_unlock(mp);
315
316	cv_waitq_add(cvp, td);
317	sig = cv_switch_catch(td);
318
319	mtx_unlock_spin(&sched_lock);
320
321	PROC_LOCK(p);
322	if (sig == 0)
323		sig = cursig(td);	/* XXXKSE */
324	if (sig != 0) {
325		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
326			rval = EINTR;
327		else
328			rval = ERESTART;
329	}
330	PROC_UNLOCK(p);
331	if (p->p_flag & P_WEXIT)
332		rval = EINTR;
333
334#ifdef KTRACE
335	if (KTRPOINT(td, KTR_CSW))
336		ktrcsw(0, 0);
337#endif
338	PICKUP_GIANT();
339	mtx_lock(mp);
340	WITNESS_RESTORE(&mp->mtx_object, mp);
341
342	return (rval);
343}
344
345/*
346 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
347 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
348 * expires.
349 */
350int
351cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
352{
353	struct thread *td;
354	int rval;
355	WITNESS_SAVE_DECL(mp);
356
357	td = curthread;
358	rval = 0;
359#ifdef KTRACE
360	if (KTRPOINT(td, KTR_CSW))
361		ktrcsw(1, 0);
362#endif
363	CV_ASSERT(cvp, mp, td);
364	WITNESS_SLEEP(0, &mp->mtx_object);
365	WITNESS_SAVE(&mp->mtx_object, mp);
366
367	if (cold || panicstr) {
368		/*
369		 * After a panic, or during autoconfiguration, just give
370		 * interrupts a chance, then just return; don't run any other
371		 * thread or panic below, in case this is the idle process and
372		 * already asleep.
373		 */
374		return 0;
375	}
376
377	mtx_lock_spin(&sched_lock);
378
379	CV_WAIT_VALIDATE(cvp, mp);
380
381	DROP_GIANT();
382	mtx_unlock(mp);
383
384	cv_waitq_add(cvp, td);
385	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
386	cv_switch(td);
387
388	if (td->td_flags & TDF_TIMEOUT) {
389		td->td_flags &= ~TDF_TIMEOUT;
390		rval = EWOULDBLOCK;
391	} else if (td->td_flags & TDF_TIMOFAIL)
392		td->td_flags &= ~TDF_TIMOFAIL;
393	else if (callout_stop(&td->td_slpcallout) == 0) {
394		/*
395		 * Work around race with cv_timedwait_end similar to that
396		 * between msleep and endtsleep.
397		 * Go back to sleep.
398		 */
399		td->td_flags |= TDF_TIMEOUT;
400		td->td_state = TDS_SLP;
401		td->td_proc->p_stats->p_ru.ru_nivcsw++;
402		mi_switch();
403	}
404
405	if (td->td_proc->p_flag & P_WEXIT)
406		rval = EWOULDBLOCK;
407	mtx_unlock_spin(&sched_lock);
408#ifdef KTRACE
409	if (KTRPOINT(td, KTR_CSW))
410		ktrcsw(0, 0);
411#endif
412	PICKUP_GIANT();
413	mtx_lock(mp);
414	WITNESS_RESTORE(&mp->mtx_object, mp);
415
416	return (rval);
417}
418
419/*
420 * Wait on a condition variable for at most timo/hz seconds, allowing
421 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
422 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
423 * a signal was caught.
424 */
425int
426cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
427{
428	struct thread *td;
429	struct proc *p;
430	int rval;
431	int sig;
432	WITNESS_SAVE_DECL(mp);
433
434	td = curthread;
435	p = td->td_proc;
436	rval = 0;
437#ifdef KTRACE
438	if (KTRPOINT(td, KTR_CSW))
439		ktrcsw(1, 0);
440#endif
441	CV_ASSERT(cvp, mp, td);
442	WITNESS_SLEEP(0, &mp->mtx_object);
443	WITNESS_SAVE(&mp->mtx_object, mp);
444
445	if (cold || panicstr) {
446		/*
447		 * After a panic, or during autoconfiguration, just give
448		 * interrupts a chance, then just return; don't run any other
449		 * thread or panic below, in case this is the idle process and
450		 * already asleep.
451		 */
452		return 0;
453	}
454
455	mtx_lock_spin(&sched_lock);
456
457	CV_WAIT_VALIDATE(cvp, mp);
458
459	DROP_GIANT();
460	mtx_unlock(mp);
461
462	cv_waitq_add(cvp, td);
463	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
464	sig = cv_switch_catch(td);
465
466	if (td->td_flags & TDF_TIMEOUT) {
467		td->td_flags &= ~TDF_TIMEOUT;
468		rval = EWOULDBLOCK;
469	} else if (td->td_flags & TDF_TIMOFAIL)
470		td->td_flags &= ~TDF_TIMOFAIL;
471	else if (callout_stop(&td->td_slpcallout) == 0) {
472		/*
473		 * Work around race with cv_timedwait_end similar to that
474		 * between msleep and endtsleep.
475		 * Go back to sleep.
476		 */
477		td->td_flags |= TDF_TIMEOUT;
478		td->td_state = TDS_SLP;
479		td->td_proc->p_stats->p_ru.ru_nivcsw++;
480		mi_switch();
481	}
482	mtx_unlock_spin(&sched_lock);
483
484	PROC_LOCK(p);
485	if (sig == 0)
486		sig = cursig(td);
487	if (sig != 0) {
488		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
489			rval = EINTR;
490		else
491			rval = ERESTART;
492	}
493	PROC_UNLOCK(p);
494
495	if (p->p_flag & P_WEXIT)
496		rval = EINTR;
497
498#ifdef KTRACE
499	if (KTRPOINT(td, KTR_CSW))
500		ktrcsw(0, 0);
501#endif
502	PICKUP_GIANT();
503	mtx_lock(mp);
504	WITNESS_RESTORE(&mp->mtx_object, mp);
505
506	return (rval);
507}
508
509/*
510 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
511 * called with sched_lock held.
512 */
513static __inline void
514cv_wakeup(struct cv *cvp)
515{
516	struct thread *td;
517
518	mtx_assert(&sched_lock, MA_OWNED);
519	td = TAILQ_FIRST(&cvp->cv_waitq);
520	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
521	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
522	TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
523	td->td_flags &= ~TDF_CVWAITQ;
524	td->td_wchan = 0;
525	if (td->td_state == TDS_SLP) {
526		/* OPTIMIZED EXPANSION OF setrunnable(td); */
527		CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
528		    td, td->td_proc->p_pid, td->td_proc->p_comm);
529		if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
530			updatepri(td);
531		td->td_ksegrp->kg_slptime = 0;
532		if (td->td_proc->p_sflag & PS_INMEM) {
533			setrunqueue(td);
534			maybe_resched(td);
535		} else {
536			td->td_state = TDS_SWAPPED;
537			if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) {
538				td->td_proc->p_sflag |= PS_SWAPINREQ;
539				wakeup(&proc0);
540			}
541		}
542		/* END INLINE EXPANSION */
543	}
544}
545
546/*
547 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
548 * the swapper if the process is not in memory, so that it can bring the
549 * sleeping process in.  Note that this may also result in additional threads
550 * being made runnable.  Should be called with the same mutex as was passed to
551 * cv_wait held.
552 */
553void
554cv_signal(struct cv *cvp)
555{
556
557	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
558	mtx_lock_spin(&sched_lock);
559	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
560		CV_SIGNAL_VALIDATE(cvp);
561		cv_wakeup(cvp);
562	}
563	mtx_unlock_spin(&sched_lock);
564}
565
566/*
567 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
568 * Should be called with the same mutex as was passed to cv_wait held.
569 */
570void
571cv_broadcast(struct cv *cvp)
572{
573
574	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
575	mtx_lock_spin(&sched_lock);
576	CV_SIGNAL_VALIDATE(cvp);
577	while (!TAILQ_EMPTY(&cvp->cv_waitq))
578		cv_wakeup(cvp);
579	mtx_unlock_spin(&sched_lock);
580}
581
582/*
583 * Remove a thread from the wait queue of its condition variable.  This may be
584 * called externally.
585 */
586void
587cv_waitq_remove(struct thread *td)
588{
589	struct cv *cvp;
590
591	mtx_lock_spin(&sched_lock);
592	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
593		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
594		td->td_flags &= ~TDF_CVWAITQ;
595		td->td_wchan = NULL;
596	}
597	mtx_unlock_spin(&sched_lock);
598}
599
600/*
601 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
602 * its timeout flag.
603 */
604static void
605cv_timedwait_end(void *arg)
606{
607	struct thread *td;
608
609	td = arg;
610	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
611	    td->td_proc->p_comm);
612	mtx_lock_spin(&sched_lock);
613	if (td->td_flags & TDF_TIMEOUT) {
614		td->td_flags &= ~TDF_TIMEOUT;
615		setrunqueue(td);
616	} else if (td->td_wchan != NULL) {
617		if (td->td_state == TDS_SLP)	/* XXXKSE */
618			setrunnable(td);
619		else
620			cv_waitq_remove(td);
621		td->td_flags |= TDF_TIMEOUT;
622	} else
623		td->td_flags |= TDF_TIMOFAIL;
624	mtx_unlock_spin(&sched_lock);
625}
626
627/*
628 * For now only abort interruptable waits.
629 * The others will have to either complete on their own or have a timeout.
630 */
631void
632cv_abort(struct thread *td)
633{
634
635	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
636	    td->td_proc->p_pid,
637	    td->td_proc->p_comm);
638	mtx_lock_spin(&sched_lock);
639	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
640		if (td->td_wchan != NULL) {
641			if (td->td_state == TDS_SLP)
642				setrunnable(td);
643			else
644				cv_waitq_remove(td);
645		}
646	}
647	mtx_unlock_spin(&sched_lock);
648}
649
650