kern_condvar.c revision 109862
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 109862 2003-01-26 04:00:39Z jeff $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/sched.h>
40#include <sys/signalvar.h>
41#include <sys/resourcevar.h>
42#ifdef KTRACE
43#include <sys/uio.h>
44#include <sys/ktrace.h>
45#endif
46
47/*
48 * Common sanity checks for cv_wait* functions.
49 */
50#define	CV_ASSERT(cvp, mp, td) do {					\
51	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
52	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
53	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
54	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
55	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
56} while (0)
57
58#ifdef INVARIANTS
59#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
60	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
61		/* Only waiter. */					\
62		(cvp)->cv_mtx = (mp);					\
63	} else {							\
64		/*							\
65		 * Other waiter; assert that we're using the		\
66		 * same mutex.						\
67		 */							\
68		KASSERT((cvp)->cv_mtx == (mp),				\
69		    ("%s: Multiple mutexes", __func__));		\
70	}								\
71} while (0)
72
73#define	CV_SIGNAL_VALIDATE(cvp) do {					\
74	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
75		KASSERT(mtx_owned((cvp)->cv_mtx),			\
76		    ("%s: Mutex not owned", __func__));			\
77	}								\
78} while (0)
79
80#else
81#define	CV_WAIT_VALIDATE(cvp, mp)
82#define	CV_SIGNAL_VALIDATE(cvp)
83#endif
84
85static void cv_timedwait_end(void *arg);
86
87/*
88 * Initialize a condition variable.  Must be called before use.
89 */
90void
91cv_init(struct cv *cvp, const char *desc)
92{
93
94	TAILQ_INIT(&cvp->cv_waitq);
95	cvp->cv_mtx = NULL;
96	cvp->cv_description = desc;
97}
98
99/*
100 * Destroy a condition variable.  The condition variable must be re-initialized
101 * in order to be re-used.
102 */
103void
104cv_destroy(struct cv *cvp)
105{
106
107	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
108}
109
110/*
111 * Common code for cv_wait* functions.  All require sched_lock.
112 */
113
114/*
115 * Switch context.
116 */
117static __inline void
118cv_switch(struct thread *td)
119{
120	TD_SET_SLEEPING(td);
121	td->td_proc->p_stats->p_ru.ru_nvcsw++;
122	mi_switch();
123	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
124	    td->td_proc->p_pid, td->td_proc->p_comm);
125}
126
127/*
128 * Switch context, catching signals.
129 */
130static __inline int
131cv_switch_catch(struct thread *td)
132{
133	struct proc *p;
134	int sig;
135
136	/*
137	 * We put ourselves on the sleep queue and start our timeout before
138	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
139	 * both) could occur while we were stopped.  A SIGCONT would cause us to
140	 * be marked as TDS_SLP without resuming us, thus we must be ready for
141	 * sleep when cursig is called.  If the wakeup happens while we're
142	 * stopped, td->td_wchan will be 0 upon return from cursig,
143	 * and TD_ON_SLEEPQ() will return false.
144	 */
145	td->td_flags |= TDF_SINTR;
146	mtx_unlock_spin(&sched_lock);
147	p = td->td_proc;
148	PROC_LOCK(p);
149	sig = cursig(td);
150	if (thread_suspend_check(1))
151		sig = SIGSTOP;
152	mtx_lock_spin(&sched_lock);
153	PROC_UNLOCK(p);
154	if (sig != 0) {
155		if (TD_ON_SLEEPQ(td))
156			cv_waitq_remove(td);
157		TD_SET_RUNNING(td);
158	} else if (TD_ON_SLEEPQ(td)) {
159		cv_switch(td);
160	}
161	td->td_flags &= ~TDF_SINTR;
162
163	return sig;
164}
165
166/*
167 * Add a thread to the wait queue of a condition variable.
168 */
169static __inline void
170cv_waitq_add(struct cv *cvp, struct thread *td)
171{
172
173	td->td_flags |= TDF_CVWAITQ;
174	TD_SET_ON_SLEEPQ(td);
175	td->td_wchan = cvp;
176	td->td_wmesg = cvp->cv_description;
177	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
178	    td->td_proc->p_pid, td->td_proc->p_comm);
179	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
180	sched_sleep(td, td->td_priority);
181}
182
183/*
184 * Wait on a condition variable.  The current thread is placed on the condition
185 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
186 * condition variable will resume the thread.  The mutex is released before
187 * sleeping and will be held on return.  It is recommended that the mutex be
188 * held when cv_signal or cv_broadcast are called.
189 */
190void
191cv_wait(struct cv *cvp, struct mtx *mp)
192{
193	struct thread *td;
194	WITNESS_SAVE_DECL(mp);
195
196	td = curthread;
197#ifdef KTRACE
198	if (KTRPOINT(td, KTR_CSW))
199		ktrcsw(1, 0);
200#endif
201	CV_ASSERT(cvp, mp, td);
202	WITNESS_SLEEP(0, &mp->mtx_object);
203	WITNESS_SAVE(&mp->mtx_object, mp);
204
205	if (cold ) {
206		/*
207		 * During autoconfiguration, just give interrupts
208		 * a chance, then just return.  Don't run any other
209		 * thread or panic below, in case this is the idle
210		 * process and already asleep.
211		 */
212		return;
213	}
214
215	mtx_lock_spin(&sched_lock);
216
217	CV_WAIT_VALIDATE(cvp, mp);
218
219	DROP_GIANT();
220	mtx_unlock(mp);
221
222	cv_waitq_add(cvp, td);
223	cv_switch(td);
224
225	mtx_unlock_spin(&sched_lock);
226#ifdef KTRACE
227	if (KTRPOINT(td, KTR_CSW))
228		ktrcsw(0, 0);
229#endif
230	PICKUP_GIANT();
231	mtx_lock(mp);
232	WITNESS_RESTORE(&mp->mtx_object, mp);
233}
234
235/*
236 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
237 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
238 * a signal was caught.  If ERESTART is returned the system call should be
239 * restarted if possible.
240 */
241int
242cv_wait_sig(struct cv *cvp, struct mtx *mp)
243{
244	struct thread *td;
245	struct proc *p;
246	int rval;
247	int sig;
248	WITNESS_SAVE_DECL(mp);
249
250	td = curthread;
251	p = td->td_proc;
252	rval = 0;
253#ifdef KTRACE
254	if (KTRPOINT(td, KTR_CSW))
255		ktrcsw(1, 0);
256#endif
257	CV_ASSERT(cvp, mp, td);
258	WITNESS_SLEEP(0, &mp->mtx_object);
259	WITNESS_SAVE(&mp->mtx_object, mp);
260
261	if (cold || panicstr) {
262		/*
263		 * After a panic, or during autoconfiguration, just give
264		 * interrupts a chance, then just return; don't run any other
265		 * procs or panic below, in case this is the idle process and
266		 * already asleep.
267		 */
268		return 0;
269	}
270
271	mtx_lock_spin(&sched_lock);
272
273	CV_WAIT_VALIDATE(cvp, mp);
274
275	DROP_GIANT();
276	mtx_unlock(mp);
277
278	cv_waitq_add(cvp, td);
279	sig = cv_switch_catch(td);
280
281	mtx_unlock_spin(&sched_lock);
282
283	PROC_LOCK(p);
284	if (sig == 0)
285		sig = cursig(td);	/* XXXKSE */
286	if (sig != 0) {
287		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
288			rval = EINTR;
289		else
290			rval = ERESTART;
291	}
292	PROC_UNLOCK(p);
293	if (p->p_flag & P_WEXIT)
294		rval = EINTR;
295
296#ifdef KTRACE
297	if (KTRPOINT(td, KTR_CSW))
298		ktrcsw(0, 0);
299#endif
300	PICKUP_GIANT();
301	mtx_lock(mp);
302	WITNESS_RESTORE(&mp->mtx_object, mp);
303
304	return (rval);
305}
306
307/*
308 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
309 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
310 * expires.
311 */
312int
313cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
314{
315	struct thread *td;
316	int rval;
317	WITNESS_SAVE_DECL(mp);
318
319	td = curthread;
320	rval = 0;
321#ifdef KTRACE
322	if (KTRPOINT(td, KTR_CSW))
323		ktrcsw(1, 0);
324#endif
325	CV_ASSERT(cvp, mp, td);
326	WITNESS_SLEEP(0, &mp->mtx_object);
327	WITNESS_SAVE(&mp->mtx_object, mp);
328
329	if (cold || panicstr) {
330		/*
331		 * After a panic, or during autoconfiguration, just give
332		 * interrupts a chance, then just return; don't run any other
333		 * thread or panic below, in case this is the idle process and
334		 * already asleep.
335		 */
336		return 0;
337	}
338
339	mtx_lock_spin(&sched_lock);
340
341	CV_WAIT_VALIDATE(cvp, mp);
342
343	DROP_GIANT();
344	mtx_unlock(mp);
345
346	cv_waitq_add(cvp, td);
347	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
348	cv_switch(td);
349
350	if (td->td_flags & TDF_TIMEOUT) {
351		td->td_flags &= ~TDF_TIMEOUT;
352		rval = EWOULDBLOCK;
353	} else if (td->td_flags & TDF_TIMOFAIL)
354		td->td_flags &= ~TDF_TIMOFAIL;
355	else if (callout_stop(&td->td_slpcallout) == 0) {
356		/*
357		 * Work around race with cv_timedwait_end similar to that
358		 * between msleep and endtsleep.
359		 * Go back to sleep.
360		 */
361		TD_SET_SLEEPING(td);
362		td->td_proc->p_stats->p_ru.ru_nivcsw++;
363		mi_switch();
364		td->td_flags &= ~TDF_TIMOFAIL;
365	}
366
367	if (td->td_proc->p_flag & P_WEXIT)
368		rval = EWOULDBLOCK;
369	mtx_unlock_spin(&sched_lock);
370#ifdef KTRACE
371	if (KTRPOINT(td, KTR_CSW))
372		ktrcsw(0, 0);
373#endif
374	PICKUP_GIANT();
375	mtx_lock(mp);
376	WITNESS_RESTORE(&mp->mtx_object, mp);
377
378	return (rval);
379}
380
381/*
382 * Wait on a condition variable for at most timo/hz seconds, allowing
383 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
384 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
385 * a signal was caught.
386 */
387int
388cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
389{
390	struct thread *td;
391	struct proc *p;
392	int rval;
393	int sig;
394	WITNESS_SAVE_DECL(mp);
395
396	td = curthread;
397	p = td->td_proc;
398	rval = 0;
399#ifdef KTRACE
400	if (KTRPOINT(td, KTR_CSW))
401		ktrcsw(1, 0);
402#endif
403	CV_ASSERT(cvp, mp, td);
404	WITNESS_SLEEP(0, &mp->mtx_object);
405	WITNESS_SAVE(&mp->mtx_object, mp);
406
407	if (cold || panicstr) {
408		/*
409		 * After a panic, or during autoconfiguration, just give
410		 * interrupts a chance, then just return; don't run any other
411		 * thread or panic below, in case this is the idle process and
412		 * already asleep.
413		 */
414		return 0;
415	}
416
417	mtx_lock_spin(&sched_lock);
418
419	CV_WAIT_VALIDATE(cvp, mp);
420
421	DROP_GIANT();
422	mtx_unlock(mp);
423
424	cv_waitq_add(cvp, td);
425	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
426	sig = cv_switch_catch(td);
427
428	if (td->td_flags & TDF_TIMEOUT) {
429		td->td_flags &= ~TDF_TIMEOUT;
430		rval = EWOULDBLOCK;
431	} else if (td->td_flags & TDF_TIMOFAIL)
432		td->td_flags &= ~TDF_TIMOFAIL;
433	else if (callout_stop(&td->td_slpcallout) == 0) {
434		/*
435		 * Work around race with cv_timedwait_end similar to that
436		 * between msleep and endtsleep.
437		 * Go back to sleep.
438		 */
439		TD_SET_SLEEPING(td);
440		td->td_proc->p_stats->p_ru.ru_nivcsw++;
441		mi_switch();
442		td->td_flags &= ~TDF_TIMOFAIL;
443	}
444	mtx_unlock_spin(&sched_lock);
445
446	PROC_LOCK(p);
447	if (sig == 0)
448		sig = cursig(td);
449	if (sig != 0) {
450		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
451			rval = EINTR;
452		else
453			rval = ERESTART;
454	}
455	PROC_UNLOCK(p);
456
457	if (p->p_flag & P_WEXIT)
458		rval = EINTR;
459
460#ifdef KTRACE
461	if (KTRPOINT(td, KTR_CSW))
462		ktrcsw(0, 0);
463#endif
464	PICKUP_GIANT();
465	mtx_lock(mp);
466	WITNESS_RESTORE(&mp->mtx_object, mp);
467
468	return (rval);
469}
470
471/*
472 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
473 * called with sched_lock held.
474 */
475static __inline void
476cv_wakeup(struct cv *cvp)
477{
478	struct thread *td;
479
480	mtx_assert(&sched_lock, MA_OWNED);
481	td = TAILQ_FIRST(&cvp->cv_waitq);
482	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
483	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
484	cv_waitq_remove(td);
485	TD_CLR_SLEEPING(td);
486	setrunnable(td);
487}
488
489/*
490 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
491 * the swapper if the process is not in memory, so that it can bring the
492 * sleeping process in.  Note that this may also result in additional threads
493 * being made runnable.  Should be called with the same mutex as was passed to
494 * cv_wait held.
495 */
496void
497cv_signal(struct cv *cvp)
498{
499
500	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
501	mtx_lock_spin(&sched_lock);
502	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
503		CV_SIGNAL_VALIDATE(cvp);
504		cv_wakeup(cvp);
505	}
506	mtx_unlock_spin(&sched_lock);
507}
508
509/*
510 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
511 * Should be called with the same mutex as was passed to cv_wait held.
512 */
513void
514cv_broadcast(struct cv *cvp)
515{
516
517	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
518	mtx_lock_spin(&sched_lock);
519	CV_SIGNAL_VALIDATE(cvp);
520	while (!TAILQ_EMPTY(&cvp->cv_waitq))
521		cv_wakeup(cvp);
522	mtx_unlock_spin(&sched_lock);
523}
524
525/*
526 * Remove a thread from the wait queue of its condition variable.  This may be
527 * called externally.
528 */
529void
530cv_waitq_remove(struct thread *td)
531{
532	struct cv *cvp;
533
534	mtx_assert(&sched_lock, MA_OWNED);
535	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
536		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
537		td->td_flags &= ~TDF_CVWAITQ;
538		TD_CLR_ON_SLEEPQ(td);
539	}
540}
541
542/*
543 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
544 * its timeout flag.
545 */
546static void
547cv_timedwait_end(void *arg)
548{
549	struct thread *td;
550
551	td = arg;
552	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
553	    td, td->td_proc->p_pid, td->td_proc->p_comm);
554	mtx_lock_spin(&sched_lock);
555	if (TD_ON_SLEEPQ(td)) {
556		cv_waitq_remove(td);
557		td->td_flags |= TDF_TIMEOUT;
558	} else {
559		td->td_flags |= TDF_TIMOFAIL;
560	}
561	TD_CLR_SLEEPING(td);
562	setrunnable(td);
563	mtx_unlock_spin(&sched_lock);
564}
565
566/*
567 * For now only abort interruptable waits.
568 * The others will have to either complete on their own or have a timeout.
569 */
570void
571cv_abort(struct thread *td)
572{
573
574	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
575	    td->td_proc->p_pid, td->td_proc->p_comm);
576	mtx_lock_spin(&sched_lock);
577	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
578		if (TD_ON_SLEEPQ(td)) {
579			cv_waitq_remove(td);
580		}
581		TD_CLR_SLEEPING(td);
582		setrunnable(td);
583	}
584	mtx_unlock_spin(&sched_lock);
585}
586
587