kern_condvar.c revision 111883
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 111883 2003-03-04 21:03:05Z jhb $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/sched.h>
40#include <sys/signalvar.h>
41#include <sys/resourcevar.h>
42#ifdef KTRACE
43#include <sys/uio.h>
44#include <sys/ktrace.h>
45#endif
46
47/*
48 * Common sanity checks for cv_wait* functions.
49 */
50#define	CV_ASSERT(cvp, mp, td) do {					\
51	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
52	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
53	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
54	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
55	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
56} while (0)
57
58#ifdef INVARIANTS
59#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
60	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
61		/* Only waiter. */					\
62		(cvp)->cv_mtx = (mp);					\
63	} else {							\
64		/*							\
65		 * Other waiter; assert that we're using the		\
66		 * same mutex.						\
67		 */							\
68		KASSERT((cvp)->cv_mtx == (mp),				\
69		    ("%s: Multiple mutexes", __func__));		\
70	}								\
71} while (0)
72
73#define	CV_SIGNAL_VALIDATE(cvp) do {					\
74	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
75		KASSERT(mtx_owned((cvp)->cv_mtx),			\
76		    ("%s: Mutex not owned", __func__));			\
77	}								\
78} while (0)
79
80#else
81#define	CV_WAIT_VALIDATE(cvp, mp)
82#define	CV_SIGNAL_VALIDATE(cvp)
83#endif
84
85static void cv_timedwait_end(void *arg);
86
87/*
88 * Initialize a condition variable.  Must be called before use.
89 */
90void
91cv_init(struct cv *cvp, const char *desc)
92{
93
94	TAILQ_INIT(&cvp->cv_waitq);
95	cvp->cv_mtx = NULL;
96	cvp->cv_description = desc;
97}
98
99/*
100 * Destroy a condition variable.  The condition variable must be re-initialized
101 * in order to be re-used.
102 */
103void
104cv_destroy(struct cv *cvp)
105{
106
107	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
108}
109
110/*
111 * Common code for cv_wait* functions.  All require sched_lock.
112 */
113
114/*
115 * Switch context.
116 */
117static __inline void
118cv_switch(struct thread *td)
119{
120	TD_SET_SLEEPING(td);
121	td->td_proc->p_stats->p_ru.ru_nvcsw++;
122	mi_switch();
123	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
124	    td->td_proc->p_pid, td->td_proc->p_comm);
125}
126
127/*
128 * Switch context, catching signals.
129 */
130static __inline int
131cv_switch_catch(struct thread *td)
132{
133	struct proc *p;
134	int sig;
135
136	/*
137	 * We put ourselves on the sleep queue and start our timeout before
138	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
139	 * both) could occur while we were stopped.  A SIGCONT would cause us to
140	 * be marked as TDS_SLP without resuming us, thus we must be ready for
141	 * sleep when cursig is called.  If the wakeup happens while we're
142	 * stopped, td->td_wchan will be 0 upon return from cursig,
143	 * and TD_ON_SLEEPQ() will return false.
144	 */
145	td->td_flags |= TDF_SINTR;
146	mtx_unlock_spin(&sched_lock);
147	p = td->td_proc;
148	PROC_LOCK(p);
149	sig = cursig(td);
150	if (thread_suspend_check(1))
151		sig = SIGSTOP;
152	mtx_lock_spin(&sched_lock);
153	PROC_UNLOCK(p);
154	if (sig != 0) {
155		if (TD_ON_SLEEPQ(td))
156			cv_waitq_remove(td);
157		TD_SET_RUNNING(td);
158	} else if (TD_ON_SLEEPQ(td)) {
159		cv_switch(td);
160	}
161	td->td_flags &= ~TDF_SINTR;
162
163	return sig;
164}
165
166/*
167 * Add a thread to the wait queue of a condition variable.
168 */
169static __inline void
170cv_waitq_add(struct cv *cvp, struct thread *td)
171{
172
173	td->td_flags |= TDF_CVWAITQ;
174	TD_SET_ON_SLEEPQ(td);
175	td->td_wchan = cvp;
176	td->td_wmesg = cvp->cv_description;
177	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
178	    td->td_proc->p_pid, td->td_proc->p_comm);
179	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
180	sched_sleep(td, td->td_priority);
181}
182
183/*
184 * Wait on a condition variable.  The current thread is placed on the condition
185 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
186 * condition variable will resume the thread.  The mutex is released before
187 * sleeping and will be held on return.  It is recommended that the mutex be
188 * held when cv_signal or cv_broadcast are called.
189 */
190void
191cv_wait(struct cv *cvp, struct mtx *mp)
192{
193	struct thread *td;
194	WITNESS_SAVE_DECL(mp);
195
196	td = curthread;
197#ifdef KTRACE
198	if (KTRPOINT(td, KTR_CSW))
199		ktrcsw(1, 0);
200#endif
201	CV_ASSERT(cvp, mp, td);
202	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
203	    "Waiting on \"%s\"", cvp->cv_description);
204	WITNESS_SAVE(&mp->mtx_object, mp);
205
206	if (cold ) {
207		/*
208		 * During autoconfiguration, just give interrupts
209		 * a chance, then just return.  Don't run any other
210		 * thread or panic below, in case this is the idle
211		 * process and already asleep.
212		 */
213		return;
214	}
215
216	mtx_lock_spin(&sched_lock);
217
218	CV_WAIT_VALIDATE(cvp, mp);
219
220	DROP_GIANT();
221	mtx_unlock(mp);
222
223	cv_waitq_add(cvp, td);
224	cv_switch(td);
225
226	mtx_unlock_spin(&sched_lock);
227#ifdef KTRACE
228	if (KTRPOINT(td, KTR_CSW))
229		ktrcsw(0, 0);
230#endif
231	PICKUP_GIANT();
232	mtx_lock(mp);
233	WITNESS_RESTORE(&mp->mtx_object, mp);
234}
235
236/*
237 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
238 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
239 * a signal was caught.  If ERESTART is returned the system call should be
240 * restarted if possible.
241 */
242int
243cv_wait_sig(struct cv *cvp, struct mtx *mp)
244{
245	struct thread *td;
246	struct proc *p;
247	int rval;
248	int sig;
249	WITNESS_SAVE_DECL(mp);
250
251	td = curthread;
252	p = td->td_proc;
253	rval = 0;
254#ifdef KTRACE
255	if (KTRPOINT(td, KTR_CSW))
256		ktrcsw(1, 0);
257#endif
258	CV_ASSERT(cvp, mp, td);
259	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
260	    "Waiting on \"%s\"", cvp->cv_description);
261	WITNESS_SAVE(&mp->mtx_object, mp);
262
263	if (cold || panicstr) {
264		/*
265		 * After a panic, or during autoconfiguration, just give
266		 * interrupts a chance, then just return; don't run any other
267		 * procs or panic below, in case this is the idle process and
268		 * already asleep.
269		 */
270		return 0;
271	}
272
273	mtx_lock_spin(&sched_lock);
274
275	CV_WAIT_VALIDATE(cvp, mp);
276
277	DROP_GIANT();
278	mtx_unlock(mp);
279
280	cv_waitq_add(cvp, td);
281	sig = cv_switch_catch(td);
282
283	mtx_unlock_spin(&sched_lock);
284
285	PROC_LOCK(p);
286	if (sig == 0)
287		sig = cursig(td);	/* XXXKSE */
288	if (sig != 0) {
289		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
290			rval = EINTR;
291		else
292			rval = ERESTART;
293	}
294	PROC_UNLOCK(p);
295	if (p->p_flag & P_WEXIT)
296		rval = EINTR;
297
298#ifdef KTRACE
299	if (KTRPOINT(td, KTR_CSW))
300		ktrcsw(0, 0);
301#endif
302	PICKUP_GIANT();
303	mtx_lock(mp);
304	WITNESS_RESTORE(&mp->mtx_object, mp);
305
306	return (rval);
307}
308
309/*
310 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
311 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
312 * expires.
313 */
314int
315cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
316{
317	struct thread *td;
318	int rval;
319	WITNESS_SAVE_DECL(mp);
320
321	td = curthread;
322	rval = 0;
323#ifdef KTRACE
324	if (KTRPOINT(td, KTR_CSW))
325		ktrcsw(1, 0);
326#endif
327	CV_ASSERT(cvp, mp, td);
328	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
329	    "Waiting on \"%s\"", cvp->cv_description);
330	WITNESS_SAVE(&mp->mtx_object, mp);
331
332	if (cold || panicstr) {
333		/*
334		 * After a panic, or during autoconfiguration, just give
335		 * interrupts a chance, then just return; don't run any other
336		 * thread or panic below, in case this is the idle process and
337		 * already asleep.
338		 */
339		return 0;
340	}
341
342	mtx_lock_spin(&sched_lock);
343
344	CV_WAIT_VALIDATE(cvp, mp);
345
346	DROP_GIANT();
347	mtx_unlock(mp);
348
349	cv_waitq_add(cvp, td);
350	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
351	cv_switch(td);
352
353	if (td->td_flags & TDF_TIMEOUT) {
354		td->td_flags &= ~TDF_TIMEOUT;
355		rval = EWOULDBLOCK;
356	} else if (td->td_flags & TDF_TIMOFAIL)
357		td->td_flags &= ~TDF_TIMOFAIL;
358	else if (callout_stop(&td->td_slpcallout) == 0) {
359		/*
360		 * Work around race with cv_timedwait_end similar to that
361		 * between msleep and endtsleep.
362		 * Go back to sleep.
363		 */
364		TD_SET_SLEEPING(td);
365		td->td_proc->p_stats->p_ru.ru_nivcsw++;
366		mi_switch();
367		td->td_flags &= ~TDF_TIMOFAIL;
368	}
369
370	if (td->td_proc->p_flag & P_WEXIT)
371		rval = EWOULDBLOCK;
372	mtx_unlock_spin(&sched_lock);
373#ifdef KTRACE
374	if (KTRPOINT(td, KTR_CSW))
375		ktrcsw(0, 0);
376#endif
377	PICKUP_GIANT();
378	mtx_lock(mp);
379	WITNESS_RESTORE(&mp->mtx_object, mp);
380
381	return (rval);
382}
383
384/*
385 * Wait on a condition variable for at most timo/hz seconds, allowing
386 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
387 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
388 * a signal was caught.
389 */
390int
391cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
392{
393	struct thread *td;
394	struct proc *p;
395	int rval;
396	int sig;
397	WITNESS_SAVE_DECL(mp);
398
399	td = curthread;
400	p = td->td_proc;
401	rval = 0;
402#ifdef KTRACE
403	if (KTRPOINT(td, KTR_CSW))
404		ktrcsw(1, 0);
405#endif
406	CV_ASSERT(cvp, mp, td);
407	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
408	    "Waiting on \"%s\"", cvp->cv_description);
409	WITNESS_SAVE(&mp->mtx_object, mp);
410
411	if (cold || panicstr) {
412		/*
413		 * After a panic, or during autoconfiguration, just give
414		 * interrupts a chance, then just return; don't run any other
415		 * thread or panic below, in case this is the idle process and
416		 * already asleep.
417		 */
418		return 0;
419	}
420
421	mtx_lock_spin(&sched_lock);
422
423	CV_WAIT_VALIDATE(cvp, mp);
424
425	DROP_GIANT();
426	mtx_unlock(mp);
427
428	cv_waitq_add(cvp, td);
429	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
430	sig = cv_switch_catch(td);
431
432	if (td->td_flags & TDF_TIMEOUT) {
433		td->td_flags &= ~TDF_TIMEOUT;
434		rval = EWOULDBLOCK;
435	} else if (td->td_flags & TDF_TIMOFAIL)
436		td->td_flags &= ~TDF_TIMOFAIL;
437	else if (callout_stop(&td->td_slpcallout) == 0) {
438		/*
439		 * Work around race with cv_timedwait_end similar to that
440		 * between msleep and endtsleep.
441		 * Go back to sleep.
442		 */
443		TD_SET_SLEEPING(td);
444		td->td_proc->p_stats->p_ru.ru_nivcsw++;
445		mi_switch();
446		td->td_flags &= ~TDF_TIMOFAIL;
447	}
448	mtx_unlock_spin(&sched_lock);
449
450	PROC_LOCK(p);
451	if (sig == 0)
452		sig = cursig(td);
453	if (sig != 0) {
454		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
455			rval = EINTR;
456		else
457			rval = ERESTART;
458	}
459	PROC_UNLOCK(p);
460
461	if (p->p_flag & P_WEXIT)
462		rval = EINTR;
463
464#ifdef KTRACE
465	if (KTRPOINT(td, KTR_CSW))
466		ktrcsw(0, 0);
467#endif
468	PICKUP_GIANT();
469	mtx_lock(mp);
470	WITNESS_RESTORE(&mp->mtx_object, mp);
471
472	return (rval);
473}
474
475/*
476 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
477 * called with sched_lock held.
478 */
479static __inline void
480cv_wakeup(struct cv *cvp)
481{
482	struct thread *td;
483
484	mtx_assert(&sched_lock, MA_OWNED);
485	td = TAILQ_FIRST(&cvp->cv_waitq);
486	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
487	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
488	cv_waitq_remove(td);
489	TD_CLR_SLEEPING(td);
490	setrunnable(td);
491}
492
493/*
494 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
495 * the swapper if the process is not in memory, so that it can bring the
496 * sleeping process in.  Note that this may also result in additional threads
497 * being made runnable.  Should be called with the same mutex as was passed to
498 * cv_wait held.
499 */
500void
501cv_signal(struct cv *cvp)
502{
503
504	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
505	mtx_lock_spin(&sched_lock);
506	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
507		CV_SIGNAL_VALIDATE(cvp);
508		cv_wakeup(cvp);
509	}
510	mtx_unlock_spin(&sched_lock);
511}
512
513/*
514 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
515 * Should be called with the same mutex as was passed to cv_wait held.
516 */
517void
518cv_broadcast(struct cv *cvp)
519{
520
521	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
522	mtx_lock_spin(&sched_lock);
523	CV_SIGNAL_VALIDATE(cvp);
524	while (!TAILQ_EMPTY(&cvp->cv_waitq))
525		cv_wakeup(cvp);
526	mtx_unlock_spin(&sched_lock);
527}
528
529/*
530 * Remove a thread from the wait queue of its condition variable.  This may be
531 * called externally.
532 */
533void
534cv_waitq_remove(struct thread *td)
535{
536	struct cv *cvp;
537
538	mtx_assert(&sched_lock, MA_OWNED);
539	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
540		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
541		td->td_flags &= ~TDF_CVWAITQ;
542		td->td_wmesg = NULL;
543		TD_CLR_ON_SLEEPQ(td);
544	}
545}
546
547/*
548 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
549 * its timeout flag.
550 */
551static void
552cv_timedwait_end(void *arg)
553{
554	struct thread *td;
555
556	td = arg;
557	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
558	    td, td->td_proc->p_pid, td->td_proc->p_comm);
559	mtx_lock_spin(&sched_lock);
560	if (TD_ON_SLEEPQ(td)) {
561		cv_waitq_remove(td);
562		td->td_flags |= TDF_TIMEOUT;
563	} else {
564		td->td_flags |= TDF_TIMOFAIL;
565	}
566	TD_CLR_SLEEPING(td);
567	setrunnable(td);
568	mtx_unlock_spin(&sched_lock);
569}
570
571/*
572 * For now only abort interruptable waits.
573 * The others will have to either complete on their own or have a timeout.
574 */
575void
576cv_abort(struct thread *td)
577{
578
579	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
580	    td->td_proc->p_pid, td->td_proc->p_comm);
581	mtx_lock_spin(&sched_lock);
582	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
583		if (TD_ON_SLEEPQ(td)) {
584			cv_waitq_remove(td);
585		}
586		TD_CLR_SLEEPING(td);
587		setrunnable(td);
588	}
589	mtx_unlock_spin(&sched_lock);
590}
591
592