kern_condvar.c revision 122352
1193323Sed/*-
2193323Sed * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3193323Sed * All rights reserved.
4193323Sed *
5193323Sed * Redistribution and use in source and binary forms, with or without
6193323Sed * modification, are permitted provided that the following conditions
7193323Sed * are met:
8193323Sed * 1. Redistributions of source code must retain the above copyright
9193323Sed *    notice, this list of conditions and the following disclaimer.
10198090Srdivacky * 2. Redistributions in binary form must reproduce the above copyright
11198090Srdivacky *    notice, this list of conditions and the following disclaimer in the
12193323Sed *    documentation and/or other materials provided with the distribution.
13193323Sed *
14193323Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17193323Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18243830Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24193323Sed * SUCH DAMAGE.
25243830Sdim */
26243830Sdim
27243830Sdim#include <sys/cdefs.h>
28243830Sdim__FBSDID("$FreeBSD: head/sys/kern/kern_condvar.c 122352 2003-11-09 09:17:26Z tanimura $");
29243830Sdim
30243830Sdim#include "opt_ktrace.h"
31243830Sdim
32193323Sed#include <sys/param.h>
33243830Sdim#include <sys/systm.h>
34243830Sdim#include <sys/lock.h>
35243830Sdim#include <sys/mutex.h>
36243830Sdim#include <sys/proc.h>
37193323Sed#include <sys/kernel.h>
38263508Sdim#include <sys/ktr.h>
39193323Sed#include <sys/condvar.h>
40193323Sed#include <sys/sched.h>
41263508Sdim#include <sys/signalvar.h>
42263508Sdim#include <sys/resourcevar.h>
43263508Sdim#ifdef KTRACE
44193323Sed#include <sys/uio.h>
45193323Sed#include <sys/ktrace.h>
46263508Sdim#endif
47193323Sed
48193323Sed/*
49193323Sed * Common sanity checks for cv_wait* functions.
50193323Sed */
51243830Sdim#define	CV_ASSERT(cvp, mp, td) do {					\
52243830Sdim	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
53243830Sdim	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
54243830Sdim	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
55263508Sdim	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
56193323Sed	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
57193323Sed} while (0)
58193323Sed
59193323Sed#ifdef INVARIANTS
60193323Sed#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
61193323Sed	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
62193323Sed		/* Only waiter. */					\
63193323Sed		(cvp)->cv_mtx = (mp);					\
64198090Srdivacky	} else {							\
65193323Sed		/*							\
66193323Sed		 * Other waiter; assert that we're using the		\
67198090Srdivacky		 * same mutex.						\
68243830Sdim		 */							\
69243830Sdim		KASSERT((cvp)->cv_mtx == (mp),				\
70243830Sdim		    ("%s: Multiple mutexes", __func__));		\
71243830Sdim	}								\
72263508Sdim} while (0)
73193323Sed
74193323Sed#define	CV_SIGNAL_VALIDATE(cvp) do {					\
75198090Srdivacky	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
76193323Sed		KASSERT(mtx_owned((cvp)->cv_mtx),			\
77193323Sed		    ("%s: Mutex not owned", __func__));			\
78193323Sed	}								\
79193323Sed} while (0)
80193323Sed
81193323Sed#else
82263508Sdim#define	CV_WAIT_VALIDATE(cvp, mp)
83193323Sed#define	CV_SIGNAL_VALIDATE(cvp)
84193323Sed#endif
85193323Sed
86193323Sedstatic void cv_timedwait_end(void *arg);
87193323Sed
88193323Sed/*
89198090Srdivacky * Initialize a condition variable.  Must be called before use.
90193323Sed */
91193323Sedvoid
92193323Sedcv_init(struct cv *cvp, const char *desc)
93193323Sed{
94193323Sed
95198090Srdivacky	TAILQ_INIT(&cvp->cv_waitq);
96193323Sed	cvp->cv_mtx = NULL;
97243830Sdim	cvp->cv_description = desc;
98193323Sed}
99193323Sed
100198090Srdivacky/*
101193323Sed * Destroy a condition variable.  The condition variable must be re-initialized
102243830Sdim * in order to be re-used.
103193323Sed */
104193323Sedvoid
105198090Srdivackycv_destroy(struct cv *cvp)
106193323Sed{
107193323Sed
108193323Sed	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
109193323Sed}
110198090Srdivacky
111193323Sed/*
112193323Sed * Common code for cv_wait* functions.  All require sched_lock.
113193323Sed */
114193323Sed
115198090Srdivacky/*
116193323Sed * Switch context.
117193323Sed */
118193323Sedstatic __inline void
119193323Sedcv_switch(struct thread *td)
120198090Srdivacky{
121193323Sed	TD_SET_SLEEPING(td);
122193323Sed	td->td_proc->p_stats->p_ru.ru_nvcsw++;
123193323Sed	mi_switch();
124193323Sed	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
125198090Srdivacky	    td->td_proc->p_pid, td->td_proc->p_comm);
126193323Sed}
127193323Sed
128193323Sed/*
129 * Switch context, catching signals.
130 */
131static __inline int
132cv_switch_catch(struct thread *td)
133{
134	struct proc *p;
135	int sig;
136
137	/*
138	 * We put ourselves on the sleep queue and start our timeout before
139	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
140	 * both) could occur while we were stopped.  A SIGCONT would cause us to
141	 * be marked as TDS_SLP without resuming us, thus we must be ready for
142	 * sleep when cursig is called.  If the wakeup happens while we're
143	 * stopped, td->td_wchan will be 0 upon return from cursig,
144	 * and TD_ON_SLEEPQ() will return false.
145	 */
146	td->td_flags |= TDF_SINTR;
147	mtx_unlock_spin(&sched_lock);
148	p = td->td_proc;
149	PROC_LOCK(p);
150	mtx_lock(&p->p_sigacts->ps_mtx);
151	sig = cursig(td);
152	mtx_unlock(&p->p_sigacts->ps_mtx);
153	if (thread_suspend_check(1))
154		sig = SIGSTOP;
155	mtx_lock_spin(&sched_lock);
156	PROC_UNLOCK(p);
157	if (sig != 0) {
158		if (TD_ON_SLEEPQ(td))
159			cv_waitq_remove(td);
160		TD_SET_RUNNING(td);
161	} else if (TD_ON_SLEEPQ(td)) {
162		cv_switch(td);
163	}
164	td->td_flags &= ~TDF_SINTR;
165
166	return sig;
167}
168
169/*
170 * Add a thread to the wait queue of a condition variable.
171 */
172static __inline void
173cv_waitq_add(struct cv *cvp, struct thread *td)
174{
175
176	td->td_flags |= TDF_CVWAITQ;
177	TD_SET_ON_SLEEPQ(td);
178	td->td_wchan = cvp;
179	td->td_wmesg = cvp->cv_description;
180	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
181	    td->td_proc->p_pid, td->td_proc->p_comm);
182	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
183	sched_sleep(td, td->td_priority);
184}
185
186/*
187 * Wait on a condition variable.  The current thread is placed on the condition
188 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
189 * condition variable will resume the thread.  The mutex is released before
190 * sleeping and will be held on return.  It is recommended that the mutex be
191 * held when cv_signal or cv_broadcast are called.
192 */
193void
194cv_wait(struct cv *cvp, struct mtx *mp)
195{
196	struct thread *td;
197	WITNESS_SAVE_DECL(mp);
198
199	td = curthread;
200#ifdef KTRACE
201	if (KTRPOINT(td, KTR_CSW))
202		ktrcsw(1, 0);
203#endif
204	CV_ASSERT(cvp, mp, td);
205	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
206	    "Waiting on \"%s\"", cvp->cv_description);
207	WITNESS_SAVE(&mp->mtx_object, mp);
208
209	if (cold ) {
210		/*
211		 * During autoconfiguration, just give interrupts
212		 * a chance, then just return.  Don't run any other
213		 * thread or panic below, in case this is the idle
214		 * process and already asleep.
215		 */
216		return;
217	}
218
219	mtx_lock_spin(&sched_lock);
220
221	CV_WAIT_VALIDATE(cvp, mp);
222
223	DROP_GIANT();
224	mtx_unlock(mp);
225
226	cv_waitq_add(cvp, td);
227	cv_switch(td);
228
229	mtx_unlock_spin(&sched_lock);
230#ifdef KTRACE
231	if (KTRPOINT(td, KTR_CSW))
232		ktrcsw(0, 0);
233#endif
234	PICKUP_GIANT();
235	mtx_lock(mp);
236	WITNESS_RESTORE(&mp->mtx_object, mp);
237}
238
239/*
240 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
241 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
242 * a signal was caught.  If ERESTART is returned the system call should be
243 * restarted if possible.
244 */
245int
246cv_wait_sig(struct cv *cvp, struct mtx *mp)
247{
248	struct thread *td;
249	struct proc *p;
250	int rval;
251	int sig;
252	WITNESS_SAVE_DECL(mp);
253
254	td = curthread;
255	p = td->td_proc;
256	rval = 0;
257#ifdef KTRACE
258	if (KTRPOINT(td, KTR_CSW))
259		ktrcsw(1, 0);
260#endif
261	CV_ASSERT(cvp, mp, td);
262	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
263	    "Waiting on \"%s\"", cvp->cv_description);
264	WITNESS_SAVE(&mp->mtx_object, mp);
265
266	if (cold || panicstr) {
267		/*
268		 * After a panic, or during autoconfiguration, just give
269		 * interrupts a chance, then just return; don't run any other
270		 * procs or panic below, in case this is the idle process and
271		 * already asleep.
272		 */
273		return 0;
274	}
275
276	mtx_lock_spin(&sched_lock);
277
278	CV_WAIT_VALIDATE(cvp, mp);
279
280	DROP_GIANT();
281	mtx_unlock(mp);
282
283	cv_waitq_add(cvp, td);
284	sig = cv_switch_catch(td);
285
286	mtx_unlock_spin(&sched_lock);
287
288	PROC_LOCK(p);
289	mtx_lock(&p->p_sigacts->ps_mtx);
290	if (sig == 0) {
291		sig = cursig(td);	/* XXXKSE */
292		if (sig == 0 && td->td_flags & TDF_INTERRUPT)
293			rval = td->td_intrval;
294	}
295	if (sig != 0) {
296		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
297			rval = EINTR;
298		else
299			rval = ERESTART;
300	}
301	mtx_unlock(&p->p_sigacts->ps_mtx);
302	if (p->p_flag & P_WEXIT)
303		rval = EINTR;
304	PROC_UNLOCK(p);
305
306#ifdef KTRACE
307	if (KTRPOINT(td, KTR_CSW))
308		ktrcsw(0, 0);
309#endif
310	PICKUP_GIANT();
311	mtx_lock(mp);
312	WITNESS_RESTORE(&mp->mtx_object, mp);
313
314	return (rval);
315}
316
317/*
318 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
319 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
320 * expires.
321 */
322int
323cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
324{
325	struct thread *td;
326	int rval;
327	WITNESS_SAVE_DECL(mp);
328
329	td = curthread;
330	rval = 0;
331#ifdef KTRACE
332	if (KTRPOINT(td, KTR_CSW))
333		ktrcsw(1, 0);
334#endif
335	CV_ASSERT(cvp, mp, td);
336	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
337	    "Waiting on \"%s\"", cvp->cv_description);
338	WITNESS_SAVE(&mp->mtx_object, mp);
339
340	if (cold || panicstr) {
341		/*
342		 * After a panic, or during autoconfiguration, just give
343		 * interrupts a chance, then just return; don't run any other
344		 * thread or panic below, in case this is the idle process and
345		 * already asleep.
346		 */
347		return 0;
348	}
349
350	mtx_lock_spin(&sched_lock);
351
352	CV_WAIT_VALIDATE(cvp, mp);
353
354	DROP_GIANT();
355	mtx_unlock(mp);
356
357	cv_waitq_add(cvp, td);
358	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
359	cv_switch(td);
360
361	if (td->td_flags & TDF_TIMEOUT) {
362		td->td_flags &= ~TDF_TIMEOUT;
363		rval = EWOULDBLOCK;
364	} else if (td->td_flags & TDF_TIMOFAIL)
365		td->td_flags &= ~TDF_TIMOFAIL;
366	else if (callout_stop(&td->td_slpcallout) == 0) {
367		/*
368		 * Work around race with cv_timedwait_end similar to that
369		 * between msleep and endtsleep.
370		 * Go back to sleep.
371		 */
372		TD_SET_SLEEPING(td);
373		td->td_proc->p_stats->p_ru.ru_nivcsw++;
374		mi_switch();
375		td->td_flags &= ~TDF_TIMOFAIL;
376	}
377
378	mtx_unlock_spin(&sched_lock);
379#ifdef KTRACE
380	if (KTRPOINT(td, KTR_CSW))
381		ktrcsw(0, 0);
382#endif
383	PICKUP_GIANT();
384	mtx_lock(mp);
385	WITNESS_RESTORE(&mp->mtx_object, mp);
386
387	return (rval);
388}
389
390/*
391 * Wait on a condition variable for at most timo/hz seconds, allowing
392 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
393 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
394 * a signal was caught.
395 */
396int
397cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
398{
399	struct thread *td;
400	struct proc *p;
401	int rval;
402	int sig;
403	WITNESS_SAVE_DECL(mp);
404
405	td = curthread;
406	p = td->td_proc;
407	rval = 0;
408#ifdef KTRACE
409	if (KTRPOINT(td, KTR_CSW))
410		ktrcsw(1, 0);
411#endif
412	CV_ASSERT(cvp, mp, td);
413	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
414	    "Waiting on \"%s\"", cvp->cv_description);
415	WITNESS_SAVE(&mp->mtx_object, mp);
416
417	if (cold || panicstr) {
418		/*
419		 * After a panic, or during autoconfiguration, just give
420		 * interrupts a chance, then just return; don't run any other
421		 * thread or panic below, in case this is the idle process and
422		 * already asleep.
423		 */
424		return 0;
425	}
426
427	mtx_lock_spin(&sched_lock);
428
429	CV_WAIT_VALIDATE(cvp, mp);
430
431	DROP_GIANT();
432	mtx_unlock(mp);
433
434	cv_waitq_add(cvp, td);
435	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
436	sig = cv_switch_catch(td);
437
438	if (td->td_flags & TDF_TIMEOUT) {
439		td->td_flags &= ~TDF_TIMEOUT;
440		rval = EWOULDBLOCK;
441	} else if (td->td_flags & TDF_TIMOFAIL)
442		td->td_flags &= ~TDF_TIMOFAIL;
443	else if (callout_stop(&td->td_slpcallout) == 0) {
444		/*
445		 * Work around race with cv_timedwait_end similar to that
446		 * between msleep and endtsleep.
447		 * Go back to sleep.
448		 */
449		TD_SET_SLEEPING(td);
450		td->td_proc->p_stats->p_ru.ru_nivcsw++;
451		mi_switch();
452		td->td_flags &= ~TDF_TIMOFAIL;
453	}
454	mtx_unlock_spin(&sched_lock);
455
456	PROC_LOCK(p);
457	mtx_lock(&p->p_sigacts->ps_mtx);
458	if (sig == 0) {
459		sig = cursig(td);
460		if (sig == 0 && td->td_flags & TDF_INTERRUPT)
461			rval = td->td_intrval;
462	}
463	if (sig != 0) {
464		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
465			rval = EINTR;
466		else
467			rval = ERESTART;
468	}
469	mtx_unlock(&p->p_sigacts->ps_mtx);
470	if (p->p_flag & P_WEXIT)
471		rval = EINTR;
472	PROC_UNLOCK(p);
473
474#ifdef KTRACE
475	if (KTRPOINT(td, KTR_CSW))
476		ktrcsw(0, 0);
477#endif
478	PICKUP_GIANT();
479	mtx_lock(mp);
480	WITNESS_RESTORE(&mp->mtx_object, mp);
481
482	return (rval);
483}
484
485/*
486 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
487 * called with sched_lock held.
488 */
489static __inline void
490cv_wakeup(struct cv *cvp)
491{
492	struct thread *td;
493
494	mtx_assert(&sched_lock, MA_OWNED);
495	td = TAILQ_FIRST(&cvp->cv_waitq);
496	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
497	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
498	cv_waitq_remove(td);
499	TD_CLR_SLEEPING(td);
500	setrunnable(td);
501}
502
503/*
504 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
505 * the swapper if the process is not in memory, so that it can bring the
506 * sleeping process in.  Note that this may also result in additional threads
507 * being made runnable.  Should be called with the same mutex as was passed to
508 * cv_wait held.
509 */
510void
511cv_signal(struct cv *cvp)
512{
513
514	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
515	mtx_lock_spin(&sched_lock);
516	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
517		CV_SIGNAL_VALIDATE(cvp);
518		cv_wakeup(cvp);
519	}
520	mtx_unlock_spin(&sched_lock);
521}
522
523/*
524 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
525 * Should be called with the same mutex as was passed to cv_wait held.
526 */
527void
528cv_broadcastpri(struct cv *cvp, int pri)
529{
530	struct thread	*td;
531
532	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
533	mtx_lock_spin(&sched_lock);
534	CV_SIGNAL_VALIDATE(cvp);
535	while (!TAILQ_EMPTY(&cvp->cv_waitq)) {
536		if (pri >= PRI_MIN && pri <= PRI_MAX) {
537			td = TAILQ_FIRST(&cvp->cv_waitq);
538			if (td->td_priority > pri)
539				td->td_priority = pri;
540		}
541		cv_wakeup(cvp);
542	}
543	mtx_unlock_spin(&sched_lock);
544}
545
546/*
547 * Remove a thread from the wait queue of its condition variable.  This may be
548 * called externally.
549 */
550void
551cv_waitq_remove(struct thread *td)
552{
553	struct cv *cvp;
554
555	mtx_assert(&sched_lock, MA_OWNED);
556	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
557		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
558		td->td_flags &= ~TDF_CVWAITQ;
559		td->td_wmesg = NULL;
560		TD_CLR_ON_SLEEPQ(td);
561	}
562}
563
564/*
565 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
566 * its timeout flag.
567 */
568static void
569cv_timedwait_end(void *arg)
570{
571	struct thread *td;
572
573	td = arg;
574	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
575	    td, td->td_proc->p_pid, td->td_proc->p_comm);
576	mtx_lock_spin(&sched_lock);
577	if (TD_ON_SLEEPQ(td)) {
578		cv_waitq_remove(td);
579		td->td_flags |= TDF_TIMEOUT;
580	} else {
581		td->td_flags |= TDF_TIMOFAIL;
582	}
583	TD_CLR_SLEEPING(td);
584	setrunnable(td);
585	mtx_unlock_spin(&sched_lock);
586}
587
588/*
589 * For now only abort interruptable waits.
590 * The others will have to either complete on their own or have a timeout.
591 */
592void
593cv_abort(struct thread *td)
594{
595
596	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
597	    td->td_proc->p_pid, td->td_proc->p_comm);
598	mtx_lock_spin(&sched_lock);
599	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
600		if (TD_ON_SLEEPQ(td)) {
601			cv_waitq_remove(td);
602		}
603		TD_CLR_SLEEPING(td);
604		setrunnable(td);
605	}
606	mtx_unlock_spin(&sched_lock);
607}
608
609