kern_condvar.c revision 78637
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 78637 2001-06-22 23:06:38Z jhb $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#ifdef KTRACE
42#include <sys/uio.h>
43#include <sys/ktrace.h>
44#endif
45
46/*
47 * Common sanity checks for cv_wait* functions.
48 */
49#define	CV_ASSERT(cvp, mp, p) do {					\
50	KASSERT((p) != NULL, ("%s: curproc NULL", __FUNCTION__));	\
51	KASSERT((p)->p_stat == SRUN, ("%s: not SRUN", __FUNCTION__));	\
52	KASSERT((cvp) != NULL, ("%s: cvp NULL", __FUNCTION__));		\
53	KASSERT((mp) != NULL, ("%s: mp NULL", __FUNCTION__));		\
54	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55} while (0)
56
57#ifdef CV_DEBUG
58#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60		/* Only waiter. */					\
61		(cvp)->cv_mtx = (mp);					\
62	} else {							\
63		/*							\
64		 * Other waiter; assert that we're using the		\
65		 * same mutex.						\
66		 */							\
67		KASSERT((cvp)->cv_mtx == (mp),				\
68		    ("%s: Multiple mutexes", __FUNCTION__));		\
69	}								\
70} while (0)
71#define	CV_SIGNAL_VALIDATE(cvp) do {					\
72	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74		    ("%s: Mutex not owned", __FUNCTION__));		\
75	}								\
76} while (0)
77#else
78#define	CV_WAIT_VALIDATE(cvp, mp)
79#define	CV_SIGNAL_VALIDATE(cvp)
80#endif
81
82static void cv_timedwait_end(void *arg);
83
84/*
85 * Initialize a condition variable.  Must be called before use.
86 */
87void
88cv_init(struct cv *cvp, const char *desc)
89{
90
91	TAILQ_INIT(&cvp->cv_waitq);
92	cvp->cv_mtx = NULL;
93	cvp->cv_description = desc;
94}
95
96/*
97 * Destroy a condition variable.  The condition variable must be re-initialized
98 * in order to be re-used.
99 */
100void
101cv_destroy(struct cv *cvp)
102{
103
104	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __FUNCTION__));
105}
106
107/*
108 * Common code for cv_wait* functions.  All require sched_lock.
109 */
110
111/*
112 * Switch context.
113 */
114static __inline void
115cv_switch(struct proc *p)
116{
117
118	p->p_stat = SSLEEP;
119	p->p_stats->p_ru.ru_nvcsw++;
120	mi_switch();
121	CTR3(KTR_PROC, "cv_switch: resume proc %p (pid %d, %s)", p, p->p_pid,
122	    p->p_comm);
123}
124
125/*
126 * Switch context, catching signals.
127 */
128static __inline int
129cv_switch_catch(struct proc *p)
130{
131	int sig;
132
133	/*
134	 * We put ourselves on the sleep queue and start our timeout before
135	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
136	 * both) could occur while we were stopped.  A SIGCONT would cause us to
137	 * be marked as SSLEEP without resuming us, thus we must be ready for
138	 * sleep when CURSIG is called.  If the wakeup happens while we're
139	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
140	 */
141	p->p_sflag |= PS_SINTR;
142	mtx_unlock_spin(&sched_lock);
143	PROC_LOCK(p);
144	sig = CURSIG(p);
145	mtx_lock_spin(&sched_lock);
146	PROC_UNLOCK_NOSWITCH(p);
147	if (sig != 0) {
148		if (p->p_wchan != NULL)
149			cv_waitq_remove(p);
150		p->p_stat = SRUN;
151	} else if (p->p_wchan != NULL) {
152		cv_switch(p);
153	}
154	p->p_sflag &= ~PS_SINTR;
155
156	return sig;
157}
158
159/*
160 * Add a process to the wait queue of a condition variable.
161 */
162static __inline void
163cv_waitq_add(struct cv *cvp, struct proc *p)
164{
165
166	/*
167	 * Process may be sitting on a slpque if asleep() was called, remove it
168	 * before re-adding.
169	 */
170	if (p->p_wchan != NULL)
171		unsleep(p);
172
173	p->p_sflag |= PS_CVWAITQ;
174	p->p_wchan = cvp;
175	p->p_wmesg = cvp->cv_description;
176	p->p_slptime = 0;
177	p->p_pri.pri_native = p->p_pri.pri_level;
178	CTR3(KTR_PROC, "cv_waitq_add: proc %p (pid %d, %s)", p, p->p_pid,
179	    p->p_comm);
180	TAILQ_INSERT_TAIL(&cvp->cv_waitq, p, p_slpq);
181}
182
183/*
184 * Wait on a condition variable.  The current process is placed on the condition
185 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
186 * condition variable will resume the process.  The mutex is released before
187 * sleeping and will be held on return.  It is recommended that the mutex be
188 * held when cv_signal or cv_broadcast are called.
189 */
190void
191cv_wait(struct cv *cvp, struct mtx *mp)
192{
193	struct proc *p;
194	WITNESS_SAVE_DECL(mp);
195
196	p = CURPROC;
197#ifdef KTRACE
198	if (p && KTRPOINT(p, KTR_CSW))
199		ktrcsw(p->p_tracep, 1, 0);
200#endif
201	CV_ASSERT(cvp, mp, p);
202	WITNESS_SLEEP(0, &mp->mtx_object);
203	WITNESS_SAVE(&mp->mtx_object, mp);
204
205	mtx_lock_spin(&sched_lock);
206	if (cold || panicstr) {
207		/*
208		 * After a panic, or during autoconfiguration, just give
209		 * interrupts a chance, then just return; don't run any other
210		 * procs or panic below, in case this is the idle process and
211		 * already asleep.
212		 */
213		mtx_unlock_spin(&sched_lock);
214		return;
215	}
216	CV_WAIT_VALIDATE(cvp, mp);
217
218	DROP_GIANT_NOSWITCH();
219	mtx_unlock_flags(mp, MTX_NOSWITCH);
220
221	cv_waitq_add(cvp, p);
222	cv_switch(p);
223
224	mtx_unlock_spin(&sched_lock);
225#ifdef KTRACE
226	if (KTRPOINT(p, KTR_CSW))
227		ktrcsw(p->p_tracep, 0, 0);
228#endif
229	PICKUP_GIANT();
230	mtx_lock(mp);
231	WITNESS_RESTORE(&mp->mtx_object, mp);
232}
233
234/*
235 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
236 * the process was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
237 * a signal was caught.  If ERESTART is returned the system call should be
238 * restarted if possible.
239 */
240int
241cv_wait_sig(struct cv *cvp, struct mtx *mp)
242{
243	struct proc *p;
244	int rval;
245	int sig;
246	WITNESS_SAVE_DECL(mp);
247
248	p = CURPROC;
249	rval = 0;
250#ifdef KTRACE
251	if (p && KTRPOINT(p, KTR_CSW))
252		ktrcsw(p->p_tracep, 1, 0);
253#endif
254	CV_ASSERT(cvp, mp, p);
255	WITNESS_SLEEP(0, &mp->mtx_object);
256	WITNESS_SAVE(&mp->mtx_object, mp);
257
258	mtx_lock_spin(&sched_lock);
259	if (cold || panicstr) {
260		/*
261		 * After a panic, or during autoconfiguration, just give
262		 * interrupts a chance, then just return; don't run any other
263		 * procs or panic below, in case this is the idle process and
264		 * already asleep.
265		 */
266		mtx_unlock_spin(&sched_lock);
267		return 0;
268	}
269	CV_WAIT_VALIDATE(cvp, mp);
270
271	DROP_GIANT_NOSWITCH();
272	mtx_unlock_flags(mp, MTX_NOSWITCH);
273
274	cv_waitq_add(cvp, p);
275	sig = cv_switch_catch(p);
276
277	mtx_unlock_spin(&sched_lock);
278	PICKUP_GIANT();
279
280	PROC_LOCK(p);
281	if (sig == 0)
282		sig = CURSIG(p);
283	if (sig != 0) {
284		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
285			rval = EINTR;
286		else
287			rval = ERESTART;
288	}
289	PROC_UNLOCK(p);
290
291#ifdef KTRACE
292	mtx_lock(&Giant);
293	if (KTRPOINT(p, KTR_CSW))
294		ktrcsw(p->p_tracep, 0, 0);
295	mtx_unlock(&Giant);
296#endif
297	mtx_lock(mp);
298	WITNESS_RESTORE(&mp->mtx_object, mp);
299
300	return (rval);
301}
302
303/*
304 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
305 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
306 * expires.
307 */
308int
309cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
310{
311	struct proc *p;
312	int rval;
313	WITNESS_SAVE_DECL(mp);
314
315	p = CURPROC;
316	rval = 0;
317#ifdef KTRACE
318	if (p && KTRPOINT(p, KTR_CSW))
319		ktrcsw(p->p_tracep, 1, 0);
320#endif
321	CV_ASSERT(cvp, mp, p);
322	WITNESS_SLEEP(0, &mp->mtx_object);
323	WITNESS_SAVE(&mp->mtx_object, mp);
324
325	mtx_lock_spin(&sched_lock);
326	if (cold || panicstr) {
327		/*
328		 * After a panic, or during autoconfiguration, just give
329		 * interrupts a chance, then just return; don't run any other
330		 * procs or panic below, in case this is the idle process and
331		 * already asleep.
332		 */
333		mtx_unlock_spin(&sched_lock);
334		return 0;
335	}
336	CV_WAIT_VALIDATE(cvp, mp);
337
338	DROP_GIANT_NOSWITCH();
339	mtx_unlock_flags(mp, MTX_NOSWITCH);
340
341	cv_waitq_add(cvp, p);
342	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
343	cv_switch(p);
344
345	if (p->p_sflag & PS_TIMEOUT) {
346		p->p_sflag &= ~PS_TIMEOUT;
347		rval = EWOULDBLOCK;
348	} else
349		callout_stop(&p->p_slpcallout);
350
351	mtx_unlock_spin(&sched_lock);
352#ifdef KTRACE
353	if (KTRPOINT(p, KTR_CSW))
354		ktrcsw(p->p_tracep, 0, 0);
355#endif
356	PICKUP_GIANT();
357	mtx_lock(mp);
358	WITNESS_RESTORE(&mp->mtx_object, mp);
359
360	return (rval);
361}
362
363/*
364 * Wait on a condition variable for at most timo/hz seconds, allowing
365 * interruption by signals.  Returns 0 if the process was resumed by cv_signal
366 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
367 * a signal was caught.
368 */
369int
370cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
371{
372	struct proc *p;
373	int rval;
374	int sig;
375	WITNESS_SAVE_DECL(mp);
376
377	p = CURPROC;
378	rval = 0;
379#ifdef KTRACE
380	if (p && KTRPOINT(p, KTR_CSW))
381		ktrcsw(p->p_tracep, 1, 0);
382#endif
383	CV_ASSERT(cvp, mp, p);
384	WITNESS_SLEEP(0, &mp->mtx_object);
385	WITNESS_SAVE(&mp->mtx_object, mp);
386
387	mtx_lock_spin(&sched_lock);
388	if (cold || panicstr) {
389		/*
390		 * After a panic, or during autoconfiguration, just give
391		 * interrupts a chance, then just return; don't run any other
392		 * procs or panic below, in case this is the idle process and
393		 * already asleep.
394		 */
395		mtx_unlock_spin(&sched_lock);
396		return 0;
397	}
398	CV_WAIT_VALIDATE(cvp, mp);
399
400	DROP_GIANT_NOSWITCH();
401	mtx_unlock_flags(mp, MTX_NOSWITCH);
402
403	cv_waitq_add(cvp, p);
404	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
405	sig = cv_switch_catch(p);
406
407	if (p->p_sflag & PS_TIMEOUT) {
408		p->p_sflag &= ~PS_TIMEOUT;
409		rval = EWOULDBLOCK;
410	} else
411		callout_stop(&p->p_slpcallout);
412
413	mtx_unlock_spin(&sched_lock);
414	PICKUP_GIANT();
415
416	PROC_LOCK(p);
417	if (sig == 0)
418		sig = CURSIG(p);
419	if (sig != 0) {
420		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
421			rval = EINTR;
422		else
423			rval = ERESTART;
424	}
425	PROC_UNLOCK(p);
426
427#ifdef KTRACE
428	mtx_lock(&Giant);
429	if (KTRPOINT(p, KTR_CSW))
430		ktrcsw(p->p_tracep, 0, 0);
431	mtx_unlock(&Giant);
432#endif
433	mtx_lock(mp);
434	WITNESS_RESTORE(&mp->mtx_object, mp);
435
436	return (rval);
437}
438
439/*
440 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
441 * called with sched_lock held.
442 */
443static __inline void
444cv_wakeup(struct cv *cvp)
445{
446	struct proc *p;
447
448	mtx_assert(&sched_lock, MA_OWNED);
449	p = TAILQ_FIRST(&cvp->cv_waitq);
450	KASSERT(p->p_wchan == cvp, ("%s: bogus wchan", __FUNCTION__));
451	KASSERT(p->p_sflag & PS_CVWAITQ, ("%s: not on waitq", __FUNCTION__));
452	TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
453	p->p_sflag &= ~PS_CVWAITQ;
454	p->p_wchan = 0;
455	if (p->p_stat == SSLEEP) {
456		/* OPTIMIZED EXPANSION OF setrunnable(p); */
457		CTR3(KTR_PROC, "cv_signal: proc %p (pid %d, %s)",
458		    p, p->p_pid, p->p_comm);
459		if (p->p_slptime > 1)
460			updatepri(p);
461		p->p_slptime = 0;
462		p->p_stat = SRUN;
463		if (p->p_sflag & PS_INMEM) {
464			setrunqueue(p);
465			maybe_resched(p);
466		} else {
467			p->p_sflag |= PS_SWAPINREQ;
468			wakeup(&proc0);
469		}
470		/* END INLINE EXPANSION */
471	}
472}
473
474/*
475 * Signal a condition variable, wakes up one waiting process.  Will also wakeup
476 * the swapper if the process is not in memory, so that it can bring the
477 * sleeping process in.  Note that this may also result in additional processes
478 * being made runnable.  Should be called with the same mutex as was passed to
479 * cv_wait held.
480 */
481void
482cv_signal(struct cv *cvp)
483{
484
485	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
486	mtx_lock_spin(&sched_lock);
487	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
488		CV_SIGNAL_VALIDATE(cvp);
489		cv_wakeup(cvp);
490	}
491	mtx_unlock_spin(&sched_lock);
492}
493
494/*
495 * Broadcast a signal to a condition variable.  Wakes up all waiting processes.
496 * Should be called with the same mutex as was passed to cv_wait held.
497 */
498void
499cv_broadcast(struct cv *cvp)
500{
501
502	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
503	mtx_lock_spin(&sched_lock);
504	CV_SIGNAL_VALIDATE(cvp);
505	while (!TAILQ_EMPTY(&cvp->cv_waitq))
506		cv_wakeup(cvp);
507	mtx_unlock_spin(&sched_lock);
508}
509
510/*
511 * Remove a process from the wait queue of its condition variable.  This may be
512 * called externally.
513 */
514void
515cv_waitq_remove(struct proc *p)
516{
517	struct cv *cvp;
518
519	mtx_lock_spin(&sched_lock);
520	if ((cvp = p->p_wchan) != NULL && p->p_sflag & PS_CVWAITQ) {
521		TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
522		p->p_sflag &= ~PS_CVWAITQ;
523		p->p_wchan = NULL;
524	}
525	mtx_unlock_spin(&sched_lock);
526}
527
528/*
529 * Timeout function for cv_timedwait.  Put the process on the runqueue and set
530 * its timeout flag.
531 */
532static void
533cv_timedwait_end(void *arg)
534{
535	struct proc *p;
536
537	p = arg;
538	CTR3(KTR_PROC, "cv_timedwait_end: proc %p (pid %d, %s)", p, p->p_pid,
539	    p->p_comm);
540	mtx_lock_spin(&sched_lock);
541	if (p->p_wchan != NULL) {
542		if (p->p_stat == SSLEEP)
543			setrunnable(p);
544		else
545			cv_waitq_remove(p);
546		p->p_sflag |= PS_TIMEOUT;
547	}
548	mtx_unlock_spin(&sched_lock);
549}
550