kern_condvar.c revision 71088
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 71088 2001-01-16 01:00:43Z jasone $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/proc.h>
34#include <sys/kernel.h>
35#include <sys/ktr.h>
36#include <sys/condvar.h>
37#include <sys/mutex.h>
38#include <sys/signalvar.h>
39#include <sys/resourcevar.h>
40#ifdef KTRACE
41#include <sys/uio.h>
42#include <sys/ktrace.h>
43#endif
44
45/*
46 * Common sanity checks for cv_wait* functions.
47 */
48#define	CV_ASSERT(cvp, mp, p) do {					\
49	KASSERT((p) != NULL, ("%s: curproc NULL", __FUNCTION__));	\
50	KASSERT((p)->p_stat == SRUN, ("%s: not SRUN", __FUNCTION__));	\
51	KASSERT((cvp) != NULL, ("%s: cvp NULL", __FUNCTION__));		\
52	KASSERT((mp) != NULL, ("%s: mp NULL", __FUNCTION__));		\
53	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
54} while (0)
55
56#ifdef CV_DEBUG
57#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
58	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
59		/* Only waiter. */					\
60		(cvp)->cv_mtx = (mp);					\
61	} else {							\
62		/*							\
63		 * Other waiter; assert that we're using the		\
64		 * same mutex.						\
65		 */							\
66		KASSERT((cvp)->cv_mtx == (mp),				\
67		    ("%s: Multiple mutexes", __FUNCTION__));		\
68	}								\
69} while (0)
70#define	CV_SIGNAL_VALIDATE(cvp) do {					\
71	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
72		KASSERT(mtx_owned((cvp)->cv_mtx),			\
73		    ("%s: Mutex not owned", __FUNCTION__));		\
74	}								\
75} while (0)
76#else
77#define	CV_WAIT_VALIDATE(cvp, mp)
78#define	CV_SIGNAL_VALIDATE(cvp)
79#endif
80
81static void cv_timedwait_end(void *arg);
82
83/*
84 * Initialize a condition variable.  Must be called before use.
85 */
86void
87cv_init(struct cv *cvp, const char *desc)
88{
89
90	TAILQ_INIT(&cvp->cv_waitq);
91	cvp->cv_mtx = NULL;
92	cvp->cv_description = desc;
93}
94
95/*
96 * Destroy a condition variable.  The condition variable must be re-initialized
97 * in order to be re-used.
98 */
99void
100cv_destroy(struct cv *cvp)
101{
102
103	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __FUNCTION__));
104}
105
106/*
107 * Common code for cv_wait* functions.  All require sched_lock.
108 */
109
110/*
111 * Switch context.
112 */
113static __inline void
114cv_switch(struct proc *p)
115{
116
117	p->p_stat = SSLEEP;
118	p->p_stats->p_ru.ru_nvcsw++;
119	mi_switch();
120	CTR3(KTR_PROC, "cv_switch: resume proc %p (pid %d, %s)", p, p->p_pid,
121	    p->p_comm);
122}
123
124/*
125 * Switch context, catching signals.
126 */
127static __inline int
128cv_switch_catch(struct proc *p)
129{
130	int sig;
131
132	/*
133	 * We put ourselves on the sleep queue and start our timeout before
134	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
135	 * both) could occur while we were stopped.  A SIGCONT would cause us to
136	 * be marked as SSLEEP without resuming us, thus we must be ready for
137	 * sleep when CURSIG is called.  If the wakeup happens while we're
138	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
139	 */
140	p->p_flag |= P_SINTR;
141	mtx_exit(&sched_lock, MTX_SPIN);
142	/* proc_lock(p); */
143	sig = CURSIG(p);
144	mtx_enter(&sched_lock, MTX_SPIN);
145	/* proc_unlock_noswitch(p); */
146	if (sig != 0) {
147		if (p->p_wchan != NULL)
148			cv_waitq_remove(p);
149		p->p_stat = SRUN;
150	} else if (p->p_wchan != NULL) {
151		cv_switch(p);
152	}
153	p->p_flag &= ~P_SINTR;
154
155	return sig;
156}
157
158/*
159 * Add a process to the wait queue of a condition variable.
160 */
161static __inline void
162cv_waitq_add(struct cv *cvp, struct proc *p)
163{
164
165	/*
166	 * Process may be sitting on a slpque if asleep() was called, remove it
167	 * before re-adding.
168	 */
169	if (p->p_wchan != NULL)
170		unsleep(p);
171
172	p->p_flag |= P_CVWAITQ;
173	p->p_wchan = cvp;
174	p->p_wmesg = cvp->cv_description;
175	p->p_slptime = 0;
176	p->p_nativepri = p->p_priority;
177	CTR3(KTR_PROC, "cv_waitq_add: proc %p (pid %d, %s)", p, p->p_pid,
178	    p->p_comm);
179	TAILQ_INSERT_TAIL(&cvp->cv_waitq, p, p_slpq);
180}
181
182/*
183 * Wait on a condition variable.  The current process is placed on the condition
184 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
185 * condition variable will resume the process.  The mutex is released before
186 * sleeping and will be held on return.  It is recommended that the mutex be
187 * held when cv_signal or cv_broadcast are called.
188 */
189void
190cv_wait(struct cv *cvp, struct mtx *mp)
191{
192	struct proc *p;
193	WITNESS_SAVE_DECL(mp);
194
195	p = CURPROC;
196#ifdef KTRACE
197	if (p && KTRPOINT(p, KTR_CSW))
198		ktrcsw(p->p_tracep, 1, 0);
199#endif
200	CV_ASSERT(cvp, mp, p);
201	WITNESS_SLEEP(0, mp);
202	WITNESS_SAVE(mp, mp);
203
204	mtx_enter(&sched_lock, MTX_SPIN);
205	if (cold || panicstr) {
206		/*
207		 * After a panic, or during autoconfiguration, just give
208		 * interrupts a chance, then just return; don't run any other
209		 * procs or panic below, in case this is the idle process and
210		 * already asleep.
211		 */
212		mtx_exit(&sched_lock, MTX_SPIN);
213		return;
214	}
215	CV_WAIT_VALIDATE(cvp, mp);
216
217	DROP_GIANT_NOSWITCH();
218	mtx_exit(mp, MTX_DEF | MTX_NOSWITCH);
219
220	cv_waitq_add(cvp, p);
221	cv_switch(p);
222	curpriority = p->p_usrpri;
223
224	mtx_exit(&sched_lock, MTX_SPIN);
225#ifdef KTRACE
226	if (KTRPOINT(p, KTR_CSW))
227		ktrcsw(p->p_tracep, 0, 0);
228#endif
229	PICKUP_GIANT();
230	mtx_enter(mp, MTX_DEF);
231	WITNESS_RESTORE(mp, mp);
232}
233
234/*
235 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
236 * the process was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
237 * a signal was caught.  If ERESTART is returned the system call should be
238 * restarted if possible.
239 */
240int
241cv_wait_sig(struct cv *cvp, struct mtx *mp)
242{
243	struct proc *p;
244	int rval;
245	int sig;
246	WITNESS_SAVE_DECL(mp);
247
248	p = CURPROC;
249	rval = 0;
250#ifdef KTRACE
251	if (p && KTRPOINT(p, KTR_CSW))
252		ktrcsw(p->p_tracep, 1, 0);
253#endif
254	CV_ASSERT(cvp, mp, p);
255	WITNESS_SLEEP(0, mp);
256	WITNESS_SAVE(mp, mp);
257
258	mtx_enter(&sched_lock, MTX_SPIN);
259	if (cold || panicstr) {
260		/*
261		 * After a panic, or during autoconfiguration, just give
262		 * interrupts a chance, then just return; don't run any other
263		 * procs or panic below, in case this is the idle process and
264		 * already asleep.
265		 */
266		mtx_exit(&sched_lock, MTX_SPIN);
267		return 0;
268	}
269	CV_WAIT_VALIDATE(cvp, mp);
270
271	DROP_GIANT_NOSWITCH();
272	mtx_exit(mp, MTX_DEF | MTX_NOSWITCH);
273
274	cv_waitq_add(cvp, p);
275	sig = cv_switch_catch(p);
276	curpriority = p->p_usrpri;
277
278	mtx_exit(&sched_lock, MTX_SPIN);
279	PICKUP_GIANT();
280
281	/* proc_lock(p); */
282	if (sig == 0)
283		sig = CURSIG(p);
284	if (sig != 0) {
285		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
286			rval = EINTR;
287		else
288			rval = ERESTART;
289	}
290	/* proc_unlock(p); */
291
292#ifdef KTRACE
293	if (KTRPOINT(p, KTR_CSW))
294		ktrcsw(p->p_tracep, 0, 0);
295#endif
296	mtx_enter(mp, MTX_DEF);
297	WITNESS_RESTORE(mp, mp);
298
299	return (rval);
300}
301
302/*
303 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
304 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
305 * expires.
306 */
307int
308cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
309{
310	struct proc *p;
311	int rval;
312	WITNESS_SAVE_DECL(mp);
313
314	p = CURPROC;
315	rval = 0;
316#ifdef KTRACE
317	if (p && KTRPOINT(p, KTR_CSW))
318		ktrcsw(p->p_tracep, 1, 0);
319#endif
320	CV_ASSERT(cvp, mp, p);
321	WITNESS_SLEEP(0, mp);
322	WITNESS_SAVE(mp, mp);
323
324	mtx_enter(&sched_lock, MTX_SPIN);
325	if (cold || panicstr) {
326		/*
327		 * After a panic, or during autoconfiguration, just give
328		 * interrupts a chance, then just return; don't run any other
329		 * procs or panic below, in case this is the idle process and
330		 * already asleep.
331		 */
332		mtx_exit(&sched_lock, MTX_SPIN);
333		return 0;
334	}
335	CV_WAIT_VALIDATE(cvp, mp);
336
337	DROP_GIANT_NOSWITCH();
338	mtx_exit(mp, MTX_DEF | MTX_NOSWITCH);
339
340	cv_waitq_add(cvp, p);
341	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
342	cv_switch(p);
343	curpriority = p->p_usrpri;
344
345	if (p->p_flag & P_TIMEOUT) {
346		p->p_flag &= ~P_TIMEOUT;
347		rval = EWOULDBLOCK;
348	} else
349		callout_stop(&p->p_slpcallout);
350
351	mtx_exit(&sched_lock, MTX_SPIN);
352#ifdef KTRACE
353	if (KTRPOINT(p, KTR_CSW))
354		ktrcsw(p->p_tracep, 0, 0);
355#endif
356	PICKUP_GIANT();
357	mtx_enter(mp, MTX_DEF);
358	WITNESS_RESTORE(mp, mp);
359
360	return (rval);
361}
362
363/*
364 * Wait on a condition variable for at most timo/hz seconds, allowing
365 * interruption by signals.  Returns 0 if the process was resumed by cv_signal
366 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
367 * a signal was caught.
368 */
369int
370cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
371{
372	struct proc *p;
373	int rval;
374	int sig;
375	WITNESS_SAVE_DECL(mp);
376
377	p = CURPROC;
378	rval = 0;
379#ifdef KTRACE
380	if (p && KTRPOINT(p, KTR_CSW))
381		ktrcsw(p->p_tracep, 1, 0);
382#endif
383	CV_ASSERT(cvp, mp, p);
384	WITNESS_SLEEP(0, mp);
385	WITNESS_SAVE(mp, mp);
386
387	mtx_enter(&sched_lock, MTX_SPIN);
388	if (cold || panicstr) {
389		/*
390		 * After a panic, or during autoconfiguration, just give
391		 * interrupts a chance, then just return; don't run any other
392		 * procs or panic below, in case this is the idle process and
393		 * already asleep.
394		 */
395		mtx_exit(&sched_lock, MTX_SPIN);
396		return 0;
397	}
398	CV_WAIT_VALIDATE(cvp, mp);
399
400	DROP_GIANT_NOSWITCH();
401	mtx_exit(mp, MTX_DEF | MTX_NOSWITCH);
402
403	cv_waitq_add(cvp, p);
404	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
405	sig = cv_switch_catch(p);
406	curpriority = p->p_usrpri;
407
408	if (p->p_flag & P_TIMEOUT) {
409		p->p_flag &= ~P_TIMEOUT;
410		rval = EWOULDBLOCK;
411	} else
412		callout_stop(&p->p_slpcallout);
413
414	mtx_exit(&sched_lock, MTX_SPIN);
415	PICKUP_GIANT();
416
417	/* proc_lock(p); */
418	if (sig == 0)
419		sig = CURSIG(p);
420	if (sig != 0) {
421		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
422			rval = EINTR;
423		else
424			rval = ERESTART;
425	}
426	/* proc_unlock(p); */
427
428#ifdef KTRACE
429	if (KTRPOINT(p, KTR_CSW))
430		ktrcsw(p->p_tracep, 0, 0);
431#endif
432	mtx_enter(mp, MTX_DEF);
433	WITNESS_RESTORE(mp, mp);
434
435	return (rval);
436}
437
438/*
439 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
440 * called with sched_lock held.
441 */
442static __inline void
443cv_wakeup(struct cv *cvp)
444{
445	struct proc *p;
446
447	p = TAILQ_FIRST(&cvp->cv_waitq);
448	KASSERT(p->p_wchan == cvp, ("%s: bogus wchan", __FUNCTION__));
449	KASSERT(p->p_flag & P_CVWAITQ, ("%s: not on waitq", __FUNCTION__));
450	TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
451	p->p_flag &= ~P_CVWAITQ;
452	p->p_wchan = 0;
453	if (p->p_stat == SSLEEP) {
454		/* OPTIMIZED EXPANSION OF setrunnable(p); */
455		CTR3(KTR_PROC, "cv_signal: proc %p (pid %d, %s)",
456		    p, p->p_pid, p->p_comm);
457		if (p->p_slptime > 1)
458			updatepri(p);
459		p->p_slptime = 0;
460		p->p_stat = SRUN;
461		if (p->p_flag & P_INMEM) {
462			setrunqueue(p);
463			maybe_resched(p);
464		} else {
465			p->p_flag |= P_SWAPINREQ;
466			wakeup(&proc0);
467		}
468		/* END INLINE EXPANSION */
469	}
470}
471
472/*
473 * Signal a condition variable, wakes up one waiting process.  Will also wakeup
474 * the swapper if the process is not in memory, so that it can bring the
475 * sleeping process in.  Note that this may also result in additional processes
476 * being made runnable.  Should be called with the same mutex as was passed to
477 * cv_wait held.
478 */
479void
480cv_signal(struct cv *cvp)
481{
482
483	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
484	mtx_enter(&sched_lock, MTX_SPIN);
485	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
486		CV_SIGNAL_VALIDATE(cvp);
487		cv_wakeup(cvp);
488	}
489	mtx_exit(&sched_lock, MTX_SPIN);
490}
491
492/*
493 * Broadcast a signal to a condition variable.  Wakes up all waiting processes.
494 * Should be called with the same mutex as was passed to cv_wait held.
495 */
496void
497cv_broadcast(struct cv *cvp)
498{
499
500	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
501	mtx_enter(&sched_lock, MTX_SPIN);
502	CV_SIGNAL_VALIDATE(cvp);
503	while (!TAILQ_EMPTY(&cvp->cv_waitq))
504		cv_wakeup(cvp);
505	mtx_exit(&sched_lock, MTX_SPIN);
506}
507
508/*
509 * Remove a process from the wait queue of its condition variable.  This may be
510 * called externally.
511 */
512void
513cv_waitq_remove(struct proc *p)
514{
515	struct cv *cvp;
516
517	mtx_enter(&sched_lock, MTX_SPIN);
518	if ((cvp = p->p_wchan) != NULL && p->p_flag & P_CVWAITQ) {
519		TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
520		p->p_flag &= ~P_CVWAITQ;
521		p->p_wchan = NULL;
522	}
523	mtx_exit(&sched_lock, MTX_SPIN);
524}
525
526/*
527 * Timeout function for cv_timedwait.  Put the process on the runqueue and set
528 * its timeout flag.
529 */
530static void
531cv_timedwait_end(void *arg)
532{
533	struct proc *p;
534
535	p = arg;
536	CTR3(KTR_PROC, "cv_timedwait_end: proc %p (pid %d, %s)", p, p->p_pid,
537	    p->p_comm);
538	mtx_enter(&sched_lock, MTX_SPIN);
539	if (p->p_wchan != NULL) {
540		if (p->p_stat == SSLEEP)
541			setrunnable(p);
542		else
543			cv_waitq_remove(p);
544		p->p_flag |= P_TIMEOUT;
545	}
546	mtx_exit(&sched_lock, MTX_SPIN);
547}
548