kern_condvar.c revision 90538
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 90538 2002-02-11 20:37:54Z julian $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#ifdef KTRACE
42#include <sys/uio.h>
43#include <sys/ktrace.h>
44#endif
45
46/*
47 * Common sanity checks for cv_wait* functions.
48 */
49#define	CV_ASSERT(cvp, mp, td) do {					\
50	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51	KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __func__));	\
52	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55} while (0)
56
57#ifdef CV_DEBUG
58#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60		/* Only waiter. */					\
61		(cvp)->cv_mtx = (mp);					\
62	} else {							\
63		/*							\
64		 * Other waiter; assert that we're using the		\
65		 * same mutex.						\
66		 */							\
67		KASSERT((cvp)->cv_mtx == (mp),				\
68		    ("%s: Multiple mutexes", __func__));		\
69	}								\
70} while (0)
71#define	CV_SIGNAL_VALIDATE(cvp) do {					\
72	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74		    ("%s: Mutex not owned", __func__));		\
75	}								\
76} while (0)
77#else
78#define	CV_WAIT_VALIDATE(cvp, mp)
79#define	CV_SIGNAL_VALIDATE(cvp)
80#endif
81
82static void cv_timedwait_end(void *arg);
83
84/*
85 * Initialize a condition variable.  Must be called before use.
86 */
87void
88cv_init(struct cv *cvp, const char *desc)
89{
90
91	TAILQ_INIT(&cvp->cv_waitq);
92	cvp->cv_mtx = NULL;
93	cvp->cv_description = desc;
94}
95
96/*
97 * Destroy a condition variable.  The condition variable must be re-initialized
98 * in order to be re-used.
99 */
100void
101cv_destroy(struct cv *cvp)
102{
103
104	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
105}
106
107/*
108 * Common code for cv_wait* functions.  All require sched_lock.
109 */
110
111/*
112 * Switch context.
113 */
114static __inline void
115cv_switch(struct thread *td)
116{
117
118	td->td_proc->p_stat = SSLEEP;
119	td->td_proc->p_stats->p_ru.ru_nvcsw++;
120	mi_switch();
121	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
122	    td->td_proc->p_pid, td->td_proc->p_comm);
123}
124
125/*
126 * Switch context, catching signals.
127 */
128static __inline int
129cv_switch_catch(struct thread *td)
130{
131	struct proc *p;
132	int sig;
133
134	/*
135	 * We put ourselves on the sleep queue and start our timeout before
136	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
137	 * both) could occur while we were stopped.  A SIGCONT would cause us to
138	 * be marked as SSLEEP without resuming us, thus we must be ready for
139	 * sleep when CURSIG is called.  If the wakeup happens while we're
140	 * stopped, td->td_wchan will be 0 upon return from CURSIG.
141	 */
142	td->td_flags |= TDF_SINTR;
143	mtx_unlock_spin(&sched_lock);
144	p = td->td_proc;
145	PROC_LOCK(p);
146	sig = CURSIG(p);	/* XXXKSE */
147	mtx_lock_spin(&sched_lock);
148	PROC_UNLOCK(p);
149	if (sig != 0) {
150		if (td->td_wchan != NULL)
151			cv_waitq_remove(td);
152		td->td_proc->p_stat = SRUN;
153	} else if (td->td_wchan != NULL) {
154		cv_switch(td);
155	}
156	td->td_flags &= ~TDF_SINTR;
157
158	return sig;
159}
160
161/*
162 * Add a thread to the wait queue of a condition variable.
163 */
164static __inline void
165cv_waitq_add(struct cv *cvp, struct thread *td)
166{
167
168	/*
169	 * Process may be sitting on a slpque if asleep() was called, remove it
170	 * before re-adding.
171	 */
172	if (td->td_wchan != NULL)
173		unsleep(td);
174
175	td->td_flags |= TDF_CVWAITQ;
176	td->td_wchan = cvp;
177	td->td_wmesg = cvp->cv_description;
178	td->td_kse->ke_slptime = 0; /* XXXKSE */
179	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
180	td->td_base_pri = td->td_priority;
181	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
182	    td->td_proc->p_pid, td->td_proc->p_comm);
183	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
184}
185
186/*
187 * Wait on a condition variable.  The current thread is placed on the condition
188 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
189 * condition variable will resume the thread.  The mutex is released before
190 * sleeping and will be held on return.  It is recommended that the mutex be
191 * held when cv_signal or cv_broadcast are called.
192 */
193void
194cv_wait(struct cv *cvp, struct mtx *mp)
195{
196	struct thread *td;
197	WITNESS_SAVE_DECL(mp);
198
199	td = curthread;
200#ifdef KTRACE
201	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
202		ktrcsw(td->td_proc->p_tracep, 1, 0);
203#endif
204	CV_ASSERT(cvp, mp, td);
205	WITNESS_SLEEP(0, &mp->mtx_object);
206	WITNESS_SAVE(&mp->mtx_object, mp);
207
208	mtx_lock_spin(&sched_lock);
209	if (cold || panicstr) {
210		/*
211		 * After a panic, or during autoconfiguration, just give
212		 * interrupts a chance, then just return; don't run any other
213		 * thread or panic below, in case this is the idle process and
214		 * already asleep.
215		 */
216		mtx_unlock_spin(&sched_lock);
217		return;
218	}
219	CV_WAIT_VALIDATE(cvp, mp);
220
221	DROP_GIANT();
222	mtx_unlock(mp);
223
224	cv_waitq_add(cvp, td);
225	cv_switch(td);
226
227	mtx_unlock_spin(&sched_lock);
228#ifdef KTRACE
229	if (KTRPOINT(td->td_proc, KTR_CSW))
230		ktrcsw(td->td_proc->p_tracep, 0, 0);
231#endif
232	PICKUP_GIANT();
233	mtx_lock(mp);
234	WITNESS_RESTORE(&mp->mtx_object, mp);
235}
236
237/*
238 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
239 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
240 * a signal was caught.  If ERESTART is returned the system call should be
241 * restarted if possible.
242 */
243int
244cv_wait_sig(struct cv *cvp, struct mtx *mp)
245{
246	struct thread *td;
247	struct proc *p;
248	int rval;
249	int sig;
250	WITNESS_SAVE_DECL(mp);
251
252	td = curthread;
253	p = td->td_proc;
254	rval = 0;
255#ifdef KTRACE
256	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
257		ktrcsw(td->td_proc->p_tracep, 1, 0);
258#endif
259	CV_ASSERT(cvp, mp, td);
260	WITNESS_SLEEP(0, &mp->mtx_object);
261	WITNESS_SAVE(&mp->mtx_object, mp);
262
263	mtx_lock_spin(&sched_lock);
264	if (cold || panicstr) {
265		/*
266		 * After a panic, or during autoconfiguration, just give
267		 * interrupts a chance, then just return; don't run any other
268		 * procs or panic below, in case this is the idle process and
269		 * already asleep.
270		 */
271		mtx_unlock_spin(&sched_lock);
272		return 0;
273	}
274	CV_WAIT_VALIDATE(cvp, mp);
275
276	DROP_GIANT();
277	mtx_unlock(mp);
278
279	cv_waitq_add(cvp, td);
280	sig = cv_switch_catch(td);
281
282	mtx_unlock_spin(&sched_lock);
283	PICKUP_GIANT();
284
285	PROC_LOCK(p);
286	if (sig == 0)
287		sig = CURSIG(p);  /* XXXKSE */
288	if (sig != 0) {
289		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
290			rval = EINTR;
291		else
292			rval = ERESTART;
293	}
294	PROC_UNLOCK(p);
295
296#ifdef KTRACE
297	mtx_lock(&Giant);
298	if (KTRPOINT(td->td_proc, KTR_CSW))
299		ktrcsw(td->td_proc->p_tracep, 0, 0);
300	mtx_unlock(&Giant);
301#endif
302	mtx_lock(mp);
303	WITNESS_RESTORE(&mp->mtx_object, mp);
304
305	return (rval);
306}
307
308/*
309 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
310 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
311 * expires.
312 */
313int
314cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
315{
316	struct thread *td;
317	int rval;
318	WITNESS_SAVE_DECL(mp);
319
320	td = curthread;
321	rval = 0;
322#ifdef KTRACE
323		ktrcsw(td->td_proc->p_tracep, 1, 0);
324#endif
325	CV_ASSERT(cvp, mp, td);
326	WITNESS_SLEEP(0, &mp->mtx_object);
327	WITNESS_SAVE(&mp->mtx_object, mp);
328
329	mtx_lock_spin(&sched_lock);
330	if (cold || panicstr) {
331		/*
332		 * After a panic, or during autoconfiguration, just give
333		 * interrupts a chance, then just return; don't run any other
334		 * thread or panic below, in case this is the idle process and
335		 * already asleep.
336		 */
337		mtx_unlock_spin(&sched_lock);
338		return 0;
339	}
340	CV_WAIT_VALIDATE(cvp, mp);
341
342	DROP_GIANT();
343	mtx_unlock(mp);
344
345	cv_waitq_add(cvp, td);
346	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
347	cv_switch(td);
348
349	if (td->td_flags & TDF_TIMEOUT) {
350		td->td_flags &= ~TDF_TIMEOUT;
351		rval = EWOULDBLOCK;
352	} else if (td->td_flags & TDF_TIMOFAIL)
353		td->td_flags &= ~TDF_TIMOFAIL;
354	else if (callout_stop(&td->td_slpcallout) == 0) {
355		/*
356		 * Work around race with cv_timedwait_end similar to that
357		 * between msleep and endtsleep.
358		 */
359		td->td_flags |= TDF_TIMEOUT;
360		td->td_proc->p_stats->p_ru.ru_nivcsw++;
361		mi_switch();
362	}
363
364	mtx_unlock_spin(&sched_lock);
365#ifdef KTRACE
366	if (KTRPOINT(td->td_proc, KTR_CSW))
367		ktrcsw(td->td_proc->p_tracep, 0, 0);
368#endif
369	PICKUP_GIANT();
370	mtx_lock(mp);
371	WITNESS_RESTORE(&mp->mtx_object, mp);
372
373	return (rval);
374}
375
376/*
377 * Wait on a condition variable for at most timo/hz seconds, allowing
378 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
379 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
380 * a signal was caught.
381 */
382int
383cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
384{
385	struct thread *td;
386	struct proc *p;
387	int rval;
388	int sig;
389	WITNESS_SAVE_DECL(mp);
390
391	td = curthread;
392	p = td->td_proc;
393	rval = 0;
394#ifdef KTRACE
395	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
396		ktrcsw(td->td_proc->p_tracep, 1, 0);
397#endif
398	CV_ASSERT(cvp, mp, td);
399	WITNESS_SLEEP(0, &mp->mtx_object);
400	WITNESS_SAVE(&mp->mtx_object, mp);
401
402	mtx_lock_spin(&sched_lock);
403	if (cold || panicstr) {
404		/*
405		 * After a panic, or during autoconfiguration, just give
406		 * interrupts a chance, then just return; don't run any other
407		 * thread or panic below, in case this is the idle process and
408		 * already asleep.
409		 */
410		mtx_unlock_spin(&sched_lock);
411		return 0;
412	}
413	CV_WAIT_VALIDATE(cvp, mp);
414
415	DROP_GIANT();
416	mtx_unlock(mp);
417
418	cv_waitq_add(cvp, td);
419	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
420	sig = cv_switch_catch(td);
421
422	if (td->td_flags & TDF_TIMEOUT) {
423		td->td_flags &= ~TDF_TIMEOUT;
424		rval = EWOULDBLOCK;
425	} else if (td->td_flags & TDF_TIMOFAIL)
426		td->td_flags &= ~TDF_TIMOFAIL;
427	else if (callout_stop(&td->td_slpcallout) == 0) {
428		/*
429		 * Work around race with cv_timedwait_end similar to that
430		 * between msleep and endtsleep.
431		 */
432		td->td_flags |= TDF_TIMEOUT;
433		td->td_proc->p_stats->p_ru.ru_nivcsw++;
434		mi_switch();
435	}
436
437	mtx_unlock_spin(&sched_lock);
438	PICKUP_GIANT();
439
440	PROC_LOCK(p);
441	if (sig == 0)
442		sig = CURSIG(p);
443	if (sig != 0) {
444		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
445			rval = EINTR;
446		else
447			rval = ERESTART;
448	}
449	PROC_UNLOCK(p);
450
451#ifdef KTRACE
452	mtx_lock(&Giant);
453	if (KTRPOINT(td->td_proc, KTR_CSW))
454		ktrcsw(td->td_proc->p_tracep, 0, 0);
455	mtx_unlock(&Giant);
456#endif
457	mtx_lock(mp);
458	WITNESS_RESTORE(&mp->mtx_object, mp);
459
460	return (rval);
461}
462
463/*
464 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
465 * called with sched_lock held.
466 */
467static __inline void
468cv_wakeup(struct cv *cvp)
469{
470	struct thread *td;
471
472	mtx_assert(&sched_lock, MA_OWNED);
473	td = TAILQ_FIRST(&cvp->cv_waitq);
474	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
475	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
476	TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
477	td->td_flags &= ~TDF_CVWAITQ;
478	td->td_wchan = 0;
479	if (td->td_proc->p_stat == SSLEEP) {
480		/* OPTIMIZED EXPANSION OF setrunnable(td); */
481		CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
482		    td, td->td_proc->p_pid, td->td_proc->p_comm);
483		if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
484			updatepri(td);
485		td->td_kse->ke_slptime = 0;
486		td->td_ksegrp->kg_slptime = 0;
487		td->td_proc->p_stat = SRUN;
488		if (td->td_proc->p_sflag & PS_INMEM) {
489			setrunqueue(td);
490			maybe_resched(td);
491		} else {
492			td->td_proc->p_sflag |= PS_SWAPINREQ;
493			wakeup(&proc0); /* XXXKSE */
494		}
495		/* END INLINE EXPANSION */
496	}
497}
498
499/*
500 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
501 * the swapper if the process is not in memory, so that it can bring the
502 * sleeping process in.  Note that this may also result in additional threads
503 * being made runnable.  Should be called with the same mutex as was passed to
504 * cv_wait held.
505 */
506void
507cv_signal(struct cv *cvp)
508{
509
510	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
511	mtx_lock_spin(&sched_lock);
512	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
513		CV_SIGNAL_VALIDATE(cvp);
514		cv_wakeup(cvp);
515	}
516	mtx_unlock_spin(&sched_lock);
517}
518
519/*
520 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
521 * Should be called with the same mutex as was passed to cv_wait held.
522 */
523void
524cv_broadcast(struct cv *cvp)
525{
526
527	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
528	mtx_lock_spin(&sched_lock);
529	CV_SIGNAL_VALIDATE(cvp);
530	while (!TAILQ_EMPTY(&cvp->cv_waitq))
531		cv_wakeup(cvp);
532	mtx_unlock_spin(&sched_lock);
533}
534
535/*
536 * Remove a thread from the wait queue of its condition variable.  This may be
537 * called externally.
538 */
539void
540cv_waitq_remove(struct thread *td)
541{
542	struct cv *cvp;
543
544	mtx_lock_spin(&sched_lock);
545	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
546		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
547		td->td_flags &= ~TDF_CVWAITQ;
548		td->td_wchan = NULL;
549	}
550	mtx_unlock_spin(&sched_lock);
551}
552
553/*
554 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
555 * its timeout flag.
556 */
557static void
558cv_timedwait_end(void *arg)
559{
560	struct thread *td;
561
562	td = arg;
563	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
564	    td->td_proc->p_comm);
565	mtx_lock_spin(&sched_lock);
566	if (td->td_flags & TDF_TIMEOUT) {
567		td->td_flags &= ~TDF_TIMEOUT;
568		setrunqueue(td);
569	} else if (td->td_wchan != NULL) {
570		if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
571			setrunnable(td);
572		else
573			cv_waitq_remove(td);
574		td->td_flags |= TDF_TIMEOUT;
575	} else
576		td->td_flags |= TDF_TIMOFAIL;
577	mtx_unlock_spin(&sched_lock);
578}
579