kern_condvar.c revision 102544
1/*-
2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_condvar.c 102544 2002-08-28 23:45:15Z peter $
27 */
28
29#include "opt_ktrace.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/proc.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/condvar.h>
39#include <sys/signalvar.h>
40#include <sys/resourcevar.h>
41#ifdef KTRACE
42#include <sys/uio.h>
43#include <sys/ktrace.h>
44#endif
45
46/*
47 * Common sanity checks for cv_wait* functions.
48 */
49#define	CV_ASSERT(cvp, mp, td) do {					\
50	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51	KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__));	\
52	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55} while (0)
56
57#ifdef INVARIANTS
58#define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60		/* Only waiter. */					\
61		(cvp)->cv_mtx = (mp);					\
62	} else {							\
63		/*							\
64		 * Other waiter; assert that we're using the		\
65		 * same mutex.						\
66		 */							\
67		KASSERT((cvp)->cv_mtx == (mp),				\
68		    ("%s: Multiple mutexes", __func__));		\
69	}								\
70} while (0)
71#define	CV_SIGNAL_VALIDATE(cvp) do {					\
72	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74		    ("%s: Mutex not owned", __func__));		\
75	}								\
76} while (0)
77#else
78#define	CV_WAIT_VALIDATE(cvp, mp)
79#define	CV_SIGNAL_VALIDATE(cvp)
80#endif
81
82static void cv_timedwait_end(void *arg);
83static void cv_check_upcall(struct thread *td);
84
85/*
86 * Initialize a condition variable.  Must be called before use.
87 */
88void
89cv_init(struct cv *cvp, const char *desc)
90{
91
92	TAILQ_INIT(&cvp->cv_waitq);
93	cvp->cv_mtx = NULL;
94	cvp->cv_description = desc;
95}
96
97/*
98 * Destroy a condition variable.  The condition variable must be re-initialized
99 * in order to be re-used.
100 */
101void
102cv_destroy(struct cv *cvp)
103{
104
105	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
106}
107
108/*
109 * Common code for cv_wait* functions.  All require sched_lock.
110 */
111
112/*
113 * Decide if we need to queue an upcall.
114 * This is copied from msleep(), perhaps this should be a common function.
115 */
116static void
117cv_check_upcall(struct thread *td)
118{
119
120	/*
121	 * If we are capable of async syscalls and there isn't already
122	 * another one ready to return, start a new thread
123	 * and queue it as ready to run. Note that there is danger here
124	 * because we need to make sure that we don't sleep allocating
125	 * the thread (recursion here might be bad).
126	 * Hence the TDF_INMSLEEP flag.
127	 */
128	if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
129	    (td->td_flags & TDF_INMSLEEP) == 0) {
130		/*
131		 * If we have no queued work to do,
132		 * upcall to the UTS to see if it has more work.
133		 * We don't need to upcall now, just queue it.
134		 */
135		if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) {
136			/* Don't recurse here! */
137			td->td_flags |= TDF_INMSLEEP;
138			thread_schedule_upcall(td, td->td_kse);
139			td->td_flags &= ~TDF_INMSLEEP;
140		}
141	}
142}
143
144/*
145 * Switch context.
146 */
147static __inline void
148cv_switch(struct thread *td)
149{
150
151	td->td_state = TDS_SLP;
152	td->td_proc->p_stats->p_ru.ru_nvcsw++;
153	cv_check_upcall(td);
154	mi_switch();
155	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
156	    td->td_proc->p_pid, td->td_proc->p_comm);
157}
158
159/*
160 * Switch context, catching signals.
161 */
162static __inline int
163cv_switch_catch(struct thread *td)
164{
165	struct proc *p;
166	int sig;
167
168	/*
169	 * We put ourselves on the sleep queue and start our timeout before
170	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
171	 * both) could occur while we were stopped.  A SIGCONT would cause us to
172	 * be marked as TDS_SLP without resuming us, thus we must be ready for
173	 * sleep when cursig is called.  If the wakeup happens while we're
174	 * stopped, td->td_wchan will be 0 upon return from cursig.
175	 */
176	td->td_flags |= TDF_SINTR;
177	mtx_unlock_spin(&sched_lock);
178	p = td->td_proc;
179	PROC_LOCK(p);
180	sig = cursig(td);	/* XXXKSE */
181	if (thread_suspend_check(1))
182		sig = SIGSTOP;
183	mtx_lock_spin(&sched_lock);
184	PROC_UNLOCK(p);
185	if (sig != 0) {
186		if (td->td_wchan != NULL)
187			cv_waitq_remove(td);
188		td->td_state = TDS_RUNNING;	/* XXXKSE */
189	} else if (td->td_wchan != NULL) {
190		cv_switch(td);
191	}
192	td->td_flags &= ~TDF_SINTR;
193
194	return sig;
195}
196
197/*
198 * Add a thread to the wait queue of a condition variable.
199 */
200static __inline void
201cv_waitq_add(struct cv *cvp, struct thread *td)
202{
203
204	td->td_flags |= TDF_CVWAITQ;
205	td->td_wchan = cvp;
206	td->td_wmesg = cvp->cv_description;
207	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
208	td->td_base_pri = td->td_priority;
209	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
210	    td->td_proc->p_pid, td->td_proc->p_comm);
211	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
212}
213
214/*
215 * Wait on a condition variable.  The current thread is placed on the condition
216 * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
217 * condition variable will resume the thread.  The mutex is released before
218 * sleeping and will be held on return.  It is recommended that the mutex be
219 * held when cv_signal or cv_broadcast are called.
220 */
221void
222cv_wait(struct cv *cvp, struct mtx *mp)
223{
224	struct thread *td;
225	WITNESS_SAVE_DECL(mp);
226
227	td = curthread;
228#ifdef KTRACE
229	if (KTRPOINT(td, KTR_CSW))
230		ktrcsw(1, 0);
231#endif
232	CV_ASSERT(cvp, mp, td);
233	WITNESS_SLEEP(0, &mp->mtx_object);
234	WITNESS_SAVE(&mp->mtx_object, mp);
235
236	if (cold ) {
237		/*
238		 * During autoconfiguration, just give interrupts
239		 * a chance, then just return.  Don't run any other
240		 * thread or panic below, in case this is the idle
241		 * process and already asleep.
242		 */
243		return;
244	}
245
246	mtx_lock_spin(&sched_lock);
247
248	CV_WAIT_VALIDATE(cvp, mp);
249
250	DROP_GIANT();
251	mtx_unlock(mp);
252
253	cv_waitq_add(cvp, td);
254	cv_switch(td);
255
256	mtx_unlock_spin(&sched_lock);
257#ifdef KTRACE
258	if (KTRPOINT(td, KTR_CSW))
259		ktrcsw(0, 0);
260#endif
261	PICKUP_GIANT();
262	mtx_lock(mp);
263	WITNESS_RESTORE(&mp->mtx_object, mp);
264}
265
266/*
267 * Wait on a condition variable, allowing interruption by signals.  Return 0 if
268 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
269 * a signal was caught.  If ERESTART is returned the system call should be
270 * restarted if possible.
271 */
272int
273cv_wait_sig(struct cv *cvp, struct mtx *mp)
274{
275	struct thread *td;
276	struct proc *p;
277	int rval;
278	int sig;
279	WITNESS_SAVE_DECL(mp);
280
281	td = curthread;
282	p = td->td_proc;
283	rval = 0;
284#ifdef KTRACE
285	if (KTRPOINT(td, KTR_CSW))
286		ktrcsw(1, 0);
287#endif
288	CV_ASSERT(cvp, mp, td);
289	WITNESS_SLEEP(0, &mp->mtx_object);
290	WITNESS_SAVE(&mp->mtx_object, mp);
291
292	if (cold || panicstr) {
293		/*
294		 * After a panic, or during autoconfiguration, just give
295		 * interrupts a chance, then just return; don't run any other
296		 * procs or panic below, in case this is the idle process and
297		 * already asleep.
298		 */
299		return 0;
300	}
301
302	mtx_lock_spin(&sched_lock);
303
304	CV_WAIT_VALIDATE(cvp, mp);
305
306	DROP_GIANT();
307	mtx_unlock(mp);
308
309	cv_waitq_add(cvp, td);
310	sig = cv_switch_catch(td);
311
312	mtx_unlock_spin(&sched_lock);
313
314	PROC_LOCK(p);
315	if (sig == 0)
316		sig = cursig(td);	/* XXXKSE */
317	if (sig != 0) {
318		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
319			rval = EINTR;
320		else
321			rval = ERESTART;
322	}
323	PROC_UNLOCK(p);
324	if (p->p_flag & P_WEXIT)
325		rval = EINTR;
326
327#ifdef KTRACE
328	if (KTRPOINT(td, KTR_CSW))
329		ktrcsw(0, 0);
330#endif
331	PICKUP_GIANT();
332	mtx_lock(mp);
333	WITNESS_RESTORE(&mp->mtx_object, mp);
334
335	return (rval);
336}
337
338/*
339 * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
340 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
341 * expires.
342 */
343int
344cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
345{
346	struct thread *td;
347	int rval;
348	WITNESS_SAVE_DECL(mp);
349
350	td = curthread;
351	rval = 0;
352#ifdef KTRACE
353	if (KTRPOINT(td, KTR_CSW))
354		ktrcsw(1, 0);
355#endif
356	CV_ASSERT(cvp, mp, td);
357	WITNESS_SLEEP(0, &mp->mtx_object);
358	WITNESS_SAVE(&mp->mtx_object, mp);
359
360	if (cold || panicstr) {
361		/*
362		 * After a panic, or during autoconfiguration, just give
363		 * interrupts a chance, then just return; don't run any other
364		 * thread or panic below, in case this is the idle process and
365		 * already asleep.
366		 */
367		return 0;
368	}
369
370	mtx_lock_spin(&sched_lock);
371
372	CV_WAIT_VALIDATE(cvp, mp);
373
374	DROP_GIANT();
375	mtx_unlock(mp);
376
377	cv_waitq_add(cvp, td);
378	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
379	cv_switch(td);
380
381	if (td->td_flags & TDF_TIMEOUT) {
382		td->td_flags &= ~TDF_TIMEOUT;
383		rval = EWOULDBLOCK;
384	} else if (td->td_flags & TDF_TIMOFAIL)
385		td->td_flags &= ~TDF_TIMOFAIL;
386	else if (callout_stop(&td->td_slpcallout) == 0) {
387		/*
388		 * Work around race with cv_timedwait_end similar to that
389		 * between msleep and endtsleep.
390		 * Go back to sleep.
391		 */
392		td->td_flags |= TDF_TIMEOUT;
393		td->td_state = TDS_SLP;
394		td->td_proc->p_stats->p_ru.ru_nivcsw++;
395		mi_switch();
396	}
397
398	if (td->td_proc->p_flag & P_WEXIT)
399		rval = EWOULDBLOCK;
400	mtx_unlock_spin(&sched_lock);
401#ifdef KTRACE
402	if (KTRPOINT(td, KTR_CSW))
403		ktrcsw(0, 0);
404#endif
405	PICKUP_GIANT();
406	mtx_lock(mp);
407	WITNESS_RESTORE(&mp->mtx_object, mp);
408
409	return (rval);
410}
411
412/*
413 * Wait on a condition variable for at most timo/hz seconds, allowing
414 * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
415 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
416 * a signal was caught.
417 */
418int
419cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
420{
421	struct thread *td;
422	struct proc *p;
423	int rval;
424	int sig;
425	WITNESS_SAVE_DECL(mp);
426
427	td = curthread;
428	p = td->td_proc;
429	rval = 0;
430#ifdef KTRACE
431	if (KTRPOINT(td, KTR_CSW))
432		ktrcsw(1, 0);
433#endif
434	CV_ASSERT(cvp, mp, td);
435	WITNESS_SLEEP(0, &mp->mtx_object);
436	WITNESS_SAVE(&mp->mtx_object, mp);
437
438	if (cold || panicstr) {
439		/*
440		 * After a panic, or during autoconfiguration, just give
441		 * interrupts a chance, then just return; don't run any other
442		 * thread or panic below, in case this is the idle process and
443		 * already asleep.
444		 */
445		return 0;
446	}
447
448	mtx_lock_spin(&sched_lock);
449
450	CV_WAIT_VALIDATE(cvp, mp);
451
452	DROP_GIANT();
453	mtx_unlock(mp);
454
455	cv_waitq_add(cvp, td);
456	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
457	sig = cv_switch_catch(td);
458
459	if (td->td_flags & TDF_TIMEOUT) {
460		td->td_flags &= ~TDF_TIMEOUT;
461		rval = EWOULDBLOCK;
462	} else if (td->td_flags & TDF_TIMOFAIL)
463		td->td_flags &= ~TDF_TIMOFAIL;
464	else if (callout_stop(&td->td_slpcallout) == 0) {
465		/*
466		 * Work around race with cv_timedwait_end similar to that
467		 * between msleep and endtsleep.
468		 * Go back to sleep.
469		 */
470		td->td_flags |= TDF_TIMEOUT;
471		td->td_state = TDS_SLP;
472		td->td_proc->p_stats->p_ru.ru_nivcsw++;
473		mi_switch();
474	}
475	mtx_unlock_spin(&sched_lock);
476
477	PROC_LOCK(p);
478	if (sig == 0)
479		sig = cursig(td);
480	if (sig != 0) {
481		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
482			rval = EINTR;
483		else
484			rval = ERESTART;
485	}
486	PROC_UNLOCK(p);
487
488	if (p->p_flag & P_WEXIT)
489		rval = EINTR;
490
491#ifdef KTRACE
492	if (KTRPOINT(td, KTR_CSW))
493		ktrcsw(0, 0);
494#endif
495	PICKUP_GIANT();
496	mtx_lock(mp);
497	WITNESS_RESTORE(&mp->mtx_object, mp);
498
499	return (rval);
500}
501
502/*
503 * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
504 * called with sched_lock held.
505 */
506static __inline void
507cv_wakeup(struct cv *cvp)
508{
509	struct thread *td;
510	struct ksegrp *kg;
511
512	mtx_assert(&sched_lock, MA_OWNED);
513	td = TAILQ_FIRST(&cvp->cv_waitq);
514	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
515	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
516	TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
517	td->td_flags &= ~TDF_CVWAITQ;
518	td->td_wchan = 0;
519	if (td->td_state == TDS_SLP) {
520		/* OPTIMIZED EXPANSION OF setrunnable(td); */
521		CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
522		    td, td->td_proc->p_pid, td->td_proc->p_comm);
523		kg = td->td_ksegrp;
524		if (kg->kg_slptime > 1) /* XXXKSE */
525			updatepri(kg);
526		kg->kg_slptime = 0;
527		if (td->td_proc->p_sflag & PS_INMEM) {
528			setrunqueue(td);
529			maybe_resched(td);
530		} else {
531			td->td_state = TDS_SWAPPED;
532			if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) {
533				td->td_proc->p_sflag |= PS_SWAPINREQ;
534				wakeup(&proc0);
535			}
536		}
537		/* END INLINE EXPANSION */
538	}
539}
540
541/*
542 * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
543 * the swapper if the process is not in memory, so that it can bring the
544 * sleeping process in.  Note that this may also result in additional threads
545 * being made runnable.  Should be called with the same mutex as was passed to
546 * cv_wait held.
547 */
548void
549cv_signal(struct cv *cvp)
550{
551
552	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
553	mtx_lock_spin(&sched_lock);
554	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
555		CV_SIGNAL_VALIDATE(cvp);
556		cv_wakeup(cvp);
557	}
558	mtx_unlock_spin(&sched_lock);
559}
560
561/*
562 * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
563 * Should be called with the same mutex as was passed to cv_wait held.
564 */
565void
566cv_broadcast(struct cv *cvp)
567{
568
569	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
570	mtx_lock_spin(&sched_lock);
571	CV_SIGNAL_VALIDATE(cvp);
572	while (!TAILQ_EMPTY(&cvp->cv_waitq))
573		cv_wakeup(cvp);
574	mtx_unlock_spin(&sched_lock);
575}
576
577/*
578 * Remove a thread from the wait queue of its condition variable.  This may be
579 * called externally.
580 */
581void
582cv_waitq_remove(struct thread *td)
583{
584	struct cv *cvp;
585
586	mtx_lock_spin(&sched_lock);
587	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
588		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
589		td->td_flags &= ~TDF_CVWAITQ;
590		td->td_wchan = NULL;
591	}
592	mtx_unlock_spin(&sched_lock);
593}
594
595/*
596 * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
597 * its timeout flag.
598 */
599static void
600cv_timedwait_end(void *arg)
601{
602	struct thread *td;
603
604	td = arg;
605	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
606	    td->td_proc->p_comm);
607	mtx_lock_spin(&sched_lock);
608	if (td->td_flags & TDF_TIMEOUT) {
609		td->td_flags &= ~TDF_TIMEOUT;
610		if (td->td_proc->p_sflag & PS_INMEM) {
611			setrunqueue(td);
612			maybe_resched(td);
613		} else {
614			td->td_state = TDS_SWAPPED;
615			if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) {
616				td->td_proc->p_sflag |= PS_SWAPINREQ;
617				wakeup(&proc0);
618			}
619		}
620	} else if (td->td_wchan != NULL) {
621		if (td->td_state == TDS_SLP)	/* XXXKSE */
622			setrunnable(td);
623		else
624			cv_waitq_remove(td);
625		td->td_flags |= TDF_TIMEOUT;
626	} else
627		td->td_flags |= TDF_TIMOFAIL;
628	mtx_unlock_spin(&sched_lock);
629}
630
631/*
632 * For now only abort interruptable waits.
633 * The others will have to either complete on their own or have a timeout.
634 */
635void
636cv_abort(struct thread *td)
637{
638
639	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
640	    td->td_proc->p_pid,
641	    td->td_proc->p_comm);
642	mtx_lock_spin(&sched_lock);
643	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
644		if (td->td_wchan != NULL) {
645			if (td->td_state == TDS_SLP)
646				setrunnable(td);
647			else
648				cv_waitq_remove(td);
649		}
650	}
651	mtx_unlock_spin(&sched_lock);
652}
653
654