condvar.c revision 3642:9280dc401622
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/thread.h>
29#include <sys/proc.h>
30#include <sys/debug.h>
31#include <sys/cmn_err.h>
32#include <sys/systm.h>
33#include <sys/sobject.h>
34#include <sys/sleepq.h>
35#include <sys/cpuvar.h>
36#include <sys/condvar.h>
37#include <sys/condvar_impl.h>
38#include <sys/schedctl.h>
39#include <sys/procfs.h>
40#include <sys/sdt.h>
41
42/*
43 * CV_MAX_WAITERS is the maximum number of waiters we track; once
44 * the number becomes higher than that, we look at the sleepq to
45 * see whether there are *really* any waiters.
46 */
47#define	CV_MAX_WAITERS		1024		/* must be power of 2 */
48#define	CV_WAITERS_MASK		(CV_MAX_WAITERS - 1)
49
50/*
51 * Threads don't "own" condition variables.
52 */
53/* ARGSUSED */
54static kthread_t *
55cv_owner(void *cvp)
56{
57	return (NULL);
58}
59
60/*
61 * Unsleep a thread that's blocked on a condition variable.
62 */
63static void
64cv_unsleep(kthread_t *t)
65{
66	condvar_impl_t *cvp = (condvar_impl_t *)t->t_wchan;
67	sleepq_head_t *sqh = SQHASH(cvp);
68
69	ASSERT(THREAD_LOCK_HELD(t));
70
71	if (cvp == NULL)
72		panic("cv_unsleep: thread %p not on sleepq %p", t, sqh);
73	DTRACE_SCHED1(wakeup, kthread_t *, t);
74	sleepq_unsleep(t);
75	if (cvp->cv_waiters != CV_MAX_WAITERS)
76		cvp->cv_waiters--;
77	disp_lock_exit_high(&sqh->sq_lock);
78	CL_SETRUN(t);
79}
80
81/*
82 * Change the priority of a thread that's blocked on a condition variable.
83 */
84static void
85cv_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip)
86{
87	condvar_impl_t *cvp = (condvar_impl_t *)t->t_wchan;
88	sleepq_t *sqp = t->t_sleepq;
89
90	ASSERT(THREAD_LOCK_HELD(t));
91	ASSERT(&SQHASH(cvp)->sq_queue == sqp);
92
93	if (cvp == NULL)
94		panic("cv_change_pri: %p not on sleep queue", t);
95	sleepq_dequeue(t);
96	*t_prip = pri;
97	sleepq_insert(sqp, t);
98}
99
100/*
101 * The sobj_ops vector exports a set of functions needed when a thread
102 * is asleep on a synchronization object of this type.
103 */
104static sobj_ops_t cv_sobj_ops = {
105	SOBJ_CV, cv_owner, cv_unsleep, cv_change_pri
106};
107
108/* ARGSUSED */
109void
110cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
111{
112	((condvar_impl_t *)cvp)->cv_waiters = 0;
113}
114
115/*
116 * cv_destroy is not currently needed, but is part of the DDI.
117 * This is in case cv_init ever needs to allocate something for a cv.
118 */
119/* ARGSUSED */
120void
121cv_destroy(kcondvar_t *cvp)
122{
123	ASSERT((((condvar_impl_t *)cvp)->cv_waiters & CV_WAITERS_MASK) == 0);
124}
125
126/*
127 * The cv_block() function blocks a thread on a condition variable
128 * by putting it in a hashed sleep queue associated with the
129 * synchronization object.
130 *
131 * Threads are taken off the hashed sleep queues via calls to
132 * cv_signal(), cv_broadcast(), or cv_unsleep().
133 */
134static void
135cv_block(condvar_impl_t *cvp)
136{
137	kthread_t *t = curthread;
138	klwp_t *lwp = ttolwp(t);
139	sleepq_head_t *sqh;
140
141	ASSERT(THREAD_LOCK_HELD(t));
142	ASSERT(t != CPU->cpu_idle_thread);
143	ASSERT(CPU_ON_INTR(CPU) == 0);
144	ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
145	ASSERT(t->t_state == TS_ONPROC);
146
147	t->t_schedflag &= ~TS_SIGNALLED;
148	CL_SLEEP(t);			/* assign kernel priority */
149	t->t_wchan = (caddr_t)cvp;
150	t->t_sobj_ops = &cv_sobj_ops;
151	DTRACE_SCHED(sleep);
152
153	/*
154	 * The check for t_intr is to avoid doing the
155	 * account for an interrupt thread on the still-pinned
156	 * lwp's statistics.
157	 */
158	if (lwp != NULL && t->t_intr == NULL) {
159		lwp->lwp_ru.nvcsw++;
160		(void) new_mstate(t, LMS_SLEEP);
161	}
162
163	sqh = SQHASH(cvp);
164	disp_lock_enter_high(&sqh->sq_lock);
165	if (cvp->cv_waiters < CV_MAX_WAITERS)
166		cvp->cv_waiters++;
167	ASSERT(cvp->cv_waiters <= CV_MAX_WAITERS);
168	THREAD_SLEEP(t, &sqh->sq_lock);
169	sleepq_insert(&sqh->sq_queue, t);
170	/*
171	 * THREAD_SLEEP() moves curthread->t_lockp to point to the
172	 * lock sqh->sq_lock. This lock is later released by the caller
173	 * when it calls thread_unlock() on curthread.
174	 */
175}
176
177#define	cv_block_sig(t, cvp)	\
178	{ (t)->t_flag |= T_WAKEABLE; cv_block(cvp); }
179
180/*
181 * Block on the indicated condition variable and release the
182 * associated kmutex while blocked.
183 */
184void
185cv_wait(kcondvar_t *cvp, kmutex_t *mp)
186{
187	if (panicstr)
188		return;
189
190	ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
191	thread_lock(curthread);			/* lock the thread */
192	cv_block((condvar_impl_t *)cvp);
193	thread_unlock_nopreempt(curthread);	/* unlock the waiters field */
194	mutex_exit(mp);
195	swtch();
196	mutex_enter(mp);
197}
198
199/*
200 * Same as cv_wait except the thread will unblock at 'tim'
201 * (an absolute time) if it hasn't already unblocked.
202 *
203 * Returns the amount of time left from the original 'tim' value
204 * when it was unblocked.
205 */
206clock_t
207cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t tim)
208{
209	kthread_t *t = curthread;
210	timeout_id_t id;
211	clock_t timeleft;
212	int signalled;
213
214	if (panicstr)
215		return (-1);
216
217	timeleft = tim - lbolt;
218	if (timeleft <= 0)
219		return (-1);
220	id = realtime_timeout((void (*)(void *))setrun, t, timeleft);
221	thread_lock(t);		/* lock the thread */
222	cv_block((condvar_impl_t *)cvp);
223	thread_unlock_nopreempt(t);
224	mutex_exit(mp);
225	if ((tim - lbolt) <= 0)		/* allow for wrap */
226		setrun(t);
227	swtch();
228	signalled = (t->t_schedflag & TS_SIGNALLED);
229	/*
230	 * Get the time left. untimeout() returns -1 if the timeout has
231	 * occured or the time remaining.  If the time remaining is zero,
232	 * the timeout has occured between when we were awoken and
233	 * we called untimeout.  We will treat this as if the timeout
234	 * has occured and set timeleft to -1.
235	 */
236	timeleft = untimeout(id);
237	mutex_enter(mp);
238	if (timeleft <= 0) {
239		timeleft = -1;
240		if (signalled)	/* avoid consuming the cv_signal() */
241			cv_signal(cvp);
242	}
243	return (timeleft);
244}
245
246int
247cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
248{
249	kthread_t *t = curthread;
250	proc_t *p = ttoproc(t);
251	klwp_t *lwp = ttolwp(t);
252	int rval = 1;
253	int signalled = 0;
254
255	if (panicstr)
256		return (rval);
257
258	/*
259	 * The check for t_intr is to catch an interrupt thread
260	 * that has not yet unpinned the thread underneath.
261	 */
262	if (lwp == NULL || t->t_intr) {
263		cv_wait(cvp, mp);
264		return (rval);
265	}
266
267	ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
268	lwp->lwp_asleep = 1;
269	lwp->lwp_sysabort = 0;
270	thread_lock(t);
271	cv_block_sig(t, (condvar_impl_t *)cvp);
272	thread_unlock_nopreempt(t);
273	mutex_exit(mp);
274	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t))
275		setrun(t);
276	/* ASSERT(no locks are held) */
277	swtch();
278	signalled = (t->t_schedflag & TS_SIGNALLED);
279	t->t_flag &= ~T_WAKEABLE;
280	mutex_enter(mp);
281	if (ISSIG_PENDING(t, lwp, p)) {
282		mutex_exit(mp);
283		if (issig(FORREAL))
284			rval = 0;
285		mutex_enter(mp);
286	}
287	if (lwp->lwp_sysabort || MUSTRETURN(p, t))
288		rval = 0;
289	lwp->lwp_asleep = 0;
290	lwp->lwp_sysabort = 0;
291	if (rval == 0 && signalled)	/* avoid consuming the cv_signal() */
292		cv_signal(cvp);
293	return (rval);
294}
295
296/*
297 * Returns:
298 * 	Function result in order of presidence:
299 *		 0 if a signal was received
300 *		-1 if timeout occured
301 *		>0 if awakened via cv_signal() or cv_broadcast().
302 *		   (returns time remaining)
303 *
304 * cv_timedwait_sig() is now part of the DDI.
305 */
306clock_t
307cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t tim)
308{
309	kthread_t *t = curthread;
310	proc_t *p = ttoproc(t);
311	klwp_t *lwp = ttolwp(t);
312	timeout_id_t id;
313	clock_t rval = 1;
314	clock_t timeleft;
315	int signalled = 0;
316
317	if (panicstr)
318		return (rval);
319
320	/*
321	 * If there is no lwp, then we don't need to wait for a signal.
322	 * The check for t_intr is to catch an interrupt thread
323	 * that has not yet unpinned the thread underneath.
324	 */
325	if (lwp == NULL || t->t_intr)
326		return (cv_timedwait(cvp, mp, tim));
327
328	/*
329	 * If tim is less than or equal to lbolt, then the timeout
330	 * has already occured.  So just check to see if there is a signal
331	 * pending.  If so return 0 indicating that there is a signal pending.
332	 * Else return -1 indicating that the timeout occured. No need to
333	 * wait on anything.
334	 */
335	timeleft = tim - lbolt;
336	if (timeleft <= 0) {
337		lwp->lwp_asleep = 1;
338		lwp->lwp_sysabort = 0;
339		rval = -1;
340		goto out;
341	}
342
343	/*
344	 * Set the timeout and wait.
345	 */
346	id = realtime_timeout((void (*)(void *))setrun, t, timeleft);
347	lwp->lwp_asleep = 1;
348	lwp->lwp_sysabort = 0;
349	thread_lock(t);
350	cv_block_sig(t, (condvar_impl_t *)cvp);
351	thread_unlock_nopreempt(t);
352	mutex_exit(mp);
353	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || (tim - lbolt <= 0))
354		setrun(t);
355	/* ASSERT(no locks are held) */
356	swtch();
357	signalled = (t->t_schedflag & TS_SIGNALLED);
358	t->t_flag &= ~T_WAKEABLE;
359	mutex_enter(mp);
360
361	/*
362	 * Untimeout the thread.  untimeout() returns -1 if the timeout has
363	 * occured or the time remaining.  If the time remaining is zero,
364	 * the timeout has occured between when we were awoken and
365	 * we called untimeout.  We will treat this as if the timeout
366	 * has occured and set rval to -1.
367	 */
368	rval = untimeout(id);
369	if (rval <= 0)
370		rval = -1;
371
372	/*
373	 * Check to see if a signal is pending.  If so, regardless of whether
374	 * or not we were awoken due to the signal, the signal is now pending
375	 * and a return of 0 has the highest priority.
376	 */
377out:
378	if (ISSIG_PENDING(t, lwp, p)) {
379		mutex_exit(mp);
380		if (issig(FORREAL))
381			rval = 0;
382		mutex_enter(mp);
383	}
384	if (lwp->lwp_sysabort || MUSTRETURN(p, t))
385		rval = 0;
386	lwp->lwp_asleep = 0;
387	lwp->lwp_sysabort = 0;
388	if (rval <= 0 && signalled)	/* avoid consuming the cv_signal() */
389		cv_signal(cvp);
390	return (rval);
391}
392
393/*
394 * Like cv_wait_sig_swap but allows the caller to indicate (with a
395 * non-NULL sigret) that they will take care of signalling the cv
396 * after wakeup, if necessary.  This is a vile hack that should only
397 * be used when no other option is available; almost all callers
398 * should just use cv_wait_sig_swap (which takes care of the cv_signal
399 * stuff automatically) instead.
400 */
401int
402cv_wait_sig_swap_core(kcondvar_t *cvp, kmutex_t *mp, int *sigret)
403{
404	kthread_t *t = curthread;
405	proc_t *p = ttoproc(t);
406	klwp_t *lwp = ttolwp(t);
407	int rval = 1;
408	int signalled = 0;
409
410	if (panicstr)
411		return (rval);
412
413	/*
414	 * The check for t_intr is to catch an interrupt thread
415	 * that has not yet unpinned the thread underneath.
416	 */
417	if (lwp == NULL || t->t_intr) {
418		cv_wait(cvp, mp);
419		return (rval);
420	}
421
422	lwp->lwp_asleep = 1;
423	lwp->lwp_sysabort = 0;
424	thread_lock(t);
425	t->t_kpri_req = 0;	/* don't need kernel priority */
426	cv_block_sig(t, (condvar_impl_t *)cvp);
427	/* I can be swapped now */
428	curthread->t_schedflag &= ~TS_DONT_SWAP;
429	thread_unlock_nopreempt(t);
430	mutex_exit(mp);
431	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t))
432		setrun(t);
433	/* ASSERT(no locks are held) */
434	swtch();
435	signalled = (t->t_schedflag & TS_SIGNALLED);
436	t->t_flag &= ~T_WAKEABLE;
437	/* TS_DONT_SWAP set by disp() */
438	ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
439	mutex_enter(mp);
440	if (ISSIG_PENDING(t, lwp, p)) {
441		mutex_exit(mp);
442		if (issig(FORREAL))
443			rval = 0;
444		mutex_enter(mp);
445	}
446	if (lwp->lwp_sysabort || MUSTRETURN(p, t))
447		rval = 0;
448	lwp->lwp_asleep = 0;
449	lwp->lwp_sysabort = 0;
450	if (rval == 0) {
451		if (sigret != NULL)
452			*sigret = signalled;	/* just tell the caller */
453		else if (signalled)
454			cv_signal(cvp);	/* avoid consuming the cv_signal() */
455	}
456	return (rval);
457}
458
459/*
460 * Same as cv_wait_sig but the thread can be swapped out while waiting.
461 * This should only be used when we know we aren't holding any locks.
462 */
463int
464cv_wait_sig_swap(kcondvar_t *cvp, kmutex_t *mp)
465{
466	return (cv_wait_sig_swap_core(cvp, mp, NULL));
467}
468
469void
470cv_signal(kcondvar_t *cvp)
471{
472	condvar_impl_t *cp = (condvar_impl_t *)cvp;
473
474	/* make sure the cv_waiters field looks sane */
475	ASSERT(cp->cv_waiters <= CV_MAX_WAITERS);
476	if (cp->cv_waiters > 0) {
477		sleepq_head_t *sqh = SQHASH(cp);
478		disp_lock_enter(&sqh->sq_lock);
479		ASSERT(CPU_ON_INTR(CPU) == 0);
480		if (cp->cv_waiters & CV_WAITERS_MASK) {
481			kthread_t *t;
482			cp->cv_waiters--;
483			t = sleepq_wakeone_chan(&sqh->sq_queue, cp);
484			/*
485			 * If cv_waiters is non-zero (and less than
486			 * CV_MAX_WAITERS) there should be a thread
487			 * in the queue.
488			 */
489			ASSERT(t != NULL);
490		} else if (sleepq_wakeone_chan(&sqh->sq_queue, cp) == NULL) {
491			cp->cv_waiters = 0;
492		}
493		disp_lock_exit(&sqh->sq_lock);
494	}
495}
496
497void
498cv_broadcast(kcondvar_t *cvp)
499{
500	condvar_impl_t *cp = (condvar_impl_t *)cvp;
501
502	/* make sure the cv_waiters field looks sane */
503	ASSERT(cp->cv_waiters <= CV_MAX_WAITERS);
504	if (cp->cv_waiters > 0) {
505		sleepq_head_t *sqh = SQHASH(cp);
506		disp_lock_enter(&sqh->sq_lock);
507		ASSERT(CPU_ON_INTR(CPU) == 0);
508		sleepq_wakeall_chan(&sqh->sq_queue, cp);
509		cp->cv_waiters = 0;
510		disp_lock_exit(&sqh->sq_lock);
511	}
512}
513
514/*
515 * Same as cv_wait(), but wakes up (after wakeup_time milliseconds) to check
516 * for requests to stop, like cv_wait_sig() but without dealing with signals.
517 * This is a horrible kludge.  It is evil.  It is vile.  It is swill.
518 * If your code has to call this function then your code is the same.
519 */
520void
521cv_wait_stop(kcondvar_t *cvp, kmutex_t *mp, int wakeup_time)
522{
523	kthread_t *t = curthread;
524	klwp_t *lwp = ttolwp(t);
525	proc_t *p = ttoproc(t);
526	timeout_id_t id;
527	clock_t tim;
528
529	if (panicstr)
530		return;
531
532	/*
533	 * If there is no lwp, then we don't need to eventually stop it
534	 * The check for t_intr is to catch an interrupt thread
535	 * that has not yet unpinned the thread underneath.
536	 */
537	if (lwp == NULL || t->t_intr) {
538		cv_wait(cvp, mp);
539		return;
540	}
541
542	/*
543	 * Wakeup in wakeup_time milliseconds, i.e., human time.
544	 */
545	tim = lbolt + MSEC_TO_TICK(wakeup_time);
546	id = realtime_timeout((void (*)(void *))setrun, t, tim - lbolt);
547	thread_lock(t);			/* lock the thread */
548	cv_block((condvar_impl_t *)cvp);
549	thread_unlock_nopreempt(t);
550	mutex_exit(mp);
551	/* ASSERT(no locks are held); */
552	if ((tim - lbolt) <= 0)		/* allow for wrap */
553		setrun(t);
554	swtch();
555	(void) untimeout(id);
556
557	/*
558	 * Check for reasons to stop, and stop if lwp_nostop is zero.
559	 * See issig_forreal() for explanations of the various stops.
560	 * Like issig_forreal(), we allow a PR_SUSPENDED/SUSPEND_NORMAL
561	 * to occur even if lwp_nostop is set.
562	 */
563	mutex_enter(&p->p_lock);
564	while (!(p->p_flag & SEXITLWPS)) {
565		/*
566		 * Hold the lwp here for watchpoint manipulation.
567		 */
568		if ((t->t_proc_flag & TP_PAUSE) && !lwp->lwp_nostop) {
569			stop(PR_SUSPENDED, SUSPEND_PAUSE);
570			continue;
571		}
572		/*
573		 * System checkpoint.
574		 */
575		if ((t->t_proc_flag & TP_CHKPT) && !lwp->lwp_nostop) {
576			stop(PR_CHECKPOINT, 0);
577			continue;
578		}
579		/*
580		 * Honor fork1(), watchpoint activity (remapping a page),
581		 * and lwp_suspend() regardless of whether lwp_nostop is
582		 * set but not if lwp_nostop_r is set (to avoid a recursive
583		 * call to prstop()).
584		 */
585		if (((p->p_flag & (SHOLDFORK1|SHOLDWATCH)) ||
586		    (t->t_proc_flag & TP_HOLDLWP)) && !lwp->lwp_nostop_r) {
587			stop(PR_SUSPENDED, SUSPEND_NORMAL);
588			continue;
589		}
590		/*
591		 * Honor /proc requested stop.
592		 */
593		if ((t->t_proc_flag & TP_PRSTOP) && !lwp->lwp_nostop) {
594			stop(PR_REQUESTED, 0);
595		}
596		/*
597		 * If some lwp in the process has already stopped
598		 * showing PR_JOBCONTROL, stop in sympathy with it.
599		 */
600		if (p->p_stopsig && !lwp->lwp_nostop && (t != p->p_agenttp)) {
601			stop(PR_JOBCONTROL, p->p_stopsig);
602			continue;
603		}
604		break;
605	}
606	mutex_exit(&p->p_lock);
607	mutex_enter(mp);
608}
609
610/*
611 * Like cv_timedwait_sig(), but takes an absolute hires future time
612 * rather than a future time in clock ticks.  Will not return showing
613 * that a timeout occurred until the future time is passed.
614 * If 'when' is a NULL pointer, no timeout will occur.
615 * Returns:
616 * 	Function result in order of presidence:
617 *		 0 if a signal was received
618 *		-1 if timeout occured
619 *	        >0 if awakened via cv_signal() or cv_broadcast()
620 *		   or by a spurious wakeup.
621 *		   (might return time remaining)
622 */
623int
624cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp, timestruc_t *when)
625{
626	timestruc_t now;
627	timestruc_t delta;
628	clock_t ticks;
629	int rval;
630
631	if (when == NULL)
632		return (cv_wait_sig_swap(cvp, mp));
633
634	gethrestime(&now);
635	delta = *when;
636	timespecsub(&delta, &now);
637	if (delta.tv_sec < 0 || (delta.tv_sec == 0 && delta.tv_nsec == 0)) {
638		/*
639		 * We have already reached the absolute future time.
640		 * Call cv_timedwait_sig() just to check for signals.
641		 * We will return immediately with either 0 or -1.
642		 */
643		rval = cv_timedwait_sig(cvp, mp, lbolt);
644	} else {
645		ticks = lbolt + timespectohz(when, now);
646		rval = cv_timedwait_sig(cvp, mp, ticks);
647
648		gethrestime(&now);
649		delta = *when;
650		timespecsub(&delta, &now);
651
652		/*
653		 * timeout is premature iff
654		 *	ticks >= lbolt  and  when > now
655		 */
656		if (rval == -1 && ticks >= lbolt && (delta.tv_sec > 0 ||
657		    (delta.tv_sec == 0 && delta.tv_nsec > 0)))
658				rval = 1;
659	}
660	return (rval);
661}
662