kern_synch.c revision 1.95
1/*	$OpenBSD: kern_synch.c,v 1.95 2010/06/29 00:28:14 tedu Exp $	*/
2/*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3
4/*
5 * Copyright (c) 1982, 1986, 1990, 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/buf.h>
45#include <sys/signalvar.h>
46#include <sys/resourcevar.h>
47#include <uvm/uvm_extern.h>
48#include <sys/sched.h>
49#include <sys/timeout.h>
50#include <sys/mount.h>
51#include <sys/syscallargs.h>
52#include <sys/pool.h>
53
54#include <machine/spinlock.h>
55
56#ifdef KTRACE
57#include <sys/ktrace.h>
58#endif
59
60void updatepri(struct proc *);
61void endtsleep(void *);
62
63/*
64 * We're only looking at 7 bits of the address; everything is
65 * aligned to 4, lots of things are aligned to greater powers
66 * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
67 */
68#define TABLESIZE	128
69#define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
70TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
71
72void
73sleep_queue_init(void)
74{
75	int i;
76
77	for (i = 0; i < TABLESIZE; i++)
78		TAILQ_INIT(&slpque[i]);
79}
80
81
82/*
83 * During autoconfiguration or after a panic, a sleep will simply
84 * lower the priority briefly to allow interrupts, then return.
85 * The priority to be used (safepri) is machine-dependent, thus this
86 * value is initialized and maintained in the machine-dependent layers.
87 * This priority will typically be 0, or the lowest priority
88 * that is safe for use on the interrupt stack; it can be made
89 * higher to block network software interrupts after panics.
90 */
91extern int safepri;
92
93/*
94 * General sleep call.  Suspends the current process until a wakeup is
95 * performed on the specified identifier.  The process will then be made
96 * runnable with the specified priority.  Sleeps at most timo/hz seconds
97 * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
98 * before and after sleeping, else signals are not checked.  Returns 0 if
99 * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
100 * signal needs to be delivered, ERESTART is returned if the current system
101 * call should be restarted if possible, and EINTR is returned if the system
102 * call should be interrupted by the signal (return EINTR).
103 */
104int
105tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
106{
107	struct sleep_state sls;
108	int error, error1;
109
110	if (cold || panicstr) {
111		int s;
112		/*
113		 * After a panic, or during autoconfiguration,
114		 * just give interrupts a chance, then just return;
115		 * don't run any other procs or panic below,
116		 * in case this is the idle process and already asleep.
117		 */
118		s = splhigh();
119		splx(safepri);
120		splx(s);
121		return (0);
122	}
123
124	sleep_setup(&sls, ident, priority, wmesg);
125	sleep_setup_timeout(&sls, timo);
126	sleep_setup_signal(&sls, priority);
127
128	sleep_finish(&sls, 1);
129	error1 = sleep_finish_timeout(&sls);
130	error = sleep_finish_signal(&sls);
131
132	/* Signal errors are higher priority than timeouts. */
133	if (error == 0 && error1 != 0)
134		error = error1;
135
136	return (error);
137}
138
139/*
140 * Same as tsleep, but if we have a mutex provided, then once we've
141 * entered the sleep queue we drop the mutex. After sleeping we re-lock.
142 */
143int
144msleep(const volatile void *ident, struct mutex *mtx, int priority,
145    const char *wmesg, int timo)
146{
147	struct sleep_state sls;
148	int error, error1, spl;
149
150	sleep_setup(&sls, ident, priority, wmesg);
151	sleep_setup_timeout(&sls, timo);
152	sleep_setup_signal(&sls, priority);
153
154	if (mtx) {
155		/* XXX - We need to make sure that the mutex doesn't
156		 * unblock splsched. This can be made a bit more
157		 * correct when the sched_lock is a mutex.
158		 */
159		spl = MUTEX_OLDIPL(mtx);
160		MUTEX_OLDIPL(mtx) = splsched();
161		mtx_leave(mtx);
162	}
163
164	sleep_finish(&sls, 1);
165	error1 = sleep_finish_timeout(&sls);
166	error = sleep_finish_signal(&sls);
167
168	if (mtx) {
169		if ((priority & PNORELOCK) == 0) {
170			mtx_enter(mtx);
171			MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
172		} else
173			splx(spl);
174	}
175	/* Signal errors are higher priority than timeouts. */
176	if (error == 0 && error1 != 0)
177		error = error1;
178
179	return (error);
180}
181
182void
183sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
184    const char *wmesg)
185{
186	struct proc *p = curproc;
187
188#ifdef DIAGNOSTIC
189	if (ident == NULL)
190		panic("tsleep: no ident");
191	if (p->p_stat != SONPROC)
192		panic("tsleep: not SONPROC");
193#endif
194
195#ifdef KTRACE
196	if (KTRPOINT(p, KTR_CSW))
197		ktrcsw(p, 1, 0);
198#endif
199
200	sls->sls_catch = 0;
201	sls->sls_do_sleep = 1;
202	sls->sls_sig = 1;
203
204	SCHED_LOCK(sls->sls_s);
205
206	p->p_wchan = ident;
207	p->p_wmesg = wmesg;
208	p->p_slptime = 0;
209	p->p_priority = prio & PRIMASK;
210	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
211}
212
213void
214sleep_finish(struct sleep_state *sls, int do_sleep)
215{
216	struct proc *p = curproc;
217
218	if (sls->sls_do_sleep && do_sleep) {
219		p->p_stat = SSLEEP;
220		p->p_stats->p_ru.ru_nvcsw++;
221		SCHED_ASSERT_LOCKED();
222		mi_switch();
223	} else if (!do_sleep) {
224		unsleep(p);
225	}
226
227#ifdef DIAGNOSTIC
228	if (p->p_stat != SONPROC)
229		panic("sleep_finish !SONPROC");
230#endif
231
232	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
233	SCHED_UNLOCK(sls->sls_s);
234
235	/*
236	 * Even though this belongs to the signal handling part of sleep,
237	 * we need to clear it before the ktrace.
238	 */
239	atomic_clearbits_int(&p->p_flag, P_SINTR);
240
241#ifdef KTRACE
242	if (KTRPOINT(p, KTR_CSW))
243		ktrcsw(p, 0, 0);
244#endif
245}
246
247void
248sleep_setup_timeout(struct sleep_state *sls, int timo)
249{
250	if (timo)
251		timeout_add(&curproc->p_sleep_to, timo);
252}
253
254int
255sleep_finish_timeout(struct sleep_state *sls)
256{
257	struct proc *p = curproc;
258
259	if (p->p_flag & P_TIMEOUT) {
260		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
261		return (EWOULDBLOCK);
262	} else if (timeout_pending(&p->p_sleep_to)) {
263		timeout_del(&p->p_sleep_to);
264	}
265
266	return (0);
267}
268
269void
270sleep_setup_signal(struct sleep_state *sls, int prio)
271{
272	struct proc *p = curproc;
273
274	if ((sls->sls_catch = (prio & PCATCH)) == 0)
275		return;
276
277	/*
278	 * We put ourselves on the sleep queue and start our timeout
279	 * before calling CURSIG, as we could stop there, and a wakeup
280	 * or a SIGCONT (or both) could occur while we were stopped.
281	 * A SIGCONT would cause us to be marked as SSLEEP
282	 * without resuming us, thus we must be ready for sleep
283	 * when CURSIG is called.  If the wakeup happens while we're
284	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
285	 */
286	atomic_setbits_int(&p->p_flag, P_SINTR);
287	if ((sls->sls_sig = CURSIG(p)) != 0) {
288		if (p->p_wchan)
289			unsleep(p);
290		p->p_stat = SONPROC;
291		sls->sls_do_sleep = 0;
292	} else if (p->p_wchan == 0) {
293		sls->sls_catch = 0;
294		sls->sls_do_sleep = 0;
295	}
296}
297
298int
299sleep_finish_signal(struct sleep_state *sls)
300{
301	struct proc *p = curproc;
302
303	if (sls->sls_catch != 0) {
304		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
305			if (p->p_sigacts->ps_sigintr & sigmask(sls->sls_sig))
306				return (EINTR);
307			return (ERESTART);
308		}
309	}
310
311	return (0);
312}
313
314/*
315 * Implement timeout for tsleep.
316 * If process hasn't been awakened (wchan non-zero),
317 * set timeout flag and undo the sleep.  If proc
318 * is stopped, just unsleep so it will remain stopped.
319 */
320void
321endtsleep(void *arg)
322{
323	struct proc *p = arg;
324	int s;
325
326	SCHED_LOCK(s);
327	if (p->p_wchan) {
328		if (p->p_stat == SSLEEP)
329			setrunnable(p);
330		else
331			unsleep(p);
332		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
333	}
334	SCHED_UNLOCK(s);
335}
336
337/*
338 * Remove a process from its wait queue
339 */
340void
341unsleep(struct proc *p)
342{
343	if (p->p_wchan) {
344		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
345		p->p_wchan = NULL;
346	}
347}
348
349/*
350 * Make a number of processes sleeping on the specified identifier runnable.
351 */
352void
353wakeup_n(const volatile void *ident, int n)
354{
355	struct slpque *qp;
356	struct proc *p;
357	struct proc *pnext;
358	int s;
359
360	SCHED_LOCK(s);
361	qp = &slpque[LOOKUP(ident)];
362	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
363		pnext = TAILQ_NEXT(p, p_runq);
364#ifdef DIAGNOSTIC
365		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
366			panic("wakeup: p_stat is %d", (int)p->p_stat);
367#endif
368		if (p->p_wchan == ident) {
369			--n;
370			p->p_wchan = 0;
371			TAILQ_REMOVE(qp, p, p_runq);
372			if (p->p_stat == SSLEEP) {
373				/* OPTIMIZED EXPANSION OF setrunnable(p); */
374				if (p->p_slptime > 1)
375					updatepri(p);
376				p->p_slptime = 0;
377				p->p_stat = SRUN;
378				p->p_cpu = sched_choosecpu(p);
379				setrunqueue(p);
380				need_resched(p->p_cpu);
381				/* END INLINE EXPANSION */
382
383			}
384		}
385	}
386	SCHED_UNLOCK(s);
387}
388
389/*
390 * Make all processes sleeping on the specified identifier runnable.
391 */
392void
393wakeup(const volatile void *chan)
394{
395	wakeup_n(chan, -1);
396}
397
398int
399sys_sched_yield(struct proc *p, void *v, register_t *retval)
400{
401	yield();
402	return (0);
403}
404
405int
406sys_thrsleep(struct proc *p, void *v, register_t *revtal)
407{
408	struct sys_thrsleep_args /* {
409		syscallarg(void *) ident;
410		syscallarg(clockid_t) clock_id;
411		syscallarg(struct timespec *) tp;
412		syscallarg(void *) lock;
413	} */ *uap = v;
414	long ident = (long)SCARG(uap, ident);
415	_spinlock_lock_t *lock = SCARG(uap, lock);
416	static _spinlock_lock_t unlocked = _SPINLOCK_UNLOCKED;
417	long long to_ticks = 0;
418	int error;
419
420	if (!rthreads_enabled)
421		return (ENOTSUP);
422	if (SCARG(uap, tp) != NULL) {
423		struct timespec now, ats;
424
425		if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0 ||
426		    (error = clock_gettime(p, SCARG(uap, clock_id), &now)) != 0)
427			return (error);
428
429		if (timespeccmp(&ats, &now, <)) {
430			/* already passed: still do the unlock */
431			if (lock)
432				copyout(&unlocked, lock, sizeof(unlocked));
433			return (EWOULDBLOCK);
434		}
435
436		timespecsub(&ats, &now, &ats);
437		to_ticks = (long long)hz * ats.tv_sec +
438		    ats.tv_nsec / (tick * 1000);
439		if (to_ticks > INT_MAX)
440			to_ticks = INT_MAX;
441		if (to_ticks == 0)
442			to_ticks = 1;
443	}
444
445	p->p_thrslpid = ident;
446
447	if (lock)
448		copyout(&unlocked, lock, sizeof(unlocked));
449	error = tsleep(&p->p_thrslpid, PUSER | PCATCH, "thrsleep",
450	    (int)to_ticks);
451
452	if (error == ERESTART)
453		error = EINTR;
454
455	return (error);
456
457}
458
459int
460sys_thrwakeup(struct proc *p, void *v, register_t *retval)
461{
462	struct sys_thrwakeup_args /* {
463		syscallarg(void *) ident;
464		syscallarg(int) n;
465	} */ *uap = v;
466	long ident = (long)SCARG(uap, ident);
467	int n = SCARG(uap, n);
468	struct proc *q;
469	int found = 0;
470
471	if (!rthreads_enabled)
472		return (ENOTSUP);
473	TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
474		if (q->p_thrslpid == ident) {
475			wakeup_one(&q->p_thrslpid);
476			q->p_thrslpid = 0;
477			if (++found == n)
478				return (0);
479		}
480	}
481	if (!found)
482		return (ESRCH);
483
484	return (0);
485}
486