kern_synch.c revision 1.130
1/*	$openbsd: kern_synch.c,v 1.129 2016/03/09 13:38:50 mpi Exp $	*/
2/*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3
4/*
5 * Copyright (c) 1982, 1986, 1990, 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/signalvar.h>
45#include <sys/resourcevar.h>
46#include <sys/sched.h>
47#include <sys/timeout.h>
48#include <sys/mount.h>
49#include <sys/syscallargs.h>
50#include <sys/pool.h>
51#include <sys/refcnt.h>
52#include <sys/atomic.h>
53#include <ddb/db_output.h>
54
55#include <machine/spinlock.h>
56
57#ifdef KTRACE
58#include <sys/ktrace.h>
59#endif
60
61int	thrsleep(struct proc *, struct sys___thrsleep_args *);
62int	thrsleep_unlock(void *, int);
63
64/*
65 * We're only looking at 7 bits of the address; everything is
66 * aligned to 4, lots of things are aligned to greater powers
67 * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
68 */
69#define TABLESIZE	128
70#define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
71TAILQ_HEAD(slpque,proc) slpque[TABLESIZE];
72
73void
74sleep_queue_init(void)
75{
76	int i;
77
78	for (i = 0; i < TABLESIZE; i++)
79		TAILQ_INIT(&slpque[i]);
80}
81
82
83/*
84 * During autoconfiguration or after a panic, a sleep will simply
85 * lower the priority briefly to allow interrupts, then return.
86 * The priority to be used (safepri) is machine-dependent, thus this
87 * value is initialized and maintained in the machine-dependent layers.
88 * This priority will typically be 0, or the lowest priority
89 * that is safe for use on the interrupt stack; it can be made
90 * higher to block network software interrupts after panics.
91 */
92extern int safepri;
93
94/*
95 * General sleep call.  Suspends the current process until a wakeup is
96 * performed on the specified identifier.  The process will then be made
97 * runnable with the specified priority.  Sleeps at most timo/hz seconds
98 * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
99 * before and after sleeping, else signals are not checked.  Returns 0 if
100 * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
101 * signal needs to be delivered, ERESTART is returned if the current system
102 * call should be restarted if possible, and EINTR is returned if the system
103 * call should be interrupted by the signal (return EINTR).
104 */
105int
106tsleep(const volatile void *ident, int priority, const char *wmesg, int timo)
107{
108	struct sleep_state sls;
109	int error, error1;
110#ifdef MULTIPROCESSOR
111	int hold_count;
112#endif
113
114	KASSERT((priority & ~(PRIMASK | PCATCH)) == 0);
115
116#ifdef MULTIPROCESSOR
117	KASSERT(timo || __mp_lock_held(&kernel_lock));
118#endif
119
120#ifdef DDB
121	if (cold == 2)
122		db_stack_dump();
123#endif
124	if (cold || panicstr) {
125		int s;
126		/*
127		 * After a panic, or during autoconfiguration,
128		 * just give interrupts a chance, then just return;
129		 * don't run any other procs or panic below,
130		 * in case this is the idle process and already asleep.
131		 */
132		s = splhigh();
133		splx(safepri);
134#ifdef MULTIPROCESSOR
135		if (__mp_lock_held(&kernel_lock)) {
136			hold_count = __mp_release_all(&kernel_lock);
137			__mp_acquire_count(&kernel_lock, hold_count);
138		}
139#endif
140		splx(s);
141		return (0);
142	}
143
144	sleep_setup(&sls, ident, priority, wmesg);
145	sleep_setup_timeout(&sls, timo);
146	sleep_setup_signal(&sls, priority);
147
148	sleep_finish(&sls, 1);
149	error1 = sleep_finish_timeout(&sls);
150	error = sleep_finish_signal(&sls);
151
152	/* Signal errors are higher priority than timeouts. */
153	if (error == 0 && error1 != 0)
154		error = error1;
155
156	return (error);
157}
158
159/*
160 * Same as tsleep, but if we have a mutex provided, then once we've
161 * entered the sleep queue we drop the mutex. After sleeping we re-lock.
162 */
163int
164msleep(const volatile void *ident, struct mutex *mtx, int priority,
165    const char *wmesg, int timo)
166{
167	struct sleep_state sls;
168	int error, error1, spl;
169#ifdef MULTIPROCESSOR
170	int hold_count;
171#endif
172
173	KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
174	KASSERT(mtx != NULL);
175
176	if (cold || panicstr) {
177		/*
178		 * After a panic, or during autoconfiguration,
179		 * just give interrupts a chance, then just return;
180		 * don't run any other procs or panic below,
181		 * in case this is the idle process and already asleep.
182		 */
183		spl = MUTEX_OLDIPL(mtx);
184		MUTEX_OLDIPL(mtx) = safepri;
185		mtx_leave(mtx);
186#ifdef MULTIPROCESSOR
187		if (__mp_lock_held(&kernel_lock)) {
188			hold_count = __mp_release_all(&kernel_lock);
189			__mp_acquire_count(&kernel_lock, hold_count);
190		}
191#endif
192		if ((priority & PNORELOCK) == 0) {
193			mtx_enter(mtx);
194			MUTEX_OLDIPL(mtx) = spl;
195		} else
196			splx(spl);
197		return (0);
198	}
199
200	sleep_setup(&sls, ident, priority, wmesg);
201	sleep_setup_timeout(&sls, timo);
202	sleep_setup_signal(&sls, priority);
203
204	/* XXX - We need to make sure that the mutex doesn't
205	 * unblock splsched. This can be made a bit more
206	 * correct when the sched_lock is a mutex.
207	 */
208	spl = MUTEX_OLDIPL(mtx);
209	MUTEX_OLDIPL(mtx) = splsched();
210	mtx_leave(mtx);
211
212	sleep_finish(&sls, 1);
213	error1 = sleep_finish_timeout(&sls);
214	error = sleep_finish_signal(&sls);
215
216	if ((priority & PNORELOCK) == 0) {
217		mtx_enter(mtx);
218		MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */
219	} else
220		splx(spl);
221
222	/* Signal errors are higher priority than timeouts. */
223	if (error == 0 && error1 != 0)
224		error = error1;
225
226	return (error);
227}
228
229void
230sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
231    const char *wmesg)
232{
233	struct proc *p = curproc;
234
235#ifdef DIAGNOSTIC
236	if (p->p_flag & P_CANTSLEEP)
237		panic("sleep: %s failed insomnia", p->p_comm);
238	if (ident == NULL)
239		panic("tsleep: no ident");
240	if (p->p_stat != SONPROC)
241		panic("tsleep: not SONPROC");
242#endif
243
244	sls->sls_catch = 0;
245	sls->sls_do_sleep = 1;
246	sls->sls_sig = 1;
247
248	SCHED_LOCK(sls->sls_s);
249
250	p->p_wchan = ident;
251	p->p_wmesg = wmesg;
252	p->p_slptime = 0;
253	p->p_priority = prio & PRIMASK;
254	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
255}
256
257void
258sleep_finish(struct sleep_state *sls, int do_sleep)
259{
260	struct proc *p = curproc;
261
262	if (sls->sls_do_sleep && do_sleep) {
263		p->p_stat = SSLEEP;
264		p->p_ru.ru_nvcsw++;
265		SCHED_ASSERT_LOCKED();
266		mi_switch();
267	} else if (!do_sleep) {
268		unsleep(p);
269	}
270
271#ifdef DIAGNOSTIC
272	if (p->p_stat != SONPROC)
273		panic("sleep_finish !SONPROC");
274#endif
275
276	p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
277	SCHED_UNLOCK(sls->sls_s);
278
279	/*
280	 * Even though this belongs to the signal handling part of sleep,
281	 * we need to clear it before the ktrace.
282	 */
283	atomic_clearbits_int(&p->p_flag, P_SINTR);
284}
285
286void
287sleep_setup_timeout(struct sleep_state *sls, int timo)
288{
289	if (timo)
290		timeout_add(&curproc->p_sleep_to, timo);
291}
292
293int
294sleep_finish_timeout(struct sleep_state *sls)
295{
296	struct proc *p = curproc;
297
298	if (p->p_flag & P_TIMEOUT) {
299		atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
300		return (EWOULDBLOCK);
301	} else
302		timeout_del(&p->p_sleep_to);
303
304	return (0);
305}
306
307void
308sleep_setup_signal(struct sleep_state *sls, int prio)
309{
310	struct proc *p = curproc;
311
312	if ((sls->sls_catch = (prio & PCATCH)) == 0)
313		return;
314
315	/*
316	 * We put ourselves on the sleep queue and start our timeout
317	 * before calling CURSIG, as we could stop there, and a wakeup
318	 * or a SIGCONT (or both) could occur while we were stopped.
319	 * A SIGCONT would cause us to be marked as SSLEEP
320	 * without resuming us, thus we must be ready for sleep
321	 * when CURSIG is called.  If the wakeup happens while we're
322	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
323	 */
324	atomic_setbits_int(&p->p_flag, P_SINTR);
325	if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) {
326		if (p->p_wchan)
327			unsleep(p);
328		p->p_stat = SONPROC;
329		sls->sls_do_sleep = 0;
330	} else if (p->p_wchan == 0) {
331		sls->sls_catch = 0;
332		sls->sls_do_sleep = 0;
333	}
334}
335
336int
337sleep_finish_signal(struct sleep_state *sls)
338{
339	struct proc *p = curproc;
340	int error;
341
342	if (sls->sls_catch != 0) {
343		if ((error = single_thread_check(p, 1)))
344			return (error);
345		if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) {
346			if (p->p_p->ps_sigacts->ps_sigintr &
347			    sigmask(sls->sls_sig))
348				return (EINTR);
349			return (ERESTART);
350		}
351	}
352
353	return (0);
354}
355
356/*
357 * Implement timeout for tsleep.
358 * If process hasn't been awakened (wchan non-zero),
359 * set timeout flag and undo the sleep.  If proc
360 * is stopped, just unsleep so it will remain stopped.
361 */
362void
363endtsleep(void *arg)
364{
365	struct proc *p = arg;
366	int s;
367
368	SCHED_LOCK(s);
369	if (p->p_wchan) {
370		if (p->p_stat == SSLEEP)
371			setrunnable(p);
372		else
373			unsleep(p);
374		atomic_setbits_int(&p->p_flag, P_TIMEOUT);
375	}
376	SCHED_UNLOCK(s);
377}
378
379/*
380 * Remove a process from its wait queue
381 */
382void
383unsleep(struct proc *p)
384{
385	SCHED_ASSERT_LOCKED();
386
387	if (p->p_wchan) {
388		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
389		p->p_wchan = NULL;
390	}
391}
392
393/*
394 * Make a number of processes sleeping on the specified identifier runnable.
395 */
396void
397wakeup_n(const volatile void *ident, int n)
398{
399	struct slpque *qp;
400	struct proc *p;
401	struct proc *pnext;
402	int s;
403
404	SCHED_LOCK(s);
405	qp = &slpque[LOOKUP(ident)];
406	for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
407		pnext = TAILQ_NEXT(p, p_runq);
408#ifdef DIAGNOSTIC
409		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
410			panic("wakeup: p_stat is %d", (int)p->p_stat);
411#endif
412		if (p->p_wchan == ident) {
413			--n;
414			p->p_wchan = 0;
415			TAILQ_REMOVE(qp, p, p_runq);
416			if (p->p_stat == SSLEEP)
417				setrunnable(p);
418		}
419	}
420	SCHED_UNLOCK(s);
421}
422
423/*
424 * Make all processes sleeping on the specified identifier runnable.
425 */
426void
427wakeup(const volatile void *chan)
428{
429	wakeup_n(chan, -1);
430}
431
432int
433sys_sched_yield(struct proc *p, void *v, register_t *retval)
434{
435	struct proc *q;
436	int s;
437
438	SCHED_LOCK(s);
439	/*
440	 * If one of the threads of a multi-threaded process called
441	 * sched_yield(2), drop its priority to ensure its siblings
442	 * can make some progress.
443	 */
444	p->p_priority = p->p_usrpri;
445	TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
446		p->p_priority = max(p->p_priority, q->p_priority);
447	p->p_stat = SRUN;
448	setrunqueue(p);
449	p->p_ru.ru_nvcsw++;
450	mi_switch();
451	SCHED_UNLOCK(s);
452
453	return (0);
454}
455
456int
457thrsleep_unlock(void *lock, int lockflags)
458{
459	static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
460	_atomic_lock_t *atomiclock = lock;
461	uint32_t *ticket = lock;
462	uint32_t ticketvalue;
463	int error;
464
465	if (!lock)
466		return (0);
467
468	if (lockflags) {
469		if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
470			return (error);
471		ticketvalue++;
472		error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
473	} else {
474		error = copyout(&unlocked, atomiclock, sizeof(unlocked));
475	}
476	return (error);
477}
478
479static int globalsleepaddr;
480
481int
482thrsleep(struct proc *p, struct sys___thrsleep_args *v)
483{
484	struct sys___thrsleep_args /* {
485		syscallarg(const volatile void *) ident;
486		syscallarg(clockid_t) clock_id;
487		syscallarg(const struct timespec *) tp;
488		syscallarg(void *) lock;
489		syscallarg(const int *) abort;
490	} */ *uap = v;
491	long ident = (long)SCARG(uap, ident);
492	struct timespec *tsp = (struct timespec *)SCARG(uap, tp);
493	void *lock = SCARG(uap, lock);
494	long long to_ticks = 0;
495	int abort, error;
496	clockid_t clock_id = SCARG(uap, clock_id) & 0x7;
497	int lockflags = SCARG(uap, clock_id) & 0x8;
498
499	if (ident == 0)
500		return (EINVAL);
501	if (tsp != NULL) {
502		struct timespec now;
503
504		if ((error = clock_gettime(p, clock_id, &now)))
505			return (error);
506#ifdef KTRACE
507		if (KTRPOINT(p, KTR_STRUCT))
508			ktrabstimespec(p, tsp);
509#endif
510
511		if (timespeccmp(tsp, &now, <)) {
512			/* already passed: still do the unlock */
513			if ((error = thrsleep_unlock(lock, lockflags)))
514				return (error);
515			return (EWOULDBLOCK);
516		}
517
518		timespecsub(tsp, &now, tsp);
519		to_ticks = (long long)hz * tsp->tv_sec +
520		    (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
521		if (to_ticks > INT_MAX)
522			to_ticks = INT_MAX;
523	}
524
525	p->p_thrslpid = ident;
526
527	if ((error = thrsleep_unlock(lock, lockflags))) {
528		goto out;
529	}
530
531	if (SCARG(uap, abort) != NULL) {
532		if ((error = copyin(SCARG(uap, abort), &abort,
533		    sizeof(abort))) != 0)
534			goto out;
535		if (abort) {
536			error = EINTR;
537			goto out;
538		}
539	}
540
541	if (p->p_thrslpid == 0)
542		error = 0;
543	else {
544		void *sleepaddr = &p->p_thrslpid;
545		if (ident == -1)
546			sleepaddr = &globalsleepaddr;
547		error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
548		    (int)to_ticks);
549	}
550
551out:
552	p->p_thrslpid = 0;
553
554	if (error == ERESTART)
555		error = EINTR;
556
557	return (error);
558
559}
560
561int
562sys___thrsleep(struct proc *p, void *v, register_t *retval)
563{
564	struct sys___thrsleep_args /* {
565		syscallarg(const volatile void *) ident;
566		syscallarg(clockid_t) clock_id;
567		syscallarg(struct timespec *) tp;
568		syscallarg(void *) lock;
569		syscallarg(const int *) abort;
570	} */ *uap = v;
571	struct timespec ts;
572	int error;
573
574	if (SCARG(uap, tp) != NULL) {
575		if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) {
576			*retval = error;
577			return (0);
578		}
579		SCARG(uap, tp) = &ts;
580	}
581
582	*retval = thrsleep(p, uap);
583	return (0);
584}
585
586int
587sys___thrwakeup(struct proc *p, void *v, register_t *retval)
588{
589	struct sys___thrwakeup_args /* {
590		syscallarg(const volatile void *) ident;
591		syscallarg(int) n;
592	} */ *uap = v;
593	long ident = (long)SCARG(uap, ident);
594	int n = SCARG(uap, n);
595	struct proc *q;
596	int found = 0;
597
598	if (ident == 0)
599		*retval = EINVAL;
600	else if (ident == -1)
601		wakeup(&globalsleepaddr);
602	else {
603		TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
604			if (q->p_thrslpid == ident) {
605				wakeup_one(&q->p_thrslpid);
606				q->p_thrslpid = 0;
607				if (++found == n)
608					break;
609			}
610		}
611		*retval = found ? 0 : ESRCH;
612	}
613
614	return (0);
615}
616
617void
618refcnt_init(struct refcnt *r)
619{
620	r->refs = 1;
621}
622
623void
624refcnt_take(struct refcnt *r)
625{
626#ifdef DIAGNOSTIC
627	u_int refcnt;
628
629	refcnt = atomic_inc_int_nv(&r->refs);
630	KASSERT(refcnt != 0);
631#else
632	atomic_inc_int(&r->refs);
633#endif
634}
635
636int
637refcnt_rele(struct refcnt *r)
638{
639	u_int refcnt;
640
641	refcnt = atomic_dec_int_nv(&r->refs);
642	KASSERT(refcnt != ~0);
643
644	return (refcnt == 0);
645}
646
647void
648refcnt_rele_wake(struct refcnt *r)
649{
650	if (refcnt_rele(r))
651		wakeup_one(r);
652}
653
654void
655refcnt_finalize(struct refcnt *r, const char *wmesg)
656{
657	struct sleep_state sls;
658	u_int refcnt;
659
660	refcnt = atomic_dec_int_nv(&r->refs);
661	while (refcnt) {
662		sleep_setup(&sls, r, PWAIT, wmesg);
663		refcnt = r->refs;
664		sleep_finish(&sls, refcnt);
665	}
666}
667