kern_lock.c revision 1.53
1/*	$NetBSD: kern_lock.c,v 1.53 2001/04/27 00:05:13 marcus Exp $	*/
2
3/*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * This code is derived from software contributed to The NetBSD Foundation
12 * by Ross Harvey.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 *    must display the following acknowledgement:
24 *	This product includes software developed by the NetBSD
25 *	Foundation, Inc. and its contributors.
26 * 4. Neither the name of The NetBSD Foundation nor the names of its
27 *    contributors may be used to endorse or promote products derived
28 *    from this software without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43/*
44 * Copyright (c) 1995
45 *	The Regents of the University of California.  All rights reserved.
46 *
47 * This code contains ideas from software contributed to Berkeley by
48 * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
49 * System project at Carnegie-Mellon University.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions
53 * are met:
54 * 1. Redistributions of source code must retain the above copyright
55 *    notice, this list of conditions and the following disclaimer.
56 * 2. Redistributions in binary form must reproduce the above copyright
57 *    notice, this list of conditions and the following disclaimer in the
58 *    documentation and/or other materials provided with the distribution.
59 * 3. All advertising materials mentioning features or use of this software
60 *    must display the following acknowledgement:
61 *	This product includes software developed by the University of
62 *	California, Berkeley and its contributors.
63 * 4. Neither the name of the University nor the names of its contributors
64 *    may be used to endorse or promote products derived from this software
65 *    without specific prior written permission.
66 *
67 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
68 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
69 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
70 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
71 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
72 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
73 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
74 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
75 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
76 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
77 * SUCH DAMAGE.
78 *
79 *	@(#)kern_lock.c	8.18 (Berkeley) 5/21/95
80 */
81
82#include "opt_multiprocessor.h"
83#include "opt_lockdebug.h"
84#include "opt_ddb.h"
85
86#include <sys/param.h>
87#include <sys/proc.h>
88#include <sys/lock.h>
89#include <sys/systm.h>
90#include <machine/cpu.h>
91
92#if defined(LOCKDEBUG)
93#include <sys/syslog.h>
94/*
95 * note that stdarg.h and the ansi style va_start macro is used for both
96 * ansi and traditional c compiles.
97 * XXX: this requires that stdarg.h define: va_alist and va_dcl
98 */
99#include <machine/stdarg.h>
100
101void	lock_printf(const char *fmt, ...)
102    __attribute__((__format__(__printf__,1,2)));
103
104int	lock_debug_syslog = 1;	/* defaults to syslog, but can be patched */
105#endif
106
107/*
108 * Locking primitives implementation.
109 * Locks provide shared/exclusive sychronization.
110 */
111
112#if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */
113#if defined(MULTIPROCESSOR) /* { */
114#define	COUNT_CPU(cpu_id, x)						\
115	curcpu()->ci_spin_locks += (x)
116#else
117u_long	spin_locks;
118#define	COUNT_CPU(cpu_id, x)	spin_locks += (x)
119#endif /* MULTIPROCESSOR */ /* } */
120
121#define	COUNT(lkp, p, cpu_id, x)					\
122do {									\
123	if ((lkp)->lk_flags & LK_SPIN)					\
124		COUNT_CPU((cpu_id), (x));				\
125	else								\
126		(p)->p_locks += (x);					\
127} while (/*CONSTCOND*/0)
128#else
129#define COUNT(lkp, p, cpu_id, x)
130#define COUNT_CPU(cpu_id, x)
131#endif /* LOCKDEBUG || DIAGNOSTIC */ /* } */
132
133#ifndef SPINLOCK_SPIN_HOOK		/* from <machine/lock.h> */
134#define	SPINLOCK_SPIN_HOOK		/* nothing */
135#endif
136
137#define	INTERLOCK_ACQUIRE(lkp, flags, s)				\
138do {									\
139	if ((flags) & LK_SPIN)						\
140		s = splsched();						\
141	simple_lock(&(lkp)->lk_interlock);				\
142} while (0)
143
144#define	INTERLOCK_RELEASE(lkp, flags, s)				\
145do {									\
146	simple_unlock(&(lkp)->lk_interlock);				\
147	if ((flags) & LK_SPIN)						\
148		splx(s);						\
149} while (0)
150
151#if defined(LOCKDEBUG)
152#if defined(DDB)
153#define	SPINLOCK_SPINCHECK_DEBUGGER	Debugger()
154#else
155#define	SPINLOCK_SPINCHECK_DEBUGGER	/* nothing */
156#endif
157
158#define	SPINLOCK_SPINCHECK_DECL						\
159	/* 32-bits of count -- wrap constitutes a "spinout" */		\
160	uint32_t __spinc = 0
161
162#define	SPINLOCK_SPINCHECK						\
163do {									\
164	if (++__spinc == 0) {						\
165		printf("LK_SPIN spinout, excl %d, share %d\n",		\
166		    lkp->lk_exclusivecount, lkp->lk_sharecount);	\
167		if (lkp->lk_exclusivecount)				\
168			printf("held by CPU %lu\n",			\
169			    (u_long) lkp->lk_cpu);			\
170		if (lkp->lk_lock_file)					\
171			printf("last locked at %s:%d\n",		\
172			    lkp->lk_lock_file, lkp->lk_lock_line);	\
173		if (lkp->lk_unlock_file)				\
174			printf("last unlocked at %s:%d\n",		\
175			    lkp->lk_unlock_file, lkp->lk_unlock_line);	\
176		SPINLOCK_SPINCHECK_DEBUGGER;				\
177	}								\
178} while (0)
179#else
180#define	SPINLOCK_SPINCHECK_DECL			/* nothing */
181#define	SPINLOCK_SPINCHECK			/* nothing */
182#endif /* LOCKDEBUG && DDB */
183
184/*
185 * Acquire a resource.
186 */
187#define ACQUIRE(lkp, error, extflags, drain, wanted)			\
188	if ((extflags) & LK_SPIN) {					\
189		int interlocked;					\
190		SPINLOCK_SPINCHECK_DECL;				\
191									\
192		if ((drain) == 0)					\
193			(lkp)->lk_waitcount++;				\
194		for (interlocked = 1;;) {				\
195			SPINLOCK_SPINCHECK;				\
196			if (wanted) {					\
197				if (interlocked) {			\
198					INTERLOCK_RELEASE((lkp),	\
199					    LK_SPIN, s);		\
200					interlocked = 0;		\
201				}					\
202				SPINLOCK_SPIN_HOOK;			\
203			} else if (interlocked) {			\
204				break;					\
205			} else {					\
206				INTERLOCK_ACQUIRE((lkp), LK_SPIN, s);	\
207				interlocked = 1;			\
208			}						\
209		}							\
210		if ((drain) == 0)					\
211			(lkp)->lk_waitcount--;				\
212		KASSERT((wanted) == 0);					\
213		error = 0;	/* sanity */				\
214	} else {							\
215		for (error = 0; wanted; ) {				\
216			if ((drain))					\
217				(lkp)->lk_flags |= LK_WAITDRAIN;	\
218			else						\
219				(lkp)->lk_waitcount++;			\
220			/* XXX Cast away volatile. */			\
221			error = ltsleep((drain) ?			\
222			    (void *)&(lkp)->lk_flags :			\
223			    (void *)(lkp), (lkp)->lk_prio,		\
224			    (lkp)->lk_wmesg, (lkp)->lk_timo,		\
225			    &(lkp)->lk_interlock);			\
226			if ((drain) == 0)				\
227				(lkp)->lk_waitcount--;			\
228			if (error)					\
229				break;					\
230			if ((extflags) & LK_SLEEPFAIL) {		\
231				error = ENOLCK;				\
232				break;					\
233			}						\
234		}							\
235	}
236
237#define	SETHOLDER(lkp, pid, cpu_id)					\
238do {									\
239	if ((lkp)->lk_flags & LK_SPIN)					\
240		(lkp)->lk_cpu = cpu_id;					\
241	else								\
242		(lkp)->lk_lockholder = pid;				\
243} while (/*CONSTCOND*/0)
244
245#define	WEHOLDIT(lkp, pid, cpu_id)					\
246	(((lkp)->lk_flags & LK_SPIN) != 0 ?				\
247	 ((lkp)->lk_cpu == (cpu_id)) : ((lkp)->lk_lockholder == (pid)))
248
249#define	WAKEUP_WAITER(lkp)						\
250do {									\
251	if (((lkp)->lk_flags & LK_SPIN) == 0 && (lkp)->lk_waitcount) {	\
252		/* XXX Cast away volatile. */				\
253		wakeup_one((void *)(lkp));				\
254	}								\
255} while (/*CONSTCOND*/0)
256
257#if defined(LOCKDEBUG) /* { */
258#if defined(MULTIPROCESSOR) /* { */
259struct simplelock spinlock_list_slock = SIMPLELOCK_INITIALIZER;
260
261#define	SPINLOCK_LIST_LOCK()						\
262	__cpu_simple_lock(&spinlock_list_slock.lock_data)
263
264#define	SPINLOCK_LIST_UNLOCK()						\
265	__cpu_simple_unlock(&spinlock_list_slock.lock_data)
266#else
267#define	SPINLOCK_LIST_LOCK()	/* nothing */
268
269#define	SPINLOCK_LIST_UNLOCK()	/* nothing */
270#endif /* MULTIPROCESSOR */ /* } */
271
272TAILQ_HEAD(, lock) spinlock_list =
273    TAILQ_HEAD_INITIALIZER(spinlock_list);
274
275#define	HAVEIT(lkp)							\
276do {									\
277	if ((lkp)->lk_flags & LK_SPIN) {				\
278		int s = spllock();					\
279		SPINLOCK_LIST_LOCK();					\
280		/* XXX Cast away volatile. */				\
281		TAILQ_INSERT_TAIL(&spinlock_list, (struct lock *)(lkp),	\
282		    lk_list);						\
283		SPINLOCK_LIST_UNLOCK();					\
284		splx(s);						\
285	}								\
286} while (/*CONSTCOND*/0)
287
288#define	DONTHAVEIT(lkp)							\
289do {									\
290	if ((lkp)->lk_flags & LK_SPIN) {				\
291		int s = spllock();					\
292		SPINLOCK_LIST_LOCK();					\
293		/* XXX Cast away volatile. */				\
294		TAILQ_REMOVE(&spinlock_list, (struct lock *)(lkp),	\
295		    lk_list);						\
296		SPINLOCK_LIST_UNLOCK();					\
297		splx(s);						\
298	}								\
299} while (/*CONSTCOND*/0)
300#else
301#define	HAVEIT(lkp)		/* nothing */
302
303#define	DONTHAVEIT(lkp)		/* nothing */
304#endif /* LOCKDEBUG */ /* } */
305
306#if defined(LOCKDEBUG)
307/*
308 * Lock debug printing routine; can be configured to print to console
309 * or log to syslog.
310 */
311void
312lock_printf(const char *fmt, ...)
313{
314	va_list ap;
315
316	va_start(ap, fmt);
317	if (lock_debug_syslog)
318		vlog(LOG_DEBUG, fmt, ap);
319	else
320		vprintf(fmt, ap);
321	va_end(ap);
322}
323#endif /* LOCKDEBUG */
324
325/*
326 * Initialize a lock; required before use.
327 */
328void
329lockinit(struct lock *lkp, int prio, const char *wmesg, int timo, int flags)
330{
331
332	memset(lkp, 0, sizeof(struct lock));
333	simple_lock_init(&lkp->lk_interlock);
334	lkp->lk_flags = flags & LK_EXTFLG_MASK;
335	if (flags & LK_SPIN)
336		lkp->lk_cpu = LK_NOCPU;
337	else {
338		lkp->lk_lockholder = LK_NOPROC;
339		lkp->lk_prio = prio;
340		lkp->lk_timo = timo;
341	}
342	lkp->lk_wmesg = wmesg;	/* just a name for spin locks */
343#if defined(LOCKDEBUG)
344	lkp->lk_lock_file = NULL;
345	lkp->lk_unlock_file = NULL;
346#endif
347}
348
349/*
350 * Determine the status of a lock.
351 */
352int
353lockstatus(struct lock *lkp)
354{
355	int s, lock_type = 0;
356
357	INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s);
358	if (lkp->lk_exclusivecount != 0)
359		lock_type = LK_EXCLUSIVE;
360	else if (lkp->lk_sharecount != 0)
361		lock_type = LK_SHARED;
362	INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
363	return (lock_type);
364}
365
366#if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
367/*
368 * Make sure no spin locks are held by a CPU that is about
369 * to context switch.
370 */
371void
372spinlock_switchcheck(void)
373{
374	u_long cnt;
375	int s;
376
377	s = spllock();
378#if defined(MULTIPROCESSOR)
379	cnt = curcpu()->ci_spin_locks;
380#else
381	cnt = spin_locks;
382#endif
383	splx(s);
384
385	if (cnt != 0)
386		panic("spinlock_switchcheck: CPU %lu has %lu spin locks",
387		    (u_long) cpu_number(), cnt);
388}
389#endif /* LOCKDEBUG || DIAGNOSTIC */
390
391/*
392 * Locks and IPLs (interrupt priority levels):
393 *
394 * Locks which may be taken from interrupt context must be handled
395 * very carefully; you must spl to the highest IPL where the lock
396 * is needed before acquiring the lock.
397 *
398 * It is also important to avoid deadlock, since certain (very high
399 * priority) interrupts are often needed to keep the system as a whole
400 * from deadlocking, and must not be blocked while you are spinning
401 * waiting for a lower-priority lock.
402 *
403 * In addition, the lock-debugging hooks themselves need to use locks!
404 *
405 * A raw __cpu_simple_lock may be used from interrupts are long as it
406 * is acquired and held at a single IPL.
407 *
408 * A simple_lock (which is a __cpu_simple_lock wrapped with some
409 * debugging hooks) may be used at or below spllock(), which is
410 * typically at or just below splhigh() (i.e. blocks everything
411 * but certain machine-dependent extremely high priority interrupts).
412 *
413 * spinlockmgr spinlocks should be used at or below splsched().
414 *
415 * Some platforms may have interrupts of higher priority than splsched(),
416 * including hard serial interrupts, inter-processor interrupts, and
417 * kernel debugger traps.
418 */
419
420/*
421 * XXX XXX kludge around another kludge..
422 *
423 * vfs_shutdown() may be called from interrupt context, either as a result
424 * of a panic, or from the debugger.   It proceeds to call
425 * sys_sync(&proc0, ...), pretending its running on behalf of proc0
426 *
427 * We would like to make an attempt to sync the filesystems in this case, so
428 * if this happens, we treat attempts to acquire locks specially.
429 * All locks are acquired on behalf of proc0.
430 *
431 * If we've already paniced, we don't block waiting for locks, but
432 * just barge right ahead since we're already going down in flames.
433 */
434
435/*
436 * Set, change, or release a lock.
437 *
438 * Shared requests increment the shared count. Exclusive requests set the
439 * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
440 * accepted shared locks and shared-to-exclusive upgrades to go away.
441 */
442int
443#if defined(LOCKDEBUG)
444_lockmgr(__volatile struct lock *lkp, u_int flags,
445    struct simplelock *interlkp, const char *file, int line)
446#else
447lockmgr(__volatile struct lock *lkp, u_int flags,
448    struct simplelock *interlkp)
449#endif
450{
451	int error;
452	pid_t pid;
453	int extflags;
454	cpuid_t cpu_id;
455	struct proc *p = curproc;
456	int lock_shutdown_noblock = 0;
457	int s;
458
459	error = 0;
460
461	INTERLOCK_ACQUIRE(lkp, lkp->lk_flags, s);
462	if (flags & LK_INTERLOCK)
463		simple_unlock(interlkp);
464	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
465
466#ifdef DIAGNOSTIC /* { */
467	/*
468	 * Don't allow spins on sleep locks and don't allow sleeps
469	 * on spin locks.
470	 */
471	if ((flags ^ lkp->lk_flags) & LK_SPIN)
472		panic("lockmgr: sleep/spin mismatch\n");
473#endif /* } */
474
475	if (extflags & LK_SPIN)
476		pid = LK_KERNPROC;
477	else {
478		if (p == NULL) {
479			if (!doing_shutdown) {
480#ifdef DIAGNOSTIC
481				panic("lockmgr: no context");
482#endif
483			} else {
484				p = &proc0;
485				if (panicstr && (!(flags & LK_NOWAIT))) {
486					flags |= LK_NOWAIT;
487					lock_shutdown_noblock = 1;
488				}
489			}
490		}
491		pid = p->p_pid;
492	}
493	cpu_id = cpu_number();
494
495	/*
496	 * Once a lock has drained, the LK_DRAINING flag is set and an
497	 * exclusive lock is returned. The only valid operation thereafter
498	 * is a single release of that exclusive lock. This final release
499	 * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
500	 * further requests of any sort will result in a panic. The bits
501	 * selected for these two flags are chosen so that they will be set
502	 * in memory that is freed (freed memory is filled with 0xdeadbeef).
503	 * The final release is permitted to give a new lease on life to
504	 * the lock by specifying LK_REENABLE.
505	 */
506	if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
507#ifdef DIAGNOSTIC /* { */
508		if (lkp->lk_flags & LK_DRAINED)
509			panic("lockmgr: using decommissioned lock");
510		if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
511		    WEHOLDIT(lkp, pid, cpu_id) == 0)
512			panic("lockmgr: non-release on draining lock: %d\n",
513			    flags & LK_TYPE_MASK);
514#endif /* DIAGNOSTIC */ /* } */
515		lkp->lk_flags &= ~LK_DRAINING;
516		if ((flags & LK_REENABLE) == 0)
517			lkp->lk_flags |= LK_DRAINED;
518	}
519
520	switch (flags & LK_TYPE_MASK) {
521
522	case LK_SHARED:
523		if (WEHOLDIT(lkp, pid, cpu_id) == 0) {
524			/*
525			 * If just polling, check to see if we will block.
526			 */
527			if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
528			    (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) {
529				error = EBUSY;
530				break;
531			}
532			/*
533			 * Wait for exclusive locks and upgrades to clear.
534			 */
535			ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags &
536			    (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE));
537			if (error)
538				break;
539			lkp->lk_sharecount++;
540			COUNT(lkp, p, cpu_id, 1);
541			break;
542		}
543		/*
544		 * We hold an exclusive lock, so downgrade it to shared.
545		 * An alternative would be to fail with EDEADLK.
546		 */
547		lkp->lk_sharecount++;
548		COUNT(lkp, p, cpu_id, 1);
549		/* fall into downgrade */
550
551	case LK_DOWNGRADE:
552		if (WEHOLDIT(lkp, pid, cpu_id) == 0 ||
553		    lkp->lk_exclusivecount == 0)
554			panic("lockmgr: not holding exclusive lock");
555		lkp->lk_sharecount += lkp->lk_exclusivecount;
556		lkp->lk_exclusivecount = 0;
557		lkp->lk_recurselevel = 0;
558		lkp->lk_flags &= ~LK_HAVE_EXCL;
559		SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
560#if defined(LOCKDEBUG)
561		lkp->lk_unlock_file = file;
562		lkp->lk_unlock_line = line;
563#endif
564		DONTHAVEIT(lkp);
565		WAKEUP_WAITER(lkp);
566		break;
567
568	case LK_EXCLUPGRADE:
569		/*
570		 * If another process is ahead of us to get an upgrade,
571		 * then we want to fail rather than have an intervening
572		 * exclusive access.
573		 */
574		if (lkp->lk_flags & LK_WANT_UPGRADE) {
575			lkp->lk_sharecount--;
576			COUNT(lkp, p, cpu_id, -1);
577			error = EBUSY;
578			break;
579		}
580		/* fall into normal upgrade */
581
582	case LK_UPGRADE:
583		/*
584		 * Upgrade a shared lock to an exclusive one. If another
585		 * shared lock has already requested an upgrade to an
586		 * exclusive lock, our shared lock is released and an
587		 * exclusive lock is requested (which will be granted
588		 * after the upgrade). If we return an error, the file
589		 * will always be unlocked.
590		 */
591		if (WEHOLDIT(lkp, pid, cpu_id) || lkp->lk_sharecount <= 0)
592			panic("lockmgr: upgrade exclusive lock");
593		lkp->lk_sharecount--;
594		COUNT(lkp, p, cpu_id, -1);
595		/*
596		 * If we are just polling, check to see if we will block.
597		 */
598		if ((extflags & LK_NOWAIT) &&
599		    ((lkp->lk_flags & LK_WANT_UPGRADE) ||
600		     lkp->lk_sharecount > 1)) {
601			error = EBUSY;
602			break;
603		}
604		if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
605			/*
606			 * We are first shared lock to request an upgrade, so
607			 * request upgrade and wait for the shared count to
608			 * drop to zero, then take exclusive lock.
609			 */
610			lkp->lk_flags |= LK_WANT_UPGRADE;
611			ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount);
612			lkp->lk_flags &= ~LK_WANT_UPGRADE;
613			if (error)
614				break;
615			lkp->lk_flags |= LK_HAVE_EXCL;
616			SETHOLDER(lkp, pid, cpu_id);
617#if defined(LOCKDEBUG)
618			lkp->lk_lock_file = file;
619			lkp->lk_lock_line = line;
620#endif
621			HAVEIT(lkp);
622			if (lkp->lk_exclusivecount != 0)
623				panic("lockmgr: non-zero exclusive count");
624			lkp->lk_exclusivecount = 1;
625			if (extflags & LK_SETRECURSE)
626				lkp->lk_recurselevel = 1;
627			COUNT(lkp, p, cpu_id, 1);
628			break;
629		}
630		/*
631		 * Someone else has requested upgrade. Release our shared
632		 * lock, awaken upgrade requestor if we are the last shared
633		 * lock, then request an exclusive lock.
634		 */
635		if (lkp->lk_sharecount == 0)
636			WAKEUP_WAITER(lkp);
637		/* fall into exclusive request */
638
639	case LK_EXCLUSIVE:
640		if (WEHOLDIT(lkp, pid, cpu_id)) {
641			/*
642			 * Recursive lock.
643			 */
644			if ((extflags & LK_CANRECURSE) == 0 &&
645			     lkp->lk_recurselevel == 0) {
646				if (extflags & LK_RECURSEFAIL) {
647					error = EDEADLK;
648					break;
649				} else
650					panic("lockmgr: locking against myself");
651			}
652			lkp->lk_exclusivecount++;
653			if (extflags & LK_SETRECURSE &&
654			    lkp->lk_recurselevel == 0)
655				lkp->lk_recurselevel = lkp->lk_exclusivecount;
656			COUNT(lkp, p, cpu_id, 1);
657			break;
658		}
659		/*
660		 * If we are just polling, check to see if we will sleep.
661		 */
662		if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
663		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
664		     lkp->lk_sharecount != 0)) {
665			error = EBUSY;
666			break;
667		}
668		/*
669		 * Try to acquire the want_exclusive flag.
670		 */
671		ACQUIRE(lkp, error, extflags, 0, lkp->lk_flags &
672		    (LK_HAVE_EXCL | LK_WANT_EXCL));
673		if (error)
674			break;
675		lkp->lk_flags |= LK_WANT_EXCL;
676		/*
677		 * Wait for shared locks and upgrades to finish.
678		 */
679		ACQUIRE(lkp, error, extflags, 0, lkp->lk_sharecount != 0 ||
680		       (lkp->lk_flags & LK_WANT_UPGRADE));
681		lkp->lk_flags &= ~LK_WANT_EXCL;
682		if (error)
683			break;
684		lkp->lk_flags |= LK_HAVE_EXCL;
685		SETHOLDER(lkp, pid, cpu_id);
686#if defined(LOCKDEBUG)
687		lkp->lk_lock_file = file;
688		lkp->lk_lock_line = line;
689#endif
690		HAVEIT(lkp);
691		if (lkp->lk_exclusivecount != 0)
692			panic("lockmgr: non-zero exclusive count");
693		lkp->lk_exclusivecount = 1;
694		if (extflags & LK_SETRECURSE)
695			lkp->lk_recurselevel = 1;
696		COUNT(lkp, p, cpu_id, 1);
697		break;
698
699	case LK_RELEASE:
700		if (lkp->lk_exclusivecount != 0) {
701			if (WEHOLDIT(lkp, pid, cpu_id) == 0) {
702				if (lkp->lk_flags & LK_SPIN) {
703					panic("lockmgr: processor %lu, not "
704					    "exclusive lock holder %lu "
705					    "unlocking", cpu_id, lkp->lk_cpu);
706				} else {
707					panic("lockmgr: pid %d, not "
708					    "exclusive lock holder %d "
709					    "unlocking", pid,
710					    lkp->lk_lockholder);
711				}
712			}
713			if (lkp->lk_exclusivecount == lkp->lk_recurselevel)
714				lkp->lk_recurselevel = 0;
715			lkp->lk_exclusivecount--;
716			COUNT(lkp, p, cpu_id, -1);
717			if (lkp->lk_exclusivecount == 0) {
718				lkp->lk_flags &= ~LK_HAVE_EXCL;
719				SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
720#if defined(LOCKDEBUG)
721				lkp->lk_unlock_file = file;
722				lkp->lk_unlock_line = line;
723#endif
724				DONTHAVEIT(lkp);
725			}
726		} else if (lkp->lk_sharecount != 0) {
727			lkp->lk_sharecount--;
728			COUNT(lkp, p, cpu_id, -1);
729		}
730#ifdef DIAGNOSTIC
731		else
732			panic("lockmgr: release of unlocked lock!");
733#endif
734		WAKEUP_WAITER(lkp);
735		break;
736
737	case LK_DRAIN:
738		/*
739		 * Check that we do not already hold the lock, as it can
740		 * never drain if we do. Unfortunately, we have no way to
741		 * check for holding a shared lock, but at least we can
742		 * check for an exclusive one.
743		 */
744		if (WEHOLDIT(lkp, pid, cpu_id))
745			panic("lockmgr: draining against myself");
746		/*
747		 * If we are just polling, check to see if we will sleep.
748		 */
749		if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
750		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
751		     lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) {
752			error = EBUSY;
753			break;
754		}
755		ACQUIRE(lkp, error, extflags, 1,
756		    ((lkp->lk_flags &
757		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
758		     lkp->lk_sharecount != 0 ||
759		     lkp->lk_waitcount != 0));
760		if (error)
761			break;
762		lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
763		SETHOLDER(lkp, pid, cpu_id);
764#if defined(LOCKDEBUG)
765		lkp->lk_lock_file = file;
766		lkp->lk_lock_line = line;
767#endif
768		HAVEIT(lkp);
769		lkp->lk_exclusivecount = 1;
770		/* XXX unlikely that we'd want this */
771		if (extflags & LK_SETRECURSE)
772			lkp->lk_recurselevel = 1;
773		COUNT(lkp, p, cpu_id, 1);
774		break;
775
776	default:
777		INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
778		panic("lockmgr: unknown locktype request %d",
779		    flags & LK_TYPE_MASK);
780		/* NOTREACHED */
781	}
782	if ((lkp->lk_flags & (LK_WAITDRAIN|LK_SPIN)) == LK_WAITDRAIN &&
783	    ((lkp->lk_flags &
784	      (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
785	     lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
786		lkp->lk_flags &= ~LK_WAITDRAIN;
787		wakeup_one((void *)&lkp->lk_flags);
788	}
789	/*
790	 * Note that this panic will be a recursive panic, since
791	 * we only set lock_shutdown_noblock above if panicstr != NULL.
792	 */
793	if (error && lock_shutdown_noblock)
794		panic("lockmgr: deadlock (see previous panic)");
795
796	INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
797	return (error);
798}
799
800/*
801 * For a recursive spinlock held one or more times by the current CPU,
802 * release all N locks, and return N.
803 * Intended for use in mi_switch() shortly before context switching.
804 */
805
806int
807#if defined(LOCKDEBUG)
808_spinlock_release_all(__volatile struct lock *lkp, const char *file, int line)
809#else
810spinlock_release_all(__volatile struct lock *lkp)
811#endif
812{
813	int s, count;
814	cpuid_t cpu_id;
815
816	KASSERT(lkp->lk_flags & LK_SPIN);
817
818	INTERLOCK_ACQUIRE(lkp, LK_SPIN, s);
819
820	cpu_id = cpu_number();
821	count = lkp->lk_exclusivecount;
822
823	if (count != 0) {
824#ifdef DIAGNOSTIC
825		if (WEHOLDIT(lkp, 0, cpu_id) == 0) {
826			panic("spinlock_release_all: processor %lu, not "
827			    "exclusive lock holder %lu "
828			    "unlocking", (long)cpu_id, lkp->lk_cpu);
829		}
830#endif
831		lkp->lk_recurselevel = 0;
832		lkp->lk_exclusivecount = 0;
833		COUNT_CPU(cpu_id, -count);
834		lkp->lk_flags &= ~LK_HAVE_EXCL;
835		SETHOLDER(lkp, LK_NOPROC, LK_NOCPU);
836#if defined(LOCKDEBUG)
837		lkp->lk_unlock_file = file;
838		lkp->lk_unlock_line = line;
839#endif
840		DONTHAVEIT(lkp);
841	}
842#ifdef DIAGNOSTIC
843	else if (lkp->lk_sharecount != 0)
844		panic("spinlock_release_all: release of shared lock!");
845	else
846		panic("spinlock_release_all: release of unlocked lock!");
847#endif
848	INTERLOCK_RELEASE(lkp, LK_SPIN, s);
849
850	return (count);
851}
852
853/*
854 * For a recursive spinlock held one or more times by the current CPU,
855 * release all N locks, and return N.
856 * Intended for use in mi_switch() right after resuming execution.
857 */
858
859void
860#if defined(LOCKDEBUG)
861_spinlock_acquire_count(__volatile struct lock *lkp, int count,
862    const char *file, int line)
863#else
864spinlock_acquire_count(__volatile struct lock *lkp, int count)
865#endif
866{
867	int s, error;
868	cpuid_t cpu_id;
869
870	KASSERT(lkp->lk_flags & LK_SPIN);
871
872	INTERLOCK_ACQUIRE(lkp, LK_SPIN, s);
873
874	cpu_id = cpu_number();
875
876#ifdef DIAGNOSTIC
877	if (WEHOLDIT(lkp, LK_NOPROC, cpu_id))
878		panic("spinlock_acquire_count: processor %lu already holds lock\n", (long)cpu_id);
879#endif
880	/*
881	 * Try to acquire the want_exclusive flag.
882	 */
883	ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_flags &
884	    (LK_HAVE_EXCL | LK_WANT_EXCL));
885	lkp->lk_flags |= LK_WANT_EXCL;
886	/*
887	 * Wait for shared locks and upgrades to finish.
888	 */
889	ACQUIRE(lkp, error, LK_SPIN, 0, lkp->lk_sharecount != 0 ||
890	    (lkp->lk_flags & LK_WANT_UPGRADE));
891	lkp->lk_flags &= ~LK_WANT_EXCL;
892	lkp->lk_flags |= LK_HAVE_EXCL;
893	SETHOLDER(lkp, LK_NOPROC, cpu_id);
894#if defined(LOCKDEBUG)
895	lkp->lk_lock_file = file;
896	lkp->lk_lock_line = line;
897#endif
898	HAVEIT(lkp);
899	if (lkp->lk_exclusivecount != 0)
900		panic("lockmgr: non-zero exclusive count");
901	lkp->lk_exclusivecount = count;
902	lkp->lk_recurselevel = 1;
903	COUNT_CPU(cpu_id, count);
904
905	INTERLOCK_RELEASE(lkp, lkp->lk_flags, s);
906}
907
908
909
910/*
911 * Print out information about state of a lock. Used by VOP_PRINT
912 * routines to display ststus about contained locks.
913 */
914void
915lockmgr_printinfo(__volatile struct lock *lkp)
916{
917
918	if (lkp->lk_sharecount)
919		printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
920		    lkp->lk_sharecount);
921	else if (lkp->lk_flags & LK_HAVE_EXCL) {
922		printf(" lock type %s: EXCL (count %d) by ",
923		    lkp->lk_wmesg, lkp->lk_exclusivecount);
924		if (lkp->lk_flags & LK_SPIN)
925			printf("processor %lu", lkp->lk_cpu);
926		else
927			printf("pid %d", lkp->lk_lockholder);
928	} else
929		printf(" not locked");
930	if ((lkp->lk_flags & LK_SPIN) == 0 && lkp->lk_waitcount > 0)
931		printf(" with %d pending", lkp->lk_waitcount);
932}
933
934#if defined(LOCKDEBUG) /* { */
935TAILQ_HEAD(, simplelock) simplelock_list =
936    TAILQ_HEAD_INITIALIZER(simplelock_list);
937
938#if defined(MULTIPROCESSOR) /* { */
939struct simplelock simplelock_list_slock = SIMPLELOCK_INITIALIZER;
940
941#define	SLOCK_LIST_LOCK()						\
942	__cpu_simple_lock(&simplelock_list_slock.lock_data)
943
944#define	SLOCK_LIST_UNLOCK()						\
945	__cpu_simple_unlock(&simplelock_list_slock.lock_data)
946
947#define	SLOCK_COUNT(x)							\
948	curcpu()->ci_simple_locks += (x)
949#else
950u_long simple_locks;
951
952#define	SLOCK_LIST_LOCK()	/* nothing */
953
954#define	SLOCK_LIST_UNLOCK()	/* nothing */
955
956#define	SLOCK_COUNT(x)		simple_locks += (x)
957#endif /* MULTIPROCESSOR */ /* } */
958
959#ifdef DDB /* { */
960#ifdef MULTIPROCESSOR
961int simple_lock_debugger = 1;	/* more serious on MP */
962#else
963int simple_lock_debugger = 0;
964#endif
965#define	SLOCK_DEBUGGER()	if (simple_lock_debugger) Debugger()
966#else
967#define	SLOCK_DEBUGGER()	/* nothing */
968#endif /* } */
969
970#ifdef MULTIPROCESSOR
971#define SLOCK_MP()		lock_printf("on cpu %ld\n", 		\
972				    (u_long) cpu_number())
973#else
974#define SLOCK_MP()		/* nothing */
975#endif
976
977#define	SLOCK_WHERE(str, alp, id, l)					\
978do {									\
979	lock_printf(str);						\
980	lock_printf("lock: %p, currently at: %s:%d\n", (alp), (id), (l)); \
981	SLOCK_MP();							\
982	if ((alp)->lock_file != NULL)					\
983		lock_printf("last locked: %s:%d\n", (alp)->lock_file,	\
984		    (alp)->lock_line);					\
985	if ((alp)->unlock_file != NULL)					\
986		lock_printf("last unlocked: %s:%d\n", (alp)->unlock_file, \
987		    (alp)->unlock_line);				\
988	SLOCK_DEBUGGER();						\
989} while (/*CONSTCOND*/0)
990
991/*
992 * Simple lock functions so that the debugger can see from whence
993 * they are being called.
994 */
995void
996simple_lock_init(struct simplelock *alp)
997{
998
999#if defined(MULTIPROCESSOR) /* { */
1000	__cpu_simple_lock_init(&alp->lock_data);
1001#else
1002	alp->lock_data = __SIMPLELOCK_UNLOCKED;
1003#endif /* } */
1004	alp->lock_file = NULL;
1005	alp->lock_line = 0;
1006	alp->unlock_file = NULL;
1007	alp->unlock_line = 0;
1008	alp->lock_holder = LK_NOCPU;
1009}
1010
1011void
1012_simple_lock(__volatile struct simplelock *alp, const char *id, int l)
1013{
1014	cpuid_t cpu_id = cpu_number();
1015	int s;
1016
1017	s = spllock();
1018
1019	/*
1020	 * MULTIPROCESSOR case: This is `safe' since if it's not us, we
1021	 * don't take any action, and just fall into the normal spin case.
1022	 */
1023	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
1024#if defined(MULTIPROCESSOR) /* { */
1025		if (alp->lock_holder == cpu_id) {
1026			SLOCK_WHERE("simple_lock: locking against myself\n",
1027			    alp, id, l);
1028			goto out;
1029		}
1030#else
1031		SLOCK_WHERE("simple_lock: lock held\n", alp, id, l);
1032		goto out;
1033#endif /* MULTIPROCESSOR */ /* } */
1034	}
1035
1036#if defined(MULTIPROCESSOR) /* { */
1037	/* Acquire the lock before modifying any fields. */
1038	__cpu_simple_lock(&alp->lock_data);
1039#else
1040	alp->lock_data = __SIMPLELOCK_LOCKED;
1041#endif /* } */
1042
1043	if (alp->lock_holder != LK_NOCPU) {
1044		SLOCK_WHERE("simple_lock: uninitialized lock\n",
1045		    alp, id, l);
1046	}
1047	alp->lock_file = id;
1048	alp->lock_line = l;
1049	alp->lock_holder = cpu_id;
1050
1051	SLOCK_LIST_LOCK();
1052	/* XXX Cast away volatile */
1053	TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list);
1054	SLOCK_LIST_UNLOCK();
1055
1056	SLOCK_COUNT(1);
1057
1058 out:
1059	splx(s);
1060}
1061
1062int
1063_simple_lock_held(__volatile struct simplelock *alp)
1064{
1065	cpuid_t cpu_id = cpu_number();
1066	int s, locked = 0;
1067
1068	s = spllock();
1069
1070#if defined(MULTIPROCESSOR)
1071	if (__cpu_simple_lock_try(&alp->lock_data) == 0)
1072		locked = (alp->lock_holder == cpu_id);
1073	else
1074		__cpu_simple_unlock(&alp->lock_data);
1075#else
1076	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
1077		locked = 1;
1078		KASSERT(alp->lock_holder == cpu_id);
1079	}
1080#endif
1081
1082	splx(s);
1083
1084	return (locked);
1085}
1086
1087int
1088_simple_lock_try(__volatile struct simplelock *alp, const char *id, int l)
1089{
1090	cpuid_t cpu_id = cpu_number();
1091	int s, rv = 0;
1092
1093	s = spllock();
1094
1095	/*
1096	 * MULTIPROCESSOR case: This is `safe' since if it's not us, we
1097	 * don't take any action.
1098	 */
1099#if defined(MULTIPROCESSOR) /* { */
1100	if ((rv = __cpu_simple_lock_try(&alp->lock_data)) == 0) {
1101		if (alp->lock_holder == cpu_id)
1102			SLOCK_WHERE("simple_lock_try: locking against myself\n",
1103			    alp, id, l);
1104		goto out;
1105	}
1106#else
1107	if (alp->lock_data == __SIMPLELOCK_LOCKED) {
1108		SLOCK_WHERE("simple_lock_try: lock held\n", alp, id, l);
1109		goto out;
1110	}
1111	alp->lock_data = __SIMPLELOCK_LOCKED;
1112#endif /* MULTIPROCESSOR */ /* } */
1113
1114	/*
1115	 * At this point, we have acquired the lock.
1116	 */
1117
1118	rv = 1;
1119
1120	alp->lock_file = id;
1121	alp->lock_line = l;
1122	alp->lock_holder = cpu_id;
1123
1124	SLOCK_LIST_LOCK();
1125	/* XXX Cast away volatile. */
1126	TAILQ_INSERT_TAIL(&simplelock_list, (struct simplelock *)alp, list);
1127	SLOCK_LIST_UNLOCK();
1128
1129	SLOCK_COUNT(1);
1130
1131 out:
1132	splx(s);
1133	return (rv);
1134}
1135
1136void
1137_simple_unlock(__volatile struct simplelock *alp, const char *id, int l)
1138{
1139	int s;
1140
1141	s = spllock();
1142
1143	/*
1144	 * MULTIPROCESSOR case: This is `safe' because we think we hold
1145	 * the lock, and if we don't, we don't take any action.
1146	 */
1147	if (alp->lock_data == __SIMPLELOCK_UNLOCKED) {
1148		SLOCK_WHERE("simple_unlock: lock not held\n",
1149		    alp, id, l);
1150		goto out;
1151	}
1152
1153	SLOCK_LIST_LOCK();
1154	TAILQ_REMOVE(&simplelock_list, alp, list);
1155	SLOCK_LIST_UNLOCK();
1156
1157	SLOCK_COUNT(-1);
1158
1159	alp->list.tqe_next = NULL;	/* sanity */
1160	alp->list.tqe_prev = NULL;	/* sanity */
1161
1162	alp->unlock_file = id;
1163	alp->unlock_line = l;
1164
1165#if defined(MULTIPROCESSOR) /* { */
1166	alp->lock_holder = LK_NOCPU;
1167	/* Now that we've modified all fields, release the lock. */
1168	__cpu_simple_unlock(&alp->lock_data);
1169#else
1170	alp->lock_data = __SIMPLELOCK_UNLOCKED;
1171	KASSERT(alp->lock_holder == cpu_number());
1172	alp->lock_holder = LK_NOCPU;
1173#endif /* } */
1174
1175 out:
1176	splx(s);
1177}
1178
1179void
1180simple_lock_dump(void)
1181{
1182	struct simplelock *alp;
1183	int s;
1184
1185	s = spllock();
1186	SLOCK_LIST_LOCK();
1187	lock_printf("all simple locks:\n");
1188	for (alp = TAILQ_FIRST(&simplelock_list); alp != NULL;
1189	     alp = TAILQ_NEXT(alp, list)) {
1190		lock_printf("%p CPU %lu %s:%d\n", alp, alp->lock_holder,
1191		    alp->lock_file, alp->lock_line);
1192	}
1193	SLOCK_LIST_UNLOCK();
1194	splx(s);
1195}
1196
1197void
1198simple_lock_freecheck(void *start, void *end)
1199{
1200	struct simplelock *alp;
1201	int s;
1202
1203	s = spllock();
1204	SLOCK_LIST_LOCK();
1205	for (alp = TAILQ_FIRST(&simplelock_list); alp != NULL;
1206	     alp = TAILQ_NEXT(alp, list)) {
1207		if ((void *)alp >= start && (void *)alp < end) {
1208			lock_printf("freeing simple_lock %p CPU %lu %s:%d\n",
1209			    alp, alp->lock_holder, alp->lock_file,
1210			    alp->lock_line);
1211			SLOCK_DEBUGGER();
1212		}
1213	}
1214	SLOCK_LIST_UNLOCK();
1215	splx(s);
1216}
1217
1218void
1219simple_lock_switchcheck(void)
1220{
1221	struct simplelock *alp;
1222	cpuid_t cpu_id = cpu_number();
1223	int s;
1224
1225	/*
1226	 * We must be holding exactly one lock: the sched_lock.
1227	 */
1228
1229	SCHED_ASSERT_LOCKED();
1230
1231	s = spllock();
1232	SLOCK_LIST_LOCK();
1233	for (alp = TAILQ_FIRST(&simplelock_list); alp != NULL;
1234	     alp = TAILQ_NEXT(alp, list)) {
1235		if (alp == &sched_lock)
1236			continue;
1237		if (alp->lock_holder == cpu_id) {
1238			lock_printf("switching with held simple_lock %p "
1239			    "CPU %lu %s:%d\n",
1240			    alp, alp->lock_holder, alp->lock_file,
1241			    alp->lock_line);
1242			SLOCK_DEBUGGER();
1243		}
1244	}
1245	SLOCK_LIST_UNLOCK();
1246	splx(s);
1247}
1248#endif /* LOCKDEBUG */ /* } */
1249