kern_lock.c revision 24273
1296417Sdim/*
2275072Semaste * Copyright (c) 1995
3275072Semaste *	The Regents of the University of California.  All rights reserved.
4275072Semaste *
5275072Semaste * This code contains ideas from software contributed to Berkeley by
6275072Semaste * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
7275072Semaste * System project at Carnegie-Mellon University.
8275072Semaste *
9275072Semaste * Redistribution and use in source and binary forms, with or without
10275072Semaste * modification, are permitted provided that the following conditions
11275072Semaste * are met:
12275072Semaste * 1. Redistributions of source code must retain the above copyright
13275072Semaste *    notice, this list of conditions and the following disclaimer.
14275072Semaste * 2. Redistributions in binary form must reproduce the above copyright
15280031Sdim *    notice, this list of conditions and the following disclaimer in the
16280031Sdim *    documentation and/or other materials provided with the distribution.
17280031Sdim * 3. All advertising materials mentioning features or use of this software
18280031Sdim *    must display the following acknowledgement:
19280031Sdim *	This product includes software developed by the University of
20275072Semaste *	California, Berkeley and its contributors.
21280031Sdim * 4. Neither the name of the University nor the names of its contributors
22275072Semaste *    may be used to endorse or promote products derived from this software
23280031Sdim *    without specific prior written permission.
24275072Semaste *
25275072Semaste * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26275072Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27280031Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28280031Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29280031Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30280031Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31280031Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32280031Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33280031Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34275072Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35280031Sdim * SUCH DAMAGE.
36275072Semaste *
37280031Sdim *	@(#)kern_lock.c	8.18 (Berkeley) 5/21/95
38280031Sdim */
39280031Sdim
40275072Semaste#include <sys/param.h>
41275072Semaste#include <sys/proc.h>
42275072Semaste#include <sys/lock.h>
43280031Sdim#include <sys/systm.h>
44280031Sdim#include <machine/cpu.h>
45280031Sdim
46280031Sdim/*
47280031Sdim * Locking primitives implementation.
48280031Sdim * Locks provide shared/exclusive sychronization.
49275072Semaste */
50280031Sdim
51275072Semaste#ifdef DEBUG
52280031Sdim#define COUNT(p, x) if (p) (p)->p_locks += (x)
53280031Sdim#else
54280031Sdim#define COUNT(p, x)
55275072Semaste#endif
56275072Semaste
57275072Semaste#if NCPUS > 1
58280031Sdim
59280031Sdim/*
60280031Sdim * For multiprocessor system, try spin lock first.
61280031Sdim *
62280031Sdim * This should be inline expanded below, but we cannot have #if
63275072Semaste * inside a multiline define.
64296417Sdim */
65275072Semasteint lock_wait_time = 100;
66275072Semaste#define PAUSE(lkp, wanted)						\
67275072Semaste		if (lock_wait_time > 0) {				\
68275072Semaste			int i;						\
69280031Sdim									\
70280031Sdim			simple_unlock(&lkp->lk_interlock);		\
71280031Sdim			for (i = lock_wait_time; i > 0; i--)		\
72296417Sdim				if (!(wanted))				\
73280031Sdim					break;				\
74275072Semaste			simple_lock(&lkp->lk_interlock);		\
75296417Sdim		}							\
76296417Sdim		if (!(wanted))						\
77275072Semaste			break;
78288943Sdim
79280031Sdim#else /* NCPUS == 1 */
80275072Semaste
81275072Semaste/*
82275072Semaste * It is an error to spin on a uniprocessor as nothing will ever cause
83280031Sdim * the simple lock to clear while we are executing.
84288943Sdim */
85280031Sdim#define PAUSE(lkp, wanted)
86280031Sdim
87280031Sdim#endif /* NCPUS == 1 */
88296417Sdim
89280031Sdim/*
90275072Semaste * Acquire a resource.
91296417Sdim */
92280031Sdim#define ACQUIRE(lkp, error, extflags, wanted)				\
93275072Semaste	PAUSE(lkp, wanted);						\
94296417Sdim	for (error = 0; wanted; ) {					\
95275072Semaste		(lkp)->lk_waitcount++;					\
96275072Semaste		simple_unlock(&(lkp)->lk_interlock);			\
97275072Semaste		error = tsleep((void *)lkp, (lkp)->lk_prio,		\
98280031Sdim		    (lkp)->lk_wmesg, (lkp)->lk_timo);			\
99280031Sdim		simple_lock(&(lkp)->lk_interlock);			\
100280031Sdim		(lkp)->lk_waitcount--;					\
101280031Sdim		if (error)						\
102280031Sdim			break;						\
103296417Sdim		if ((extflags) & LK_SLEEPFAIL) {			\
104280031Sdim			error = ENOLCK;					\
105275072Semaste			break;						\
106296417Sdim		}							\
107280031Sdim	}
108275072Semaste
109296417Sdim/*
110275072Semaste * Initialize a lock; required before use.
111275072Semaste */
112275072Semastevoid
113280031Sdimlockinit(lkp, prio, wmesg, timo, flags)
114288943Sdim	struct lock *lkp;
115280031Sdim	int prio;
116280031Sdim	char *wmesg;
117280031Sdim	int timo;
118296417Sdim	int flags;
119280031Sdim{
120275072Semaste
121296417Sdim	simple_lock_init(&lkp->lk_interlock);
122280031Sdim	lkp->lk_flags = flags & LK_EXTFLG_MASK;
123275072Semaste	lkp->lk_sharecount = 0;
124280031Sdim	lkp->lk_waitcount = 0;
125280031Sdim	lkp->lk_exclusivecount = 0;
126280031Sdim	lkp->lk_prio = prio;
127280031Sdim	lkp->lk_wmesg = wmesg;
128280031Sdim	lkp->lk_timo = timo;
129296417Sdim	lkp->lk_lockholder = LK_NOPROC;
130296417Sdim}
131280031Sdim
132275072Semaste/*
133280031Sdim * Determine the status of a lock.
134288943Sdim */
135280031Sdimint
136275072Semastelockstatus(lkp)
137275072Semaste	struct lock *lkp;
138275072Semaste{
139280031Sdim	int lock_type = 0;
140280031Sdim
141280031Sdim	simple_lock(&lkp->lk_interlock);
142280031Sdim	if (lkp->lk_exclusivecount != 0)
143280031Sdim		lock_type = LK_EXCLUSIVE;
144296417Sdim	else if (lkp->lk_sharecount != 0)
145280031Sdim		lock_type = LK_SHARED;
146275072Semaste	simple_unlock(&lkp->lk_interlock);
147296417Sdim	return (lock_type);
148280031Sdim}
149275072Semaste
150280031Sdim/*
151280031Sdim * Set, change, or release a lock.
152280031Sdim *
153280031Sdim * Shared requests increment the shared count. Exclusive requests set the
154280031Sdim * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
155296417Sdim * accepted shared locks and shared-to-exclusive upgrades to go away.
156296417Sdim */
157280031Sdimint
158275072Semastelockmgr(lkp, flags, interlkp, p)
159288943Sdim	__volatile struct lock *lkp;
160288943Sdim	u_int flags;
161288943Sdim	struct simplelock *interlkp;
162288943Sdim	struct proc *p;
163288943Sdim{
164288943Sdim	int error;
165280031Sdim	pid_t pid;
166275072Semaste	int extflags;
167275072Semaste
168275072Semaste	error = 0;
169280031Sdim	if (p)
170280031Sdim		pid = p->p_pid;
171280031Sdim	else
172280031Sdim		pid = LK_KERNPROC;
173280031Sdim	simple_lock(&lkp->lk_interlock);
174280031Sdim	if (flags & LK_INTERLOCK) {
175275072Semaste		simple_unlock(interlkp);
176280031Sdim	}
177296417Sdim	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
178275072Semaste#ifdef DIAGNOSTIC
179280031Sdim	/*
180275072Semaste	 * Once a lock has drained, the LK_DRAINING flag is set and an
181280031Sdim	 * exclusive lock is returned. The only valid operation thereafter
182280031Sdim	 * is a single release of that exclusive lock. This final release
183280031Sdim	 * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any
184280031Sdim	 * further requests of any sort will result in a panic. The bits
185280031Sdim	 * selected for these two flags are chosen so that they will be set
186280031Sdim	 * in memory that is freed (freed memory is filled with 0xdeadbeef).
187280031Sdim	 * The final release is permitted to give a new lease on life to
188280031Sdim	 * the lock by specifying LK_REENABLE.
189280031Sdim	 */
190280031Sdim	if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) {
191275072Semaste		if (lkp->lk_flags & LK_DRAINED)
192			panic("lockmgr: using decommissioned lock");
193		if ((flags & LK_TYPE_MASK) != LK_RELEASE ||
194		    lkp->lk_lockholder != pid)
195			panic("lockmgr: non-release on draining lock: %d\n",
196			    flags & LK_TYPE_MASK);
197		lkp->lk_flags &= ~LK_DRAINING;
198		if ((flags & LK_REENABLE) == 0)
199			lkp->lk_flags |= LK_DRAINED;
200	}
201#endif DIAGNOSTIC
202
203	switch (flags & LK_TYPE_MASK) {
204
205	case LK_SHARED:
206		if (lkp->lk_lockholder != pid) {
207			/*
208			 * If just polling, check to see if we will block.
209			 */
210			if ((extflags & LK_NOWAIT) && (lkp->lk_flags &
211			    (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) {
212				error = EBUSY;
213				break;
214			}
215			/*
216			 * Wait for exclusive locks and upgrades to clear.
217			 */
218			ACQUIRE(lkp, error, extflags, lkp->lk_flags &
219			    (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE));
220			if (error)
221				break;
222			lkp->lk_sharecount++;
223			COUNT(p, 1);
224			break;
225		}
226		/*
227		 * We hold an exclusive lock, so downgrade it to shared.
228		 * An alternative would be to fail with EDEADLK.
229		 */
230		lkp->lk_sharecount++;
231		COUNT(p, 1);
232		/* fall into downgrade */
233
234	case LK_DOWNGRADE:
235		if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0)
236			panic("lockmgr: not holding exclusive lock");
237		lkp->lk_sharecount += lkp->lk_exclusivecount;
238		lkp->lk_exclusivecount = 0;
239		lkp->lk_flags &= ~LK_HAVE_EXCL;
240		lkp->lk_lockholder = LK_NOPROC;
241		if (lkp->lk_waitcount)
242			wakeup((void *)lkp);
243		break;
244
245	case LK_EXCLUPGRADE:
246		/*
247		 * If another process is ahead of us to get an upgrade,
248		 * then we want to fail rather than have an intervening
249		 * exclusive access.
250		 */
251		if (lkp->lk_flags & LK_WANT_UPGRADE) {
252			lkp->lk_sharecount--;
253			COUNT(p, -1);
254			error = EBUSY;
255			break;
256		}
257		/* fall into normal upgrade */
258
259	case LK_UPGRADE:
260		/*
261		 * Upgrade a shared lock to an exclusive one. If another
262		 * shared lock has already requested an upgrade to an
263		 * exclusive lock, our shared lock is released and an
264		 * exclusive lock is requested (which will be granted
265		 * after the upgrade). If we return an error, the file
266		 * will always be unlocked.
267		 */
268		if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0)
269			panic("lockmgr: upgrade exclusive lock");
270		lkp->lk_sharecount--;
271		COUNT(p, -1);
272		/*
273		 * If we are just polling, check to see if we will block.
274		 */
275		if ((extflags & LK_NOWAIT) &&
276		    ((lkp->lk_flags & LK_WANT_UPGRADE) ||
277		     lkp->lk_sharecount > 1)) {
278			error = EBUSY;
279			break;
280		}
281		if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
282			/*
283			 * We are first shared lock to request an upgrade, so
284			 * request upgrade and wait for the shared count to
285			 * drop to zero, then take exclusive lock.
286			 */
287			lkp->lk_flags |= LK_WANT_UPGRADE;
288			ACQUIRE(lkp, error, extflags, lkp->lk_sharecount);
289			lkp->lk_flags &= ~LK_WANT_UPGRADE;
290			if (error)
291				break;
292			lkp->lk_flags |= LK_HAVE_EXCL;
293			lkp->lk_lockholder = pid;
294			if (lkp->lk_exclusivecount != 0)
295				panic("lockmgr: non-zero exclusive count");
296			lkp->lk_exclusivecount = 1;
297			COUNT(p, 1);
298			break;
299		}
300		/*
301		 * Someone else has requested upgrade. Release our shared
302		 * lock, awaken upgrade requestor if we are the last shared
303		 * lock, then request an exclusive lock.
304		 */
305		if (lkp->lk_sharecount == 0 && lkp->lk_waitcount)
306			wakeup((void *)lkp);
307		/* fall into exclusive request */
308
309	case LK_EXCLUSIVE:
310		if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) {
311			/*
312			 *	Recursive lock.
313			 */
314			if ((extflags & LK_CANRECURSE) == 0)
315				panic("lockmgr: locking against myself");
316			lkp->lk_exclusivecount++;
317			COUNT(p, 1);
318			break;
319		}
320		/*
321		 * If we are just polling, check to see if we will sleep.
322		 */
323		if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
324		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
325		     lkp->lk_sharecount != 0)) {
326			error = EBUSY;
327			break;
328		}
329		/*
330		 * Try to acquire the want_exclusive flag.
331		 */
332		ACQUIRE(lkp, error, extflags, lkp->lk_flags &
333		    (LK_HAVE_EXCL | LK_WANT_EXCL));
334		if (error)
335			break;
336		lkp->lk_flags |= LK_WANT_EXCL;
337		/*
338		 * Wait for shared locks and upgrades to finish.
339		 */
340		ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 ||
341		       (lkp->lk_flags & LK_WANT_UPGRADE));
342		lkp->lk_flags &= ~LK_WANT_EXCL;
343		if (error)
344			break;
345		lkp->lk_flags |= LK_HAVE_EXCL;
346		lkp->lk_lockholder = pid;
347		if (lkp->lk_exclusivecount != 0)
348			panic("lockmgr: non-zero exclusive count");
349		lkp->lk_exclusivecount = 1;
350		COUNT(p, 1);
351		break;
352
353	case LK_RELEASE:
354		if (lkp->lk_exclusivecount != 0) {
355			if (pid != lkp->lk_lockholder)
356				panic("lockmgr: pid %d, not %s %d unlocking",
357				    pid, "exclusive lock holder",
358				    lkp->lk_lockholder);
359			lkp->lk_exclusivecount--;
360			COUNT(p, -1);
361			if (lkp->lk_exclusivecount == 0) {
362				lkp->lk_flags &= ~LK_HAVE_EXCL;
363				lkp->lk_lockholder = LK_NOPROC;
364			}
365		} else if (lkp->lk_sharecount != 0) {
366			lkp->lk_sharecount--;
367			COUNT(p, -1);
368		}
369		if (lkp->lk_waitcount)
370			wakeup((void *)lkp);
371		break;
372
373	case LK_DRAIN:
374		/*
375		 * Check that we do not already hold the lock, as it can
376		 * never drain if we do. Unfortunately, we have no way to
377		 * check for holding a shared lock, but at least we can
378		 * check for an exclusive one.
379		 */
380		if (lkp->lk_lockholder == pid)
381			panic("lockmgr: draining against myself");
382		/*
383		 * If we are just polling, check to see if we will sleep.
384		 */
385		if ((extflags & LK_NOWAIT) && ((lkp->lk_flags &
386		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
387		     lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) {
388			error = EBUSY;
389			break;
390		}
391		PAUSE(lkp, ((lkp->lk_flags &
392		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
393		     lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0));
394		for (error = 0; ((lkp->lk_flags &
395		     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) ||
396		     lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) {
397			lkp->lk_flags |= LK_WAITDRAIN;
398			simple_unlock(&lkp->lk_interlock);
399			if (error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio,
400			    lkp->lk_wmesg, lkp->lk_timo))
401				return (error);
402			if ((extflags) & LK_SLEEPFAIL)
403				return (ENOLCK);
404			simple_lock(&lkp->lk_interlock);
405		}
406		lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
407		lkp->lk_lockholder = pid;
408		lkp->lk_exclusivecount = 1;
409		COUNT(p, 1);
410		break;
411
412	default:
413		simple_unlock(&lkp->lk_interlock);
414		panic("lockmgr: unknown locktype request %d",
415		    flags & LK_TYPE_MASK);
416		/* NOTREACHED */
417	}
418	if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags &
419	     (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 &&
420	     lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) {
421		lkp->lk_flags &= ~LK_WAITDRAIN;
422		wakeup((void *)&lkp->lk_flags);
423	}
424	simple_unlock(&lkp->lk_interlock);
425	return (error);
426}
427
428/*
429 * Print out information about state of a lock. Used by VOP_PRINT
430 * routines to display ststus about contained locks.
431 */
432void
433lockmgr_printinfo(lkp)
434	struct lock *lkp;
435{
436
437	if (lkp->lk_sharecount)
438		printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
439		    lkp->lk_sharecount);
440	else if (lkp->lk_flags & LK_HAVE_EXCL)
441		printf(" lock type %s: EXCL (count %d) by pid %d",
442		    lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder);
443	if (lkp->lk_waitcount > 0)
444		printf(" with %d pending", lkp->lk_waitcount);
445}
446
447#if defined(SIMPLELOCK_DEBUG) && NCPUS == 1
448#include <sys/kernel.h>
449#include <vm/vm.h>
450#include <sys/sysctl.h>
451int lockpausetime = 0;
452/* struct ctldebug debug2 = { "lockpausetime", &lockpausetime }; */
453int simplelockrecurse;
454/*
455 * Simple lock functions so that the debugger can see from whence
456 * they are being called.
457 */
458void
459simple_lock_init(alp)
460	struct simplelock *alp;
461{
462
463	alp->lock_data = 0;
464}
465
466void
467_simple_lock(alp, id, l)
468	__volatile struct simplelock *alp;
469	const char *id;
470	int l;
471{
472
473	if (simplelockrecurse)
474		return;
475	if (alp->lock_data == 1) {
476		if (lockpausetime == -1)
477			panic("%s:%d: simple_lock: lock held", id, l);
478		printf("%s:%d: simple_lock: lock held\n", id, l);
479		if (lockpausetime == 1) {
480			Debugger("simple_lock");
481			/*BACKTRACE(curproc); */
482		} else if (lockpausetime > 1) {
483			printf("%s:%d: simple_lock: lock held...", id, l);
484			tsleep(&lockpausetime, PCATCH | PPAUSE, "slock",
485			    lockpausetime * hz);
486			printf(" continuing\n");
487		}
488	}
489	alp->lock_data = 1;
490	if (curproc)
491		curproc->p_simple_locks++;
492}
493
494int
495_simple_lock_try(alp, id, l)
496	__volatile struct simplelock *alp;
497	const char *id;
498	int l;
499{
500
501	if (alp->lock_data)
502		return (0);
503	if (simplelockrecurse)
504		return (1);
505	alp->lock_data = 1;
506	if (curproc)
507		curproc->p_simple_locks++;
508	return (1);
509}
510
511void
512_simple_unlock(alp, id, l)
513	__volatile struct simplelock *alp;
514	const char *id;
515	int l;
516{
517
518	if (simplelockrecurse)
519		return;
520	if (alp->lock_data == 0) {
521		if (lockpausetime == -1)
522			panic("%s:%d: simple_unlock: lock not held", id, l);
523		printf("%s:%d: simple_unlock: lock not held\n", id, l);
524		if (lockpausetime == 1) {
525			Debugger("simple_unlock");
526			/* BACKTRACE(curproc); */
527		} else if (lockpausetime > 1) {
528			printf("%s:%d: simple_unlock: lock not held...", id, l);
529			tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock",
530			    lockpausetime * hz);
531			printf(" continuing\n");
532		}
533	}
534	alp->lock_data = 0;
535	if (curproc)
536		curproc->p_simple_locks--;
537}
538#endif /* SIMPLELOCK_DEBUG && NCPUS == 1 */
539