kern_lockf.c revision 46744
11960Sdg/*
21960Sdg * Copyright (c) 1982, 1986, 1989, 1993
31960Sdg *	The Regents of the University of California.  All rights reserved.
41960Sdg *
51960Sdg * This code is derived from software contributed to Berkeley by
61960Sdg * Scooter Morris at Genentech Inc.
71960Sdg *
81960Sdg * Redistribution and use in source and binary forms, with or without
91960Sdg * modification, are permitted provided that the following conditions
101960Sdg * are met:
111960Sdg * 1. Redistributions of source code must retain the above copyright
121960Sdg *    notice, this list of conditions and the following disclaimer.
131960Sdg * 2. Redistributions in binary form must reproduce the above copyright
141960Sdg *    notice, this list of conditions and the following disclaimer in the
151960Sdg *    documentation and/or other materials provided with the distribution.
161960Sdg * 3. All advertising materials mentioning features or use of this software
171960Sdg *    must display the following acknowledgement:
181960Sdg *	This product includes software developed by the University of
191960Sdg *	California, Berkeley and its contributors.
201960Sdg * 4. Neither the name of the University nor the names of its contributors
211960Sdg *    may be used to endorse or promote products derived from this software
221960Sdg *    without specific prior written permission.
231960Sdg *
241960Sdg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251960Sdg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261960Sdg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271960Sdg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281960Sdg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291960Sdg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301960Sdg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311960Sdg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321960Sdg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331960Sdg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341960Sdg * SUCH DAMAGE.
351960Sdg *
361960Sdg *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
3746744Sdt * $Id: kern_lockf.c,v 1.21 1999/01/27 21:49:56 dillon Exp $
381960Sdg */
391960Sdg
4032929Seivind#include "opt_debug_lockf.h"
4132929Seivind
421960Sdg#include <sys/param.h>
431960Sdg#include <sys/systm.h>
4441059Speter#include <sys/kernel.h>
4531561Sbde#include <sys/lock.h>
461960Sdg#include <sys/proc.h>
4718020Sbde#include <sys/unistd.h>
481960Sdg#include <sys/vnode.h>
491960Sdg#include <sys/malloc.h>
501960Sdg#include <sys/fcntl.h>
511960Sdg
521960Sdg#include <sys/lockf.h>
531960Sdg
541960Sdg/*
551960Sdg * This variable controls the maximum number of processes that will
561960Sdg * be checked in doing deadlock detection.
571960Sdg */
5822521Sdysonstatic int maxlockdepth = MAXDEPTH;
591960Sdg
601960Sdg#ifdef LOCKF_DEBUG
6122880Sbde#include <sys/kernel.h>
6222880Sbde#include <sys/sysctl.h>
6322880Sbde
6422880Sbde#include <ufs/ufs/quota.h>
6522880Sbde#include <ufs/ufs/inode.h>
6622880Sbde
6730309Sphk
6824481Sbdestatic int	lockf_debug = 0;
6924481SbdeSYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
701960Sdg#endif
711960Sdg
7230354Sphkstatic MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
7330309Sphk
741960Sdg#define NOLOCKF (struct lockf *)0
751960Sdg#define SELF	0x1
761960Sdg#define OTHERS	0x2
7712819Sphkstatic int	 lf_clearlock __P((struct lockf *));
7812819Sphkstatic int	 lf_findoverlap __P((struct lockf *,
7912819Sphk	    struct lockf *, int, struct lockf ***, struct lockf **));
8012819Sphkstatic struct lockf *
8112819Sphk	 lf_getblock __P((struct lockf *));
8212819Sphkstatic int	 lf_getlock __P((struct lockf *, struct flock *));
8312819Sphkstatic int	 lf_setlock __P((struct lockf *));
8412819Sphkstatic void	 lf_split __P((struct lockf *, struct lockf *));
8512819Sphkstatic void	 lf_wakelock __P((struct lockf *));
861960Sdg
871960Sdg/*
881960Sdg * Advisory record locking support
891960Sdg */
901960Sdgint
911960Sdglf_advlock(ap, head, size)
921960Sdg	struct vop_advlock_args /* {
931960Sdg		struct vnode *a_vp;
941960Sdg		caddr_t  a_id;
951960Sdg		int  a_op;
961960Sdg		struct flock *a_fl;
971960Sdg		int  a_flags;
981960Sdg	} */ *ap;
991960Sdg	struct lockf **head;
1001960Sdg	u_quad_t size;
1011960Sdg{
1021960Sdg	register struct flock *fl = ap->a_fl;
1031960Sdg	register struct lockf *lock;
1041960Sdg	off_t start, end;
1051960Sdg	int error;
1061960Sdg
1071960Sdg	/*
1081960Sdg	 * Convert the flock structure into a start and end.
1091960Sdg	 */
1101960Sdg	switch (fl->l_whence) {
1111960Sdg
1121960Sdg	case SEEK_SET:
1131960Sdg	case SEEK_CUR:
1141960Sdg		/*
1151960Sdg		 * Caller is responsible for adding any necessary offset
1161960Sdg		 * when SEEK_CUR is used.
1171960Sdg		 */
1181960Sdg		start = fl->l_start;
1191960Sdg		break;
1201960Sdg
1211960Sdg	case SEEK_END:
1221960Sdg		start = size + fl->l_start;
1231960Sdg		break;
1241960Sdg
1251960Sdg	default:
1261960Sdg		return (EINVAL);
1271960Sdg	}
1281960Sdg	if (start < 0)
1291960Sdg		return (EINVAL);
1301960Sdg	if (fl->l_len == 0)
1311960Sdg		end = -1;
13220676Sbde	else {
1331960Sdg		end = start + fl->l_len - 1;
13420676Sbde		if (end < start)
13520676Sbde			return (EINVAL);
13620676Sbde	}
1371960Sdg	/*
13820676Sbde	 * Avoid the common case of unlocking when inode has no locks.
13920676Sbde	 */
14020676Sbde	if (*head == (struct lockf *)0) {
14120676Sbde		if (ap->a_op != F_SETLK) {
14220676Sbde			fl->l_type = F_UNLCK;
14320676Sbde			return (0);
14420676Sbde		}
14520676Sbde	}
14620676Sbde	/*
1471960Sdg	 * Create the lockf structure
1481960Sdg	 */
1491960Sdg	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
1501960Sdg	lock->lf_start = start;
1511960Sdg	lock->lf_end = end;
1521960Sdg	lock->lf_id = ap->a_id;
15322521Sdyson/*	lock->lf_inode = ip; */	/* XXX JH */
15422521Sdyson	lock->lf_type = fl->l_type;
1551960Sdg	lock->lf_head = head;
1561960Sdg	lock->lf_next = (struct lockf *)0;
15722521Sdyson	TAILQ_INIT(&lock->lf_blkhd);
1581960Sdg	lock->lf_flags = ap->a_flags;
1591960Sdg	/*
1601960Sdg	 * Do the requested operation.
1611960Sdg	 */
1621960Sdg	switch(ap->a_op) {
1631960Sdg	case F_SETLK:
1641960Sdg		return (lf_setlock(lock));
1651960Sdg
1661960Sdg	case F_UNLCK:
1671960Sdg		error = lf_clearlock(lock);
1681960Sdg		FREE(lock, M_LOCKF);
1691960Sdg		return (error);
1701960Sdg
1711960Sdg	case F_GETLK:
1721960Sdg		error = lf_getlock(lock, fl);
1731960Sdg		FREE(lock, M_LOCKF);
1741960Sdg		return (error);
1758876Srgrimes
1761960Sdg	default:
1771960Sdg		free(lock, M_LOCKF);
1781960Sdg		return (EINVAL);
1791960Sdg	}
1801960Sdg	/* NOTREACHED */
1811960Sdg}
1821960Sdg
1831960Sdg/*
1841960Sdg * Set a byte-range lock.
1851960Sdg */
18612819Sphkstatic int
1871960Sdglf_setlock(lock)
1881960Sdg	register struct lockf *lock;
1891960Sdg{
1901960Sdg	register struct lockf *block;
1911960Sdg	struct lockf **head = lock->lf_head;
1921960Sdg	struct lockf **prev, *overlap, *ltmp;
1931960Sdg	static char lockstr[] = "lockf";
1941960Sdg	int ovcase, priority, needtolink, error;
1951960Sdg
1961960Sdg#ifdef LOCKF_DEBUG
1971960Sdg	if (lockf_debug & 1)
1981960Sdg		lf_print("lf_setlock", lock);
1991960Sdg#endif /* LOCKF_DEBUG */
2001960Sdg
2011960Sdg	/*
2021960Sdg	 * Set the priority
2031960Sdg	 */
2041960Sdg	priority = PLOCK;
2051960Sdg	if (lock->lf_type == F_WRLCK)
2061960Sdg		priority += 4;
2071960Sdg	priority |= PCATCH;
2081960Sdg	/*
2091960Sdg	 * Scan lock list for this file looking for locks that would block us.
2101960Sdg	 */
2113098Sphk	while ((block = lf_getblock(lock))) {
2121960Sdg		/*
2131960Sdg		 * Free the structure and return if nonblocking.
2141960Sdg		 */
2151960Sdg		if ((lock->lf_flags & F_WAIT) == 0) {
2161960Sdg			FREE(lock, M_LOCKF);
2171960Sdg			return (EAGAIN);
2181960Sdg		}
2191960Sdg		/*
2201960Sdg		 * We are blocked. Since flock style locks cover
2211960Sdg		 * the whole file, there is no chance for deadlock.
2221960Sdg		 * For byte-range locks we must check for deadlock.
2231960Sdg		 *
2241960Sdg		 * Deadlock detection is done by looking through the
2251960Sdg		 * wait channels to see if there are any cycles that
2261960Sdg		 * involve us. MAXDEPTH is set just to make sure we
2271960Sdg		 * do not go off into neverland.
2281960Sdg		 */
2291960Sdg		if ((lock->lf_flags & F_POSIX) &&
2301960Sdg		    (block->lf_flags & F_POSIX)) {
2311960Sdg			register struct proc *wproc;
2321960Sdg			register struct lockf *waitblock;
2331960Sdg			int i = 0;
2341960Sdg
2351960Sdg			/* The block is waiting on something */
2361960Sdg			wproc = (struct proc *)block->lf_id;
2371960Sdg			while (wproc->p_wchan &&
2381960Sdg			       (wproc->p_wmesg == lockstr) &&
2391960Sdg			       (i++ < maxlockdepth)) {
2401960Sdg				waitblock = (struct lockf *)wproc->p_wchan;
2411960Sdg				/* Get the owner of the blocking lock */
2421960Sdg				waitblock = waitblock->lf_next;
2431960Sdg				if ((waitblock->lf_flags & F_POSIX) == 0)
2441960Sdg					break;
2451960Sdg				wproc = (struct proc *)waitblock->lf_id;
2461960Sdg				if (wproc == (struct proc *)lock->lf_id) {
2471960Sdg					free(lock, M_LOCKF);
2481960Sdg					return (EDEADLK);
2491960Sdg				}
2501960Sdg			}
2511960Sdg		}
2521960Sdg		/*
2531960Sdg		 * For flock type locks, we must first remove
2541960Sdg		 * any shared locks that we hold before we sleep
2551960Sdg		 * waiting for an exclusive lock.
2561960Sdg		 */
2571960Sdg		if ((lock->lf_flags & F_FLOCK) &&
2581960Sdg		    lock->lf_type == F_WRLCK) {
2591960Sdg			lock->lf_type = F_UNLCK;
2601960Sdg			(void) lf_clearlock(lock);
2611960Sdg			lock->lf_type = F_WRLCK;
2621960Sdg		}
2631960Sdg		/*
2641960Sdg		 * Add our lock to the blocked list and sleep until we're free.
2651960Sdg		 * Remember who blocked us (for deadlock detection).
2661960Sdg		 */
2671960Sdg		lock->lf_next = block;
26822521Sdyson		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
2691960Sdg#ifdef LOCKF_DEBUG
2701960Sdg		if (lockf_debug & 1) {
2711960Sdg			lf_print("lf_setlock: blocking on", block);
2721960Sdg			lf_printlist("lf_setlock", block);
2731960Sdg		}
2741960Sdg#endif /* LOCKF_DEBUG */
2753098Sphk		if ((error = tsleep((caddr_t)lock, priority, lockstr, 0))) {
27622521Sdyson                        /*
27722521Sdyson			 * We may have been awakened by a signal (in
27822521Sdyson			 * which case we must remove ourselves from the
27922521Sdyson			 * blocked list) and/or by another process
28022521Sdyson			 * releasing a lock (in which case we have already
28122521Sdyson			 * been removed from the blocked list and our
28222521Sdyson			 * lf_next field set to NOLOCKF).
28322521Sdyson                         */
28422521Sdyson			if (lock->lf_next)
28522521Sdyson				TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock,
28622521Sdyson					lf_block);
28722521Sdyson                        free(lock, M_LOCKF);
28822521Sdyson                        return (error);
2891960Sdg		}
2901960Sdg	}
2911960Sdg	/*
2921960Sdg	 * No blocks!!  Add the lock.  Note that we will
2931960Sdg	 * downgrade or upgrade any overlapping locks this
2941960Sdg	 * process already owns.
2951960Sdg	 *
2961960Sdg	 * Skip over locks owned by other processes.
2971960Sdg	 * Handle any locks that overlap and are owned by ourselves.
2981960Sdg	 */
2991960Sdg	prev = head;
3001960Sdg	block = *head;
3011960Sdg	needtolink = 1;
3021960Sdg	for (;;) {
3033098Sphk		ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
3043098Sphk		if (ovcase)
3051960Sdg			block = overlap->lf_next;
3061960Sdg		/*
3071960Sdg		 * Six cases:
3081960Sdg		 *	0) no overlap
3091960Sdg		 *	1) overlap == lock
3101960Sdg		 *	2) overlap contains lock
3111960Sdg		 *	3) lock contains overlap
3121960Sdg		 *	4) overlap starts before lock
3131960Sdg		 *	5) overlap ends after lock
3141960Sdg		 */
3151960Sdg		switch (ovcase) {
3161960Sdg		case 0: /* no overlap */
3171960Sdg			if (needtolink) {
3181960Sdg				*prev = lock;
3191960Sdg				lock->lf_next = overlap;
3201960Sdg			}
3211960Sdg			break;
3221960Sdg
3231960Sdg		case 1: /* overlap == lock */
3241960Sdg			/*
3251960Sdg			 * If downgrading lock, others may be
3261960Sdg			 * able to acquire it.
3271960Sdg			 */
3281960Sdg			if (lock->lf_type == F_RDLCK &&
3291960Sdg			    overlap->lf_type == F_WRLCK)
3301960Sdg				lf_wakelock(overlap);
3311960Sdg			overlap->lf_type = lock->lf_type;
3321960Sdg			FREE(lock, M_LOCKF);
3331960Sdg			lock = overlap; /* for debug output below */
3341960Sdg			break;
3351960Sdg
3361960Sdg		case 2: /* overlap contains lock */
3371960Sdg			/*
3381960Sdg			 * Check for common starting point and different types.
3391960Sdg			 */
3401960Sdg			if (overlap->lf_type == lock->lf_type) {
3411960Sdg				free(lock, M_LOCKF);
3421960Sdg				lock = overlap; /* for debug output below */
3431960Sdg				break;
3441960Sdg			}
3451960Sdg			if (overlap->lf_start == lock->lf_start) {
3461960Sdg				*prev = lock;
3471960Sdg				lock->lf_next = overlap;
3481960Sdg				overlap->lf_start = lock->lf_end + 1;
3491960Sdg			} else
3501960Sdg				lf_split(overlap, lock);
3511960Sdg			lf_wakelock(overlap);
3521960Sdg			break;
3531960Sdg
3541960Sdg		case 3: /* lock contains overlap */
3551960Sdg			/*
3561960Sdg			 * If downgrading lock, others may be able to
3571960Sdg			 * acquire it, otherwise take the list.
3581960Sdg			 */
3591960Sdg			if (lock->lf_type == F_RDLCK &&
3601960Sdg			    overlap->lf_type == F_WRLCK) {
3611960Sdg				lf_wakelock(overlap);
3621960Sdg			} else {
36343301Sdillon				while ((ltmp = overlap->lf_blkhd.tqh_first) != NULL) {
36422521Sdyson					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
36522521Sdyson					    lf_block);
36622521Sdyson					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
36722521Sdyson					    ltmp, lf_block);
36846744Sdt					ltmp->lf_next = lock;
36922521Sdyson				}
3701960Sdg			}
3711960Sdg			/*
3721960Sdg			 * Add the new lock if necessary and delete the overlap.
3731960Sdg			 */
3741960Sdg			if (needtolink) {
3751960Sdg				*prev = lock;
3761960Sdg				lock->lf_next = overlap->lf_next;
3771960Sdg				prev = &lock->lf_next;
3781960Sdg				needtolink = 0;
3791960Sdg			} else
3801960Sdg				*prev = overlap->lf_next;
3811960Sdg			free(overlap, M_LOCKF);
3821960Sdg			continue;
3831960Sdg
3841960Sdg		case 4: /* overlap starts before lock */
3851960Sdg			/*
3861960Sdg			 * Add lock after overlap on the list.
3871960Sdg			 */
3881960Sdg			lock->lf_next = overlap->lf_next;
3891960Sdg			overlap->lf_next = lock;
3901960Sdg			overlap->lf_end = lock->lf_start - 1;
3911960Sdg			prev = &lock->lf_next;
3921960Sdg			lf_wakelock(overlap);
3931960Sdg			needtolink = 0;
3941960Sdg			continue;
3951960Sdg
3961960Sdg		case 5: /* overlap ends after lock */
3971960Sdg			/*
3981960Sdg			 * Add the new lock before overlap.
3991960Sdg			 */
4001960Sdg			if (needtolink) {
4011960Sdg				*prev = lock;
4021960Sdg				lock->lf_next = overlap;
4031960Sdg			}
4041960Sdg			overlap->lf_start = lock->lf_end + 1;
4051960Sdg			lf_wakelock(overlap);
4061960Sdg			break;
4071960Sdg		}
4081960Sdg		break;
4091960Sdg	}
4101960Sdg#ifdef LOCKF_DEBUG
4111960Sdg	if (lockf_debug & 1) {
4121960Sdg		lf_print("lf_setlock: got the lock", lock);
4131960Sdg		lf_printlist("lf_setlock", lock);
4141960Sdg	}
4151960Sdg#endif /* LOCKF_DEBUG */
4161960Sdg	return (0);
4171960Sdg}
4181960Sdg
4191960Sdg/*
4201960Sdg * Remove a byte-range lock on an inode.
4211960Sdg *
4221960Sdg * Generally, find the lock (or an overlap to that lock)
4231960Sdg * and remove it (or shrink it), then wakeup anyone we can.
4241960Sdg */
42512819Sphkstatic int
4261960Sdglf_clearlock(unlock)
4271960Sdg	register struct lockf *unlock;
4281960Sdg{
4291960Sdg	struct lockf **head = unlock->lf_head;
4301960Sdg	register struct lockf *lf = *head;
4311960Sdg	struct lockf *overlap, **prev;
4321960Sdg	int ovcase;
4331960Sdg
4341960Sdg	if (lf == NOLOCKF)
4351960Sdg		return (0);
4361960Sdg#ifdef LOCKF_DEBUG
4371960Sdg	if (unlock->lf_type != F_UNLCK)
4381960Sdg		panic("lf_clearlock: bad type");
4391960Sdg	if (lockf_debug & 1)
4401960Sdg		lf_print("lf_clearlock", unlock);
4411960Sdg#endif /* LOCKF_DEBUG */
4421960Sdg	prev = head;
4433098Sphk	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
4441960Sdg		/*
4451960Sdg		 * Wakeup the list of locks to be retried.
4461960Sdg		 */
4471960Sdg		lf_wakelock(overlap);
4481960Sdg
4491960Sdg		switch (ovcase) {
4501960Sdg
4511960Sdg		case 1: /* overlap == lock */
4521960Sdg			*prev = overlap->lf_next;
4531960Sdg			FREE(overlap, M_LOCKF);
4541960Sdg			break;
4551960Sdg
4561960Sdg		case 2: /* overlap contains lock: split it */
4571960Sdg			if (overlap->lf_start == unlock->lf_start) {
4581960Sdg				overlap->lf_start = unlock->lf_end + 1;
4591960Sdg				break;
4601960Sdg			}
4611960Sdg			lf_split(overlap, unlock);
4621960Sdg			overlap->lf_next = unlock->lf_next;
4631960Sdg			break;
4641960Sdg
4651960Sdg		case 3: /* lock contains overlap */
4661960Sdg			*prev = overlap->lf_next;
4671960Sdg			lf = overlap->lf_next;
4681960Sdg			free(overlap, M_LOCKF);
4691960Sdg			continue;
4701960Sdg
4711960Sdg		case 4: /* overlap starts before lock */
4721960Sdg			overlap->lf_end = unlock->lf_start - 1;
4731960Sdg			prev = &overlap->lf_next;
4741960Sdg			lf = overlap->lf_next;
4751960Sdg			continue;
4761960Sdg
4771960Sdg		case 5: /* overlap ends after lock */
4781960Sdg			overlap->lf_start = unlock->lf_end + 1;
4791960Sdg			break;
4801960Sdg		}
4811960Sdg		break;
4821960Sdg	}
4831960Sdg#ifdef LOCKF_DEBUG
4841960Sdg	if (lockf_debug & 1)
4851960Sdg		lf_printlist("lf_clearlock", unlock);
4861960Sdg#endif /* LOCKF_DEBUG */
4871960Sdg	return (0);
4881960Sdg}
4891960Sdg
4901960Sdg/*
4911960Sdg * Check whether there is a blocking lock,
4921960Sdg * and if so return its process identifier.
4931960Sdg */
49412819Sphkstatic int
4951960Sdglf_getlock(lock, fl)
4961960Sdg	register struct lockf *lock;
4971960Sdg	register struct flock *fl;
4981960Sdg{
4991960Sdg	register struct lockf *block;
5001960Sdg
5011960Sdg#ifdef LOCKF_DEBUG
5021960Sdg	if (lockf_debug & 1)
5031960Sdg		lf_print("lf_getlock", lock);
5041960Sdg#endif /* LOCKF_DEBUG */
5051960Sdg
5063098Sphk	if ((block = lf_getblock(lock))) {
5071960Sdg		fl->l_type = block->lf_type;
5081960Sdg		fl->l_whence = SEEK_SET;
5091960Sdg		fl->l_start = block->lf_start;
5101960Sdg		if (block->lf_end == -1)
5111960Sdg			fl->l_len = 0;
5121960Sdg		else
5131960Sdg			fl->l_len = block->lf_end - block->lf_start + 1;
5141960Sdg		if (block->lf_flags & F_POSIX)
5151960Sdg			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
5161960Sdg		else
5171960Sdg			fl->l_pid = -1;
5181960Sdg	} else {
5191960Sdg		fl->l_type = F_UNLCK;
5201960Sdg	}
5211960Sdg	return (0);
5221960Sdg}
5231960Sdg
5241960Sdg/*
5251960Sdg * Walk the list of locks for an inode and
5261960Sdg * return the first blocking lock.
5271960Sdg */
52812819Sphkstatic struct lockf *
5291960Sdglf_getblock(lock)
5301960Sdg	register struct lockf *lock;
5311960Sdg{
5321960Sdg	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
5331960Sdg	int ovcase;
5341960Sdg
5351960Sdg	prev = lock->lf_head;
5363098Sphk	while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
5371960Sdg		/*
5381960Sdg		 * We've found an overlap, see if it blocks us
5391960Sdg		 */
5401960Sdg		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
5411960Sdg			return (overlap);
5421960Sdg		/*
5431960Sdg		 * Nope, point to the next one on the list and
5441960Sdg		 * see if it blocks us
5451960Sdg		 */
5461960Sdg		lf = overlap->lf_next;
5471960Sdg	}
5481960Sdg	return (NOLOCKF);
5491960Sdg}
5501960Sdg
5511960Sdg/*
5521960Sdg * Walk the list of locks for an inode to
5531960Sdg * find an overlapping lock (if any).
5541960Sdg *
5551960Sdg * NOTE: this returns only the FIRST overlapping lock.  There
5561960Sdg *	 may be more than one.
5571960Sdg */
55812819Sphkstatic int
5591960Sdglf_findoverlap(lf, lock, type, prev, overlap)
5601960Sdg	register struct lockf *lf;
5611960Sdg	struct lockf *lock;
5621960Sdg	int type;
5631960Sdg	struct lockf ***prev;
5641960Sdg	struct lockf **overlap;
5651960Sdg{
5661960Sdg	off_t start, end;
5671960Sdg
5681960Sdg	*overlap = lf;
5691960Sdg	if (lf == NOLOCKF)
5701960Sdg		return (0);
5711960Sdg#ifdef LOCKF_DEBUG
5721960Sdg	if (lockf_debug & 2)
5731960Sdg		lf_print("lf_findoverlap: looking for overlap in", lock);
5741960Sdg#endif /* LOCKF_DEBUG */
5751960Sdg	start = lock->lf_start;
5761960Sdg	end = lock->lf_end;
5771960Sdg	while (lf != NOLOCKF) {
5781960Sdg		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
5791960Sdg		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
5801960Sdg			*prev = &lf->lf_next;
5811960Sdg			*overlap = lf = lf->lf_next;
5821960Sdg			continue;
5831960Sdg		}
5841960Sdg#ifdef LOCKF_DEBUG
5851960Sdg		if (lockf_debug & 2)
5861960Sdg			lf_print("\tchecking", lf);
5871960Sdg#endif /* LOCKF_DEBUG */
5881960Sdg		/*
5891960Sdg		 * OK, check for overlap
5901960Sdg		 *
5911960Sdg		 * Six cases:
5921960Sdg		 *	0) no overlap
5931960Sdg		 *	1) overlap == lock
5941960Sdg		 *	2) overlap contains lock
5951960Sdg		 *	3) lock contains overlap
5961960Sdg		 *	4) overlap starts before lock
5971960Sdg		 *	5) overlap ends after lock
5981960Sdg		 */
5991960Sdg		if ((lf->lf_end != -1 && start > lf->lf_end) ||
6001960Sdg		    (end != -1 && lf->lf_start > end)) {
6011960Sdg			/* Case 0 */
6021960Sdg#ifdef LOCKF_DEBUG
6031960Sdg			if (lockf_debug & 2)
6041960Sdg				printf("no overlap\n");
6051960Sdg#endif /* LOCKF_DEBUG */
6061960Sdg			if ((type & SELF) && end != -1 && lf->lf_start > end)
6071960Sdg				return (0);
6081960Sdg			*prev = &lf->lf_next;
6091960Sdg			*overlap = lf = lf->lf_next;
6101960Sdg			continue;
6111960Sdg		}
6121960Sdg		if ((lf->lf_start == start) && (lf->lf_end == end)) {
6131960Sdg			/* Case 1 */
6141960Sdg#ifdef LOCKF_DEBUG
6151960Sdg			if (lockf_debug & 2)
6161960Sdg				printf("overlap == lock\n");
6171960Sdg#endif /* LOCKF_DEBUG */
6181960Sdg			return (1);
6191960Sdg		}
6201960Sdg		if ((lf->lf_start <= start) &&
6211960Sdg		    (end != -1) &&
6221960Sdg		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
6231960Sdg			/* Case 2 */
6241960Sdg#ifdef LOCKF_DEBUG
6251960Sdg			if (lockf_debug & 2)
6261960Sdg				printf("overlap contains lock\n");
6271960Sdg#endif /* LOCKF_DEBUG */
6281960Sdg			return (2);
6291960Sdg		}
6301960Sdg		if (start <= lf->lf_start &&
6311960Sdg		           (end == -1 ||
6321960Sdg			   (lf->lf_end != -1 && end >= lf->lf_end))) {
6331960Sdg			/* Case 3 */
6341960Sdg#ifdef LOCKF_DEBUG
6351960Sdg			if (lockf_debug & 2)
6361960Sdg				printf("lock contains overlap\n");
6371960Sdg#endif /* LOCKF_DEBUG */
6381960Sdg			return (3);
6391960Sdg		}
6401960Sdg		if ((lf->lf_start < start) &&
6411960Sdg			((lf->lf_end >= start) || (lf->lf_end == -1))) {
6421960Sdg			/* Case 4 */
6431960Sdg#ifdef LOCKF_DEBUG
6441960Sdg			if (lockf_debug & 2)
6451960Sdg				printf("overlap starts before lock\n");
6461960Sdg#endif /* LOCKF_DEBUG */
6471960Sdg			return (4);
6481960Sdg		}
6491960Sdg		if ((lf->lf_start > start) &&
6501960Sdg			(end != -1) &&
6511960Sdg			((lf->lf_end > end) || (lf->lf_end == -1))) {
6521960Sdg			/* Case 5 */
6531960Sdg#ifdef LOCKF_DEBUG
6541960Sdg			if (lockf_debug & 2)
6551960Sdg				printf("overlap ends after lock\n");
6561960Sdg#endif /* LOCKF_DEBUG */
6571960Sdg			return (5);
6581960Sdg		}
6591960Sdg		panic("lf_findoverlap: default");
6601960Sdg	}
6611960Sdg	return (0);
6621960Sdg}
6631960Sdg
6641960Sdg/*
6651960Sdg * Split a lock and a contained region into
6661960Sdg * two or three locks as necessary.
6671960Sdg */
66812819Sphkstatic void
6691960Sdglf_split(lock1, lock2)
6701960Sdg	register struct lockf *lock1;
6711960Sdg	register struct lockf *lock2;
6721960Sdg{
6731960Sdg	register struct lockf *splitlock;
6741960Sdg
6751960Sdg#ifdef LOCKF_DEBUG
6761960Sdg	if (lockf_debug & 2) {
6771960Sdg		lf_print("lf_split", lock1);
6781960Sdg		lf_print("splitting from", lock2);
6791960Sdg	}
6801960Sdg#endif /* LOCKF_DEBUG */
6811960Sdg	/*
6821960Sdg	 * Check to see if spliting into only two pieces.
6831960Sdg	 */
6841960Sdg	if (lock1->lf_start == lock2->lf_start) {
6851960Sdg		lock1->lf_start = lock2->lf_end + 1;
6861960Sdg		lock2->lf_next = lock1;
6871960Sdg		return;
6881960Sdg	}
6891960Sdg	if (lock1->lf_end == lock2->lf_end) {
6901960Sdg		lock1->lf_end = lock2->lf_start - 1;
6911960Sdg		lock2->lf_next = lock1->lf_next;
6921960Sdg		lock1->lf_next = lock2;
6931960Sdg		return;
6941960Sdg	}
6951960Sdg	/*
6961960Sdg	 * Make a new lock consisting of the last part of
6971960Sdg	 * the encompassing lock
6981960Sdg	 */
6991960Sdg	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
7001960Sdg	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
7011960Sdg	splitlock->lf_start = lock2->lf_end + 1;
70222521Sdyson	TAILQ_INIT(&splitlock->lf_blkhd);
7031960Sdg	lock1->lf_end = lock2->lf_start - 1;
7041960Sdg	/*
7051960Sdg	 * OK, now link it in
7061960Sdg	 */
7071960Sdg	splitlock->lf_next = lock1->lf_next;
7081960Sdg	lock2->lf_next = splitlock;
7091960Sdg	lock1->lf_next = lock2;
7101960Sdg}
7111960Sdg
7121960Sdg/*
7131960Sdg * Wakeup a blocklist
7141960Sdg */
71512819Sphkstatic void
7161960Sdglf_wakelock(listhead)
7171960Sdg	struct lockf *listhead;
7181960Sdg{
71922521Sdyson	register struct lockf *wakelock;
7201960Sdg
72143301Sdillon	while ((wakelock = listhead->lf_blkhd.tqh_first) != NULL) {
72222521Sdyson		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
7231960Sdg		wakelock->lf_next = NOLOCKF;
7241960Sdg#ifdef LOCKF_DEBUG
7251960Sdg		if (lockf_debug & 2)
7261960Sdg			lf_print("lf_wakelock: awakening", wakelock);
7271960Sdg#endif /* LOCKF_DEBUG */
72822521Sdyson		wakeup((caddr_t)wakelock);
72922521Sdyson	}
7301960Sdg}
7311960Sdg
7321960Sdg#ifdef LOCKF_DEBUG
7331960Sdg/*
7341960Sdg * Print out a lock.
7351960Sdg */
73622592Sbdevoid
7371960Sdglf_print(tag, lock)
7381960Sdg	char *tag;
7391960Sdg	register struct lockf *lock;
7401960Sdg{
7418876Srgrimes
74237951Sbde	printf("%s: lock %p for ", tag, (void *)lock);
7431960Sdg	if (lock->lf_flags & F_POSIX)
74437951Sbde		printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
7451960Sdg	else
74637951Sbde		printf("id %p", (void *)lock->lf_id);
74737951Sbde	/* XXX no %qd in kernel.  Truncate. */
74837951Sbde	printf(" in ino %lu on dev <%d, %d>, %s, start %ld, end %ld",
74937951Sbde	    (u_long)lock->lf_inode->i_number,
75037951Sbde	    major(lock->lf_inode->i_dev),
75137951Sbde	    minor(lock->lf_inode->i_dev),
75237951Sbde	    lock->lf_type == F_RDLCK ? "shared" :
75337951Sbde	    lock->lf_type == F_WRLCK ? "exclusive" :
75437951Sbde	    lock->lf_type == F_UNLCK ? "unlock" :
75537951Sbde	    "unknown", (long)lock->lf_start, (long)lock->lf_end);
75622521Sdyson	if (lock->lf_blkhd.tqh_first)
75737951Sbde		printf(" block %p\n", (void *)lock->lf_blkhd.tqh_first);
7581960Sdg	else
7591960Sdg		printf("\n");
7601960Sdg}
7611960Sdg
76222592Sbdevoid
7631960Sdglf_printlist(tag, lock)
7641960Sdg	char *tag;
7651960Sdg	struct lockf *lock;
7661960Sdg{
76722521Sdyson	register struct lockf *lf, *blk;
7681960Sdg
76937951Sbde	printf("%s: Lock list for ino %lu on dev <%d, %d>:\n",
77037951Sbde	    tag, (u_long)lock->lf_inode->i_number,
77137951Sbde	    major(lock->lf_inode->i_dev),
77237951Sbde	    minor(lock->lf_inode->i_dev));
7731960Sdg	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
77437951Sbde		printf("\tlock %p for ",(void *)lf);
7751960Sdg		if (lf->lf_flags & F_POSIX)
77637951Sbde			printf("proc %ld",
77737951Sbde			    (long)((struct proc *)lf->lf_id)->p_pid);
7781960Sdg		else
77937951Sbde			printf("id %p", (void *)lf->lf_id);
78037951Sbde		/* XXX no %qd in kernel.  Truncate. */
78137951Sbde		printf(", %s, start %ld, end %ld",
78237951Sbde		    lf->lf_type == F_RDLCK ? "shared" :
78337951Sbde		    lf->lf_type == F_WRLCK ? "exclusive" :
78437951Sbde		    lf->lf_type == F_UNLCK ? "unlock" :
78537951Sbde		    "unknown", (long)lf->lf_start, (long)lf->lf_end);
78622521Sdyson		for (blk = lf->lf_blkhd.tqh_first; blk;
78722521Sdyson		     blk = blk->lf_block.tqe_next) {
78837951Sbde			printf("\n\t\tlock request %p for ", (void *)blk);
78922521Sdyson			if (blk->lf_flags & F_POSIX)
79037951Sbde				printf("proc %ld",
79137951Sbde				    (long)((struct proc *)blk->lf_id)->p_pid);
79222521Sdyson			else
79337951Sbde				printf("id %p", (void *)blk->lf_id);
79437951Sbde			/* XXX no %qd in kernel.  Truncate. */
79537951Sbde			printf(", %s, start %ld, end %ld",
79637951Sbde			    blk->lf_type == F_RDLCK ? "shared" :
79737951Sbde			    blk->lf_type == F_WRLCK ? "exclusive" :
79837951Sbde			    blk->lf_type == F_UNLCK ? "unlock" :
79937951Sbde			    "unknown", (long)blk->lf_start,
80037951Sbde			    (long)blk->lf_end);
80122521Sdyson			if (blk->lf_blkhd.tqh_first)
80222521Sdyson				panic("lf_printlist: bad list");
80322521Sdyson		}
80422521Sdyson		printf("\n");
8051960Sdg	}
8061960Sdg}
8071960Sdg#endif /* LOCKF_DEBUG */
808