kern_lockf.c revision 82200
11960Sdg/*
21960Sdg * Copyright (c) 1982, 1986, 1989, 1993
31960Sdg *	The Regents of the University of California.  All rights reserved.
41960Sdg *
51960Sdg * This code is derived from software contributed to Berkeley by
61960Sdg * Scooter Morris at Genentech Inc.
71960Sdg *
81960Sdg * Redistribution and use in source and binary forms, with or without
91960Sdg * modification, are permitted provided that the following conditions
101960Sdg * are met:
111960Sdg * 1. Redistributions of source code must retain the above copyright
121960Sdg *    notice, this list of conditions and the following disclaimer.
131960Sdg * 2. Redistributions in binary form must reproduce the above copyright
141960Sdg *    notice, this list of conditions and the following disclaimer in the
151960Sdg *    documentation and/or other materials provided with the distribution.
161960Sdg * 3. All advertising materials mentioning features or use of this software
171960Sdg *    must display the following acknowledgement:
181960Sdg *	This product includes software developed by the University of
191960Sdg *	California, Berkeley and its contributors.
201960Sdg * 4. Neither the name of the University nor the names of its contributors
211960Sdg *    may be used to endorse or promote products derived from this software
221960Sdg *    without specific prior written permission.
231960Sdg *
241960Sdg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251960Sdg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261960Sdg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271960Sdg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281960Sdg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291960Sdg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301960Sdg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311960Sdg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321960Sdg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331960Sdg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341960Sdg * SUCH DAMAGE.
351960Sdg *
361960Sdg *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
3750477Speter * $FreeBSD: head/sys/kern/kern_lockf.c 82200 2001-08-23 15:40:30Z ache $
381960Sdg */
391960Sdg
4032929Seivind#include "opt_debug_lockf.h"
4132929Seivind
421960Sdg#include <sys/param.h>
431960Sdg#include <sys/systm.h>
4441059Speter#include <sys/kernel.h>
4531561Sbde#include <sys/lock.h>
4676166Smarkm#include <sys/mutex.h>
471960Sdg#include <sys/proc.h>
4818020Sbde#include <sys/unistd.h>
491960Sdg#include <sys/vnode.h>
501960Sdg#include <sys/malloc.h>
511960Sdg#include <sys/fcntl.h>
521960Sdg#include <sys/lockf.h>
531960Sdg
5482189Sache#include <machine/limits.h>
5582189Sache
561960Sdg/*
571960Sdg * This variable controls the maximum number of processes that will
581960Sdg * be checked in doing deadlock detection.
591960Sdg */
6022521Sdysonstatic int maxlockdepth = MAXDEPTH;
611960Sdg
621960Sdg#ifdef LOCKF_DEBUG
6322880Sbde#include <sys/kernel.h>
6422880Sbde#include <sys/sysctl.h>
6522880Sbde
6622880Sbde#include <ufs/ufs/quota.h>
6722880Sbde#include <ufs/ufs/inode.h>
6822880Sbde
6930309Sphk
7024481Sbdestatic int	lockf_debug = 0;
7124481SbdeSYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
721960Sdg#endif
731960Sdg
7475631SalfredMALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
7530309Sphk
761960Sdg#define NOLOCKF (struct lockf *)0
771960Sdg#define SELF	0x1
781960Sdg#define OTHERS	0x2
7912819Sphkstatic int	 lf_clearlock __P((struct lockf *));
8012819Sphkstatic int	 lf_findoverlap __P((struct lockf *,
8112819Sphk	    struct lockf *, int, struct lockf ***, struct lockf **));
8212819Sphkstatic struct lockf *
8312819Sphk	 lf_getblock __P((struct lockf *));
8412819Sphkstatic int	 lf_getlock __P((struct lockf *, struct flock *));
8512819Sphkstatic int	 lf_setlock __P((struct lockf *));
8612819Sphkstatic void	 lf_split __P((struct lockf *, struct lockf *));
8712819Sphkstatic void	 lf_wakelock __P((struct lockf *));
881960Sdg
891960Sdg/*
901960Sdg * Advisory record locking support
911960Sdg */
921960Sdgint
931960Sdglf_advlock(ap, head, size)
941960Sdg	struct vop_advlock_args /* {
951960Sdg		struct vnode *a_vp;
961960Sdg		caddr_t  a_id;
971960Sdg		int  a_op;
981960Sdg		struct flock *a_fl;
991960Sdg		int  a_flags;
1001960Sdg	} */ *ap;
1011960Sdg	struct lockf **head;
1021960Sdg	u_quad_t size;
1031960Sdg{
1041960Sdg	register struct flock *fl = ap->a_fl;
1051960Sdg	register struct lockf *lock;
1061960Sdg	off_t start, end;
1071960Sdg	int error;
1081960Sdg
1091960Sdg	/*
1101960Sdg	 * Convert the flock structure into a start and end.
1111960Sdg	 */
1121960Sdg	switch (fl->l_whence) {
1131960Sdg
1141960Sdg	case SEEK_SET:
1151960Sdg	case SEEK_CUR:
1161960Sdg		/*
1171960Sdg		 * Caller is responsible for adding any necessary offset
1181960Sdg		 * when SEEK_CUR is used.
1191960Sdg		 */
1201960Sdg		start = fl->l_start;
1211960Sdg		break;
1221960Sdg
1231960Sdg	case SEEK_END:
12482195Sache		/* 'size' is always >= 0 */
12582172Sache		if (fl->l_start > 0 && size > OFF_MAX - fl->l_start)
12682172Sache			return (EOVERFLOW);
1271960Sdg		start = size + fl->l_start;
1281960Sdg		break;
1291960Sdg
1301960Sdg	default:
1311960Sdg		return (EINVAL);
1321960Sdg	}
1331960Sdg	if (start < 0)
1341960Sdg		return (EINVAL);
13582200Sache	if (fl->l_len < 0) {
13682200Sache		start += fl->l_len;
13782200Sache		if (start <= 0)
13882200Sache			return (EINVAL);
13982200Sache		end = start - 1;
14082200Sache	} else if (fl->l_len == 0)
1411960Sdg		end = -1;
14220676Sbde	else {
14382172Sache		off_t oadd = fl->l_len - 1;
14482172Sache
14582195Sache		/* 'oadd' and 'start' are >= 0 */
14682172Sache		if (oadd > OFF_MAX - start)
14782172Sache			return (EOVERFLOW);
14882172Sache		end = start + oadd;
14920676Sbde		if (end < start)
15020676Sbde			return (EINVAL);
15120676Sbde	}
1521960Sdg	/*
15320676Sbde	 * Avoid the common case of unlocking when inode has no locks.
15420676Sbde	 */
15520676Sbde	if (*head == (struct lockf *)0) {
15620676Sbde		if (ap->a_op != F_SETLK) {
15720676Sbde			fl->l_type = F_UNLCK;
15820676Sbde			return (0);
15920676Sbde		}
16020676Sbde	}
16120676Sbde	/*
1621960Sdg	 * Create the lockf structure
1631960Sdg	 */
1641960Sdg	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
1651960Sdg	lock->lf_start = start;
1661960Sdg	lock->lf_end = end;
1671960Sdg	lock->lf_id = ap->a_id;
16822521Sdyson/*	lock->lf_inode = ip; */	/* XXX JH */
16922521Sdyson	lock->lf_type = fl->l_type;
1701960Sdg	lock->lf_head = head;
1711960Sdg	lock->lf_next = (struct lockf *)0;
17222521Sdyson	TAILQ_INIT(&lock->lf_blkhd);
1731960Sdg	lock->lf_flags = ap->a_flags;
1741960Sdg	/*
1751960Sdg	 * Do the requested operation.
1761960Sdg	 */
1771960Sdg	switch(ap->a_op) {
1781960Sdg	case F_SETLK:
1791960Sdg		return (lf_setlock(lock));
1801960Sdg
1811960Sdg	case F_UNLCK:
1821960Sdg		error = lf_clearlock(lock);
1831960Sdg		FREE(lock, M_LOCKF);
1841960Sdg		return (error);
1851960Sdg
1861960Sdg	case F_GETLK:
1871960Sdg		error = lf_getlock(lock, fl);
1881960Sdg		FREE(lock, M_LOCKF);
1891960Sdg		return (error);
1908876Srgrimes
1911960Sdg	default:
1921960Sdg		free(lock, M_LOCKF);
1931960Sdg		return (EINVAL);
1941960Sdg	}
1951960Sdg	/* NOTREACHED */
1961960Sdg}
1971960Sdg
1981960Sdg/*
1991960Sdg * Set a byte-range lock.
2001960Sdg */
20112819Sphkstatic int
2021960Sdglf_setlock(lock)
2031960Sdg	register struct lockf *lock;
2041960Sdg{
2051960Sdg	register struct lockf *block;
2061960Sdg	struct lockf **head = lock->lf_head;
2071960Sdg	struct lockf **prev, *overlap, *ltmp;
2081960Sdg	static char lockstr[] = "lockf";
2091960Sdg	int ovcase, priority, needtolink, error;
2101960Sdg
2111960Sdg#ifdef LOCKF_DEBUG
2121960Sdg	if (lockf_debug & 1)
2131960Sdg		lf_print("lf_setlock", lock);
2141960Sdg#endif /* LOCKF_DEBUG */
2151960Sdg
2161960Sdg	/*
2171960Sdg	 * Set the priority
2181960Sdg	 */
2191960Sdg	priority = PLOCK;
2201960Sdg	if (lock->lf_type == F_WRLCK)
2211960Sdg		priority += 4;
2221960Sdg	priority |= PCATCH;
2231960Sdg	/*
2241960Sdg	 * Scan lock list for this file looking for locks that would block us.
2251960Sdg	 */
2263098Sphk	while ((block = lf_getblock(lock))) {
2271960Sdg		/*
2281960Sdg		 * Free the structure and return if nonblocking.
2291960Sdg		 */
2301960Sdg		if ((lock->lf_flags & F_WAIT) == 0) {
2311960Sdg			FREE(lock, M_LOCKF);
2321960Sdg			return (EAGAIN);
2331960Sdg		}
2341960Sdg		/*
2351960Sdg		 * We are blocked. Since flock style locks cover
2361960Sdg		 * the whole file, there is no chance for deadlock.
2371960Sdg		 * For byte-range locks we must check for deadlock.
2381960Sdg		 *
2391960Sdg		 * Deadlock detection is done by looking through the
2401960Sdg		 * wait channels to see if there are any cycles that
2411960Sdg		 * involve us. MAXDEPTH is set just to make sure we
2421960Sdg		 * do not go off into neverland.
2431960Sdg		 */
2441960Sdg		if ((lock->lf_flags & F_POSIX) &&
2451960Sdg		    (block->lf_flags & F_POSIX)) {
2461960Sdg			register struct proc *wproc;
2471960Sdg			register struct lockf *waitblock;
2481960Sdg			int i = 0;
2491960Sdg
2501960Sdg			/* The block is waiting on something */
2511960Sdg			wproc = (struct proc *)block->lf_id;
25274727Sjhb			mtx_lock_spin(&sched_lock);
2531960Sdg			while (wproc->p_wchan &&
2541960Sdg			       (wproc->p_wmesg == lockstr) &&
2551960Sdg			       (i++ < maxlockdepth)) {
2561960Sdg				waitblock = (struct lockf *)wproc->p_wchan;
2571960Sdg				/* Get the owner of the blocking lock */
2581960Sdg				waitblock = waitblock->lf_next;
2591960Sdg				if ((waitblock->lf_flags & F_POSIX) == 0)
2601960Sdg					break;
2611960Sdg				wproc = (struct proc *)waitblock->lf_id;
2621960Sdg				if (wproc == (struct proc *)lock->lf_id) {
26374727Sjhb					mtx_unlock_spin(&sched_lock);
2641960Sdg					free(lock, M_LOCKF);
2651960Sdg					return (EDEADLK);
2661960Sdg				}
2671960Sdg			}
26874727Sjhb			mtx_unlock_spin(&sched_lock);
2691960Sdg		}
2701960Sdg		/*
2711960Sdg		 * For flock type locks, we must first remove
2721960Sdg		 * any shared locks that we hold before we sleep
2731960Sdg		 * waiting for an exclusive lock.
2741960Sdg		 */
2751960Sdg		if ((lock->lf_flags & F_FLOCK) &&
2761960Sdg		    lock->lf_type == F_WRLCK) {
2771960Sdg			lock->lf_type = F_UNLCK;
2781960Sdg			(void) lf_clearlock(lock);
2791960Sdg			lock->lf_type = F_WRLCK;
2801960Sdg		}
2811960Sdg		/*
2821960Sdg		 * Add our lock to the blocked list and sleep until we're free.
2831960Sdg		 * Remember who blocked us (for deadlock detection).
2841960Sdg		 */
2851960Sdg		lock->lf_next = block;
28622521Sdyson		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
2871960Sdg#ifdef LOCKF_DEBUG
2881960Sdg		if (lockf_debug & 1) {
2891960Sdg			lf_print("lf_setlock: blocking on", block);
2901960Sdg			lf_printlist("lf_setlock", block);
2911960Sdg		}
2921960Sdg#endif /* LOCKF_DEBUG */
29348556Sbde		error = tsleep((caddr_t)lock, priority, lockstr, 0);
29448556Sbde		/*
29548556Sbde		 * We may have been awakened by a signal and/or by a
29648556Sbde		 * debugger continuing us (in which cases we must remove
29748556Sbde		 * ourselves from the blocked list) and/or by another
29848556Sbde		 * process releasing a lock (in which case we have
29948556Sbde		 * already been removed from the blocked list and our
30048556Sbde		 * lf_next field set to NOLOCKF).
30148556Sbde		 */
30248556Sbde		if (lock->lf_next) {
30348556Sbde			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
30448556Sbde			lock->lf_next = NOLOCKF;
3051960Sdg		}
30648556Sbde		if (error) {
30748556Sbde			free(lock, M_LOCKF);
30848556Sbde			return (error);
30948556Sbde		}
3101960Sdg	}
3111960Sdg	/*
3121960Sdg	 * No blocks!!  Add the lock.  Note that we will
3131960Sdg	 * downgrade or upgrade any overlapping locks this
3141960Sdg	 * process already owns.
3151960Sdg	 *
3161960Sdg	 * Skip over locks owned by other processes.
3171960Sdg	 * Handle any locks that overlap and are owned by ourselves.
3181960Sdg	 */
3191960Sdg	prev = head;
3201960Sdg	block = *head;
3211960Sdg	needtolink = 1;
3221960Sdg	for (;;) {
3233098Sphk		ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
3243098Sphk		if (ovcase)
3251960Sdg			block = overlap->lf_next;
3261960Sdg		/*
3271960Sdg		 * Six cases:
3281960Sdg		 *	0) no overlap
3291960Sdg		 *	1) overlap == lock
3301960Sdg		 *	2) overlap contains lock
3311960Sdg		 *	3) lock contains overlap
3321960Sdg		 *	4) overlap starts before lock
3331960Sdg		 *	5) overlap ends after lock
3341960Sdg		 */
3351960Sdg		switch (ovcase) {
3361960Sdg		case 0: /* no overlap */
3371960Sdg			if (needtolink) {
3381960Sdg				*prev = lock;
3391960Sdg				lock->lf_next = overlap;
3401960Sdg			}
3411960Sdg			break;
3421960Sdg
3431960Sdg		case 1: /* overlap == lock */
3441960Sdg			/*
3451960Sdg			 * If downgrading lock, others may be
3461960Sdg			 * able to acquire it.
3471960Sdg			 */
3481960Sdg			if (lock->lf_type == F_RDLCK &&
3491960Sdg			    overlap->lf_type == F_WRLCK)
3501960Sdg				lf_wakelock(overlap);
3511960Sdg			overlap->lf_type = lock->lf_type;
3521960Sdg			FREE(lock, M_LOCKF);
3531960Sdg			lock = overlap; /* for debug output below */
3541960Sdg			break;
3551960Sdg
3561960Sdg		case 2: /* overlap contains lock */
3571960Sdg			/*
3581960Sdg			 * Check for common starting point and different types.
3591960Sdg			 */
3601960Sdg			if (overlap->lf_type == lock->lf_type) {
3611960Sdg				free(lock, M_LOCKF);
3621960Sdg				lock = overlap; /* for debug output below */
3631960Sdg				break;
3641960Sdg			}
3651960Sdg			if (overlap->lf_start == lock->lf_start) {
3661960Sdg				*prev = lock;
3671960Sdg				lock->lf_next = overlap;
3681960Sdg				overlap->lf_start = lock->lf_end + 1;
3691960Sdg			} else
3701960Sdg				lf_split(overlap, lock);
3711960Sdg			lf_wakelock(overlap);
3721960Sdg			break;
3731960Sdg
3741960Sdg		case 3: /* lock contains overlap */
3751960Sdg			/*
3761960Sdg			 * If downgrading lock, others may be able to
3771960Sdg			 * acquire it, otherwise take the list.
3781960Sdg			 */
3791960Sdg			if (lock->lf_type == F_RDLCK &&
3801960Sdg			    overlap->lf_type == F_WRLCK) {
3811960Sdg				lf_wakelock(overlap);
3821960Sdg			} else {
38353225Sphk				while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
38453225Sphk					ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
38522521Sdyson					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
38622521Sdyson					    lf_block);
38722521Sdyson					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
38822521Sdyson					    ltmp, lf_block);
38946744Sdt					ltmp->lf_next = lock;
39022521Sdyson				}
3911960Sdg			}
3921960Sdg			/*
3931960Sdg			 * Add the new lock if necessary and delete the overlap.
3941960Sdg			 */
3951960Sdg			if (needtolink) {
3961960Sdg				*prev = lock;
3971960Sdg				lock->lf_next = overlap->lf_next;
3981960Sdg				prev = &lock->lf_next;
3991960Sdg				needtolink = 0;
4001960Sdg			} else
4011960Sdg				*prev = overlap->lf_next;
4021960Sdg			free(overlap, M_LOCKF);
4031960Sdg			continue;
4041960Sdg
4051960Sdg		case 4: /* overlap starts before lock */
4061960Sdg			/*
4071960Sdg			 * Add lock after overlap on the list.
4081960Sdg			 */
4091960Sdg			lock->lf_next = overlap->lf_next;
4101960Sdg			overlap->lf_next = lock;
4111960Sdg			overlap->lf_end = lock->lf_start - 1;
4121960Sdg			prev = &lock->lf_next;
4131960Sdg			lf_wakelock(overlap);
4141960Sdg			needtolink = 0;
4151960Sdg			continue;
4161960Sdg
4171960Sdg		case 5: /* overlap ends after lock */
4181960Sdg			/*
4191960Sdg			 * Add the new lock before overlap.
4201960Sdg			 */
4211960Sdg			if (needtolink) {
4221960Sdg				*prev = lock;
4231960Sdg				lock->lf_next = overlap;
4241960Sdg			}
4251960Sdg			overlap->lf_start = lock->lf_end + 1;
4261960Sdg			lf_wakelock(overlap);
4271960Sdg			break;
4281960Sdg		}
4291960Sdg		break;
4301960Sdg	}
4311960Sdg#ifdef LOCKF_DEBUG
4321960Sdg	if (lockf_debug & 1) {
4331960Sdg		lf_print("lf_setlock: got the lock", lock);
4341960Sdg		lf_printlist("lf_setlock", lock);
4351960Sdg	}
4361960Sdg#endif /* LOCKF_DEBUG */
4371960Sdg	return (0);
4381960Sdg}
4391960Sdg
4401960Sdg/*
4411960Sdg * Remove a byte-range lock on an inode.
4421960Sdg *
4431960Sdg * Generally, find the lock (or an overlap to that lock)
4441960Sdg * and remove it (or shrink it), then wakeup anyone we can.
4451960Sdg */
44612819Sphkstatic int
4471960Sdglf_clearlock(unlock)
4481960Sdg	register struct lockf *unlock;
4491960Sdg{
4501960Sdg	struct lockf **head = unlock->lf_head;
4511960Sdg	register struct lockf *lf = *head;
4521960Sdg	struct lockf *overlap, **prev;
4531960Sdg	int ovcase;
4541960Sdg
4551960Sdg	if (lf == NOLOCKF)
4561960Sdg		return (0);
4571960Sdg#ifdef LOCKF_DEBUG
4581960Sdg	if (unlock->lf_type != F_UNLCK)
4591960Sdg		panic("lf_clearlock: bad type");
4601960Sdg	if (lockf_debug & 1)
4611960Sdg		lf_print("lf_clearlock", unlock);
4621960Sdg#endif /* LOCKF_DEBUG */
4631960Sdg	prev = head;
4643098Sphk	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
4651960Sdg		/*
4661960Sdg		 * Wakeup the list of locks to be retried.
4671960Sdg		 */
4681960Sdg		lf_wakelock(overlap);
4691960Sdg
4701960Sdg		switch (ovcase) {
4711960Sdg
4721960Sdg		case 1: /* overlap == lock */
4731960Sdg			*prev = overlap->lf_next;
4741960Sdg			FREE(overlap, M_LOCKF);
4751960Sdg			break;
4761960Sdg
4771960Sdg		case 2: /* overlap contains lock: split it */
4781960Sdg			if (overlap->lf_start == unlock->lf_start) {
4791960Sdg				overlap->lf_start = unlock->lf_end + 1;
4801960Sdg				break;
4811960Sdg			}
4821960Sdg			lf_split(overlap, unlock);
4831960Sdg			overlap->lf_next = unlock->lf_next;
4841960Sdg			break;
4851960Sdg
4861960Sdg		case 3: /* lock contains overlap */
4871960Sdg			*prev = overlap->lf_next;
4881960Sdg			lf = overlap->lf_next;
4891960Sdg			free(overlap, M_LOCKF);
4901960Sdg			continue;
4911960Sdg
4921960Sdg		case 4: /* overlap starts before lock */
4931960Sdg			overlap->lf_end = unlock->lf_start - 1;
4941960Sdg			prev = &overlap->lf_next;
4951960Sdg			lf = overlap->lf_next;
4961960Sdg			continue;
4971960Sdg
4981960Sdg		case 5: /* overlap ends after lock */
4991960Sdg			overlap->lf_start = unlock->lf_end + 1;
5001960Sdg			break;
5011960Sdg		}
5021960Sdg		break;
5031960Sdg	}
5041960Sdg#ifdef LOCKF_DEBUG
5051960Sdg	if (lockf_debug & 1)
5061960Sdg		lf_printlist("lf_clearlock", unlock);
5071960Sdg#endif /* LOCKF_DEBUG */
5081960Sdg	return (0);
5091960Sdg}
5101960Sdg
5111960Sdg/*
5121960Sdg * Check whether there is a blocking lock,
5131960Sdg * and if so return its process identifier.
5141960Sdg */
51512819Sphkstatic int
5161960Sdglf_getlock(lock, fl)
5171960Sdg	register struct lockf *lock;
5181960Sdg	register struct flock *fl;
5191960Sdg{
5201960Sdg	register struct lockf *block;
5211960Sdg
5221960Sdg#ifdef LOCKF_DEBUG
5231960Sdg	if (lockf_debug & 1)
5241960Sdg		lf_print("lf_getlock", lock);
5251960Sdg#endif /* LOCKF_DEBUG */
5261960Sdg
5273098Sphk	if ((block = lf_getblock(lock))) {
5281960Sdg		fl->l_type = block->lf_type;
5291960Sdg		fl->l_whence = SEEK_SET;
5301960Sdg		fl->l_start = block->lf_start;
5311960Sdg		if (block->lf_end == -1)
5321960Sdg			fl->l_len = 0;
5331960Sdg		else
5341960Sdg			fl->l_len = block->lf_end - block->lf_start + 1;
5351960Sdg		if (block->lf_flags & F_POSIX)
5361960Sdg			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
5371960Sdg		else
5381960Sdg			fl->l_pid = -1;
5391960Sdg	} else {
5401960Sdg		fl->l_type = F_UNLCK;
5411960Sdg	}
5421960Sdg	return (0);
5431960Sdg}
5441960Sdg
5451960Sdg/*
5461960Sdg * Walk the list of locks for an inode and
5471960Sdg * return the first blocking lock.
5481960Sdg */
54912819Sphkstatic struct lockf *
5501960Sdglf_getblock(lock)
5511960Sdg	register struct lockf *lock;
5521960Sdg{
5531960Sdg	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
5541960Sdg	int ovcase;
5551960Sdg
5561960Sdg	prev = lock->lf_head;
5573098Sphk	while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
5581960Sdg		/*
5591960Sdg		 * We've found an overlap, see if it blocks us
5601960Sdg		 */
5611960Sdg		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
5621960Sdg			return (overlap);
5631960Sdg		/*
5641960Sdg		 * Nope, point to the next one on the list and
5651960Sdg		 * see if it blocks us
5661960Sdg		 */
5671960Sdg		lf = overlap->lf_next;
5681960Sdg	}
5691960Sdg	return (NOLOCKF);
5701960Sdg}
5711960Sdg
5721960Sdg/*
5731960Sdg * Walk the list of locks for an inode to
5741960Sdg * find an overlapping lock (if any).
5751960Sdg *
5761960Sdg * NOTE: this returns only the FIRST overlapping lock.  There
5771960Sdg *	 may be more than one.
5781960Sdg */
57912819Sphkstatic int
5801960Sdglf_findoverlap(lf, lock, type, prev, overlap)
5811960Sdg	register struct lockf *lf;
5821960Sdg	struct lockf *lock;
5831960Sdg	int type;
5841960Sdg	struct lockf ***prev;
5851960Sdg	struct lockf **overlap;
5861960Sdg{
5871960Sdg	off_t start, end;
5881960Sdg
5891960Sdg	*overlap = lf;
5901960Sdg	if (lf == NOLOCKF)
5911960Sdg		return (0);
5921960Sdg#ifdef LOCKF_DEBUG
5931960Sdg	if (lockf_debug & 2)
5941960Sdg		lf_print("lf_findoverlap: looking for overlap in", lock);
5951960Sdg#endif /* LOCKF_DEBUG */
5961960Sdg	start = lock->lf_start;
5971960Sdg	end = lock->lf_end;
5981960Sdg	while (lf != NOLOCKF) {
5991960Sdg		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
6001960Sdg		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
6011960Sdg			*prev = &lf->lf_next;
6021960Sdg			*overlap = lf = lf->lf_next;
6031960Sdg			continue;
6041960Sdg		}
6051960Sdg#ifdef LOCKF_DEBUG
6061960Sdg		if (lockf_debug & 2)
6071960Sdg			lf_print("\tchecking", lf);
6081960Sdg#endif /* LOCKF_DEBUG */
6091960Sdg		/*
6101960Sdg		 * OK, check for overlap
6111960Sdg		 *
6121960Sdg		 * Six cases:
6131960Sdg		 *	0) no overlap
6141960Sdg		 *	1) overlap == lock
6151960Sdg		 *	2) overlap contains lock
6161960Sdg		 *	3) lock contains overlap
6171960Sdg		 *	4) overlap starts before lock
6181960Sdg		 *	5) overlap ends after lock
6191960Sdg		 */
6201960Sdg		if ((lf->lf_end != -1 && start > lf->lf_end) ||
6211960Sdg		    (end != -1 && lf->lf_start > end)) {
6221960Sdg			/* Case 0 */
6231960Sdg#ifdef LOCKF_DEBUG
6241960Sdg			if (lockf_debug & 2)
6251960Sdg				printf("no overlap\n");
6261960Sdg#endif /* LOCKF_DEBUG */
6271960Sdg			if ((type & SELF) && end != -1 && lf->lf_start > end)
6281960Sdg				return (0);
6291960Sdg			*prev = &lf->lf_next;
6301960Sdg			*overlap = lf = lf->lf_next;
6311960Sdg			continue;
6321960Sdg		}
6331960Sdg		if ((lf->lf_start == start) && (lf->lf_end == end)) {
6341960Sdg			/* Case 1 */
6351960Sdg#ifdef LOCKF_DEBUG
6361960Sdg			if (lockf_debug & 2)
6371960Sdg				printf("overlap == lock\n");
6381960Sdg#endif /* LOCKF_DEBUG */
6391960Sdg			return (1);
6401960Sdg		}
6411960Sdg		if ((lf->lf_start <= start) &&
6421960Sdg		    (end != -1) &&
6431960Sdg		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
6441960Sdg			/* Case 2 */
6451960Sdg#ifdef LOCKF_DEBUG
6461960Sdg			if (lockf_debug & 2)
6471960Sdg				printf("overlap contains lock\n");
6481960Sdg#endif /* LOCKF_DEBUG */
6491960Sdg			return (2);
6501960Sdg		}
6511960Sdg		if (start <= lf->lf_start &&
6521960Sdg		           (end == -1 ||
6531960Sdg			   (lf->lf_end != -1 && end >= lf->lf_end))) {
6541960Sdg			/* Case 3 */
6551960Sdg#ifdef LOCKF_DEBUG
6561960Sdg			if (lockf_debug & 2)
6571960Sdg				printf("lock contains overlap\n");
6581960Sdg#endif /* LOCKF_DEBUG */
6591960Sdg			return (3);
6601960Sdg		}
6611960Sdg		if ((lf->lf_start < start) &&
6621960Sdg			((lf->lf_end >= start) || (lf->lf_end == -1))) {
6631960Sdg			/* Case 4 */
6641960Sdg#ifdef LOCKF_DEBUG
6651960Sdg			if (lockf_debug & 2)
6661960Sdg				printf("overlap starts before lock\n");
6671960Sdg#endif /* LOCKF_DEBUG */
6681960Sdg			return (4);
6691960Sdg		}
6701960Sdg		if ((lf->lf_start > start) &&
6711960Sdg			(end != -1) &&
6721960Sdg			((lf->lf_end > end) || (lf->lf_end == -1))) {
6731960Sdg			/* Case 5 */
6741960Sdg#ifdef LOCKF_DEBUG
6751960Sdg			if (lockf_debug & 2)
6761960Sdg				printf("overlap ends after lock\n");
6771960Sdg#endif /* LOCKF_DEBUG */
6781960Sdg			return (5);
6791960Sdg		}
6801960Sdg		panic("lf_findoverlap: default");
6811960Sdg	}
6821960Sdg	return (0);
6831960Sdg}
6841960Sdg
6851960Sdg/*
6861960Sdg * Split a lock and a contained region into
6871960Sdg * two or three locks as necessary.
6881960Sdg */
68912819Sphkstatic void
6901960Sdglf_split(lock1, lock2)
6911960Sdg	register struct lockf *lock1;
6921960Sdg	register struct lockf *lock2;
6931960Sdg{
6941960Sdg	register struct lockf *splitlock;
6951960Sdg
6961960Sdg#ifdef LOCKF_DEBUG
6971960Sdg	if (lockf_debug & 2) {
6981960Sdg		lf_print("lf_split", lock1);
6991960Sdg		lf_print("splitting from", lock2);
7001960Sdg	}
7011960Sdg#endif /* LOCKF_DEBUG */
7021960Sdg	/*
7031960Sdg	 * Check to see if spliting into only two pieces.
7041960Sdg	 */
7051960Sdg	if (lock1->lf_start == lock2->lf_start) {
7061960Sdg		lock1->lf_start = lock2->lf_end + 1;
7071960Sdg		lock2->lf_next = lock1;
7081960Sdg		return;
7091960Sdg	}
7101960Sdg	if (lock1->lf_end == lock2->lf_end) {
7111960Sdg		lock1->lf_end = lock2->lf_start - 1;
7121960Sdg		lock2->lf_next = lock1->lf_next;
7131960Sdg		lock1->lf_next = lock2;
7141960Sdg		return;
7151960Sdg	}
7161960Sdg	/*
7171960Sdg	 * Make a new lock consisting of the last part of
7181960Sdg	 * the encompassing lock
7191960Sdg	 */
7201960Sdg	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
7211960Sdg	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
7221960Sdg	splitlock->lf_start = lock2->lf_end + 1;
72322521Sdyson	TAILQ_INIT(&splitlock->lf_blkhd);
7241960Sdg	lock1->lf_end = lock2->lf_start - 1;
7251960Sdg	/*
7261960Sdg	 * OK, now link it in
7271960Sdg	 */
7281960Sdg	splitlock->lf_next = lock1->lf_next;
7291960Sdg	lock2->lf_next = splitlock;
7301960Sdg	lock1->lf_next = lock2;
7311960Sdg}
7321960Sdg
7331960Sdg/*
7341960Sdg * Wakeup a blocklist
7351960Sdg */
73612819Sphkstatic void
7371960Sdglf_wakelock(listhead)
7381960Sdg	struct lockf *listhead;
7391960Sdg{
74022521Sdyson	register struct lockf *wakelock;
7411960Sdg
74253225Sphk	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
74353225Sphk		wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
74422521Sdyson		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
7451960Sdg		wakelock->lf_next = NOLOCKF;
7461960Sdg#ifdef LOCKF_DEBUG
7471960Sdg		if (lockf_debug & 2)
7481960Sdg			lf_print("lf_wakelock: awakening", wakelock);
7491960Sdg#endif /* LOCKF_DEBUG */
75022521Sdyson		wakeup((caddr_t)wakelock);
75122521Sdyson	}
7521960Sdg}
7531960Sdg
7541960Sdg#ifdef LOCKF_DEBUG
7551960Sdg/*
7561960Sdg * Print out a lock.
7571960Sdg */
75822592Sbdevoid
7591960Sdglf_print(tag, lock)
7601960Sdg	char *tag;
7611960Sdg	register struct lockf *lock;
7621960Sdg{
7638876Srgrimes
76437951Sbde	printf("%s: lock %p for ", tag, (void *)lock);
7651960Sdg	if (lock->lf_flags & F_POSIX)
76637951Sbde		printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
7671960Sdg	else
76837951Sbde		printf("id %p", (void *)lock->lf_id);
76937951Sbde	/* XXX no %qd in kernel.  Truncate. */
77037951Sbde	printf(" in ino %lu on dev <%d, %d>, %s, start %ld, end %ld",
77137951Sbde	    (u_long)lock->lf_inode->i_number,
77237951Sbde	    major(lock->lf_inode->i_dev),
77337951Sbde	    minor(lock->lf_inode->i_dev),
77437951Sbde	    lock->lf_type == F_RDLCK ? "shared" :
77537951Sbde	    lock->lf_type == F_WRLCK ? "exclusive" :
77637951Sbde	    lock->lf_type == F_UNLCK ? "unlock" :
77737951Sbde	    "unknown", (long)lock->lf_start, (long)lock->lf_end);
77853225Sphk	if (!TAILQ_EMPTY(&lock->lf_blkhd))
77953225Sphk		printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
7801960Sdg	else
7811960Sdg		printf("\n");
7821960Sdg}
7831960Sdg
78422592Sbdevoid
7851960Sdglf_printlist(tag, lock)
7861960Sdg	char *tag;
7871960Sdg	struct lockf *lock;
7881960Sdg{
78922521Sdyson	register struct lockf *lf, *blk;
7901960Sdg
79137951Sbde	printf("%s: Lock list for ino %lu on dev <%d, %d>:\n",
79237951Sbde	    tag, (u_long)lock->lf_inode->i_number,
79337951Sbde	    major(lock->lf_inode->i_dev),
79437951Sbde	    minor(lock->lf_inode->i_dev));
7951960Sdg	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
79637951Sbde		printf("\tlock %p for ",(void *)lf);
7971960Sdg		if (lf->lf_flags & F_POSIX)
79837951Sbde			printf("proc %ld",
79937951Sbde			    (long)((struct proc *)lf->lf_id)->p_pid);
8001960Sdg		else
80137951Sbde			printf("id %p", (void *)lf->lf_id);
80237951Sbde		/* XXX no %qd in kernel.  Truncate. */
80337951Sbde		printf(", %s, start %ld, end %ld",
80437951Sbde		    lf->lf_type == F_RDLCK ? "shared" :
80537951Sbde		    lf->lf_type == F_WRLCK ? "exclusive" :
80637951Sbde		    lf->lf_type == F_UNLCK ? "unlock" :
80737951Sbde		    "unknown", (long)lf->lf_start, (long)lf->lf_end);
80853225Sphk		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
80937951Sbde			printf("\n\t\tlock request %p for ", (void *)blk);
81022521Sdyson			if (blk->lf_flags & F_POSIX)
81137951Sbde				printf("proc %ld",
81237951Sbde				    (long)((struct proc *)blk->lf_id)->p_pid);
81322521Sdyson			else
81437951Sbde				printf("id %p", (void *)blk->lf_id);
81537951Sbde			/* XXX no %qd in kernel.  Truncate. */
81637951Sbde			printf(", %s, start %ld, end %ld",
81737951Sbde			    blk->lf_type == F_RDLCK ? "shared" :
81837951Sbde			    blk->lf_type == F_WRLCK ? "exclusive" :
81937951Sbde			    blk->lf_type == F_UNLCK ? "unlock" :
82037951Sbde			    "unknown", (long)blk->lf_start,
82137951Sbde			    (long)blk->lf_end);
82253225Sphk			if (!TAILQ_EMPTY(&blk->lf_blkhd))
82322521Sdyson				panic("lf_printlist: bad list");
82422521Sdyson		}
82522521Sdyson		printf("\n");
8261960Sdg	}
8271960Sdg}
8281960Sdg#endif /* LOCKF_DEBUG */
829