kern_lockf.c revision 139804
1139804Simp/*-
21960Sdg * Copyright (c) 1982, 1986, 1989, 1993
31960Sdg *	The Regents of the University of California.  All rights reserved.
41960Sdg *
51960Sdg * This code is derived from software contributed to Berkeley by
61960Sdg * Scooter Morris at Genentech Inc.
71960Sdg *
81960Sdg * Redistribution and use in source and binary forms, with or without
91960Sdg * modification, are permitted provided that the following conditions
101960Sdg * are met:
111960Sdg * 1. Redistributions of source code must retain the above copyright
121960Sdg *    notice, this list of conditions and the following disclaimer.
131960Sdg * 2. Redistributions in binary form must reproduce the above copyright
141960Sdg *    notice, this list of conditions and the following disclaimer in the
151960Sdg *    documentation and/or other materials provided with the distribution.
161960Sdg * 4. Neither the name of the University nor the names of its contributors
171960Sdg *    may be used to endorse or promote products derived from this software
181960Sdg *    without specific prior written permission.
191960Sdg *
201960Sdg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211960Sdg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221960Sdg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231960Sdg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241960Sdg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251960Sdg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261960Sdg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271960Sdg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281960Sdg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291960Sdg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301960Sdg * SUCH DAMAGE.
311960Sdg *
321960Sdg *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
331960Sdg */
341960Sdg
35116182Sobrien#include <sys/cdefs.h>
36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_lockf.c 139804 2005-01-06 23:35:40Z imp $");
37116182Sobrien
3832929Seivind#include "opt_debug_lockf.h"
3932929Seivind
401960Sdg#include <sys/param.h>
411960Sdg#include <sys/systm.h>
4241059Speter#include <sys/kernel.h>
43114216Skan#include <sys/limits.h>
4431561Sbde#include <sys/lock.h>
45101778Sphk#include <sys/mount.h>
4676166Smarkm#include <sys/mutex.h>
471960Sdg#include <sys/proc.h>
4818020Sbde#include <sys/unistd.h>
491960Sdg#include <sys/vnode.h>
501960Sdg#include <sys/malloc.h>
511960Sdg#include <sys/fcntl.h>
521960Sdg#include <sys/lockf.h>
531960Sdg
541960Sdg/*
551960Sdg * This variable controls the maximum number of processes that will
561960Sdg * be checked in doing deadlock detection.
571960Sdg */
5822521Sdysonstatic int maxlockdepth = MAXDEPTH;
591960Sdg
601960Sdg#ifdef LOCKF_DEBUG
6122880Sbde#include <sys/sysctl.h>
6222880Sbde
6322880Sbde#include <ufs/ufs/quota.h>
6422880Sbde#include <ufs/ufs/inode.h>
6522880Sbde
6630309Sphk
6724481Sbdestatic int	lockf_debug = 0;
6824481SbdeSYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
691960Sdg#endif
701960Sdg
7175631SalfredMALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
7230309Sphk
731960Sdg#define NOLOCKF (struct lockf *)0
741960Sdg#define SELF	0x1
751960Sdg#define OTHERS	0x2
7692723Salfredstatic int	 lf_clearlock(struct lockf *);
7792723Salfredstatic int	 lf_findoverlap(struct lockf *,
7892723Salfred	    struct lockf *, int, struct lockf ***, struct lockf **);
7912819Sphkstatic struct lockf *
8092723Salfred	 lf_getblock(struct lockf *);
8192723Salfredstatic int	 lf_getlock(struct lockf *, struct flock *);
8292723Salfredstatic int	 lf_setlock(struct lockf *);
8392723Salfredstatic void	 lf_split(struct lockf *, struct lockf *);
8492723Salfredstatic void	 lf_wakelock(struct lockf *);
851960Sdg
861960Sdg/*
871960Sdg * Advisory record locking support
881960Sdg */
891960Sdgint
901960Sdglf_advlock(ap, head, size)
911960Sdg	struct vop_advlock_args /* {
921960Sdg		struct vnode *a_vp;
931960Sdg		caddr_t  a_id;
941960Sdg		int  a_op;
951960Sdg		struct flock *a_fl;
961960Sdg		int  a_flags;
971960Sdg	} */ *ap;
981960Sdg	struct lockf **head;
991960Sdg	u_quad_t size;
1001960Sdg{
1011960Sdg	register struct flock *fl = ap->a_fl;
1021960Sdg	register struct lockf *lock;
10382346Sache	off_t start, end, oadd;
1041960Sdg	int error;
1051960Sdg
1061960Sdg	/*
1071960Sdg	 * Convert the flock structure into a start and end.
1081960Sdg	 */
1091960Sdg	switch (fl->l_whence) {
1101960Sdg
1111960Sdg	case SEEK_SET:
1121960Sdg	case SEEK_CUR:
1131960Sdg		/*
1141960Sdg		 * Caller is responsible for adding any necessary offset
1151960Sdg		 * when SEEK_CUR is used.
1161960Sdg		 */
1171960Sdg		start = fl->l_start;
1181960Sdg		break;
1191960Sdg
1201960Sdg	case SEEK_END:
12182516Sache		if (size > OFF_MAX ||
12282516Sache		    (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
12382172Sache			return (EOVERFLOW);
1241960Sdg		start = size + fl->l_start;
1251960Sdg		break;
1261960Sdg
1271960Sdg	default:
1281960Sdg		return (EINVAL);
1291960Sdg	}
1301960Sdg	if (start < 0)
1311960Sdg		return (EINVAL);
13282200Sache	if (fl->l_len < 0) {
13382202Sache		if (start == 0)
13482202Sache			return (EINVAL);
13582202Sache		end = start - 1;
13682200Sache		start += fl->l_len;
13782202Sache		if (start < 0)
13882200Sache			return (EINVAL);
13982200Sache	} else if (fl->l_len == 0)
1401960Sdg		end = -1;
14120676Sbde	else {
14282346Sache		oadd = fl->l_len - 1;
14382172Sache		if (oadd > OFF_MAX - start)
14482172Sache			return (EOVERFLOW);
14582172Sache		end = start + oadd;
14620676Sbde	}
1471960Sdg	/*
14820676Sbde	 * Avoid the common case of unlocking when inode has no locks.
14920676Sbde	 */
15020676Sbde	if (*head == (struct lockf *)0) {
15120676Sbde		if (ap->a_op != F_SETLK) {
15220676Sbde			fl->l_type = F_UNLCK;
15320676Sbde			return (0);
15420676Sbde		}
15520676Sbde	}
15620676Sbde	/*
1571960Sdg	 * Create the lockf structure
1581960Sdg	 */
159111119Simp	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
1601960Sdg	lock->lf_start = start;
1611960Sdg	lock->lf_end = end;
1621960Sdg	lock->lf_id = ap->a_id;
16387211Salfred	/*
16487211Salfred	 * XXX The problem is that VTOI is ufs specific, so it will
16587211Salfred	 * break LOCKF_DEBUG for all other FS's other than UFS because
16687211Salfred	 * it casts the vnode->data ptr to struct inode *.
16787211Salfred	 */
16887211Salfred/*	lock->lf_inode = VTOI(ap->a_vp); */
16987211Salfred	lock->lf_inode = (struct inode *)0;
17022521Sdyson	lock->lf_type = fl->l_type;
1711960Sdg	lock->lf_head = head;
1721960Sdg	lock->lf_next = (struct lockf *)0;
17322521Sdyson	TAILQ_INIT(&lock->lf_blkhd);
1741960Sdg	lock->lf_flags = ap->a_flags;
1751960Sdg	/*
1761960Sdg	 * Do the requested operation.
1771960Sdg	 */
1781960Sdg	switch(ap->a_op) {
1791960Sdg	case F_SETLK:
1801960Sdg		return (lf_setlock(lock));
1811960Sdg
1821960Sdg	case F_UNLCK:
1831960Sdg		error = lf_clearlock(lock);
1841960Sdg		FREE(lock, M_LOCKF);
1851960Sdg		return (error);
1861960Sdg
1871960Sdg	case F_GETLK:
1881960Sdg		error = lf_getlock(lock, fl);
1891960Sdg		FREE(lock, M_LOCKF);
1901960Sdg		return (error);
1918876Srgrimes
1921960Sdg	default:
1931960Sdg		free(lock, M_LOCKF);
1941960Sdg		return (EINVAL);
1951960Sdg	}
1961960Sdg	/* NOTREACHED */
1971960Sdg}
1981960Sdg
1991960Sdg/*
2001960Sdg * Set a byte-range lock.
2011960Sdg */
20212819Sphkstatic int
2031960Sdglf_setlock(lock)
2041960Sdg	register struct lockf *lock;
2051960Sdg{
2061960Sdg	register struct lockf *block;
2071960Sdg	struct lockf **head = lock->lf_head;
2081960Sdg	struct lockf **prev, *overlap, *ltmp;
2091960Sdg	static char lockstr[] = "lockf";
2101960Sdg	int ovcase, priority, needtolink, error;
2111960Sdg
2121960Sdg#ifdef LOCKF_DEBUG
2131960Sdg	if (lockf_debug & 1)
2141960Sdg		lf_print("lf_setlock", lock);
2151960Sdg#endif /* LOCKF_DEBUG */
2161960Sdg
2171960Sdg	/*
2181960Sdg	 * Set the priority
2191960Sdg	 */
2201960Sdg	priority = PLOCK;
2211960Sdg	if (lock->lf_type == F_WRLCK)
2221960Sdg		priority += 4;
2231960Sdg	priority |= PCATCH;
2241960Sdg	/*
2251960Sdg	 * Scan lock list for this file looking for locks that would block us.
2261960Sdg	 */
2273098Sphk	while ((block = lf_getblock(lock))) {
2281960Sdg		/*
2291960Sdg		 * Free the structure and return if nonblocking.
2301960Sdg		 */
2311960Sdg		if ((lock->lf_flags & F_WAIT) == 0) {
2321960Sdg			FREE(lock, M_LOCKF);
2331960Sdg			return (EAGAIN);
2341960Sdg		}
2351960Sdg		/*
2361960Sdg		 * We are blocked. Since flock style locks cover
2371960Sdg		 * the whole file, there is no chance for deadlock.
2381960Sdg		 * For byte-range locks we must check for deadlock.
2391960Sdg		 *
2401960Sdg		 * Deadlock detection is done by looking through the
2411960Sdg		 * wait channels to see if there are any cycles that
2421960Sdg		 * involve us. MAXDEPTH is set just to make sure we
2431960Sdg		 * do not go off into neverland.
2441960Sdg		 */
2451960Sdg		if ((lock->lf_flags & F_POSIX) &&
2461960Sdg		    (block->lf_flags & F_POSIX)) {
2471960Sdg			register struct proc *wproc;
24883366Sjulian			struct thread *td;
2491960Sdg			register struct lockf *waitblock;
2501960Sdg			int i = 0;
2511960Sdg
2521960Sdg			/* The block is waiting on something */
25383366Sjulian			/* XXXKSE this is not complete under threads */
2541960Sdg			wproc = (struct proc *)block->lf_id;
25574727Sjhb			mtx_lock_spin(&sched_lock);
25683366Sjulian			FOREACH_THREAD_IN_PROC(wproc, td) {
25783366Sjulian				while (td->td_wchan &&
25883366Sjulian				    (td->td_wmesg == lockstr) &&
25983366Sjulian				    (i++ < maxlockdepth)) {
26083366Sjulian					waitblock = (struct lockf *)td->td_wchan;
26183366Sjulian					/* Get the owner of the blocking lock */
26283366Sjulian					waitblock = waitblock->lf_next;
26383366Sjulian					if ((waitblock->lf_flags & F_POSIX) == 0)
26483366Sjulian						break;
26583366Sjulian					wproc = (struct proc *)waitblock->lf_id;
26683366Sjulian					if (wproc == (struct proc *)lock->lf_id) {
26783366Sjulian						mtx_unlock_spin(&sched_lock);
26883366Sjulian						free(lock, M_LOCKF);
26983366Sjulian						return (EDEADLK);
27083366Sjulian					}
2711960Sdg				}
2721960Sdg			}
27374727Sjhb			mtx_unlock_spin(&sched_lock);
2741960Sdg		}
2751960Sdg		/*
2761960Sdg		 * For flock type locks, we must first remove
2771960Sdg		 * any shared locks that we hold before we sleep
2781960Sdg		 * waiting for an exclusive lock.
2791960Sdg		 */
2801960Sdg		if ((lock->lf_flags & F_FLOCK) &&
2811960Sdg		    lock->lf_type == F_WRLCK) {
2821960Sdg			lock->lf_type = F_UNLCK;
2831960Sdg			(void) lf_clearlock(lock);
2841960Sdg			lock->lf_type = F_WRLCK;
2851960Sdg		}
2861960Sdg		/*
2871960Sdg		 * Add our lock to the blocked list and sleep until we're free.
2881960Sdg		 * Remember who blocked us (for deadlock detection).
2891960Sdg		 */
2901960Sdg		lock->lf_next = block;
29122521Sdyson		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
2921960Sdg#ifdef LOCKF_DEBUG
2931960Sdg		if (lockf_debug & 1) {
2941960Sdg			lf_print("lf_setlock: blocking on", block);
2951960Sdg			lf_printlist("lf_setlock", block);
2961960Sdg		}
2971960Sdg#endif /* LOCKF_DEBUG */
29898998Salfred		error = tsleep(lock, priority, lockstr, 0);
29948556Sbde		/*
30048556Sbde		 * We may have been awakened by a signal and/or by a
30148556Sbde		 * debugger continuing us (in which cases we must remove
30248556Sbde		 * ourselves from the blocked list) and/or by another
30348556Sbde		 * process releasing a lock (in which case we have
30448556Sbde		 * already been removed from the blocked list and our
30548556Sbde		 * lf_next field set to NOLOCKF).
30648556Sbde		 */
30748556Sbde		if (lock->lf_next) {
30848556Sbde			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
30948556Sbde			lock->lf_next = NOLOCKF;
3101960Sdg		}
31148556Sbde		if (error) {
31248556Sbde			free(lock, M_LOCKF);
31348556Sbde			return (error);
31448556Sbde		}
3151960Sdg	}
3161960Sdg	/*
3171960Sdg	 * No blocks!!  Add the lock.  Note that we will
3181960Sdg	 * downgrade or upgrade any overlapping locks this
3191960Sdg	 * process already owns.
3201960Sdg	 *
3211960Sdg	 * Skip over locks owned by other processes.
3221960Sdg	 * Handle any locks that overlap and are owned by ourselves.
3231960Sdg	 */
3241960Sdg	prev = head;
3251960Sdg	block = *head;
3261960Sdg	needtolink = 1;
3271960Sdg	for (;;) {
3283098Sphk		ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
3293098Sphk		if (ovcase)
3301960Sdg			block = overlap->lf_next;
3311960Sdg		/*
3321960Sdg		 * Six cases:
3331960Sdg		 *	0) no overlap
3341960Sdg		 *	1) overlap == lock
3351960Sdg		 *	2) overlap contains lock
3361960Sdg		 *	3) lock contains overlap
3371960Sdg		 *	4) overlap starts before lock
3381960Sdg		 *	5) overlap ends after lock
3391960Sdg		 */
3401960Sdg		switch (ovcase) {
3411960Sdg		case 0: /* no overlap */
3421960Sdg			if (needtolink) {
3431960Sdg				*prev = lock;
3441960Sdg				lock->lf_next = overlap;
3451960Sdg			}
3461960Sdg			break;
3471960Sdg
3481960Sdg		case 1: /* overlap == lock */
3491960Sdg			/*
3501960Sdg			 * If downgrading lock, others may be
3511960Sdg			 * able to acquire it.
3521960Sdg			 */
3531960Sdg			if (lock->lf_type == F_RDLCK &&
3541960Sdg			    overlap->lf_type == F_WRLCK)
3551960Sdg				lf_wakelock(overlap);
3561960Sdg			overlap->lf_type = lock->lf_type;
3571960Sdg			FREE(lock, M_LOCKF);
3581960Sdg			lock = overlap; /* for debug output below */
3591960Sdg			break;
3601960Sdg
3611960Sdg		case 2: /* overlap contains lock */
3621960Sdg			/*
3631960Sdg			 * Check for common starting point and different types.
3641960Sdg			 */
3651960Sdg			if (overlap->lf_type == lock->lf_type) {
3661960Sdg				free(lock, M_LOCKF);
3671960Sdg				lock = overlap; /* for debug output below */
3681960Sdg				break;
3691960Sdg			}
3701960Sdg			if (overlap->lf_start == lock->lf_start) {
3711960Sdg				*prev = lock;
3721960Sdg				lock->lf_next = overlap;
3731960Sdg				overlap->lf_start = lock->lf_end + 1;
3741960Sdg			} else
3751960Sdg				lf_split(overlap, lock);
3761960Sdg			lf_wakelock(overlap);
3771960Sdg			break;
3781960Sdg
3791960Sdg		case 3: /* lock contains overlap */
3801960Sdg			/*
3811960Sdg			 * If downgrading lock, others may be able to
3821960Sdg			 * acquire it, otherwise take the list.
3831960Sdg			 */
3841960Sdg			if (lock->lf_type == F_RDLCK &&
3851960Sdg			    overlap->lf_type == F_WRLCK) {
3861960Sdg				lf_wakelock(overlap);
3871960Sdg			} else {
38853225Sphk				while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
38953225Sphk					ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
39022521Sdyson					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
39122521Sdyson					    lf_block);
39222521Sdyson					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
39322521Sdyson					    ltmp, lf_block);
39446744Sdt					ltmp->lf_next = lock;
39522521Sdyson				}
3961960Sdg			}
3971960Sdg			/*
3981960Sdg			 * Add the new lock if necessary and delete the overlap.
3991960Sdg			 */
4001960Sdg			if (needtolink) {
4011960Sdg				*prev = lock;
4021960Sdg				lock->lf_next = overlap->lf_next;
4031960Sdg				prev = &lock->lf_next;
4041960Sdg				needtolink = 0;
4051960Sdg			} else
4061960Sdg				*prev = overlap->lf_next;
4071960Sdg			free(overlap, M_LOCKF);
4081960Sdg			continue;
4091960Sdg
4101960Sdg		case 4: /* overlap starts before lock */
4111960Sdg			/*
4121960Sdg			 * Add lock after overlap on the list.
4131960Sdg			 */
4141960Sdg			lock->lf_next = overlap->lf_next;
4151960Sdg			overlap->lf_next = lock;
4161960Sdg			overlap->lf_end = lock->lf_start - 1;
4171960Sdg			prev = &lock->lf_next;
4181960Sdg			lf_wakelock(overlap);
4191960Sdg			needtolink = 0;
4201960Sdg			continue;
4211960Sdg
4221960Sdg		case 5: /* overlap ends after lock */
4231960Sdg			/*
4241960Sdg			 * Add the new lock before overlap.
4251960Sdg			 */
4261960Sdg			if (needtolink) {
4271960Sdg				*prev = lock;
4281960Sdg				lock->lf_next = overlap;
4291960Sdg			}
4301960Sdg			overlap->lf_start = lock->lf_end + 1;
4311960Sdg			lf_wakelock(overlap);
4321960Sdg			break;
4331960Sdg		}
4341960Sdg		break;
4351960Sdg	}
4361960Sdg#ifdef LOCKF_DEBUG
4371960Sdg	if (lockf_debug & 1) {
4381960Sdg		lf_print("lf_setlock: got the lock", lock);
4391960Sdg		lf_printlist("lf_setlock", lock);
4401960Sdg	}
4411960Sdg#endif /* LOCKF_DEBUG */
4421960Sdg	return (0);
4431960Sdg}
4441960Sdg
4451960Sdg/*
4461960Sdg * Remove a byte-range lock on an inode.
4471960Sdg *
4481960Sdg * Generally, find the lock (or an overlap to that lock)
4491960Sdg * and remove it (or shrink it), then wakeup anyone we can.
4501960Sdg */
45112819Sphkstatic int
4521960Sdglf_clearlock(unlock)
4531960Sdg	register struct lockf *unlock;
4541960Sdg{
4551960Sdg	struct lockf **head = unlock->lf_head;
4561960Sdg	register struct lockf *lf = *head;
4571960Sdg	struct lockf *overlap, **prev;
4581960Sdg	int ovcase;
4591960Sdg
4601960Sdg	if (lf == NOLOCKF)
4611960Sdg		return (0);
4621960Sdg#ifdef LOCKF_DEBUG
4631960Sdg	if (unlock->lf_type != F_UNLCK)
4641960Sdg		panic("lf_clearlock: bad type");
4651960Sdg	if (lockf_debug & 1)
4661960Sdg		lf_print("lf_clearlock", unlock);
4671960Sdg#endif /* LOCKF_DEBUG */
4681960Sdg	prev = head;
4693098Sphk	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
4701960Sdg		/*
4711960Sdg		 * Wakeup the list of locks to be retried.
4721960Sdg		 */
4731960Sdg		lf_wakelock(overlap);
4741960Sdg
4751960Sdg		switch (ovcase) {
4761960Sdg
4771960Sdg		case 1: /* overlap == lock */
4781960Sdg			*prev = overlap->lf_next;
4791960Sdg			FREE(overlap, M_LOCKF);
4801960Sdg			break;
4811960Sdg
4821960Sdg		case 2: /* overlap contains lock: split it */
4831960Sdg			if (overlap->lf_start == unlock->lf_start) {
4841960Sdg				overlap->lf_start = unlock->lf_end + 1;
4851960Sdg				break;
4861960Sdg			}
4871960Sdg			lf_split(overlap, unlock);
4881960Sdg			overlap->lf_next = unlock->lf_next;
4891960Sdg			break;
4901960Sdg
4911960Sdg		case 3: /* lock contains overlap */
4921960Sdg			*prev = overlap->lf_next;
4931960Sdg			lf = overlap->lf_next;
4941960Sdg			free(overlap, M_LOCKF);
4951960Sdg			continue;
4961960Sdg
4971960Sdg		case 4: /* overlap starts before lock */
4981960Sdg			overlap->lf_end = unlock->lf_start - 1;
4991960Sdg			prev = &overlap->lf_next;
5001960Sdg			lf = overlap->lf_next;
5011960Sdg			continue;
5021960Sdg
5031960Sdg		case 5: /* overlap ends after lock */
5041960Sdg			overlap->lf_start = unlock->lf_end + 1;
5051960Sdg			break;
5061960Sdg		}
5071960Sdg		break;
5081960Sdg	}
5091960Sdg#ifdef LOCKF_DEBUG
5101960Sdg	if (lockf_debug & 1)
5111960Sdg		lf_printlist("lf_clearlock", unlock);
5121960Sdg#endif /* LOCKF_DEBUG */
5131960Sdg	return (0);
5141960Sdg}
5151960Sdg
5161960Sdg/*
5171960Sdg * Check whether there is a blocking lock,
5181960Sdg * and if so return its process identifier.
5191960Sdg */
52012819Sphkstatic int
5211960Sdglf_getlock(lock, fl)
5221960Sdg	register struct lockf *lock;
5231960Sdg	register struct flock *fl;
5241960Sdg{
5251960Sdg	register struct lockf *block;
5261960Sdg
5271960Sdg#ifdef LOCKF_DEBUG
5281960Sdg	if (lockf_debug & 1)
5291960Sdg		lf_print("lf_getlock", lock);
5301960Sdg#endif /* LOCKF_DEBUG */
5311960Sdg
5323098Sphk	if ((block = lf_getblock(lock))) {
5331960Sdg		fl->l_type = block->lf_type;
5341960Sdg		fl->l_whence = SEEK_SET;
5351960Sdg		fl->l_start = block->lf_start;
5361960Sdg		if (block->lf_end == -1)
5371960Sdg			fl->l_len = 0;
5381960Sdg		else
5391960Sdg			fl->l_len = block->lf_end - block->lf_start + 1;
5401960Sdg		if (block->lf_flags & F_POSIX)
5411960Sdg			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
5421960Sdg		else
5431960Sdg			fl->l_pid = -1;
5441960Sdg	} else {
5451960Sdg		fl->l_type = F_UNLCK;
5461960Sdg	}
5471960Sdg	return (0);
5481960Sdg}
5491960Sdg
5501960Sdg/*
5511960Sdg * Walk the list of locks for an inode and
5521960Sdg * return the first blocking lock.
5531960Sdg */
55412819Sphkstatic struct lockf *
5551960Sdglf_getblock(lock)
5561960Sdg	register struct lockf *lock;
5571960Sdg{
5581960Sdg	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
5591960Sdg	int ovcase;
5601960Sdg
5611960Sdg	prev = lock->lf_head;
5623098Sphk	while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
5631960Sdg		/*
5641960Sdg		 * We've found an overlap, see if it blocks us
5651960Sdg		 */
5661960Sdg		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
5671960Sdg			return (overlap);
5681960Sdg		/*
5691960Sdg		 * Nope, point to the next one on the list and
5701960Sdg		 * see if it blocks us
5711960Sdg		 */
5721960Sdg		lf = overlap->lf_next;
5731960Sdg	}
5741960Sdg	return (NOLOCKF);
5751960Sdg}
5761960Sdg
5771960Sdg/*
5781960Sdg * Walk the list of locks for an inode to
5791960Sdg * find an overlapping lock (if any).
5801960Sdg *
5811960Sdg * NOTE: this returns only the FIRST overlapping lock.  There
5821960Sdg *	 may be more than one.
5831960Sdg */
58412819Sphkstatic int
5851960Sdglf_findoverlap(lf, lock, type, prev, overlap)
5861960Sdg	register struct lockf *lf;
5871960Sdg	struct lockf *lock;
5881960Sdg	int type;
5891960Sdg	struct lockf ***prev;
5901960Sdg	struct lockf **overlap;
5911960Sdg{
5921960Sdg	off_t start, end;
5931960Sdg
5941960Sdg	*overlap = lf;
5951960Sdg	if (lf == NOLOCKF)
5961960Sdg		return (0);
5971960Sdg#ifdef LOCKF_DEBUG
5981960Sdg	if (lockf_debug & 2)
5991960Sdg		lf_print("lf_findoverlap: looking for overlap in", lock);
6001960Sdg#endif /* LOCKF_DEBUG */
6011960Sdg	start = lock->lf_start;
6021960Sdg	end = lock->lf_end;
6031960Sdg	while (lf != NOLOCKF) {
6041960Sdg		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
6051960Sdg		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
6061960Sdg			*prev = &lf->lf_next;
6071960Sdg			*overlap = lf = lf->lf_next;
6081960Sdg			continue;
6091960Sdg		}
6101960Sdg#ifdef LOCKF_DEBUG
6111960Sdg		if (lockf_debug & 2)
6121960Sdg			lf_print("\tchecking", lf);
6131960Sdg#endif /* LOCKF_DEBUG */
6141960Sdg		/*
6151960Sdg		 * OK, check for overlap
6161960Sdg		 *
6171960Sdg		 * Six cases:
6181960Sdg		 *	0) no overlap
6191960Sdg		 *	1) overlap == lock
6201960Sdg		 *	2) overlap contains lock
6211960Sdg		 *	3) lock contains overlap
6221960Sdg		 *	4) overlap starts before lock
6231960Sdg		 *	5) overlap ends after lock
6241960Sdg		 */
6251960Sdg		if ((lf->lf_end != -1 && start > lf->lf_end) ||
6261960Sdg		    (end != -1 && lf->lf_start > end)) {
6271960Sdg			/* Case 0 */
6281960Sdg#ifdef LOCKF_DEBUG
6291960Sdg			if (lockf_debug & 2)
6301960Sdg				printf("no overlap\n");
6311960Sdg#endif /* LOCKF_DEBUG */
6321960Sdg			if ((type & SELF) && end != -1 && lf->lf_start > end)
6331960Sdg				return (0);
6341960Sdg			*prev = &lf->lf_next;
6351960Sdg			*overlap = lf = lf->lf_next;
6361960Sdg			continue;
6371960Sdg		}
6381960Sdg		if ((lf->lf_start == start) && (lf->lf_end == end)) {
6391960Sdg			/* Case 1 */
6401960Sdg#ifdef LOCKF_DEBUG
6411960Sdg			if (lockf_debug & 2)
6421960Sdg				printf("overlap == lock\n");
6431960Sdg#endif /* LOCKF_DEBUG */
6441960Sdg			return (1);
6451960Sdg		}
6461960Sdg		if ((lf->lf_start <= start) &&
6471960Sdg		    (end != -1) &&
6481960Sdg		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
6491960Sdg			/* Case 2 */
6501960Sdg#ifdef LOCKF_DEBUG
6511960Sdg			if (lockf_debug & 2)
6521960Sdg				printf("overlap contains lock\n");
6531960Sdg#endif /* LOCKF_DEBUG */
6541960Sdg			return (2);
6551960Sdg		}
6561960Sdg		if (start <= lf->lf_start &&
6571960Sdg		           (end == -1 ||
6581960Sdg			   (lf->lf_end != -1 && end >= lf->lf_end))) {
6591960Sdg			/* Case 3 */
6601960Sdg#ifdef LOCKF_DEBUG
6611960Sdg			if (lockf_debug & 2)
6621960Sdg				printf("lock contains overlap\n");
6631960Sdg#endif /* LOCKF_DEBUG */
6641960Sdg			return (3);
6651960Sdg		}
6661960Sdg		if ((lf->lf_start < start) &&
6671960Sdg			((lf->lf_end >= start) || (lf->lf_end == -1))) {
6681960Sdg			/* Case 4 */
6691960Sdg#ifdef LOCKF_DEBUG
6701960Sdg			if (lockf_debug & 2)
6711960Sdg				printf("overlap starts before lock\n");
6721960Sdg#endif /* LOCKF_DEBUG */
6731960Sdg			return (4);
6741960Sdg		}
6751960Sdg		if ((lf->lf_start > start) &&
6761960Sdg			(end != -1) &&
6771960Sdg			((lf->lf_end > end) || (lf->lf_end == -1))) {
6781960Sdg			/* Case 5 */
6791960Sdg#ifdef LOCKF_DEBUG
6801960Sdg			if (lockf_debug & 2)
6811960Sdg				printf("overlap ends after lock\n");
6821960Sdg#endif /* LOCKF_DEBUG */
6831960Sdg			return (5);
6841960Sdg		}
6851960Sdg		panic("lf_findoverlap: default");
6861960Sdg	}
6871960Sdg	return (0);
6881960Sdg}
6891960Sdg
6901960Sdg/*
6911960Sdg * Split a lock and a contained region into
6921960Sdg * two or three locks as necessary.
6931960Sdg */
69412819Sphkstatic void
6951960Sdglf_split(lock1, lock2)
6961960Sdg	register struct lockf *lock1;
6971960Sdg	register struct lockf *lock2;
6981960Sdg{
6991960Sdg	register struct lockf *splitlock;
7001960Sdg
7011960Sdg#ifdef LOCKF_DEBUG
7021960Sdg	if (lockf_debug & 2) {
7031960Sdg		lf_print("lf_split", lock1);
7041960Sdg		lf_print("splitting from", lock2);
7051960Sdg	}
7061960Sdg#endif /* LOCKF_DEBUG */
7071960Sdg	/*
7081960Sdg	 * Check to see if spliting into only two pieces.
7091960Sdg	 */
7101960Sdg	if (lock1->lf_start == lock2->lf_start) {
7111960Sdg		lock1->lf_start = lock2->lf_end + 1;
7121960Sdg		lock2->lf_next = lock1;
7131960Sdg		return;
7141960Sdg	}
7151960Sdg	if (lock1->lf_end == lock2->lf_end) {
7161960Sdg		lock1->lf_end = lock2->lf_start - 1;
7171960Sdg		lock2->lf_next = lock1->lf_next;
7181960Sdg		lock1->lf_next = lock2;
7191960Sdg		return;
7201960Sdg	}
7211960Sdg	/*
7221960Sdg	 * Make a new lock consisting of the last part of
7231960Sdg	 * the encompassing lock
7241960Sdg	 */
725111119Simp	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
72698998Salfred	bcopy(lock1, splitlock, sizeof *splitlock);
7271960Sdg	splitlock->lf_start = lock2->lf_end + 1;
72822521Sdyson	TAILQ_INIT(&splitlock->lf_blkhd);
7291960Sdg	lock1->lf_end = lock2->lf_start - 1;
7301960Sdg	/*
7311960Sdg	 * OK, now link it in
7321960Sdg	 */
7331960Sdg	splitlock->lf_next = lock1->lf_next;
7341960Sdg	lock2->lf_next = splitlock;
7351960Sdg	lock1->lf_next = lock2;
7361960Sdg}
7371960Sdg
7381960Sdg/*
7391960Sdg * Wakeup a blocklist
7401960Sdg */
74112819Sphkstatic void
7421960Sdglf_wakelock(listhead)
7431960Sdg	struct lockf *listhead;
7441960Sdg{
74522521Sdyson	register struct lockf *wakelock;
7461960Sdg
74753225Sphk	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
74853225Sphk		wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
74922521Sdyson		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
7501960Sdg		wakelock->lf_next = NOLOCKF;
7511960Sdg#ifdef LOCKF_DEBUG
7521960Sdg		if (lockf_debug & 2)
7531960Sdg			lf_print("lf_wakelock: awakening", wakelock);
7541960Sdg#endif /* LOCKF_DEBUG */
75598998Salfred		wakeup(wakelock);
75622521Sdyson	}
7571960Sdg}
7581960Sdg
7591960Sdg#ifdef LOCKF_DEBUG
7601960Sdg/*
7611960Sdg * Print out a lock.
7621960Sdg */
76322592Sbdevoid
7641960Sdglf_print(tag, lock)
7651960Sdg	char *tag;
7661960Sdg	register struct lockf *lock;
7671960Sdg{
7688876Srgrimes
76937951Sbde	printf("%s: lock %p for ", tag, (void *)lock);
7701960Sdg	if (lock->lf_flags & F_POSIX)
77137951Sbde		printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
7721960Sdg	else
77337951Sbde		printf("id %p", (void *)lock->lf_id);
77487211Salfred	if (lock->lf_inode != (struct inode *)0)
775106584Smux		printf(" in ino %ju on dev <%d, %d>, %s, start %jd, end %jd",
776106584Smux		    (uintmax_t)lock->lf_inode->i_number,
77787211Salfred		    major(lock->lf_inode->i_dev),
77887211Salfred		    minor(lock->lf_inode->i_dev),
77987211Salfred		    lock->lf_type == F_RDLCK ? "shared" :
78087211Salfred		    lock->lf_type == F_WRLCK ? "exclusive" :
781106584Smux		    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
782106584Smux		    (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
78387211Salfred	else
784106584Smux		printf(" %s, start %jd, end %jd",
78587211Salfred		    lock->lf_type == F_RDLCK ? "shared" :
78687211Salfred		    lock->lf_type == F_WRLCK ? "exclusive" :
787106584Smux		    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
788106584Smux		    (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
78953225Sphk	if (!TAILQ_EMPTY(&lock->lf_blkhd))
79053225Sphk		printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
7911960Sdg	else
7921960Sdg		printf("\n");
7931960Sdg}
7941960Sdg
79522592Sbdevoid
7961960Sdglf_printlist(tag, lock)
7971960Sdg	char *tag;
7981960Sdg	struct lockf *lock;
7991960Sdg{
80022521Sdyson	register struct lockf *lf, *blk;
8011960Sdg
80287211Salfred	if (lock->lf_inode == (struct inode *)0)
80387211Salfred		return;
80487211Salfred
805106584Smux	printf("%s: Lock list for ino %ju on dev <%d, %d>:\n",
806106584Smux	    tag, (uintmax_t)lock->lf_inode->i_number,
80737951Sbde	    major(lock->lf_inode->i_dev),
80837951Sbde	    minor(lock->lf_inode->i_dev));
8091960Sdg	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
81037951Sbde		printf("\tlock %p for ",(void *)lf);
8111960Sdg		if (lf->lf_flags & F_POSIX)
81237951Sbde			printf("proc %ld",
81337951Sbde			    (long)((struct proc *)lf->lf_id)->p_pid);
8141960Sdg		else
81537951Sbde			printf("id %p", (void *)lf->lf_id);
816106584Smux		printf(", %s, start %jd, end %jd",
81737951Sbde		    lf->lf_type == F_RDLCK ? "shared" :
81837951Sbde		    lf->lf_type == F_WRLCK ? "exclusive" :
81937951Sbde		    lf->lf_type == F_UNLCK ? "unlock" :
820106584Smux		    "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
82153225Sphk		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
82237951Sbde			printf("\n\t\tlock request %p for ", (void *)blk);
82322521Sdyson			if (blk->lf_flags & F_POSIX)
82437951Sbde				printf("proc %ld",
82537951Sbde				    (long)((struct proc *)blk->lf_id)->p_pid);
82622521Sdyson			else
82737951Sbde				printf("id %p", (void *)blk->lf_id);
828106584Smux			printf(", %s, start %jd, end %jd",
82937951Sbde			    blk->lf_type == F_RDLCK ? "shared" :
83037951Sbde			    blk->lf_type == F_WRLCK ? "exclusive" :
83137951Sbde			    blk->lf_type == F_UNLCK ? "unlock" :
832106584Smux			    "unknown", (intmax_t)blk->lf_start,
833106584Smux			    (intmax_t)blk->lf_end);
83453225Sphk			if (!TAILQ_EMPTY(&blk->lf_blkhd))
83522521Sdyson				panic("lf_printlist: bad list");
83622521Sdyson		}
83722521Sdyson		printf("\n");
8381960Sdg	}
8391960Sdg}
8401960Sdg#endif /* LOCKF_DEBUG */
841