vfs_lockf.c revision 1.49
1/*	$NetBSD: vfs_lockf.c,v 1.49 2006/05/14 21:15:12 elad Exp $	*/
2
3/*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Scooter Morris at Genentech Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)ufs_lockf.c	8.4 (Berkeley) 10/26/94
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.49 2006/05/14 21:15:12 elad Exp $");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/file.h>
44#include <sys/proc.h>
45#include <sys/vnode.h>
46#include <sys/pool.h>
47#include <sys/fcntl.h>
48#include <sys/lockf.h>
49#include <sys/kauth.h>
50
51POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl",
52    &pool_allocator_nointr);
53
54/*
55 * This variable controls the maximum number of processes that will
56 * be checked in doing deadlock detection.
57 */
58int maxlockdepth = MAXDEPTH;
59
60#ifdef LOCKF_DEBUG
61int	lockf_debug = 0;
62#endif
63
64#define NOLOCKF (struct lockf *)0
65#define SELF	0x1
66#define OTHERS	0x2
67
68/*
69 * XXX TODO
70 * Misc cleanups: "caddr_t id" should be visible in the API as a
71 * "struct proc *".
72 * (This requires rototilling all VFS's which support advisory locking).
73 */
74
75/*
76 * If there's a lot of lock contention on a single vnode, locking
77 * schemes which allow for more paralleism would be needed.  Given how
78 * infrequently byte-range locks are actually used in typical BSD
79 * code, a more complex approach probably isn't worth it.
80 */
81
82/*
83 * We enforce a limit on locks by uid, so that a single user cannot
84 * run the kernel out of memory.  For now, the limit is pretty coarse.
85 * There is no limit on root.
86 *
87 * Splitting a lock will always succeed, regardless of current allocations.
88 * If you're slightly above the limit, we still have to permit an allocation
89 * so that the unlock can succeed.  If the unlocking causes too many splits,
90 * however, you're totally cutoff.
91 */
92int maxlocksperuid = 1024;
93
94#ifdef LOCKF_DEBUG
95/*
96 * Print out a lock.
97 */
98static void
99lf_print(char *tag, struct lockf *lock)
100{
101
102	printf("%s: lock %p for ", tag, lock);
103	if (lock->lf_flags & F_POSIX)
104		printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
105	else
106		printf("file %p", (struct file *)lock->lf_id);
107	printf(" %s, start %qx, end %qx",
108		lock->lf_type == F_RDLCK ? "shared" :
109		lock->lf_type == F_WRLCK ? "exclusive" :
110		lock->lf_type == F_UNLCK ? "unlock" :
111		"unknown", lock->lf_start, lock->lf_end);
112	if (TAILQ_FIRST(&lock->lf_blkhd))
113		printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
114	else
115		printf("\n");
116}
117
118static void
119lf_printlist(char *tag, struct lockf *lock)
120{
121	struct lockf *lf, *blk;
122
123	printf("%s: Lock list:\n", tag);
124	for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
125		printf("\tlock %p for ", lf);
126		if (lf->lf_flags & F_POSIX)
127			printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
128		else
129			printf("file %p", (struct file *)lf->lf_id);
130		printf(", %s, start %qx, end %qx",
131			lf->lf_type == F_RDLCK ? "shared" :
132			lf->lf_type == F_WRLCK ? "exclusive" :
133			lf->lf_type == F_UNLCK ? "unlock" :
134			"unknown", lf->lf_start, lf->lf_end);
135		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
136			if (blk->lf_flags & F_POSIX)
137				printf("proc %d",
138				    ((struct proc *)blk->lf_id)->p_pid);
139			else
140				printf("file %p", (struct file *)blk->lf_id);
141			printf(", %s, start %qx, end %qx",
142				blk->lf_type == F_RDLCK ? "shared" :
143				blk->lf_type == F_WRLCK ? "exclusive" :
144				blk->lf_type == F_UNLCK ? "unlock" :
145				"unknown", blk->lf_start, blk->lf_end);
146			if (TAILQ_FIRST(&blk->lf_blkhd))
147				 panic("lf_printlist: bad list");
148		}
149		printf("\n");
150	}
151}
152#endif /* LOCKF_DEBUG */
153
154/*
155 * 3 options for allowfail.
156 * 0 - always allocate.  1 - cutoff at limit.  2 - cutoff at double limit.
157 */
158static struct lockf *
159lf_alloc(uid_t uid, int allowfail)
160{
161	struct uidinfo *uip;
162	struct lockf *lock;
163	int s;
164
165	uip = uid_find(uid);
166	UILOCK(uip, s);
167	if (uid && allowfail && uip->ui_lockcnt >
168	    (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) {
169		UIUNLOCK(uip, s);
170		return NULL;
171	}
172	uip->ui_lockcnt++;
173	UIUNLOCK(uip, s);
174	lock = pool_get(&lockfpool, PR_WAITOK);
175	lock->lf_uid = uid;
176	return lock;
177}
178
179static void
180lf_free(struct lockf *lock)
181{
182	struct uidinfo *uip;
183	int s;
184
185	uip = uid_find(lock->lf_uid);
186	UILOCK(uip, s);
187	uip->ui_lockcnt--;
188	UIUNLOCK(uip, s);
189	pool_put(&lockfpool, lock);
190}
191
192/*
193 * Walk the list of locks for an inode to
194 * find an overlapping lock (if any).
195 *
196 * NOTE: this returns only the FIRST overlapping lock.  There
197 *	 may be more than one.
198 */
199static int
200lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
201    struct lockf ***prev, struct lockf **overlap)
202{
203	off_t start, end;
204
205	*overlap = lf;
206	if (lf == NOLOCKF)
207		return 0;
208#ifdef LOCKF_DEBUG
209	if (lockf_debug & 2)
210		lf_print("lf_findoverlap: looking for overlap in", lock);
211#endif /* LOCKF_DEBUG */
212	start = lock->lf_start;
213	end = lock->lf_end;
214	while (lf != NOLOCKF) {
215		if (((type == SELF) && lf->lf_id != lock->lf_id) ||
216		    ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
217			*prev = &lf->lf_next;
218			*overlap = lf = lf->lf_next;
219			continue;
220		}
221#ifdef LOCKF_DEBUG
222		if (lockf_debug & 2)
223			lf_print("\tchecking", lf);
224#endif /* LOCKF_DEBUG */
225		/*
226		 * OK, check for overlap
227		 *
228		 * Six cases:
229		 *	0) no overlap
230		 *	1) overlap == lock
231		 *	2) overlap contains lock
232		 *	3) lock contains overlap
233		 *	4) overlap starts before lock
234		 *	5) overlap ends after lock
235		 */
236		if ((lf->lf_end != -1 && start > lf->lf_end) ||
237		    (end != -1 && lf->lf_start > end)) {
238			/* Case 0 */
239#ifdef LOCKF_DEBUG
240			if (lockf_debug & 2)
241				printf("no overlap\n");
242#endif /* LOCKF_DEBUG */
243			if ((type & SELF) && end != -1 && lf->lf_start > end)
244				return 0;
245			*prev = &lf->lf_next;
246			*overlap = lf = lf->lf_next;
247			continue;
248		}
249		if ((lf->lf_start == start) && (lf->lf_end == end)) {
250			/* Case 1 */
251#ifdef LOCKF_DEBUG
252			if (lockf_debug & 2)
253				printf("overlap == lock\n");
254#endif /* LOCKF_DEBUG */
255			return 1;
256		}
257		if ((lf->lf_start <= start) &&
258		    (end != -1) &&
259		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
260			/* Case 2 */
261#ifdef LOCKF_DEBUG
262			if (lockf_debug & 2)
263				printf("overlap contains lock\n");
264#endif /* LOCKF_DEBUG */
265			return 2;
266		}
267		if (start <= lf->lf_start &&
268		           (end == -1 ||
269			   (lf->lf_end != -1 && end >= lf->lf_end))) {
270			/* Case 3 */
271#ifdef LOCKF_DEBUG
272			if (lockf_debug & 2)
273				printf("lock contains overlap\n");
274#endif /* LOCKF_DEBUG */
275			return 3;
276		}
277		if ((lf->lf_start < start) &&
278			((lf->lf_end >= start) || (lf->lf_end == -1))) {
279			/* Case 4 */
280#ifdef LOCKF_DEBUG
281			if (lockf_debug & 2)
282				printf("overlap starts before lock\n");
283#endif /* LOCKF_DEBUG */
284			return 4;
285		}
286		if ((lf->lf_start > start) &&
287			(end != -1) &&
288			((lf->lf_end > end) || (lf->lf_end == -1))) {
289			/* Case 5 */
290#ifdef LOCKF_DEBUG
291			if (lockf_debug & 2)
292				printf("overlap ends after lock\n");
293#endif /* LOCKF_DEBUG */
294			return 5;
295		}
296		panic("lf_findoverlap: default");
297	}
298	return 0;
299}
300
301/*
302 * Split a lock and a contained region into
303 * two or three locks as necessary.
304 */
305static void
306lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock)
307{
308	struct lockf *splitlock;
309
310#ifdef LOCKF_DEBUG
311	if (lockf_debug & 2) {
312		lf_print("lf_split", lock1);
313		lf_print("splitting from", lock2);
314	}
315#endif /* LOCKF_DEBUG */
316	/*
317	 * Check to see if spliting into only two pieces.
318	 */
319	if (lock1->lf_start == lock2->lf_start) {
320		lock1->lf_start = lock2->lf_end + 1;
321		lock2->lf_next = lock1;
322		return;
323	}
324	if (lock1->lf_end == lock2->lf_end) {
325		lock1->lf_end = lock2->lf_start - 1;
326		lock2->lf_next = lock1->lf_next;
327		lock1->lf_next = lock2;
328		return;
329	}
330	/*
331	 * Make a new lock consisting of the last part of
332	 * the encompassing lock
333	 */
334	splitlock = *sparelock;
335	*sparelock = NULL;
336	memcpy(splitlock, lock1, sizeof(*splitlock));
337	splitlock->lf_start = lock2->lf_end + 1;
338	TAILQ_INIT(&splitlock->lf_blkhd);
339	lock1->lf_end = lock2->lf_start - 1;
340	/*
341	 * OK, now link it in
342	 */
343	splitlock->lf_next = lock1->lf_next;
344	lock2->lf_next = splitlock;
345	lock1->lf_next = lock2;
346}
347
348/*
349 * Wakeup a blocklist
350 */
351static void
352lf_wakelock(struct lockf *listhead)
353{
354	struct lockf *wakelock;
355
356	while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
357		KASSERT(wakelock->lf_next == listhead);
358		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
359		wakelock->lf_next = NOLOCKF;
360#ifdef LOCKF_DEBUG
361		if (lockf_debug & 2)
362			lf_print("lf_wakelock: awakening", wakelock);
363#endif
364		wakeup(wakelock);
365	}
366}
367
368/*
369 * Remove a byte-range lock on an inode.
370 *
371 * Generally, find the lock (or an overlap to that lock)
372 * and remove it (or shrink it), then wakeup anyone we can.
373 */
374static int
375lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
376{
377	struct lockf **head = unlock->lf_head;
378	struct lockf *lf = *head;
379	struct lockf *overlap, **prev;
380	int ovcase;
381
382	if (lf == NOLOCKF)
383		return 0;
384#ifdef LOCKF_DEBUG
385	if (unlock->lf_type != F_UNLCK)
386		panic("lf_clearlock: bad type");
387	if (lockf_debug & 1)
388		lf_print("lf_clearlock", unlock);
389#endif /* LOCKF_DEBUG */
390	prev = head;
391	while ((ovcase = lf_findoverlap(lf, unlock, SELF,
392					&prev, &overlap)) != 0) {
393		/*
394		 * Wakeup the list of locks to be retried.
395		 */
396		lf_wakelock(overlap);
397
398		switch (ovcase) {
399
400		case 1: /* overlap == lock */
401			*prev = overlap->lf_next;
402			lf_free(overlap);
403			break;
404
405		case 2: /* overlap contains lock: split it */
406			if (overlap->lf_start == unlock->lf_start) {
407				overlap->lf_start = unlock->lf_end + 1;
408				break;
409			}
410			lf_split(overlap, unlock, sparelock);
411			overlap->lf_next = unlock->lf_next;
412			break;
413
414		case 3: /* lock contains overlap */
415			*prev = overlap->lf_next;
416			lf = overlap->lf_next;
417			lf_free(overlap);
418			continue;
419
420		case 4: /* overlap starts before lock */
421			overlap->lf_end = unlock->lf_start - 1;
422			prev = &overlap->lf_next;
423			lf = overlap->lf_next;
424			continue;
425
426		case 5: /* overlap ends after lock */
427			overlap->lf_start = unlock->lf_end + 1;
428			break;
429		}
430		break;
431	}
432#ifdef LOCKF_DEBUG
433	if (lockf_debug & 1)
434		lf_printlist("lf_clearlock", unlock);
435#endif /* LOCKF_DEBUG */
436	return 0;
437}
438
439/*
440 * Walk the list of locks for an inode and
441 * return the first blocking lock.
442 */
443static struct lockf *
444lf_getblock(struct lockf *lock)
445{
446	struct lockf **prev, *overlap, *lf = *(lock->lf_head);
447
448	prev = lock->lf_head;
449	while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {
450		/*
451		 * We've found an overlap, see if it blocks us
452		 */
453		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
454			return overlap;
455		/*
456		 * Nope, point to the next one on the list and
457		 * see if it blocks us
458		 */
459		lf = overlap->lf_next;
460	}
461	return NOLOCKF;
462}
463
464/*
465 * Set a byte-range lock.
466 */
467static int
468lf_setlock(struct lockf *lock, struct lockf **sparelock,
469    struct simplelock *interlock)
470{
471	struct lockf *block;
472	struct lockf **head = lock->lf_head;
473	struct lockf **prev, *overlap, *ltmp;
474	static char lockstr[] = "lockf";
475	int ovcase, priority, needtolink, error;
476
477#ifdef LOCKF_DEBUG
478	if (lockf_debug & 1)
479		lf_print("lf_setlock", lock);
480#endif /* LOCKF_DEBUG */
481
482	/*
483	 * Set the priority
484	 */
485	priority = PLOCK;
486	if (lock->lf_type == F_WRLCK)
487		priority += 4;
488	priority |= PCATCH;
489	/*
490	 * Scan lock list for this file looking for locks that would block us.
491	 */
492	while ((block = lf_getblock(lock)) != NULL) {
493		/*
494		 * Free the structure and return if nonblocking.
495		 */
496		if ((lock->lf_flags & F_WAIT) == 0) {
497			lf_free(lock);
498			return EAGAIN;
499		}
500		/*
501		 * We are blocked. Since flock style locks cover
502		 * the whole file, there is no chance for deadlock.
503		 * For byte-range locks we must check for deadlock.
504		 *
505		 * Deadlock detection is done by looking through the
506		 * wait channels to see if there are any cycles that
507		 * involve us. MAXDEPTH is set just to make sure we
508		 * do not go off into neverneverland.
509		 */
510		if ((lock->lf_flags & F_POSIX) &&
511		    (block->lf_flags & F_POSIX)) {
512			struct lwp *wlwp;
513			volatile const struct lockf *waitblock;
514			int i = 0;
515
516			/*
517			 * The block is waiting on something.  if_lwp will be
518			 * 0 once the lock is granted, so we terminate the
519			 * loop if we find this.
520			 */
521			wlwp = block->lf_lwp;
522			while (wlwp && (i++ < maxlockdepth)) {
523				waitblock = wlwp->l_wchan;
524				/* Get the owner of the blocking lock */
525				waitblock = waitblock->lf_next;
526				if ((waitblock->lf_flags & F_POSIX) == 0)
527					break;
528				wlwp = waitblock->lf_lwp;
529				if (wlwp == lock->lf_lwp) {
530					lf_free(lock);
531					return EDEADLK;
532				}
533			}
534			/*
535			 * If we're still following a dependency chain
536			 * after maxlockdepth iterations, assume we're in
537			 * a cycle to be safe.
538			 */
539			if (i >= maxlockdepth) {
540				lf_free(lock);
541				return EDEADLK;
542			}
543		}
544		/*
545		 * For flock type locks, we must first remove
546		 * any shared locks that we hold before we sleep
547		 * waiting for an exclusive lock.
548		 */
549		if ((lock->lf_flags & F_FLOCK) &&
550		    lock->lf_type == F_WRLCK) {
551			lock->lf_type = F_UNLCK;
552			(void) lf_clearlock(lock, NULL);
553			lock->lf_type = F_WRLCK;
554		}
555		/*
556		 * Add our lock to the blocked list and sleep until we're free.
557		 * Remember who blocked us (for deadlock detection).
558		 */
559		lock->lf_next = block;
560		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
561#ifdef LOCKF_DEBUG
562		if (lockf_debug & 1) {
563			lf_print("lf_setlock: blocking on", block);
564			lf_printlist("lf_setlock", block);
565		}
566#endif /* LOCKF_DEBUG */
567		error = ltsleep(lock, priority, lockstr, 0, interlock);
568
569		/*
570		 * We may have been awakened by a signal (in
571		 * which case we must remove ourselves from the
572		 * blocked list) and/or by another process
573		 * releasing a lock (in which case we have already
574		 * been removed from the blocked list and our
575		 * lf_next field set to NOLOCKF).
576		 */
577		if (lock->lf_next != NOLOCKF) {
578			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
579			lock->lf_next = NOLOCKF;
580		}
581		if (error) {
582			lf_free(lock);
583			return error;
584		}
585	}
586	/*
587	 * No blocks!!  Add the lock.  Note that we will
588	 * downgrade or upgrade any overlapping locks this
589	 * process already owns.
590	 *
591	 * Skip over locks owned by other processes.
592	 * Handle any locks that overlap and are owned by ourselves.
593	 */
594	lock->lf_lwp = 0;
595	prev = head;
596	block = *head;
597	needtolink = 1;
598	for (;;) {
599		ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
600		if (ovcase)
601			block = overlap->lf_next;
602		/*
603		 * Six cases:
604		 *	0) no overlap
605		 *	1) overlap == lock
606		 *	2) overlap contains lock
607		 *	3) lock contains overlap
608		 *	4) overlap starts before lock
609		 *	5) overlap ends after lock
610		 */
611		switch (ovcase) {
612		case 0: /* no overlap */
613			if (needtolink) {
614				*prev = lock;
615				lock->lf_next = overlap;
616			}
617			break;
618
619		case 1: /* overlap == lock */
620			/*
621			 * If downgrading lock, others may be
622			 * able to acquire it.
623			 */
624			if (lock->lf_type == F_RDLCK &&
625			    overlap->lf_type == F_WRLCK)
626				lf_wakelock(overlap);
627			overlap->lf_type = lock->lf_type;
628			lf_free(lock);
629			lock = overlap; /* for debug output below */
630			break;
631
632		case 2: /* overlap contains lock */
633			/*
634			 * Check for common starting point and different types.
635			 */
636			if (overlap->lf_type == lock->lf_type) {
637				lf_free(lock);
638				lock = overlap; /* for debug output below */
639				break;
640			}
641			if (overlap->lf_start == lock->lf_start) {
642				*prev = lock;
643				lock->lf_next = overlap;
644				overlap->lf_start = lock->lf_end + 1;
645			} else
646				lf_split(overlap, lock, sparelock);
647			lf_wakelock(overlap);
648			break;
649
650		case 3: /* lock contains overlap */
651			/*
652			 * If downgrading lock, others may be able to
653			 * acquire it, otherwise take the list.
654			 */
655			if (lock->lf_type == F_RDLCK &&
656			    overlap->lf_type == F_WRLCK) {
657				lf_wakelock(overlap);
658			} else {
659				while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
660					KASSERT(ltmp->lf_next == overlap);
661					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
662					    lf_block);
663					ltmp->lf_next = lock;
664					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
665					    ltmp, lf_block);
666				}
667			}
668			/*
669			 * Add the new lock if necessary and delete the overlap.
670			 */
671			if (needtolink) {
672				*prev = lock;
673				lock->lf_next = overlap->lf_next;
674				prev = &lock->lf_next;
675				needtolink = 0;
676			} else
677				*prev = overlap->lf_next;
678			lf_free(overlap);
679			continue;
680
681		case 4: /* overlap starts before lock */
682			/*
683			 * Add lock after overlap on the list.
684			 */
685			lock->lf_next = overlap->lf_next;
686			overlap->lf_next = lock;
687			overlap->lf_end = lock->lf_start - 1;
688			prev = &lock->lf_next;
689			lf_wakelock(overlap);
690			needtolink = 0;
691			continue;
692
693		case 5: /* overlap ends after lock */
694			/*
695			 * Add the new lock before overlap.
696			 */
697			if (needtolink) {
698				*prev = lock;
699				lock->lf_next = overlap;
700			}
701			overlap->lf_start = lock->lf_end + 1;
702			lf_wakelock(overlap);
703			break;
704		}
705		break;
706	}
707#ifdef LOCKF_DEBUG
708	if (lockf_debug & 1) {
709		lf_print("lf_setlock: got the lock", lock);
710		lf_printlist("lf_setlock", lock);
711	}
712#endif /* LOCKF_DEBUG */
713	return 0;
714}
715
716/*
717 * Check whether there is a blocking lock,
718 * and if so return its process identifier.
719 */
720static int
721lf_getlock(struct lockf *lock, struct flock *fl)
722{
723	struct lockf *block;
724
725#ifdef LOCKF_DEBUG
726	if (lockf_debug & 1)
727		lf_print("lf_getlock", lock);
728#endif /* LOCKF_DEBUG */
729
730	if ((block = lf_getblock(lock)) != NULL) {
731		fl->l_type = block->lf_type;
732		fl->l_whence = SEEK_SET;
733		fl->l_start = block->lf_start;
734		if (block->lf_end == -1)
735			fl->l_len = 0;
736		else
737			fl->l_len = block->lf_end - block->lf_start + 1;
738		if (block->lf_flags & F_POSIX)
739			fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
740		else
741			fl->l_pid = -1;
742	} else {
743		fl->l_type = F_UNLCK;
744	}
745	return 0;
746}
747
748/*
749 * Do an advisory lock operation.
750 */
751int
752lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
753{
754	struct proc *p = curproc;
755	struct flock *fl = ap->a_fl;
756	struct lockf *lock = NULL;
757	struct lockf *sparelock;
758	struct simplelock *interlock = &ap->a_vp->v_interlock;
759	off_t start, end;
760	int error = 0;
761
762	/*
763	 * Convert the flock structure into a start and end.
764	 */
765	switch (fl->l_whence) {
766	case SEEK_SET:
767	case SEEK_CUR:
768		/*
769		 * Caller is responsible for adding any necessary offset
770		 * when SEEK_CUR is used.
771		 */
772		start = fl->l_start;
773		break;
774
775	case SEEK_END:
776		start = size + fl->l_start;
777		break;
778
779	default:
780		return EINVAL;
781	}
782	if (start < 0)
783		return EINVAL;
784
785	/*
786	 * allocate locks before acquire simple lock.
787	 * we need two locks in the worst case.
788	 */
789	switch (ap->a_op) {
790	case F_SETLK:
791	case F_UNLCK:
792		/*
793		 * XXX for F_UNLCK case, we can re-use lock.
794		 */
795		if ((ap->a_flags & F_FLOCK) == 0) {
796			/*
797			 * byte-range lock might need one more lock.
798			 */
799			sparelock = lf_alloc(kauth_cred_geteuid(p->p_cred), 0);
800			if (sparelock == NULL) {
801				error = ENOMEM;
802				goto quit;
803			}
804			break;
805		}
806		/* FALLTHROUGH */
807
808	case F_GETLK:
809		sparelock = NULL;
810		break;
811
812	default:
813		return EINVAL;
814	}
815
816	lock = lf_alloc(kauth_cred_geteuid(p->p_cred), ap->a_op != F_UNLCK ? 1 : 2);
817	if (lock == NULL) {
818		error = ENOMEM;
819		goto quit;
820	}
821
822	simple_lock(interlock);
823
824	/*
825	 * Avoid the common case of unlocking when inode has no locks.
826	 */
827	if (*head == (struct lockf *)0) {
828		if (ap->a_op != F_SETLK) {
829			fl->l_type = F_UNLCK;
830			error = 0;
831			goto quit_unlock;
832		}
833	}
834
835	if (fl->l_len == 0)
836		end = -1;
837	else
838		end = start + fl->l_len - 1;
839	/*
840	 * Create the lockf structure.
841	 */
842	lock->lf_start = start;
843	lock->lf_end = end;
844	/* XXX NJWLWP
845	 * I don't want to make the entire VFS universe use LWPs, because
846	 * they don't need them, for the most part. This is an exception,
847	 * and a kluge.
848	 */
849
850	lock->lf_head = head;
851	lock->lf_type = fl->l_type;
852	lock->lf_next = (struct lockf *)0;
853	TAILQ_INIT(&lock->lf_blkhd);
854	lock->lf_flags = ap->a_flags;
855	if (lock->lf_flags & F_POSIX) {
856		KASSERT(curproc == (struct proc *)ap->a_id);
857	}
858	lock->lf_id = (struct proc *)ap->a_id;
859	lock->lf_lwp = curlwp;
860
861	/*
862	 * Do the requested operation.
863	 */
864	switch (ap->a_op) {
865
866	case F_SETLK:
867		error = lf_setlock(lock, &sparelock, interlock);
868		lock = NULL; /* lf_setlock freed it */
869		break;
870
871	case F_UNLCK:
872		error = lf_clearlock(lock, &sparelock);
873		break;
874
875	case F_GETLK:
876		error = lf_getlock(lock, fl);
877		break;
878
879	default:
880		break;
881		/* NOTREACHED */
882	}
883
884quit_unlock:
885	simple_unlock(interlock);
886quit:
887	if (lock)
888		lf_free(lock);
889	if (sparelock)
890		lf_free(sparelock);
891
892	return error;
893}
894