vfs_lockf.c revision 1.61
1/* $NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/file.h> 44#include <sys/proc.h> 45#include <sys/vnode.h> 46#include <sys/pool.h> 47#include <sys/fcntl.h> 48#include <sys/lockf.h> 49#include <sys/kauth.h> 50 51/* 52 * The lockf structure is a kernel structure which contains the information 53 * associated with a byte range lock. The lockf structures are linked into 54 * the vnode structure. Locks are sorted by the starting byte of the lock for 55 * efficiency. 56 * 57 * lf_next is used for two purposes, depending on whether the lock is 58 * being held, or is in conflict with an existing lock. If this lock 59 * is held, it indicates the next lock on the same vnode. 60 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 61 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 62 */ 63 64TAILQ_HEAD(locklist, lockf); 65 66struct lockf { 67 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 68 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 69 off_t lf_start; /* The byte # of the start of the lock */ 70 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 71 void *lf_id; /* process or file description holding lock */ 72 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 73 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 74 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 75 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 76 uid_t lf_uid; /* User ID responsible */ 77 kcondvar_t lf_cv; /* Signalling */ 78}; 79 80/* Maximum length of sleep chains to traverse to try and detect deadlock. */ 81#define MAXDEPTH 50 82 83static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl", 84 &pool_allocator_nointr, IPL_NONE); 85 86/* 87 * This variable controls the maximum number of processes that will 88 * be checked in doing deadlock detection. 89 */ 90int maxlockdepth = MAXDEPTH; 91 92#ifdef LOCKF_DEBUG 93int lockf_debug = 0; 94#endif 95 96#define SELF 0x1 97#define OTHERS 0x2 98 99/* 100 * XXX TODO 101 * Misc cleanups: "void *id" should be visible in the API as a 102 * "struct proc *". 103 * (This requires rototilling all VFS's which support advisory locking). 104 */ 105 106/* 107 * If there's a lot of lock contention on a single vnode, locking 108 * schemes which allow for more paralleism would be needed. Given how 109 * infrequently byte-range locks are actually used in typical BSD 110 * code, a more complex approach probably isn't worth it. 111 */ 112 113/* 114 * We enforce a limit on locks by uid, so that a single user cannot 115 * run the kernel out of memory. For now, the limit is pretty coarse. 116 * There is no limit on root. 117 * 118 * Splitting a lock will always succeed, regardless of current allocations. 119 * If you're slightly above the limit, we still have to permit an allocation 120 * so that the unlock can succeed. If the unlocking causes too many splits, 121 * however, you're totally cutoff. 122 */ 123int maxlocksperuid = 1024; 124 125#ifdef LOCKF_DEBUG 126/* 127 * Print out a lock. 128 */ 129static void 130lf_print(const char *tag, struct lockf *lock) 131{ 132 133 printf("%s: lock %p for ", tag, lock); 134 if (lock->lf_flags & F_POSIX) 135 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 136 else 137 printf("file %p", (struct file *)lock->lf_id); 138 printf(" %s, start %qx, end %qx", 139 lock->lf_type == F_RDLCK ? "shared" : 140 lock->lf_type == F_WRLCK ? "exclusive" : 141 lock->lf_type == F_UNLCK ? "unlock" : 142 "unknown", lock->lf_start, lock->lf_end); 143 if (TAILQ_FIRST(&lock->lf_blkhd)) 144 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 145 else 146 printf("\n"); 147} 148 149static void 150lf_printlist(const char *tag, struct lockf *lock) 151{ 152 struct lockf *lf, *blk; 153 154 printf("%s: Lock list:\n", tag); 155 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 156 printf("\tlock %p for ", lf); 157 if (lf->lf_flags & F_POSIX) 158 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 159 else 160 printf("file %p", (struct file *)lf->lf_id); 161 printf(", %s, start %qx, end %qx", 162 lf->lf_type == F_RDLCK ? "shared" : 163 lf->lf_type == F_WRLCK ? "exclusive" : 164 lf->lf_type == F_UNLCK ? "unlock" : 165 "unknown", lf->lf_start, lf->lf_end); 166 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 167 if (blk->lf_flags & F_POSIX) 168 printf("proc %d", 169 ((struct proc *)blk->lf_id)->p_pid); 170 else 171 printf("file %p", (struct file *)blk->lf_id); 172 printf(", %s, start %qx, end %qx", 173 blk->lf_type == F_RDLCK ? "shared" : 174 blk->lf_type == F_WRLCK ? "exclusive" : 175 blk->lf_type == F_UNLCK ? "unlock" : 176 "unknown", blk->lf_start, blk->lf_end); 177 if (TAILQ_FIRST(&blk->lf_blkhd)) 178 panic("lf_printlist: bad list"); 179 } 180 printf("\n"); 181 } 182} 183#endif /* LOCKF_DEBUG */ 184 185/* 186 * 3 options for allowfail. 187 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 188 */ 189static struct lockf * 190lf_alloc(uid_t uid, int allowfail) 191{ 192 struct uidinfo *uip; 193 struct lockf *lock; 194 195 uip = uid_find(uid); 196 mutex_enter(&uip->ui_lock); 197 if (uid && allowfail && uip->ui_lockcnt > 198 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 199 mutex_exit(&uip->ui_lock); 200 return NULL; 201 } 202 uip->ui_lockcnt++; 203 mutex_exit(&uip->ui_lock); 204 lock = pool_get(&lockfpool, PR_WAITOK); 205 lock->lf_uid = uid; 206 cv_init(&lock->lf_cv, "lockf"); 207 return lock; 208} 209 210static void 211lf_free(struct lockf *lock) 212{ 213 struct uidinfo *uip; 214 215 uip = uid_find(lock->lf_uid); 216 mutex_enter(&uip->ui_lock); 217 uip->ui_lockcnt--; 218 mutex_exit(&uip->ui_lock); 219 cv_destroy(&lock->lf_cv); 220 pool_put(&lockfpool, lock); 221} 222 223/* 224 * Walk the list of locks for an inode to 225 * find an overlapping lock (if any). 226 * 227 * NOTE: this returns only the FIRST overlapping lock. There 228 * may be more than one. 229 */ 230static int 231lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 232 struct lockf ***prev, struct lockf **overlap) 233{ 234 off_t start, end; 235 236 *overlap = lf; 237 if (lf == NULL) 238 return 0; 239#ifdef LOCKF_DEBUG 240 if (lockf_debug & 2) 241 lf_print("lf_findoverlap: looking for overlap in", lock); 242#endif /* LOCKF_DEBUG */ 243 start = lock->lf_start; 244 end = lock->lf_end; 245 while (lf != NULL) { 246 if (((type == SELF) && lf->lf_id != lock->lf_id) || 247 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 248 *prev = &lf->lf_next; 249 *overlap = lf = lf->lf_next; 250 continue; 251 } 252#ifdef LOCKF_DEBUG 253 if (lockf_debug & 2) 254 lf_print("\tchecking", lf); 255#endif /* LOCKF_DEBUG */ 256 /* 257 * OK, check for overlap 258 * 259 * Six cases: 260 * 0) no overlap 261 * 1) overlap == lock 262 * 2) overlap contains lock 263 * 3) lock contains overlap 264 * 4) overlap starts before lock 265 * 5) overlap ends after lock 266 */ 267 if ((lf->lf_end != -1 && start > lf->lf_end) || 268 (end != -1 && lf->lf_start > end)) { 269 /* Case 0 */ 270#ifdef LOCKF_DEBUG 271 if (lockf_debug & 2) 272 printf("no overlap\n"); 273#endif /* LOCKF_DEBUG */ 274 if ((type & SELF) && end != -1 && lf->lf_start > end) 275 return 0; 276 *prev = &lf->lf_next; 277 *overlap = lf = lf->lf_next; 278 continue; 279 } 280 if ((lf->lf_start == start) && (lf->lf_end == end)) { 281 /* Case 1 */ 282#ifdef LOCKF_DEBUG 283 if (lockf_debug & 2) 284 printf("overlap == lock\n"); 285#endif /* LOCKF_DEBUG */ 286 return 1; 287 } 288 if ((lf->lf_start <= start) && 289 (end != -1) && 290 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 291 /* Case 2 */ 292#ifdef LOCKF_DEBUG 293 if (lockf_debug & 2) 294 printf("overlap contains lock\n"); 295#endif /* LOCKF_DEBUG */ 296 return 2; 297 } 298 if (start <= lf->lf_start && 299 (end == -1 || 300 (lf->lf_end != -1 && end >= lf->lf_end))) { 301 /* Case 3 */ 302#ifdef LOCKF_DEBUG 303 if (lockf_debug & 2) 304 printf("lock contains overlap\n"); 305#endif /* LOCKF_DEBUG */ 306 return 3; 307 } 308 if ((lf->lf_start < start) && 309 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 310 /* Case 4 */ 311#ifdef LOCKF_DEBUG 312 if (lockf_debug & 2) 313 printf("overlap starts before lock\n"); 314#endif /* LOCKF_DEBUG */ 315 return 4; 316 } 317 if ((lf->lf_start > start) && 318 (end != -1) && 319 ((lf->lf_end > end) || (lf->lf_end == -1))) { 320 /* Case 5 */ 321#ifdef LOCKF_DEBUG 322 if (lockf_debug & 2) 323 printf("overlap ends after lock\n"); 324#endif /* LOCKF_DEBUG */ 325 return 5; 326 } 327 panic("lf_findoverlap: default"); 328 } 329 return 0; 330} 331 332/* 333 * Split a lock and a contained region into 334 * two or three locks as necessary. 335 */ 336static void 337lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 338{ 339 struct lockf *splitlock; 340 341#ifdef LOCKF_DEBUG 342 if (lockf_debug & 2) { 343 lf_print("lf_split", lock1); 344 lf_print("splitting from", lock2); 345 } 346#endif /* LOCKF_DEBUG */ 347 /* 348 * Check to see if spliting into only two pieces. 349 */ 350 if (lock1->lf_start == lock2->lf_start) { 351 lock1->lf_start = lock2->lf_end + 1; 352 lock2->lf_next = lock1; 353 return; 354 } 355 if (lock1->lf_end == lock2->lf_end) { 356 lock1->lf_end = lock2->lf_start - 1; 357 lock2->lf_next = lock1->lf_next; 358 lock1->lf_next = lock2; 359 return; 360 } 361 /* 362 * Make a new lock consisting of the last part of 363 * the encompassing lock 364 */ 365 splitlock = *sparelock; 366 *sparelock = NULL; 367 memcpy(splitlock, lock1, sizeof(*splitlock)); 368 splitlock->lf_start = lock2->lf_end + 1; 369 TAILQ_INIT(&splitlock->lf_blkhd); 370 lock1->lf_end = lock2->lf_start - 1; 371 /* 372 * OK, now link it in 373 */ 374 splitlock->lf_next = lock1->lf_next; 375 lock2->lf_next = splitlock; 376 lock1->lf_next = lock2; 377} 378 379/* 380 * Wakeup a blocklist 381 */ 382static void 383lf_wakelock(struct lockf *listhead) 384{ 385 struct lockf *wakelock; 386 387 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 388 KASSERT(wakelock->lf_next == listhead); 389 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 390 wakelock->lf_next = NULL; 391#ifdef LOCKF_DEBUG 392 if (lockf_debug & 2) 393 lf_print("lf_wakelock: awakening", wakelock); 394#endif 395 cv_broadcast(&wakelock->lf_cv); 396 } 397} 398 399/* 400 * Remove a byte-range lock on an inode. 401 * 402 * Generally, find the lock (or an overlap to that lock) 403 * and remove it (or shrink it), then wakeup anyone we can. 404 */ 405static int 406lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 407{ 408 struct lockf **head = unlock->lf_head; 409 struct lockf *lf = *head; 410 struct lockf *overlap, **prev; 411 int ovcase; 412 413 if (lf == NULL) 414 return 0; 415#ifdef LOCKF_DEBUG 416 if (unlock->lf_type != F_UNLCK) 417 panic("lf_clearlock: bad type"); 418 if (lockf_debug & 1) 419 lf_print("lf_clearlock", unlock); 420#endif /* LOCKF_DEBUG */ 421 prev = head; 422 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 423 &prev, &overlap)) != 0) { 424 /* 425 * Wakeup the list of locks to be retried. 426 */ 427 lf_wakelock(overlap); 428 429 switch (ovcase) { 430 431 case 1: /* overlap == lock */ 432 *prev = overlap->lf_next; 433 lf_free(overlap); 434 break; 435 436 case 2: /* overlap contains lock: split it */ 437 if (overlap->lf_start == unlock->lf_start) { 438 overlap->lf_start = unlock->lf_end + 1; 439 break; 440 } 441 lf_split(overlap, unlock, sparelock); 442 overlap->lf_next = unlock->lf_next; 443 break; 444 445 case 3: /* lock contains overlap */ 446 *prev = overlap->lf_next; 447 lf = overlap->lf_next; 448 lf_free(overlap); 449 continue; 450 451 case 4: /* overlap starts before lock */ 452 overlap->lf_end = unlock->lf_start - 1; 453 prev = &overlap->lf_next; 454 lf = overlap->lf_next; 455 continue; 456 457 case 5: /* overlap ends after lock */ 458 overlap->lf_start = unlock->lf_end + 1; 459 break; 460 } 461 break; 462 } 463#ifdef LOCKF_DEBUG 464 if (lockf_debug & 1) 465 lf_printlist("lf_clearlock", unlock); 466#endif /* LOCKF_DEBUG */ 467 return 0; 468} 469 470/* 471 * Walk the list of locks for an inode and 472 * return the first blocking lock. 473 */ 474static struct lockf * 475lf_getblock(struct lockf *lock) 476{ 477 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 478 479 prev = lock->lf_head; 480 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 481 /* 482 * We've found an overlap, see if it blocks us 483 */ 484 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 485 return overlap; 486 /* 487 * Nope, point to the next one on the list and 488 * see if it blocks us 489 */ 490 lf = overlap->lf_next; 491 } 492 return NULL; 493} 494 495/* 496 * Set a byte-range lock. 497 */ 498static int 499lf_setlock(struct lockf *lock, struct lockf **sparelock, 500 kmutex_t *interlock) 501{ 502 struct lockf *block; 503 struct lockf **head = lock->lf_head; 504 struct lockf **prev, *overlap, *ltmp; 505 static char lockstr[] = "lockf"; 506 int ovcase, needtolink, error; 507 508#ifdef LOCKF_DEBUG 509 if (lockf_debug & 1) 510 lf_print("lf_setlock", lock); 511#endif /* LOCKF_DEBUG */ 512 513 /* 514 * XXX Here we used to set the sleep priority so that writers 515 * took priority. That's of dubious use, and is not possible 516 * with condition variables. Need to find a better way to ensure 517 * fairness. 518 */ 519 520 /* 521 * Scan lock list for this file looking for locks that would block us. 522 */ 523 while ((block = lf_getblock(lock)) != NULL) { 524 /* 525 * Free the structure and return if nonblocking. 526 */ 527 if ((lock->lf_flags & F_WAIT) == 0) { 528 lf_free(lock); 529 return EAGAIN; 530 } 531 /* 532 * We are blocked. Since flock style locks cover 533 * the whole file, there is no chance for deadlock. 534 * For byte-range locks we must check for deadlock. 535 * 536 * Deadlock detection is done by looking through the 537 * wait channels to see if there are any cycles that 538 * involve us. MAXDEPTH is set just to make sure we 539 * do not go off into neverneverland. 540 */ 541 if ((lock->lf_flags & F_POSIX) && 542 (block->lf_flags & F_POSIX)) { 543 struct lwp *wlwp; 544 volatile const struct lockf *waitblock; 545 int i = 0; 546 struct proc *p; 547 548 p = (struct proc *)block->lf_id; 549 KASSERT(p != NULL); 550 while (i++ < maxlockdepth) { 551 mutex_enter(&p->p_smutex); 552 if (p->p_nlwps > 1) { 553 mutex_exit(&p->p_smutex); 554 break; 555 } 556 wlwp = LIST_FIRST(&p->p_lwps); 557 lwp_lock(wlwp); 558 if (wlwp->l_wmesg != lockstr) { 559 lwp_unlock(wlwp); 560 mutex_exit(&p->p_smutex); 561 break; 562 } 563 waitblock = wlwp->l_wchan; 564 lwp_unlock(wlwp); 565 mutex_exit(&p->p_smutex); 566 if (waitblock == NULL) { 567 /* 568 * this lwp just got up but 569 * not returned from ltsleep yet. 570 */ 571 break; 572 } 573 /* Get the owner of the blocking lock */ 574 waitblock = waitblock->lf_next; 575 if ((waitblock->lf_flags & F_POSIX) == 0) 576 break; 577 p = (struct proc *)waitblock->lf_id; 578 if (p == curproc) { 579 lf_free(lock); 580 return EDEADLK; 581 } 582 } 583 /* 584 * If we're still following a dependency chain 585 * after maxlockdepth iterations, assume we're in 586 * a cycle to be safe. 587 */ 588 if (i >= maxlockdepth) { 589 lf_free(lock); 590 return EDEADLK; 591 } 592 } 593 /* 594 * For flock type locks, we must first remove 595 * any shared locks that we hold before we sleep 596 * waiting for an exclusive lock. 597 */ 598 if ((lock->lf_flags & F_FLOCK) && 599 lock->lf_type == F_WRLCK) { 600 lock->lf_type = F_UNLCK; 601 (void) lf_clearlock(lock, NULL); 602 lock->lf_type = F_WRLCK; 603 } 604 /* 605 * Add our lock to the blocked list and sleep until we're free. 606 * Remember who blocked us (for deadlock detection). 607 */ 608 lock->lf_next = block; 609 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 610#ifdef LOCKF_DEBUG 611 if (lockf_debug & 1) { 612 lf_print("lf_setlock: blocking on", block); 613 lf_printlist("lf_setlock", block); 614 } 615#endif /* LOCKF_DEBUG */ 616 error = cv_wait_sig(&lock->lf_cv, interlock); 617 618 /* 619 * We may have been awakened by a signal (in 620 * which case we must remove ourselves from the 621 * blocked list) and/or by another process 622 * releasing a lock (in which case we have already 623 * been removed from the blocked list and our 624 * lf_next field set to NULL). 625 */ 626 if (lock->lf_next != NULL) { 627 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 628 lock->lf_next = NULL; 629 } 630 if (error) { 631 lf_free(lock); 632 return error; 633 } 634 } 635 /* 636 * No blocks!! Add the lock. Note that we will 637 * downgrade or upgrade any overlapping locks this 638 * process already owns. 639 * 640 * Skip over locks owned by other processes. 641 * Handle any locks that overlap and are owned by ourselves. 642 */ 643 prev = head; 644 block = *head; 645 needtolink = 1; 646 for (;;) { 647 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 648 if (ovcase) 649 block = overlap->lf_next; 650 /* 651 * Six cases: 652 * 0) no overlap 653 * 1) overlap == lock 654 * 2) overlap contains lock 655 * 3) lock contains overlap 656 * 4) overlap starts before lock 657 * 5) overlap ends after lock 658 */ 659 switch (ovcase) { 660 case 0: /* no overlap */ 661 if (needtolink) { 662 *prev = lock; 663 lock->lf_next = overlap; 664 } 665 break; 666 667 case 1: /* overlap == lock */ 668 /* 669 * If downgrading lock, others may be 670 * able to acquire it. 671 */ 672 if (lock->lf_type == F_RDLCK && 673 overlap->lf_type == F_WRLCK) 674 lf_wakelock(overlap); 675 overlap->lf_type = lock->lf_type; 676 lf_free(lock); 677 lock = overlap; /* for debug output below */ 678 break; 679 680 case 2: /* overlap contains lock */ 681 /* 682 * Check for common starting point and different types. 683 */ 684 if (overlap->lf_type == lock->lf_type) { 685 lf_free(lock); 686 lock = overlap; /* for debug output below */ 687 break; 688 } 689 if (overlap->lf_start == lock->lf_start) { 690 *prev = lock; 691 lock->lf_next = overlap; 692 overlap->lf_start = lock->lf_end + 1; 693 } else 694 lf_split(overlap, lock, sparelock); 695 lf_wakelock(overlap); 696 break; 697 698 case 3: /* lock contains overlap */ 699 /* 700 * If downgrading lock, others may be able to 701 * acquire it, otherwise take the list. 702 */ 703 if (lock->lf_type == F_RDLCK && 704 overlap->lf_type == F_WRLCK) { 705 lf_wakelock(overlap); 706 } else { 707 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 708 KASSERT(ltmp->lf_next == overlap); 709 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 710 lf_block); 711 ltmp->lf_next = lock; 712 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 713 ltmp, lf_block); 714 } 715 } 716 /* 717 * Add the new lock if necessary and delete the overlap. 718 */ 719 if (needtolink) { 720 *prev = lock; 721 lock->lf_next = overlap->lf_next; 722 prev = &lock->lf_next; 723 needtolink = 0; 724 } else 725 *prev = overlap->lf_next; 726 lf_free(overlap); 727 continue; 728 729 case 4: /* overlap starts before lock */ 730 /* 731 * Add lock after overlap on the list. 732 */ 733 lock->lf_next = overlap->lf_next; 734 overlap->lf_next = lock; 735 overlap->lf_end = lock->lf_start - 1; 736 prev = &lock->lf_next; 737 lf_wakelock(overlap); 738 needtolink = 0; 739 continue; 740 741 case 5: /* overlap ends after lock */ 742 /* 743 * Add the new lock before overlap. 744 */ 745 if (needtolink) { 746 *prev = lock; 747 lock->lf_next = overlap; 748 } 749 overlap->lf_start = lock->lf_end + 1; 750 lf_wakelock(overlap); 751 break; 752 } 753 break; 754 } 755#ifdef LOCKF_DEBUG 756 if (lockf_debug & 1) { 757 lf_print("lf_setlock: got the lock", lock); 758 lf_printlist("lf_setlock", lock); 759 } 760#endif /* LOCKF_DEBUG */ 761 return 0; 762} 763 764/* 765 * Check whether there is a blocking lock, 766 * and if so return its process identifier. 767 */ 768static int 769lf_getlock(struct lockf *lock, struct flock *fl) 770{ 771 struct lockf *block; 772 773#ifdef LOCKF_DEBUG 774 if (lockf_debug & 1) 775 lf_print("lf_getlock", lock); 776#endif /* LOCKF_DEBUG */ 777 778 if ((block = lf_getblock(lock)) != NULL) { 779 fl->l_type = block->lf_type; 780 fl->l_whence = SEEK_SET; 781 fl->l_start = block->lf_start; 782 if (block->lf_end == -1) 783 fl->l_len = 0; 784 else 785 fl->l_len = block->lf_end - block->lf_start + 1; 786 if (block->lf_flags & F_POSIX) 787 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 788 else 789 fl->l_pid = -1; 790 } else { 791 fl->l_type = F_UNLCK; 792 } 793 return 0; 794} 795 796/* 797 * Do an advisory lock operation. 798 */ 799int 800lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 801{ 802 struct lwp *l = curlwp; 803 struct flock *fl = ap->a_fl; 804 struct lockf *lock = NULL; 805 struct lockf *sparelock; 806 kmutex_t *interlock = &ap->a_vp->v_interlock; 807 off_t start, end; 808 int error = 0; 809 810 /* 811 * Convert the flock structure into a start and end. 812 */ 813 switch (fl->l_whence) { 814 case SEEK_SET: 815 case SEEK_CUR: 816 /* 817 * Caller is responsible for adding any necessary offset 818 * when SEEK_CUR is used. 819 */ 820 start = fl->l_start; 821 break; 822 823 case SEEK_END: 824 start = size + fl->l_start; 825 break; 826 827 default: 828 return EINVAL; 829 } 830 if (start < 0) 831 return EINVAL; 832 833 /* 834 * Allocate locks before acquiring the interlock. We need two 835 * locks in the worst case. 836 */ 837 switch (ap->a_op) { 838 case F_SETLK: 839 case F_UNLCK: 840 /* 841 * XXX For F_UNLCK case, we can re-use the lock. 842 */ 843 if ((ap->a_flags & F_FLOCK) == 0) { 844 /* 845 * Byte-range lock might need one more lock. 846 */ 847 sparelock = lf_alloc(kauth_cred_geteuid(l->l_cred), 0); 848 if (sparelock == NULL) { 849 error = ENOMEM; 850 goto quit; 851 } 852 break; 853 } 854 /* FALLTHROUGH */ 855 856 case F_GETLK: 857 sparelock = NULL; 858 break; 859 860 default: 861 return EINVAL; 862 } 863 864 lock = lf_alloc(kauth_cred_geteuid(l->l_cred), 865 ap->a_op != F_UNLCK ? 1 : 2); 866 if (lock == NULL) { 867 error = ENOMEM; 868 goto quit; 869 } 870 871 mutex_enter(interlock); 872 873 /* 874 * Avoid the common case of unlocking when inode has no locks. 875 */ 876 if (*head == (struct lockf *)0) { 877 if (ap->a_op != F_SETLK) { 878 fl->l_type = F_UNLCK; 879 error = 0; 880 goto quit_unlock; 881 } 882 } 883 884 if (fl->l_len == 0) 885 end = -1; 886 else 887 end = start + fl->l_len - 1; 888 /* 889 * Create the lockf structure. 890 */ 891 lock->lf_start = start; 892 lock->lf_end = end; 893 /* XXX NJWLWP 894 * I don't want to make the entire VFS universe use LWPs, because 895 * they don't need them, for the most part. This is an exception, 896 * and a kluge. 897 */ 898 899 lock->lf_head = head; 900 lock->lf_type = fl->l_type; 901 lock->lf_next = (struct lockf *)0; 902 TAILQ_INIT(&lock->lf_blkhd); 903 lock->lf_flags = ap->a_flags; 904 if (lock->lf_flags & F_POSIX) { 905 KASSERT(curproc == (struct proc *)ap->a_id); 906 } 907 lock->lf_id = (struct proc *)ap->a_id; 908 909 /* 910 * Do the requested operation. 911 */ 912 switch (ap->a_op) { 913 914 case F_SETLK: 915 error = lf_setlock(lock, &sparelock, interlock); 916 lock = NULL; /* lf_setlock freed it */ 917 break; 918 919 case F_UNLCK: 920 error = lf_clearlock(lock, &sparelock); 921 break; 922 923 case F_GETLK: 924 error = lf_getlock(lock, fl); 925 break; 926 927 default: 928 break; 929 /* NOTREACHED */ 930 } 931 932quit_unlock: 933 mutex_exit(interlock); 934quit: 935 if (lock) 936 lf_free(lock); 937 if (sparelock) 938 lf_free(sparelock); 939 940 return error; 941} 942