vfs_lockf.c revision 1.51
1/* $NetBSD: vfs_lockf.c,v 1.51 2006/05/20 12:04:21 yamt Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.51 2006/05/20 12:04:21 yamt Exp $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/file.h> 44#include <sys/proc.h> 45#include <sys/vnode.h> 46#include <sys/pool.h> 47#include <sys/fcntl.h> 48#include <sys/lockf.h> 49#include <sys/kauth.h> 50 51/* 52 * The lockf structure is a kernel structure which contains the information 53 * associated with a byte range lock. The lockf structures are linked into 54 * the inode structure. Locks are sorted by the starting byte of the lock for 55 * efficiency. 56 * 57 * lf_next is used for two purposes, depending on whether the lock is 58 * being held, or is in conflict with an existing lock. If this lock 59 * is held, it indicates the next lock on the same vnode. 60 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 61 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 62 */ 63 64TAILQ_HEAD(locklist, lockf); 65 66struct lockf { 67 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 68 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 69 off_t lf_start; /* The byte # of the start of the lock */ 70 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 71 void *lf_id; /* process or file description holding lock */ 72 struct lwp *lf_lwp; /* LWP waiting for lock */ 73 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 74 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 75 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 76 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 77 uid_t lf_uid; /* User ID responsible */ 78}; 79 80/* Maximum length of sleep chains to traverse to try and detect deadlock. */ 81#define MAXDEPTH 50 82 83static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl", 84 &pool_allocator_nointr); 85 86/* 87 * This variable controls the maximum number of processes that will 88 * be checked in doing deadlock detection. 89 */ 90int maxlockdepth = MAXDEPTH; 91 92#ifdef LOCKF_DEBUG 93int lockf_debug = 0; 94#endif 95 96#define NOLOCKF (struct lockf *)0 97#define SELF 0x1 98#define OTHERS 0x2 99 100/* 101 * XXX TODO 102 * Misc cleanups: "caddr_t id" should be visible in the API as a 103 * "struct proc *". 104 * (This requires rototilling all VFS's which support advisory locking). 105 */ 106 107/* 108 * If there's a lot of lock contention on a single vnode, locking 109 * schemes which allow for more paralleism would be needed. Given how 110 * infrequently byte-range locks are actually used in typical BSD 111 * code, a more complex approach probably isn't worth it. 112 */ 113 114/* 115 * We enforce a limit on locks by uid, so that a single user cannot 116 * run the kernel out of memory. For now, the limit is pretty coarse. 117 * There is no limit on root. 118 * 119 * Splitting a lock will always succeed, regardless of current allocations. 120 * If you're slightly above the limit, we still have to permit an allocation 121 * so that the unlock can succeed. If the unlocking causes too many splits, 122 * however, you're totally cutoff. 123 */ 124int maxlocksperuid = 1024; 125 126#ifdef LOCKF_DEBUG 127/* 128 * Print out a lock. 129 */ 130static void 131lf_print(char *tag, struct lockf *lock) 132{ 133 134 printf("%s: lock %p for ", tag, lock); 135 if (lock->lf_flags & F_POSIX) 136 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 137 else 138 printf("file %p", (struct file *)lock->lf_id); 139 printf(" %s, start %qx, end %qx", 140 lock->lf_type == F_RDLCK ? "shared" : 141 lock->lf_type == F_WRLCK ? "exclusive" : 142 lock->lf_type == F_UNLCK ? "unlock" : 143 "unknown", lock->lf_start, lock->lf_end); 144 if (TAILQ_FIRST(&lock->lf_blkhd)) 145 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 146 else 147 printf("\n"); 148} 149 150static void 151lf_printlist(char *tag, struct lockf *lock) 152{ 153 struct lockf *lf, *blk; 154 155 printf("%s: Lock list:\n", tag); 156 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 157 printf("\tlock %p for ", lf); 158 if (lf->lf_flags & F_POSIX) 159 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 160 else 161 printf("file %p", (struct file *)lf->lf_id); 162 printf(", %s, start %qx, end %qx", 163 lf->lf_type == F_RDLCK ? "shared" : 164 lf->lf_type == F_WRLCK ? "exclusive" : 165 lf->lf_type == F_UNLCK ? "unlock" : 166 "unknown", lf->lf_start, lf->lf_end); 167 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 168 if (blk->lf_flags & F_POSIX) 169 printf("proc %d", 170 ((struct proc *)blk->lf_id)->p_pid); 171 else 172 printf("file %p", (struct file *)blk->lf_id); 173 printf(", %s, start %qx, end %qx", 174 blk->lf_type == F_RDLCK ? "shared" : 175 blk->lf_type == F_WRLCK ? "exclusive" : 176 blk->lf_type == F_UNLCK ? "unlock" : 177 "unknown", blk->lf_start, blk->lf_end); 178 if (TAILQ_FIRST(&blk->lf_blkhd)) 179 panic("lf_printlist: bad list"); 180 } 181 printf("\n"); 182 } 183} 184#endif /* LOCKF_DEBUG */ 185 186/* 187 * 3 options for allowfail. 188 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 189 */ 190static struct lockf * 191lf_alloc(uid_t uid, int allowfail) 192{ 193 struct uidinfo *uip; 194 struct lockf *lock; 195 int s; 196 197 uip = uid_find(uid); 198 UILOCK(uip, s); 199 if (uid && allowfail && uip->ui_lockcnt > 200 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 201 UIUNLOCK(uip, s); 202 return NULL; 203 } 204 uip->ui_lockcnt++; 205 UIUNLOCK(uip, s); 206 lock = pool_get(&lockfpool, PR_WAITOK); 207 lock->lf_uid = uid; 208 return lock; 209} 210 211static void 212lf_free(struct lockf *lock) 213{ 214 struct uidinfo *uip; 215 int s; 216 217 uip = uid_find(lock->lf_uid); 218 UILOCK(uip, s); 219 uip->ui_lockcnt--; 220 UIUNLOCK(uip, s); 221 pool_put(&lockfpool, lock); 222} 223 224/* 225 * Walk the list of locks for an inode to 226 * find an overlapping lock (if any). 227 * 228 * NOTE: this returns only the FIRST overlapping lock. There 229 * may be more than one. 230 */ 231static int 232lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 233 struct lockf ***prev, struct lockf **overlap) 234{ 235 off_t start, end; 236 237 *overlap = lf; 238 if (lf == NOLOCKF) 239 return 0; 240#ifdef LOCKF_DEBUG 241 if (lockf_debug & 2) 242 lf_print("lf_findoverlap: looking for overlap in", lock); 243#endif /* LOCKF_DEBUG */ 244 start = lock->lf_start; 245 end = lock->lf_end; 246 while (lf != NOLOCKF) { 247 if (((type == SELF) && lf->lf_id != lock->lf_id) || 248 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 249 *prev = &lf->lf_next; 250 *overlap = lf = lf->lf_next; 251 continue; 252 } 253#ifdef LOCKF_DEBUG 254 if (lockf_debug & 2) 255 lf_print("\tchecking", lf); 256#endif /* LOCKF_DEBUG */ 257 /* 258 * OK, check for overlap 259 * 260 * Six cases: 261 * 0) no overlap 262 * 1) overlap == lock 263 * 2) overlap contains lock 264 * 3) lock contains overlap 265 * 4) overlap starts before lock 266 * 5) overlap ends after lock 267 */ 268 if ((lf->lf_end != -1 && start > lf->lf_end) || 269 (end != -1 && lf->lf_start > end)) { 270 /* Case 0 */ 271#ifdef LOCKF_DEBUG 272 if (lockf_debug & 2) 273 printf("no overlap\n"); 274#endif /* LOCKF_DEBUG */ 275 if ((type & SELF) && end != -1 && lf->lf_start > end) 276 return 0; 277 *prev = &lf->lf_next; 278 *overlap = lf = lf->lf_next; 279 continue; 280 } 281 if ((lf->lf_start == start) && (lf->lf_end == end)) { 282 /* Case 1 */ 283#ifdef LOCKF_DEBUG 284 if (lockf_debug & 2) 285 printf("overlap == lock\n"); 286#endif /* LOCKF_DEBUG */ 287 return 1; 288 } 289 if ((lf->lf_start <= start) && 290 (end != -1) && 291 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 292 /* Case 2 */ 293#ifdef LOCKF_DEBUG 294 if (lockf_debug & 2) 295 printf("overlap contains lock\n"); 296#endif /* LOCKF_DEBUG */ 297 return 2; 298 } 299 if (start <= lf->lf_start && 300 (end == -1 || 301 (lf->lf_end != -1 && end >= lf->lf_end))) { 302 /* Case 3 */ 303#ifdef LOCKF_DEBUG 304 if (lockf_debug & 2) 305 printf("lock contains overlap\n"); 306#endif /* LOCKF_DEBUG */ 307 return 3; 308 } 309 if ((lf->lf_start < start) && 310 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 311 /* Case 4 */ 312#ifdef LOCKF_DEBUG 313 if (lockf_debug & 2) 314 printf("overlap starts before lock\n"); 315#endif /* LOCKF_DEBUG */ 316 return 4; 317 } 318 if ((lf->lf_start > start) && 319 (end != -1) && 320 ((lf->lf_end > end) || (lf->lf_end == -1))) { 321 /* Case 5 */ 322#ifdef LOCKF_DEBUG 323 if (lockf_debug & 2) 324 printf("overlap ends after lock\n"); 325#endif /* LOCKF_DEBUG */ 326 return 5; 327 } 328 panic("lf_findoverlap: default"); 329 } 330 return 0; 331} 332 333/* 334 * Split a lock and a contained region into 335 * two or three locks as necessary. 336 */ 337static void 338lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 339{ 340 struct lockf *splitlock; 341 342#ifdef LOCKF_DEBUG 343 if (lockf_debug & 2) { 344 lf_print("lf_split", lock1); 345 lf_print("splitting from", lock2); 346 } 347#endif /* LOCKF_DEBUG */ 348 /* 349 * Check to see if spliting into only two pieces. 350 */ 351 if (lock1->lf_start == lock2->lf_start) { 352 lock1->lf_start = lock2->lf_end + 1; 353 lock2->lf_next = lock1; 354 return; 355 } 356 if (lock1->lf_end == lock2->lf_end) { 357 lock1->lf_end = lock2->lf_start - 1; 358 lock2->lf_next = lock1->lf_next; 359 lock1->lf_next = lock2; 360 return; 361 } 362 /* 363 * Make a new lock consisting of the last part of 364 * the encompassing lock 365 */ 366 splitlock = *sparelock; 367 *sparelock = NULL; 368 memcpy(splitlock, lock1, sizeof(*splitlock)); 369 splitlock->lf_start = lock2->lf_end + 1; 370 TAILQ_INIT(&splitlock->lf_blkhd); 371 lock1->lf_end = lock2->lf_start - 1; 372 /* 373 * OK, now link it in 374 */ 375 splitlock->lf_next = lock1->lf_next; 376 lock2->lf_next = splitlock; 377 lock1->lf_next = lock2; 378} 379 380/* 381 * Wakeup a blocklist 382 */ 383static void 384lf_wakelock(struct lockf *listhead) 385{ 386 struct lockf *wakelock; 387 388 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 389 KASSERT(wakelock->lf_next == listhead); 390 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 391 wakelock->lf_next = NOLOCKF; 392#ifdef LOCKF_DEBUG 393 if (lockf_debug & 2) 394 lf_print("lf_wakelock: awakening", wakelock); 395#endif 396 wakeup(wakelock); 397 } 398} 399 400/* 401 * Remove a byte-range lock on an inode. 402 * 403 * Generally, find the lock (or an overlap to that lock) 404 * and remove it (or shrink it), then wakeup anyone we can. 405 */ 406static int 407lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 408{ 409 struct lockf **head = unlock->lf_head; 410 struct lockf *lf = *head; 411 struct lockf *overlap, **prev; 412 int ovcase; 413 414 if (lf == NOLOCKF) 415 return 0; 416#ifdef LOCKF_DEBUG 417 if (unlock->lf_type != F_UNLCK) 418 panic("lf_clearlock: bad type"); 419 if (lockf_debug & 1) 420 lf_print("lf_clearlock", unlock); 421#endif /* LOCKF_DEBUG */ 422 prev = head; 423 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 424 &prev, &overlap)) != 0) { 425 /* 426 * Wakeup the list of locks to be retried. 427 */ 428 lf_wakelock(overlap); 429 430 switch (ovcase) { 431 432 case 1: /* overlap == lock */ 433 *prev = overlap->lf_next; 434 lf_free(overlap); 435 break; 436 437 case 2: /* overlap contains lock: split it */ 438 if (overlap->lf_start == unlock->lf_start) { 439 overlap->lf_start = unlock->lf_end + 1; 440 break; 441 } 442 lf_split(overlap, unlock, sparelock); 443 overlap->lf_next = unlock->lf_next; 444 break; 445 446 case 3: /* lock contains overlap */ 447 *prev = overlap->lf_next; 448 lf = overlap->lf_next; 449 lf_free(overlap); 450 continue; 451 452 case 4: /* overlap starts before lock */ 453 overlap->lf_end = unlock->lf_start - 1; 454 prev = &overlap->lf_next; 455 lf = overlap->lf_next; 456 continue; 457 458 case 5: /* overlap ends after lock */ 459 overlap->lf_start = unlock->lf_end + 1; 460 break; 461 } 462 break; 463 } 464#ifdef LOCKF_DEBUG 465 if (lockf_debug & 1) 466 lf_printlist("lf_clearlock", unlock); 467#endif /* LOCKF_DEBUG */ 468 return 0; 469} 470 471/* 472 * Walk the list of locks for an inode and 473 * return the first blocking lock. 474 */ 475static struct lockf * 476lf_getblock(struct lockf *lock) 477{ 478 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 479 480 prev = lock->lf_head; 481 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 482 /* 483 * We've found an overlap, see if it blocks us 484 */ 485 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 486 return overlap; 487 /* 488 * Nope, point to the next one on the list and 489 * see if it blocks us 490 */ 491 lf = overlap->lf_next; 492 } 493 return NOLOCKF; 494} 495 496/* 497 * Set a byte-range lock. 498 */ 499static int 500lf_setlock(struct lockf *lock, struct lockf **sparelock, 501 struct simplelock *interlock) 502{ 503 struct lockf *block; 504 struct lockf **head = lock->lf_head; 505 struct lockf **prev, *overlap, *ltmp; 506 static char lockstr[] = "lockf"; 507 int ovcase, priority, needtolink, error; 508 509#ifdef LOCKF_DEBUG 510 if (lockf_debug & 1) 511 lf_print("lf_setlock", lock); 512#endif /* LOCKF_DEBUG */ 513 514 /* 515 * Set the priority 516 */ 517 priority = PLOCK; 518 if (lock->lf_type == F_WRLCK) 519 priority += 4; 520 priority |= PCATCH; 521 /* 522 * Scan lock list for this file looking for locks that would block us. 523 */ 524 while ((block = lf_getblock(lock)) != NULL) { 525 /* 526 * Free the structure and return if nonblocking. 527 */ 528 if ((lock->lf_flags & F_WAIT) == 0) { 529 lf_free(lock); 530 return EAGAIN; 531 } 532 /* 533 * We are blocked. Since flock style locks cover 534 * the whole file, there is no chance for deadlock. 535 * For byte-range locks we must check for deadlock. 536 * 537 * Deadlock detection is done by looking through the 538 * wait channels to see if there are any cycles that 539 * involve us. MAXDEPTH is set just to make sure we 540 * do not go off into neverneverland. 541 */ 542 if ((lock->lf_flags & F_POSIX) && 543 (block->lf_flags & F_POSIX)) { 544 struct lwp *wlwp; 545 volatile const struct lockf *waitblock; 546 int i = 0; 547 548 /* 549 * The block is waiting on something. if_lwp will be 550 * 0 once the lock is granted, so we terminate the 551 * loop if we find this. 552 */ 553 wlwp = block->lf_lwp; 554 while (wlwp && (i++ < maxlockdepth)) { 555 waitblock = wlwp->l_wchan; 556 /* Get the owner of the blocking lock */ 557 waitblock = waitblock->lf_next; 558 if ((waitblock->lf_flags & F_POSIX) == 0) 559 break; 560 wlwp = waitblock->lf_lwp; 561 if (wlwp == lock->lf_lwp) { 562 lf_free(lock); 563 return EDEADLK; 564 } 565 } 566 /* 567 * If we're still following a dependency chain 568 * after maxlockdepth iterations, assume we're in 569 * a cycle to be safe. 570 */ 571 if (i >= maxlockdepth) { 572 lf_free(lock); 573 return EDEADLK; 574 } 575 } 576 /* 577 * For flock type locks, we must first remove 578 * any shared locks that we hold before we sleep 579 * waiting for an exclusive lock. 580 */ 581 if ((lock->lf_flags & F_FLOCK) && 582 lock->lf_type == F_WRLCK) { 583 lock->lf_type = F_UNLCK; 584 (void) lf_clearlock(lock, NULL); 585 lock->lf_type = F_WRLCK; 586 } 587 /* 588 * Add our lock to the blocked list and sleep until we're free. 589 * Remember who blocked us (for deadlock detection). 590 */ 591 lock->lf_next = block; 592 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 593#ifdef LOCKF_DEBUG 594 if (lockf_debug & 1) { 595 lf_print("lf_setlock: blocking on", block); 596 lf_printlist("lf_setlock", block); 597 } 598#endif /* LOCKF_DEBUG */ 599 error = ltsleep(lock, priority, lockstr, 0, interlock); 600 601 /* 602 * We may have been awakened by a signal (in 603 * which case we must remove ourselves from the 604 * blocked list) and/or by another process 605 * releasing a lock (in which case we have already 606 * been removed from the blocked list and our 607 * lf_next field set to NOLOCKF). 608 */ 609 if (lock->lf_next != NOLOCKF) { 610 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 611 lock->lf_next = NOLOCKF; 612 } 613 if (error) { 614 lf_free(lock); 615 return error; 616 } 617 } 618 /* 619 * No blocks!! Add the lock. Note that we will 620 * downgrade or upgrade any overlapping locks this 621 * process already owns. 622 * 623 * Skip over locks owned by other processes. 624 * Handle any locks that overlap and are owned by ourselves. 625 */ 626 lock->lf_lwp = 0; 627 prev = head; 628 block = *head; 629 needtolink = 1; 630 for (;;) { 631 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 632 if (ovcase) 633 block = overlap->lf_next; 634 /* 635 * Six cases: 636 * 0) no overlap 637 * 1) overlap == lock 638 * 2) overlap contains lock 639 * 3) lock contains overlap 640 * 4) overlap starts before lock 641 * 5) overlap ends after lock 642 */ 643 switch (ovcase) { 644 case 0: /* no overlap */ 645 if (needtolink) { 646 *prev = lock; 647 lock->lf_next = overlap; 648 } 649 break; 650 651 case 1: /* overlap == lock */ 652 /* 653 * If downgrading lock, others may be 654 * able to acquire it. 655 */ 656 if (lock->lf_type == F_RDLCK && 657 overlap->lf_type == F_WRLCK) 658 lf_wakelock(overlap); 659 overlap->lf_type = lock->lf_type; 660 lf_free(lock); 661 lock = overlap; /* for debug output below */ 662 break; 663 664 case 2: /* overlap contains lock */ 665 /* 666 * Check for common starting point and different types. 667 */ 668 if (overlap->lf_type == lock->lf_type) { 669 lf_free(lock); 670 lock = overlap; /* for debug output below */ 671 break; 672 } 673 if (overlap->lf_start == lock->lf_start) { 674 *prev = lock; 675 lock->lf_next = overlap; 676 overlap->lf_start = lock->lf_end + 1; 677 } else 678 lf_split(overlap, lock, sparelock); 679 lf_wakelock(overlap); 680 break; 681 682 case 3: /* lock contains overlap */ 683 /* 684 * If downgrading lock, others may be able to 685 * acquire it, otherwise take the list. 686 */ 687 if (lock->lf_type == F_RDLCK && 688 overlap->lf_type == F_WRLCK) { 689 lf_wakelock(overlap); 690 } else { 691 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 692 KASSERT(ltmp->lf_next == overlap); 693 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 694 lf_block); 695 ltmp->lf_next = lock; 696 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 697 ltmp, lf_block); 698 } 699 } 700 /* 701 * Add the new lock if necessary and delete the overlap. 702 */ 703 if (needtolink) { 704 *prev = lock; 705 lock->lf_next = overlap->lf_next; 706 prev = &lock->lf_next; 707 needtolink = 0; 708 } else 709 *prev = overlap->lf_next; 710 lf_free(overlap); 711 continue; 712 713 case 4: /* overlap starts before lock */ 714 /* 715 * Add lock after overlap on the list. 716 */ 717 lock->lf_next = overlap->lf_next; 718 overlap->lf_next = lock; 719 overlap->lf_end = lock->lf_start - 1; 720 prev = &lock->lf_next; 721 lf_wakelock(overlap); 722 needtolink = 0; 723 continue; 724 725 case 5: /* overlap ends after lock */ 726 /* 727 * Add the new lock before overlap. 728 */ 729 if (needtolink) { 730 *prev = lock; 731 lock->lf_next = overlap; 732 } 733 overlap->lf_start = lock->lf_end + 1; 734 lf_wakelock(overlap); 735 break; 736 } 737 break; 738 } 739#ifdef LOCKF_DEBUG 740 if (lockf_debug & 1) { 741 lf_print("lf_setlock: got the lock", lock); 742 lf_printlist("lf_setlock", lock); 743 } 744#endif /* LOCKF_DEBUG */ 745 return 0; 746} 747 748/* 749 * Check whether there is a blocking lock, 750 * and if so return its process identifier. 751 */ 752static int 753lf_getlock(struct lockf *lock, struct flock *fl) 754{ 755 struct lockf *block; 756 757#ifdef LOCKF_DEBUG 758 if (lockf_debug & 1) 759 lf_print("lf_getlock", lock); 760#endif /* LOCKF_DEBUG */ 761 762 if ((block = lf_getblock(lock)) != NULL) { 763 fl->l_type = block->lf_type; 764 fl->l_whence = SEEK_SET; 765 fl->l_start = block->lf_start; 766 if (block->lf_end == -1) 767 fl->l_len = 0; 768 else 769 fl->l_len = block->lf_end - block->lf_start + 1; 770 if (block->lf_flags & F_POSIX) 771 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 772 else 773 fl->l_pid = -1; 774 } else { 775 fl->l_type = F_UNLCK; 776 } 777 return 0; 778} 779 780/* 781 * Do an advisory lock operation. 782 */ 783int 784lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 785{ 786 struct proc *p = curproc; 787 struct flock *fl = ap->a_fl; 788 struct lockf *lock = NULL; 789 struct lockf *sparelock; 790 struct simplelock *interlock = &ap->a_vp->v_interlock; 791 off_t start, end; 792 int error = 0; 793 794 /* 795 * Convert the flock structure into a start and end. 796 */ 797 switch (fl->l_whence) { 798 case SEEK_SET: 799 case SEEK_CUR: 800 /* 801 * Caller is responsible for adding any necessary offset 802 * when SEEK_CUR is used. 803 */ 804 start = fl->l_start; 805 break; 806 807 case SEEK_END: 808 start = size + fl->l_start; 809 break; 810 811 default: 812 return EINVAL; 813 } 814 if (start < 0) 815 return EINVAL; 816 817 /* 818 * allocate locks before acquire simple lock. 819 * we need two locks in the worst case. 820 */ 821 switch (ap->a_op) { 822 case F_SETLK: 823 case F_UNLCK: 824 /* 825 * XXX for F_UNLCK case, we can re-use lock. 826 */ 827 if ((ap->a_flags & F_FLOCK) == 0) { 828 /* 829 * byte-range lock might need one more lock. 830 */ 831 sparelock = lf_alloc(kauth_cred_geteuid(p->p_cred), 0); 832 if (sparelock == NULL) { 833 error = ENOMEM; 834 goto quit; 835 } 836 break; 837 } 838 /* FALLTHROUGH */ 839 840 case F_GETLK: 841 sparelock = NULL; 842 break; 843 844 default: 845 return EINVAL; 846 } 847 848 lock = lf_alloc(kauth_cred_geteuid(p->p_cred), ap->a_op != F_UNLCK ? 1 : 2); 849 if (lock == NULL) { 850 error = ENOMEM; 851 goto quit; 852 } 853 854 simple_lock(interlock); 855 856 /* 857 * Avoid the common case of unlocking when inode has no locks. 858 */ 859 if (*head == (struct lockf *)0) { 860 if (ap->a_op != F_SETLK) { 861 fl->l_type = F_UNLCK; 862 error = 0; 863 goto quit_unlock; 864 } 865 } 866 867 if (fl->l_len == 0) 868 end = -1; 869 else 870 end = start + fl->l_len - 1; 871 /* 872 * Create the lockf structure. 873 */ 874 lock->lf_start = start; 875 lock->lf_end = end; 876 /* XXX NJWLWP 877 * I don't want to make the entire VFS universe use LWPs, because 878 * they don't need them, for the most part. This is an exception, 879 * and a kluge. 880 */ 881 882 lock->lf_head = head; 883 lock->lf_type = fl->l_type; 884 lock->lf_next = (struct lockf *)0; 885 TAILQ_INIT(&lock->lf_blkhd); 886 lock->lf_flags = ap->a_flags; 887 if (lock->lf_flags & F_POSIX) { 888 KASSERT(curproc == (struct proc *)ap->a_id); 889 } 890 lock->lf_id = (struct proc *)ap->a_id; 891 lock->lf_lwp = curlwp; 892 893 /* 894 * Do the requested operation. 895 */ 896 switch (ap->a_op) { 897 898 case F_SETLK: 899 error = lf_setlock(lock, &sparelock, interlock); 900 lock = NULL; /* lf_setlock freed it */ 901 break; 902 903 case F_UNLCK: 904 error = lf_clearlock(lock, &sparelock); 905 break; 906 907 case F_GETLK: 908 error = lf_getlock(lock, fl); 909 break; 910 911 default: 912 break; 913 /* NOTREACHED */ 914 } 915 916quit_unlock: 917 simple_unlock(interlock); 918quit: 919 if (lock) 920 lf_free(lock); 921 if (sparelock) 922 lf_free(sparelock); 923 924 return error; 925} 926