vfs_lockf.c revision 1.54
1/* $NetBSD: vfs_lockf.c,v 1.54 2006/05/20 12:20:55 yamt Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.54 2006/05/20 12:20:55 yamt Exp $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/file.h> 44#include <sys/proc.h> 45#include <sys/vnode.h> 46#include <sys/pool.h> 47#include <sys/fcntl.h> 48#include <sys/lockf.h> 49#include <sys/kauth.h> 50 51/* 52 * The lockf structure is a kernel structure which contains the information 53 * associated with a byte range lock. The lockf structures are linked into 54 * the inode structure. Locks are sorted by the starting byte of the lock for 55 * efficiency. 56 * 57 * lf_next is used for two purposes, depending on whether the lock is 58 * being held, or is in conflict with an existing lock. If this lock 59 * is held, it indicates the next lock on the same vnode. 60 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 61 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 62 */ 63 64TAILQ_HEAD(locklist, lockf); 65 66struct lockf { 67 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 68 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 69 off_t lf_start; /* The byte # of the start of the lock */ 70 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 71 void *lf_id; /* process or file description holding lock */ 72 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 73 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 74 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 75 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 76 uid_t lf_uid; /* User ID responsible */ 77}; 78 79/* Maximum length of sleep chains to traverse to try and detect deadlock. */ 80#define MAXDEPTH 50 81 82static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl", 83 &pool_allocator_nointr); 84 85/* 86 * This variable controls the maximum number of processes that will 87 * be checked in doing deadlock detection. 88 */ 89int maxlockdepth = MAXDEPTH; 90 91#ifdef LOCKF_DEBUG 92int lockf_debug = 0; 93#endif 94 95#define SELF 0x1 96#define OTHERS 0x2 97 98/* 99 * XXX TODO 100 * Misc cleanups: "caddr_t id" should be visible in the API as a 101 * "struct proc *". 102 * (This requires rototilling all VFS's which support advisory locking). 103 */ 104 105/* 106 * If there's a lot of lock contention on a single vnode, locking 107 * schemes which allow for more paralleism would be needed. Given how 108 * infrequently byte-range locks are actually used in typical BSD 109 * code, a more complex approach probably isn't worth it. 110 */ 111 112/* 113 * We enforce a limit on locks by uid, so that a single user cannot 114 * run the kernel out of memory. For now, the limit is pretty coarse. 115 * There is no limit on root. 116 * 117 * Splitting a lock will always succeed, regardless of current allocations. 118 * If you're slightly above the limit, we still have to permit an allocation 119 * so that the unlock can succeed. If the unlocking causes too many splits, 120 * however, you're totally cutoff. 121 */ 122int maxlocksperuid = 1024; 123 124#ifdef LOCKF_DEBUG 125/* 126 * Print out a lock. 127 */ 128static void 129lf_print(char *tag, struct lockf *lock) 130{ 131 132 printf("%s: lock %p for ", tag, lock); 133 if (lock->lf_flags & F_POSIX) 134 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 135 else 136 printf("file %p", (struct file *)lock->lf_id); 137 printf(" %s, start %qx, end %qx", 138 lock->lf_type == F_RDLCK ? "shared" : 139 lock->lf_type == F_WRLCK ? "exclusive" : 140 lock->lf_type == F_UNLCK ? "unlock" : 141 "unknown", lock->lf_start, lock->lf_end); 142 if (TAILQ_FIRST(&lock->lf_blkhd)) 143 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 144 else 145 printf("\n"); 146} 147 148static void 149lf_printlist(char *tag, struct lockf *lock) 150{ 151 struct lockf *lf, *blk; 152 153 printf("%s: Lock list:\n", tag); 154 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 155 printf("\tlock %p for ", lf); 156 if (lf->lf_flags & F_POSIX) 157 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 158 else 159 printf("file %p", (struct file *)lf->lf_id); 160 printf(", %s, start %qx, end %qx", 161 lf->lf_type == F_RDLCK ? "shared" : 162 lf->lf_type == F_WRLCK ? "exclusive" : 163 lf->lf_type == F_UNLCK ? "unlock" : 164 "unknown", lf->lf_start, lf->lf_end); 165 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 166 if (blk->lf_flags & F_POSIX) 167 printf("proc %d", 168 ((struct proc *)blk->lf_id)->p_pid); 169 else 170 printf("file %p", (struct file *)blk->lf_id); 171 printf(", %s, start %qx, end %qx", 172 blk->lf_type == F_RDLCK ? "shared" : 173 blk->lf_type == F_WRLCK ? "exclusive" : 174 blk->lf_type == F_UNLCK ? "unlock" : 175 "unknown", blk->lf_start, blk->lf_end); 176 if (TAILQ_FIRST(&blk->lf_blkhd)) 177 panic("lf_printlist: bad list"); 178 } 179 printf("\n"); 180 } 181} 182#endif /* LOCKF_DEBUG */ 183 184/* 185 * 3 options for allowfail. 186 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 187 */ 188static struct lockf * 189lf_alloc(uid_t uid, int allowfail) 190{ 191 struct uidinfo *uip; 192 struct lockf *lock; 193 int s; 194 195 uip = uid_find(uid); 196 UILOCK(uip, s); 197 if (uid && allowfail && uip->ui_lockcnt > 198 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 199 UIUNLOCK(uip, s); 200 return NULL; 201 } 202 uip->ui_lockcnt++; 203 UIUNLOCK(uip, s); 204 lock = pool_get(&lockfpool, PR_WAITOK); 205 lock->lf_uid = uid; 206 return lock; 207} 208 209static void 210lf_free(struct lockf *lock) 211{ 212 struct uidinfo *uip; 213 int s; 214 215 uip = uid_find(lock->lf_uid); 216 UILOCK(uip, s); 217 uip->ui_lockcnt--; 218 UIUNLOCK(uip, s); 219 pool_put(&lockfpool, lock); 220} 221 222/* 223 * Walk the list of locks for an inode to 224 * find an overlapping lock (if any). 225 * 226 * NOTE: this returns only the FIRST overlapping lock. There 227 * may be more than one. 228 */ 229static int 230lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 231 struct lockf ***prev, struct lockf **overlap) 232{ 233 off_t start, end; 234 235 *overlap = lf; 236 if (lf == NULL) 237 return 0; 238#ifdef LOCKF_DEBUG 239 if (lockf_debug & 2) 240 lf_print("lf_findoverlap: looking for overlap in", lock); 241#endif /* LOCKF_DEBUG */ 242 start = lock->lf_start; 243 end = lock->lf_end; 244 while (lf != NULL) { 245 if (((type == SELF) && lf->lf_id != lock->lf_id) || 246 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 247 *prev = &lf->lf_next; 248 *overlap = lf = lf->lf_next; 249 continue; 250 } 251#ifdef LOCKF_DEBUG 252 if (lockf_debug & 2) 253 lf_print("\tchecking", lf); 254#endif /* LOCKF_DEBUG */ 255 /* 256 * OK, check for overlap 257 * 258 * Six cases: 259 * 0) no overlap 260 * 1) overlap == lock 261 * 2) overlap contains lock 262 * 3) lock contains overlap 263 * 4) overlap starts before lock 264 * 5) overlap ends after lock 265 */ 266 if ((lf->lf_end != -1 && start > lf->lf_end) || 267 (end != -1 && lf->lf_start > end)) { 268 /* Case 0 */ 269#ifdef LOCKF_DEBUG 270 if (lockf_debug & 2) 271 printf("no overlap\n"); 272#endif /* LOCKF_DEBUG */ 273 if ((type & SELF) && end != -1 && lf->lf_start > end) 274 return 0; 275 *prev = &lf->lf_next; 276 *overlap = lf = lf->lf_next; 277 continue; 278 } 279 if ((lf->lf_start == start) && (lf->lf_end == end)) { 280 /* Case 1 */ 281#ifdef LOCKF_DEBUG 282 if (lockf_debug & 2) 283 printf("overlap == lock\n"); 284#endif /* LOCKF_DEBUG */ 285 return 1; 286 } 287 if ((lf->lf_start <= start) && 288 (end != -1) && 289 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 290 /* Case 2 */ 291#ifdef LOCKF_DEBUG 292 if (lockf_debug & 2) 293 printf("overlap contains lock\n"); 294#endif /* LOCKF_DEBUG */ 295 return 2; 296 } 297 if (start <= lf->lf_start && 298 (end == -1 || 299 (lf->lf_end != -1 && end >= lf->lf_end))) { 300 /* Case 3 */ 301#ifdef LOCKF_DEBUG 302 if (lockf_debug & 2) 303 printf("lock contains overlap\n"); 304#endif /* LOCKF_DEBUG */ 305 return 3; 306 } 307 if ((lf->lf_start < start) && 308 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 309 /* Case 4 */ 310#ifdef LOCKF_DEBUG 311 if (lockf_debug & 2) 312 printf("overlap starts before lock\n"); 313#endif /* LOCKF_DEBUG */ 314 return 4; 315 } 316 if ((lf->lf_start > start) && 317 (end != -1) && 318 ((lf->lf_end > end) || (lf->lf_end == -1))) { 319 /* Case 5 */ 320#ifdef LOCKF_DEBUG 321 if (lockf_debug & 2) 322 printf("overlap ends after lock\n"); 323#endif /* LOCKF_DEBUG */ 324 return 5; 325 } 326 panic("lf_findoverlap: default"); 327 } 328 return 0; 329} 330 331/* 332 * Split a lock and a contained region into 333 * two or three locks as necessary. 334 */ 335static void 336lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 337{ 338 struct lockf *splitlock; 339 340#ifdef LOCKF_DEBUG 341 if (lockf_debug & 2) { 342 lf_print("lf_split", lock1); 343 lf_print("splitting from", lock2); 344 } 345#endif /* LOCKF_DEBUG */ 346 /* 347 * Check to see if spliting into only two pieces. 348 */ 349 if (lock1->lf_start == lock2->lf_start) { 350 lock1->lf_start = lock2->lf_end + 1; 351 lock2->lf_next = lock1; 352 return; 353 } 354 if (lock1->lf_end == lock2->lf_end) { 355 lock1->lf_end = lock2->lf_start - 1; 356 lock2->lf_next = lock1->lf_next; 357 lock1->lf_next = lock2; 358 return; 359 } 360 /* 361 * Make a new lock consisting of the last part of 362 * the encompassing lock 363 */ 364 splitlock = *sparelock; 365 *sparelock = NULL; 366 memcpy(splitlock, lock1, sizeof(*splitlock)); 367 splitlock->lf_start = lock2->lf_end + 1; 368 TAILQ_INIT(&splitlock->lf_blkhd); 369 lock1->lf_end = lock2->lf_start - 1; 370 /* 371 * OK, now link it in 372 */ 373 splitlock->lf_next = lock1->lf_next; 374 lock2->lf_next = splitlock; 375 lock1->lf_next = lock2; 376} 377 378/* 379 * Wakeup a blocklist 380 */ 381static void 382lf_wakelock(struct lockf *listhead) 383{ 384 struct lockf *wakelock; 385 386 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 387 KASSERT(wakelock->lf_next == listhead); 388 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 389 wakelock->lf_next = NULL; 390#ifdef LOCKF_DEBUG 391 if (lockf_debug & 2) 392 lf_print("lf_wakelock: awakening", wakelock); 393#endif 394 wakeup(wakelock); 395 } 396} 397 398/* 399 * Remove a byte-range lock on an inode. 400 * 401 * Generally, find the lock (or an overlap to that lock) 402 * and remove it (or shrink it), then wakeup anyone we can. 403 */ 404static int 405lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 406{ 407 struct lockf **head = unlock->lf_head; 408 struct lockf *lf = *head; 409 struct lockf *overlap, **prev; 410 int ovcase; 411 412 if (lf == NULL) 413 return 0; 414#ifdef LOCKF_DEBUG 415 if (unlock->lf_type != F_UNLCK) 416 panic("lf_clearlock: bad type"); 417 if (lockf_debug & 1) 418 lf_print("lf_clearlock", unlock); 419#endif /* LOCKF_DEBUG */ 420 prev = head; 421 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 422 &prev, &overlap)) != 0) { 423 /* 424 * Wakeup the list of locks to be retried. 425 */ 426 lf_wakelock(overlap); 427 428 switch (ovcase) { 429 430 case 1: /* overlap == lock */ 431 *prev = overlap->lf_next; 432 lf_free(overlap); 433 break; 434 435 case 2: /* overlap contains lock: split it */ 436 if (overlap->lf_start == unlock->lf_start) { 437 overlap->lf_start = unlock->lf_end + 1; 438 break; 439 } 440 lf_split(overlap, unlock, sparelock); 441 overlap->lf_next = unlock->lf_next; 442 break; 443 444 case 3: /* lock contains overlap */ 445 *prev = overlap->lf_next; 446 lf = overlap->lf_next; 447 lf_free(overlap); 448 continue; 449 450 case 4: /* overlap starts before lock */ 451 overlap->lf_end = unlock->lf_start - 1; 452 prev = &overlap->lf_next; 453 lf = overlap->lf_next; 454 continue; 455 456 case 5: /* overlap ends after lock */ 457 overlap->lf_start = unlock->lf_end + 1; 458 break; 459 } 460 break; 461 } 462#ifdef LOCKF_DEBUG 463 if (lockf_debug & 1) 464 lf_printlist("lf_clearlock", unlock); 465#endif /* LOCKF_DEBUG */ 466 return 0; 467} 468 469/* 470 * Walk the list of locks for an inode and 471 * return the first blocking lock. 472 */ 473static struct lockf * 474lf_getblock(struct lockf *lock) 475{ 476 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 477 478 prev = lock->lf_head; 479 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 480 /* 481 * We've found an overlap, see if it blocks us 482 */ 483 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 484 return overlap; 485 /* 486 * Nope, point to the next one on the list and 487 * see if it blocks us 488 */ 489 lf = overlap->lf_next; 490 } 491 return NULL; 492} 493 494/* 495 * Set a byte-range lock. 496 */ 497static int 498lf_setlock(struct lockf *lock, struct lockf **sparelock, 499 struct simplelock *interlock) 500{ 501 struct lockf *block; 502 struct lockf **head = lock->lf_head; 503 struct lockf **prev, *overlap, *ltmp; 504 static char lockstr[] = "lockf"; 505 int ovcase, priority, needtolink, error; 506 507#ifdef LOCKF_DEBUG 508 if (lockf_debug & 1) 509 lf_print("lf_setlock", lock); 510#endif /* LOCKF_DEBUG */ 511 512 /* 513 * Set the priority 514 */ 515 priority = PLOCK; 516 if (lock->lf_type == F_WRLCK) 517 priority += 4; 518 priority |= PCATCH; 519 /* 520 * Scan lock list for this file looking for locks that would block us. 521 */ 522 while ((block = lf_getblock(lock)) != NULL) { 523 /* 524 * Free the structure and return if nonblocking. 525 */ 526 if ((lock->lf_flags & F_WAIT) == 0) { 527 lf_free(lock); 528 return EAGAIN; 529 } 530 /* 531 * We are blocked. Since flock style locks cover 532 * the whole file, there is no chance for deadlock. 533 * For byte-range locks we must check for deadlock. 534 * 535 * Deadlock detection is done by looking through the 536 * wait channels to see if there are any cycles that 537 * involve us. MAXDEPTH is set just to make sure we 538 * do not go off into neverneverland. 539 */ 540 if ((lock->lf_flags & F_POSIX) && 541 (block->lf_flags & F_POSIX)) { 542 struct lwp *wlwp; 543 volatile const struct lockf *waitblock; 544 int i = 0; 545 struct proc *p; 546 547 p = (struct proc *)block->lf_id; 548 KASSERT(p != NULL); 549 while (i++ < maxlockdepth) { 550 simple_lock(&p->p_lock); 551 if (p->p_nlwps > 1) { 552 simple_unlock(&p->p_lock); 553 break; 554 } 555 wlwp = LIST_FIRST(&p->p_lwps); 556 if (wlwp->l_wmesg != lockstr) { 557 simple_unlock(&p->p_lock); 558 break; 559 } 560 simple_unlock(&p->p_lock); 561 waitblock = wlwp->l_wchan; 562 if (waitblock == NULL) { 563 /* 564 * this lwp just got up but 565 * not returned from ltsleep yet. 566 */ 567 break; 568 } 569 /* Get the owner of the blocking lock */ 570 waitblock = waitblock->lf_next; 571 if ((waitblock->lf_flags & F_POSIX) == 0) 572 break; 573 p = (struct proc *)waitblock->lf_id; 574 if (p == curproc) { 575 lf_free(lock); 576 return EDEADLK; 577 } 578 } 579 /* 580 * If we're still following a dependency chain 581 * after maxlockdepth iterations, assume we're in 582 * a cycle to be safe. 583 */ 584 if (i >= maxlockdepth) { 585 lf_free(lock); 586 return EDEADLK; 587 } 588 } 589 /* 590 * For flock type locks, we must first remove 591 * any shared locks that we hold before we sleep 592 * waiting for an exclusive lock. 593 */ 594 if ((lock->lf_flags & F_FLOCK) && 595 lock->lf_type == F_WRLCK) { 596 lock->lf_type = F_UNLCK; 597 (void) lf_clearlock(lock, NULL); 598 lock->lf_type = F_WRLCK; 599 } 600 /* 601 * Add our lock to the blocked list and sleep until we're free. 602 * Remember who blocked us (for deadlock detection). 603 */ 604 lock->lf_next = block; 605 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 606#ifdef LOCKF_DEBUG 607 if (lockf_debug & 1) { 608 lf_print("lf_setlock: blocking on", block); 609 lf_printlist("lf_setlock", block); 610 } 611#endif /* LOCKF_DEBUG */ 612 error = ltsleep(lock, priority, lockstr, 0, interlock); 613 614 /* 615 * We may have been awakened by a signal (in 616 * which case we must remove ourselves from the 617 * blocked list) and/or by another process 618 * releasing a lock (in which case we have already 619 * been removed from the blocked list and our 620 * lf_next field set to NULL). 621 */ 622 if (lock->lf_next != NULL) { 623 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 624 lock->lf_next = NULL; 625 } 626 if (error) { 627 lf_free(lock); 628 return error; 629 } 630 } 631 /* 632 * No blocks!! Add the lock. Note that we will 633 * downgrade or upgrade any overlapping locks this 634 * process already owns. 635 * 636 * Skip over locks owned by other processes. 637 * Handle any locks that overlap and are owned by ourselves. 638 */ 639 prev = head; 640 block = *head; 641 needtolink = 1; 642 for (;;) { 643 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 644 if (ovcase) 645 block = overlap->lf_next; 646 /* 647 * Six cases: 648 * 0) no overlap 649 * 1) overlap == lock 650 * 2) overlap contains lock 651 * 3) lock contains overlap 652 * 4) overlap starts before lock 653 * 5) overlap ends after lock 654 */ 655 switch (ovcase) { 656 case 0: /* no overlap */ 657 if (needtolink) { 658 *prev = lock; 659 lock->lf_next = overlap; 660 } 661 break; 662 663 case 1: /* overlap == lock */ 664 /* 665 * If downgrading lock, others may be 666 * able to acquire it. 667 */ 668 if (lock->lf_type == F_RDLCK && 669 overlap->lf_type == F_WRLCK) 670 lf_wakelock(overlap); 671 overlap->lf_type = lock->lf_type; 672 lf_free(lock); 673 lock = overlap; /* for debug output below */ 674 break; 675 676 case 2: /* overlap contains lock */ 677 /* 678 * Check for common starting point and different types. 679 */ 680 if (overlap->lf_type == lock->lf_type) { 681 lf_free(lock); 682 lock = overlap; /* for debug output below */ 683 break; 684 } 685 if (overlap->lf_start == lock->lf_start) { 686 *prev = lock; 687 lock->lf_next = overlap; 688 overlap->lf_start = lock->lf_end + 1; 689 } else 690 lf_split(overlap, lock, sparelock); 691 lf_wakelock(overlap); 692 break; 693 694 case 3: /* lock contains overlap */ 695 /* 696 * If downgrading lock, others may be able to 697 * acquire it, otherwise take the list. 698 */ 699 if (lock->lf_type == F_RDLCK && 700 overlap->lf_type == F_WRLCK) { 701 lf_wakelock(overlap); 702 } else { 703 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 704 KASSERT(ltmp->lf_next == overlap); 705 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 706 lf_block); 707 ltmp->lf_next = lock; 708 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 709 ltmp, lf_block); 710 } 711 } 712 /* 713 * Add the new lock if necessary and delete the overlap. 714 */ 715 if (needtolink) { 716 *prev = lock; 717 lock->lf_next = overlap->lf_next; 718 prev = &lock->lf_next; 719 needtolink = 0; 720 } else 721 *prev = overlap->lf_next; 722 lf_free(overlap); 723 continue; 724 725 case 4: /* overlap starts before lock */ 726 /* 727 * Add lock after overlap on the list. 728 */ 729 lock->lf_next = overlap->lf_next; 730 overlap->lf_next = lock; 731 overlap->lf_end = lock->lf_start - 1; 732 prev = &lock->lf_next; 733 lf_wakelock(overlap); 734 needtolink = 0; 735 continue; 736 737 case 5: /* overlap ends after lock */ 738 /* 739 * Add the new lock before overlap. 740 */ 741 if (needtolink) { 742 *prev = lock; 743 lock->lf_next = overlap; 744 } 745 overlap->lf_start = lock->lf_end + 1; 746 lf_wakelock(overlap); 747 break; 748 } 749 break; 750 } 751#ifdef LOCKF_DEBUG 752 if (lockf_debug & 1) { 753 lf_print("lf_setlock: got the lock", lock); 754 lf_printlist("lf_setlock", lock); 755 } 756#endif /* LOCKF_DEBUG */ 757 return 0; 758} 759 760/* 761 * Check whether there is a blocking lock, 762 * and if so return its process identifier. 763 */ 764static int 765lf_getlock(struct lockf *lock, struct flock *fl) 766{ 767 struct lockf *block; 768 769#ifdef LOCKF_DEBUG 770 if (lockf_debug & 1) 771 lf_print("lf_getlock", lock); 772#endif /* LOCKF_DEBUG */ 773 774 if ((block = lf_getblock(lock)) != NULL) { 775 fl->l_type = block->lf_type; 776 fl->l_whence = SEEK_SET; 777 fl->l_start = block->lf_start; 778 if (block->lf_end == -1) 779 fl->l_len = 0; 780 else 781 fl->l_len = block->lf_end - block->lf_start + 1; 782 if (block->lf_flags & F_POSIX) 783 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 784 else 785 fl->l_pid = -1; 786 } else { 787 fl->l_type = F_UNLCK; 788 } 789 return 0; 790} 791 792/* 793 * Do an advisory lock operation. 794 */ 795int 796lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 797{ 798 struct proc *p = curproc; 799 struct flock *fl = ap->a_fl; 800 struct lockf *lock = NULL; 801 struct lockf *sparelock; 802 struct simplelock *interlock = &ap->a_vp->v_interlock; 803 off_t start, end; 804 int error = 0; 805 806 /* 807 * Convert the flock structure into a start and end. 808 */ 809 switch (fl->l_whence) { 810 case SEEK_SET: 811 case SEEK_CUR: 812 /* 813 * Caller is responsible for adding any necessary offset 814 * when SEEK_CUR is used. 815 */ 816 start = fl->l_start; 817 break; 818 819 case SEEK_END: 820 start = size + fl->l_start; 821 break; 822 823 default: 824 return EINVAL; 825 } 826 if (start < 0) 827 return EINVAL; 828 829 /* 830 * allocate locks before acquire simple lock. 831 * we need two locks in the worst case. 832 */ 833 switch (ap->a_op) { 834 case F_SETLK: 835 case F_UNLCK: 836 /* 837 * XXX for F_UNLCK case, we can re-use lock. 838 */ 839 if ((ap->a_flags & F_FLOCK) == 0) { 840 /* 841 * byte-range lock might need one more lock. 842 */ 843 sparelock = lf_alloc(kauth_cred_geteuid(p->p_cred), 0); 844 if (sparelock == NULL) { 845 error = ENOMEM; 846 goto quit; 847 } 848 break; 849 } 850 /* FALLTHROUGH */ 851 852 case F_GETLK: 853 sparelock = NULL; 854 break; 855 856 default: 857 return EINVAL; 858 } 859 860 lock = lf_alloc(kauth_cred_geteuid(p->p_cred), ap->a_op != F_UNLCK ? 1 : 2); 861 if (lock == NULL) { 862 error = ENOMEM; 863 goto quit; 864 } 865 866 simple_lock(interlock); 867 868 /* 869 * Avoid the common case of unlocking when inode has no locks. 870 */ 871 if (*head == (struct lockf *)0) { 872 if (ap->a_op != F_SETLK) { 873 fl->l_type = F_UNLCK; 874 error = 0; 875 goto quit_unlock; 876 } 877 } 878 879 if (fl->l_len == 0) 880 end = -1; 881 else 882 end = start + fl->l_len - 1; 883 /* 884 * Create the lockf structure. 885 */ 886 lock->lf_start = start; 887 lock->lf_end = end; 888 /* XXX NJWLWP 889 * I don't want to make the entire VFS universe use LWPs, because 890 * they don't need them, for the most part. This is an exception, 891 * and a kluge. 892 */ 893 894 lock->lf_head = head; 895 lock->lf_type = fl->l_type; 896 lock->lf_next = (struct lockf *)0; 897 TAILQ_INIT(&lock->lf_blkhd); 898 lock->lf_flags = ap->a_flags; 899 if (lock->lf_flags & F_POSIX) { 900 KASSERT(curproc == (struct proc *)ap->a_id); 901 } 902 lock->lf_id = (struct proc *)ap->a_id; 903 904 /* 905 * Do the requested operation. 906 */ 907 switch (ap->a_op) { 908 909 case F_SETLK: 910 error = lf_setlock(lock, &sparelock, interlock); 911 lock = NULL; /* lf_setlock freed it */ 912 break; 913 914 case F_UNLCK: 915 error = lf_clearlock(lock, &sparelock); 916 break; 917 918 case F_GETLK: 919 error = lf_getlock(lock, fl); 920 break; 921 922 default: 923 break; 924 /* NOTREACHED */ 925 } 926 927quit_unlock: 928 simple_unlock(interlock); 929quit: 930 if (lock) 931 lf_free(lock); 932 if (sparelock) 933 lf_free(sparelock); 934 935 return error; 936} 937