1/* $NetBSD: vfs_lockf.c,v 1.81 2023/09/23 18:21:11 ad Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.81 2023/09/23 18:21:11 ad Exp $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/file.h> 44#include <sys/proc.h> 45#include <sys/vnode.h> 46#include <sys/kmem.h> 47#include <sys/fcntl.h> 48#include <sys/lockf.h> 49#include <sys/atomic.h> 50#include <sys/kauth.h> 51#include <sys/uidinfo.h> 52 53/* 54 * The lockf structure is a kernel structure which contains the information 55 * associated with a byte range lock. The lockf structures are linked into 56 * the vnode structure. Locks are sorted by the starting byte of the lock for 57 * efficiency. 58 * 59 * lf_next is used for two purposes, depending on whether the lock is 60 * being held, or is in conflict with an existing lock. If this lock 61 * is held, it indicates the next lock on the same vnode. 62 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 63 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 64 */ 65 66TAILQ_HEAD(locklist, lockf); 67 68struct lockf { 69 kcondvar_t lf_cv; /* Signalling */ 70 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 71 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 72 off_t lf_start; /* The byte # of the start of the lock */ 73 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 74 void *lf_id; /* process or file description holding lock */ 75 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 76 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 77 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 78 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 79 struct uidinfo *lf_uip; /* Cached pointer to uidinfo */ 80}; 81 82/* Maximum length of sleep chains to traverse to try and detect deadlock. */ 83#define MAXDEPTH 50 84 85static kmutex_t lockf_lock __cacheline_aligned; 86static char lockstr[] = "lockf"; 87 88/* 89 * This variable controls the maximum number of processes that will 90 * be checked in doing deadlock detection. 91 */ 92int maxlockdepth = MAXDEPTH; 93 94#ifdef LOCKF_DEBUG 95int lockf_debug = 0; 96#endif 97 98#define SELF 0x1 99#define OTHERS 0x2 100 101/* 102 * XXX TODO 103 * Misc cleanups: "void *id" should be visible in the API as a 104 * "struct proc *". 105 * (This requires rototilling all VFS's which support advisory locking). 106 */ 107 108/* 109 * If there's a lot of lock contention on a single vnode, locking 110 * schemes which allow for more paralleism would be needed. Given how 111 * infrequently byte-range locks are actually used in typical BSD 112 * code, a more complex approach probably isn't worth it. 113 */ 114 115/* 116 * We enforce a limit on locks by uid, so that a single user cannot 117 * run the kernel out of memory. For now, the limit is pretty coarse. 118 * There is no limit on root. 119 * 120 * Splitting a lock will always succeed, regardless of current allocations. 121 * If you're slightly above the limit, we still have to permit an allocation 122 * so that the unlock can succeed. If the unlocking causes too many splits, 123 * however, you're totally cutoff. 124 */ 125#define MAXLOCKSPERUID (2 * maxfiles) 126 127#ifdef LOCKF_DEBUG 128/* 129 * Print out a lock. 130 */ 131static void 132lf_print(const char *tag, struct lockf *lock) 133{ 134 135 printf("%s: lock %p for ", tag, lock); 136 if (lock->lf_flags & F_POSIX) 137 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 138 else 139 printf("file %p", (struct file *)lock->lf_id); 140 printf(" %s, start %jd, end %jd", 141 lock->lf_type == F_RDLCK ? "shared" : 142 lock->lf_type == F_WRLCK ? "exclusive" : 143 lock->lf_type == F_UNLCK ? "unlock" : 144 "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end); 145 if (TAILQ_FIRST(&lock->lf_blkhd)) 146 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 147 else 148 printf("\n"); 149} 150 151static void 152lf_printlist(const char *tag, struct lockf *lock) 153{ 154 struct lockf *lf, *blk; 155 156 printf("%s: Lock list:\n", tag); 157 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 158 printf("\tlock %p for ", lf); 159 if (lf->lf_flags & F_POSIX) 160 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 161 else 162 printf("file %p", (struct file *)lf->lf_id); 163 printf(", %s, start %jd, end %jd", 164 lf->lf_type == F_RDLCK ? "shared" : 165 lf->lf_type == F_WRLCK ? "exclusive" : 166 lf->lf_type == F_UNLCK ? "unlock" : 167 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); 168 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 169 if (blk->lf_flags & F_POSIX) 170 printf("; proc %d", 171 ((struct proc *)blk->lf_id)->p_pid); 172 else 173 printf("; file %p", (struct file *)blk->lf_id); 174 printf(", %s, start %jd, end %jd", 175 blk->lf_type == F_RDLCK ? "shared" : 176 blk->lf_type == F_WRLCK ? "exclusive" : 177 blk->lf_type == F_UNLCK ? "unlock" : 178 "unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end); 179 if (TAILQ_FIRST(&blk->lf_blkhd)) 180 panic("lf_printlist: bad list"); 181 } 182 printf("\n"); 183 } 184} 185#endif /* LOCKF_DEBUG */ 186 187/* 188 * 3 options for allowfail. 189 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 190 */ 191static struct lockf * 192lf_alloc(int allowfail) 193{ 194 struct uidinfo *uip; 195 struct lockf *lock; 196 u_long lcnt; 197 const uid_t uid = kauth_cred_geteuid(kauth_cred_get()); 198 199 uip = uid_find(uid); 200 lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt); 201 if (uid && allowfail && lcnt > 202 (allowfail == 1 ? MAXLOCKSPERUID : (MAXLOCKSPERUID * 2))) { 203 atomic_dec_ulong(&uip->ui_lockcnt); 204 return NULL; 205 } 206 207 lock = kmem_alloc(sizeof(*lock), KM_SLEEP); 208 lock->lf_uip = uip; 209 cv_init(&lock->lf_cv, lockstr); 210 return lock; 211} 212 213static void 214lf_free(struct lockf *lock) 215{ 216 217 atomic_dec_ulong(&lock->lf_uip->ui_lockcnt); 218 cv_destroy(&lock->lf_cv); 219 kmem_free(lock, sizeof(*lock)); 220} 221 222/* 223 * Walk the list of locks for an inode to 224 * find an overlapping lock (if any). 225 * 226 * NOTE: this returns only the FIRST overlapping lock. There 227 * may be more than one. 228 */ 229static int 230lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 231 struct lockf ***prev, struct lockf **overlap) 232{ 233 off_t start, end; 234 235 *overlap = lf; 236 if (lf == NULL) 237 return 0; 238#ifdef LOCKF_DEBUG 239 if (lockf_debug & 2) 240 lf_print("lf_findoverlap: looking for overlap in", lock); 241#endif /* LOCKF_DEBUG */ 242 start = lock->lf_start; 243 end = lock->lf_end; 244 while (lf != NULL) { 245 if (((type == SELF) && lf->lf_id != lock->lf_id) || 246 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 247 *prev = &lf->lf_next; 248 *overlap = lf = lf->lf_next; 249 continue; 250 } 251#ifdef LOCKF_DEBUG 252 if (lockf_debug & 2) 253 lf_print("\tchecking", lf); 254#endif /* LOCKF_DEBUG */ 255 /* 256 * OK, check for overlap 257 * 258 * Six cases: 259 * 0) no overlap 260 * 1) overlap == lock 261 * 2) overlap contains lock 262 * 3) lock contains overlap 263 * 4) overlap starts before lock 264 * 5) overlap ends after lock 265 */ 266 if ((lf->lf_end != -1 && start > lf->lf_end) || 267 (end != -1 && lf->lf_start > end)) { 268 /* Case 0 */ 269#ifdef LOCKF_DEBUG 270 if (lockf_debug & 2) 271 printf("no overlap\n"); 272#endif /* LOCKF_DEBUG */ 273 if ((type & SELF) && end != -1 && lf->lf_start > end) 274 return 0; 275 *prev = &lf->lf_next; 276 *overlap = lf = lf->lf_next; 277 continue; 278 } 279 if ((lf->lf_start == start) && (lf->lf_end == end)) { 280 /* Case 1 */ 281#ifdef LOCKF_DEBUG 282 if (lockf_debug & 2) 283 printf("overlap == lock\n"); 284#endif /* LOCKF_DEBUG */ 285 return 1; 286 } 287 if ((lf->lf_start <= start) && 288 (end != -1) && 289 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 290 /* Case 2 */ 291#ifdef LOCKF_DEBUG 292 if (lockf_debug & 2) 293 printf("overlap contains lock\n"); 294#endif /* LOCKF_DEBUG */ 295 return 2; 296 } 297 if (start <= lf->lf_start && 298 (end == -1 || 299 (lf->lf_end != -1 && end >= lf->lf_end))) { 300 /* Case 3 */ 301#ifdef LOCKF_DEBUG 302 if (lockf_debug & 2) 303 printf("lock contains overlap\n"); 304#endif /* LOCKF_DEBUG */ 305 return 3; 306 } 307 if ((lf->lf_start < start) && 308 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 309 /* Case 4 */ 310#ifdef LOCKF_DEBUG 311 if (lockf_debug & 2) 312 printf("overlap starts before lock\n"); 313#endif /* LOCKF_DEBUG */ 314 return 4; 315 } 316 if ((lf->lf_start > start) && 317 (end != -1) && 318 ((lf->lf_end > end) || (lf->lf_end == -1))) { 319 /* Case 5 */ 320#ifdef LOCKF_DEBUG 321 if (lockf_debug & 2) 322 printf("overlap ends after lock\n"); 323#endif /* LOCKF_DEBUG */ 324 return 5; 325 } 326 panic("lf_findoverlap: default"); 327 } 328 return 0; 329} 330 331/* 332 * Split a lock and a contained region into 333 * two or three locks as necessary. 334 */ 335static void 336lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 337{ 338 struct lockf *splitlock; 339 340#ifdef LOCKF_DEBUG 341 if (lockf_debug & 2) { 342 lf_print("lf_split", lock1); 343 lf_print("splitting from", lock2); 344 } 345#endif /* LOCKF_DEBUG */ 346 /* 347 * Check to see if splitting into only two pieces. 348 */ 349 if (lock1->lf_start == lock2->lf_start) { 350 lock1->lf_start = lock2->lf_end + 1; 351 lock2->lf_next = lock1; 352 return; 353 } 354 if (lock1->lf_end == lock2->lf_end) { 355 lock1->lf_end = lock2->lf_start - 1; 356 lock2->lf_next = lock1->lf_next; 357 lock1->lf_next = lock2; 358 return; 359 } 360 /* 361 * Make a new lock consisting of the last part of 362 * the encompassing lock 363 */ 364 splitlock = *sparelock; 365 *sparelock = NULL; 366 cv_destroy(&splitlock->lf_cv); 367 memcpy(splitlock, lock1, sizeof(*splitlock)); 368 cv_init(&splitlock->lf_cv, lockstr); 369 370 splitlock->lf_start = lock2->lf_end + 1; 371 TAILQ_INIT(&splitlock->lf_blkhd); 372 lock1->lf_end = lock2->lf_start - 1; 373 /* 374 * OK, now link it in 375 */ 376 splitlock->lf_next = lock1->lf_next; 377 lock2->lf_next = splitlock; 378 lock1->lf_next = lock2; 379} 380 381/* 382 * Wakeup a blocklist 383 */ 384static void 385lf_wakelock(struct lockf *listhead) 386{ 387 struct lockf *wakelock; 388 389 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 390 KASSERT(wakelock->lf_next == listhead); 391 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 392 wakelock->lf_next = NULL; 393#ifdef LOCKF_DEBUG 394 if (lockf_debug & 2) 395 lf_print("lf_wakelock: awakening", wakelock); 396#endif 397 cv_broadcast(&wakelock->lf_cv); 398 } 399} 400 401/* 402 * Remove a byte-range lock on an inode. 403 * 404 * Generally, find the lock (or an overlap to that lock) 405 * and remove it (or shrink it), then wakeup anyone we can. 406 */ 407static int 408lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 409{ 410 struct lockf **head = unlock->lf_head; 411 struct lockf *lf = *head; 412 struct lockf *overlap, **prev; 413 int ovcase; 414 415 if (lf == NULL) 416 return 0; 417#ifdef LOCKF_DEBUG 418 if (unlock->lf_type != F_UNLCK) 419 panic("lf_clearlock: bad type"); 420 if (lockf_debug & 1) 421 lf_print("lf_clearlock", unlock); 422#endif /* LOCKF_DEBUG */ 423 prev = head; 424 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 425 &prev, &overlap)) != 0) { 426 /* 427 * Wakeup the list of locks to be retried. 428 */ 429 lf_wakelock(overlap); 430 431 switch (ovcase) { 432 433 case 1: /* overlap == lock */ 434 *prev = overlap->lf_next; 435 lf_free(overlap); 436 break; 437 438 case 2: /* overlap contains lock: split it */ 439 if (overlap->lf_start == unlock->lf_start) { 440 overlap->lf_start = unlock->lf_end + 1; 441 break; 442 } 443 lf_split(overlap, unlock, sparelock); 444 overlap->lf_next = unlock->lf_next; 445 break; 446 447 case 3: /* lock contains overlap */ 448 *prev = overlap->lf_next; 449 lf = overlap->lf_next; 450 lf_free(overlap); 451 continue; 452 453 case 4: /* overlap starts before lock */ 454 overlap->lf_end = unlock->lf_start - 1; 455 prev = &overlap->lf_next; 456 lf = overlap->lf_next; 457 continue; 458 459 case 5: /* overlap ends after lock */ 460 overlap->lf_start = unlock->lf_end + 1; 461 break; 462 } 463 break; 464 } 465#ifdef LOCKF_DEBUG 466 if (lockf_debug & 1) 467 lf_printlist("lf_clearlock", unlock); 468#endif /* LOCKF_DEBUG */ 469 return 0; 470} 471 472/* 473 * Walk the list of locks for an inode and 474 * return the first blocking lock. 475 */ 476static struct lockf * 477lf_getblock(struct lockf *lock) 478{ 479 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 480 481 prev = lock->lf_head; 482 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 483 /* 484 * We've found an overlap, see if it blocks us 485 */ 486 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 487 return overlap; 488 /* 489 * Nope, point to the next one on the list and 490 * see if it blocks us 491 */ 492 lf = overlap->lf_next; 493 } 494 return NULL; 495} 496 497/* 498 * Set a byte-range lock. 499 */ 500static int 501lf_setlock(struct lockf *lock, struct lockf **sparelock, 502 kmutex_t *interlock) 503{ 504 struct lockf *block; 505 struct lockf **head = lock->lf_head; 506 struct lockf **prev, *overlap, *ltmp; 507 int ovcase, needtolink, error; 508 509#ifdef LOCKF_DEBUG 510 if (lockf_debug & 1) 511 lf_print("lf_setlock", lock); 512#endif /* LOCKF_DEBUG */ 513 514 /* 515 * Scan lock list for this file looking for locks that would block us. 516 */ 517 while ((block = lf_getblock(lock)) != NULL) { 518 /* 519 * Free the structure and return if nonblocking. 520 */ 521 if ((lock->lf_flags & F_WAIT) == 0) { 522 lf_free(lock); 523 return EAGAIN; 524 } 525 /* 526 * We are blocked. Since flock style locks cover 527 * the whole file, there is no chance for deadlock. 528 * For byte-range locks we must check for deadlock. 529 * 530 * Deadlock detection is done by looking through the 531 * wait channels to see if there are any cycles that 532 * involve us. MAXDEPTH is set just to make sure we 533 * do not go off into neverneverland. 534 */ 535 if ((lock->lf_flags & F_POSIX) && 536 (block->lf_flags & F_POSIX)) { 537 struct lwp *wlwp; 538 volatile const struct lockf *waitblock; 539 int i = 0; 540 struct proc *p; 541 542 p = (struct proc *)block->lf_id; 543 KASSERT(p != NULL); 544 while (i++ < maxlockdepth) { 545 mutex_enter(p->p_lock); 546 if (p->p_nlwps > 1) { 547 mutex_exit(p->p_lock); 548 break; 549 } 550 wlwp = LIST_FIRST(&p->p_lwps); 551 lwp_lock(wlwp); 552 if (wlwp->l_wchan == NULL || 553 wlwp->l_wmesg != lockstr) { 554 lwp_unlock(wlwp); 555 mutex_exit(p->p_lock); 556 break; 557 } 558 waitblock = wlwp->l_wchan; 559 lwp_unlock(wlwp); 560 mutex_exit(p->p_lock); 561 /* Get the owner of the blocking lock */ 562 waitblock = waitblock->lf_next; 563 if ((waitblock->lf_flags & F_POSIX) == 0) 564 break; 565 p = (struct proc *)waitblock->lf_id; 566 if (p == curproc) { 567 lf_free(lock); 568 return EDEADLK; 569 } 570 } 571 /* 572 * If we're still following a dependency chain 573 * after maxlockdepth iterations, assume we're in 574 * a cycle to be safe. 575 */ 576 if (i >= maxlockdepth) { 577 lf_free(lock); 578 return EDEADLK; 579 } 580 } 581 /* 582 * For flock type locks, we must first remove 583 * any shared locks that we hold before we sleep 584 * waiting for an exclusive lock. 585 */ 586 if ((lock->lf_flags & F_FLOCK) && 587 lock->lf_type == F_WRLCK) { 588 lock->lf_type = F_UNLCK; 589 (void) lf_clearlock(lock, NULL); 590 lock->lf_type = F_WRLCK; 591 } 592 /* 593 * Add our lock to the blocked list and sleep until we're free. 594 * Remember who blocked us (for deadlock detection). 595 */ 596 lock->lf_next = block; 597 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 598#ifdef LOCKF_DEBUG 599 if (lockf_debug & 1) { 600 lf_print("lf_setlock: blocking on", block); 601 lf_printlist("lf_setlock", block); 602 } 603#endif /* LOCKF_DEBUG */ 604 error = cv_wait_sig(&lock->lf_cv, interlock); 605 606 /* 607 * We may have been awoken by a signal (in 608 * which case we must remove ourselves from the 609 * blocked list) and/or by another process 610 * releasing a lock (in which case we have already 611 * been removed from the blocked list and our 612 * lf_next field set to NULL). 613 */ 614 if (lock->lf_next != NULL) { 615 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 616 lock->lf_next = NULL; 617 } 618 if (error) { 619 lf_free(lock); 620 return error; 621 } 622 } 623 /* 624 * No blocks!! Add the lock. Note that we will 625 * downgrade or upgrade any overlapping locks this 626 * process already owns. 627 * 628 * Skip over locks owned by other processes. 629 * Handle any locks that overlap and are owned by ourselves. 630 */ 631 prev = head; 632 block = *head; 633 needtolink = 1; 634 for (;;) { 635 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 636 if (ovcase) 637 block = overlap->lf_next; 638 /* 639 * Six cases: 640 * 0) no overlap 641 * 1) overlap == lock 642 * 2) overlap contains lock 643 * 3) lock contains overlap 644 * 4) overlap starts before lock 645 * 5) overlap ends after lock 646 */ 647 switch (ovcase) { 648 case 0: /* no overlap */ 649 if (needtolink) { 650 *prev = lock; 651 lock->lf_next = overlap; 652 } 653 break; 654 655 case 1: /* overlap == lock */ 656 /* 657 * If downgrading lock, others may be 658 * able to acquire it. 659 */ 660 if (lock->lf_type == F_RDLCK && 661 overlap->lf_type == F_WRLCK) 662 lf_wakelock(overlap); 663 overlap->lf_type = lock->lf_type; 664 lf_free(lock); 665 lock = overlap; /* for debug output below */ 666 break; 667 668 case 2: /* overlap contains lock */ 669 /* 670 * Check for common starting point and different types. 671 */ 672 if (overlap->lf_type == lock->lf_type) { 673 lf_free(lock); 674 lock = overlap; /* for debug output below */ 675 break; 676 } 677 if (overlap->lf_start == lock->lf_start) { 678 *prev = lock; 679 lock->lf_next = overlap; 680 overlap->lf_start = lock->lf_end + 1; 681 } else 682 lf_split(overlap, lock, sparelock); 683 lf_wakelock(overlap); 684 break; 685 686 case 3: /* lock contains overlap */ 687 /* 688 * If downgrading lock, others may be able to 689 * acquire it, otherwise take the list. 690 */ 691 if (lock->lf_type == F_RDLCK && 692 overlap->lf_type == F_WRLCK) { 693 lf_wakelock(overlap); 694 } else { 695 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 696 KASSERT(ltmp->lf_next == overlap); 697 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 698 lf_block); 699 ltmp->lf_next = lock; 700 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 701 ltmp, lf_block); 702 } 703 } 704 /* 705 * Add the new lock if necessary and delete the overlap. 706 */ 707 if (needtolink) { 708 *prev = lock; 709 lock->lf_next = overlap->lf_next; 710 prev = &lock->lf_next; 711 needtolink = 0; 712 } else 713 *prev = overlap->lf_next; 714 lf_free(overlap); 715 continue; 716 717 case 4: /* overlap starts before lock */ 718 /* 719 * Add lock after overlap on the list. 720 */ 721 lock->lf_next = overlap->lf_next; 722 overlap->lf_next = lock; 723 overlap->lf_end = lock->lf_start - 1; 724 prev = &lock->lf_next; 725 lf_wakelock(overlap); 726 needtolink = 0; 727 continue; 728 729 case 5: /* overlap ends after lock */ 730 /* 731 * Add the new lock before overlap. 732 */ 733 if (needtolink) { 734 *prev = lock; 735 lock->lf_next = overlap; 736 } 737 overlap->lf_start = lock->lf_end + 1; 738 lf_wakelock(overlap); 739 break; 740 } 741 break; 742 } 743#ifdef LOCKF_DEBUG 744 if (lockf_debug & 1) { 745 lf_print("lf_setlock: got the lock", lock); 746 lf_printlist("lf_setlock", lock); 747 } 748#endif /* LOCKF_DEBUG */ 749 return 0; 750} 751 752/* 753 * Check whether there is a blocking lock, 754 * and if so return its process identifier. 755 */ 756static int 757lf_getlock(struct lockf *lock, struct flock *fl) 758{ 759 struct lockf *block; 760 761#ifdef LOCKF_DEBUG 762 if (lockf_debug & 1) 763 lf_print("lf_getlock", lock); 764#endif /* LOCKF_DEBUG */ 765 766 if ((block = lf_getblock(lock)) != NULL) { 767 fl->l_type = block->lf_type; 768 fl->l_whence = SEEK_SET; 769 fl->l_start = block->lf_start; 770 if (block->lf_end == -1) 771 fl->l_len = 0; 772 else 773 fl->l_len = block->lf_end - block->lf_start + 1; 774 if (block->lf_flags & F_POSIX) 775 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 776 else 777 fl->l_pid = -1; 778 } else { 779 fl->l_type = F_UNLCK; 780 } 781 return 0; 782} 783 784/* 785 * Do an advisory lock operation. 786 */ 787int 788lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 789{ 790 struct flock *fl = ap->a_fl; 791 struct lockf *lock = NULL; 792 struct lockf *sparelock; 793 kmutex_t *interlock = &lockf_lock; 794 off_t start, end; 795 int error = 0; 796 797 KASSERTMSG(size >= 0, "size=%jd", (intmax_t)size); 798 799 /* 800 * Convert the flock structure into a start and end. 801 */ 802 switch (fl->l_whence) { 803 case SEEK_SET: 804 case SEEK_CUR: 805 /* 806 * Caller is responsible for adding any necessary offset 807 * when SEEK_CUR is used. 808 */ 809 start = fl->l_start; 810 break; 811 812 case SEEK_END: 813 if (fl->l_start > __type_max(off_t) - size) 814 return EINVAL; 815 start = size + fl->l_start; 816 break; 817 818 default: 819 return EINVAL; 820 } 821 822 if (fl->l_len == 0) 823 end = -1; 824 else { 825 if (fl->l_len >= 0) { 826 if (start >= 0 && 827 fl->l_len - 1 > __type_max(off_t) - start) 828 return EINVAL; 829 end = start + (fl->l_len - 1); 830 } else { 831 /* lockf() allows -ve lengths */ 832 if (start < 0) 833 return EINVAL; 834 end = start - 1; 835 start += fl->l_len; 836 } 837 } 838 if (start < 0) 839 return EINVAL; 840 841 /* 842 * Allocate locks before acquiring the interlock. We need two 843 * locks in the worst case. 844 */ 845 switch (ap->a_op) { 846 case F_SETLK: 847 case F_UNLCK: 848 /* 849 * XXX For F_UNLCK case, we can re-use the lock. 850 */ 851 if ((ap->a_flags & F_FLOCK) == 0) { 852 /* 853 * Byte-range lock might need one more lock. 854 */ 855 sparelock = lf_alloc(0); 856 if (sparelock == NULL) { 857 error = ENOMEM; 858 goto quit; 859 } 860 break; 861 } 862 /* FALLTHROUGH */ 863 864 case F_GETLK: 865 sparelock = NULL; 866 break; 867 868 default: 869 return EINVAL; 870 } 871 872 switch (ap->a_op) { 873 case F_SETLK: 874 lock = lf_alloc(1); 875 break; 876 case F_UNLCK: 877 if (start == 0 || end == -1) { 878 /* never split */ 879 lock = lf_alloc(0); 880 } else { 881 /* might split */ 882 lock = lf_alloc(2); 883 } 884 break; 885 case F_GETLK: 886 lock = lf_alloc(0); 887 break; 888 } 889 if (lock == NULL) { 890 error = ENOMEM; 891 goto quit; 892 } 893 894 mutex_enter(interlock); 895 896 /* 897 * Avoid the common case of unlocking when inode has no locks. 898 */ 899 if (*head == (struct lockf *)0) { 900 if (ap->a_op != F_SETLK) { 901 fl->l_type = F_UNLCK; 902 error = 0; 903 goto quit_unlock; 904 } 905 } 906 907 /* 908 * Create the lockf structure. 909 */ 910 lock->lf_start = start; 911 lock->lf_end = end; 912 lock->lf_head = head; 913 lock->lf_type = fl->l_type; 914 lock->lf_next = (struct lockf *)0; 915 TAILQ_INIT(&lock->lf_blkhd); 916 lock->lf_flags = ap->a_flags; 917 if (lock->lf_flags & F_POSIX) { 918 KASSERT(curproc == (struct proc *)ap->a_id); 919 } 920 lock->lf_id = ap->a_id; 921 922 /* 923 * Do the requested operation. 924 */ 925 switch (ap->a_op) { 926 927 case F_SETLK: 928 error = lf_setlock(lock, &sparelock, interlock); 929 lock = NULL; /* lf_setlock freed it */ 930 break; 931 932 case F_UNLCK: 933 error = lf_clearlock(lock, &sparelock); 934 break; 935 936 case F_GETLK: 937 error = lf_getlock(lock, fl); 938 break; 939 940 default: 941 break; 942 /* NOTREACHED */ 943 } 944 945quit_unlock: 946 mutex_exit(interlock); 947quit: 948 if (lock) 949 lf_free(lock); 950 if (sparelock) 951 lf_free(sparelock); 952 953 return error; 954} 955 956/* 957 * Initialize subsystem. XXX We use a global lock. This could be the 958 * vnode interlock, but the deadlock detection code may need to inspect 959 * locks belonging to other files. 960 */ 961void 962lf_init(void) 963{ 964 965 mutex_init(&lockf_lock, MUTEX_DEFAULT, IPL_NONE); 966} 967