vfs_lockf.c revision 1.77
1/* $NetBSD: vfs_lockf.c,v 1.77 2022/08/03 11:09:13 riastradh Exp $ */ 2 3/* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.77 2022/08/03 11:09:13 riastradh Exp $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/file.h> 44#include <sys/proc.h> 45#include <sys/vnode.h> 46#include <sys/pool.h> 47#include <sys/fcntl.h> 48#include <sys/lockf.h> 49#include <sys/atomic.h> 50#include <sys/kauth.h> 51#include <sys/uidinfo.h> 52 53/* 54 * The lockf structure is a kernel structure which contains the information 55 * associated with a byte range lock. The lockf structures are linked into 56 * the vnode structure. Locks are sorted by the starting byte of the lock for 57 * efficiency. 58 * 59 * lf_next is used for two purposes, depending on whether the lock is 60 * being held, or is in conflict with an existing lock. If this lock 61 * is held, it indicates the next lock on the same vnode. 62 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 63 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 64 */ 65 66TAILQ_HEAD(locklist, lockf); 67 68struct lockf { 69 kcondvar_t lf_cv; /* Signalling */ 70 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 71 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 72 off_t lf_start; /* The byte # of the start of the lock */ 73 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 74 void *lf_id; /* process or file description holding lock */ 75 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 76 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 77 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 78 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 79 uid_t lf_uid; /* User ID responsible */ 80}; 81 82/* Maximum length of sleep chains to traverse to try and detect deadlock. */ 83#define MAXDEPTH 50 84 85static pool_cache_t lockf_cache; 86static kmutex_t *lockf_lock; 87static char lockstr[] = "lockf"; 88 89/* 90 * This variable controls the maximum number of processes that will 91 * be checked in doing deadlock detection. 92 */ 93int maxlockdepth = MAXDEPTH; 94 95#ifdef LOCKF_DEBUG 96int lockf_debug = 0; 97#endif 98 99#define SELF 0x1 100#define OTHERS 0x2 101 102/* 103 * XXX TODO 104 * Misc cleanups: "void *id" should be visible in the API as a 105 * "struct proc *". 106 * (This requires rototilling all VFS's which support advisory locking). 107 */ 108 109/* 110 * If there's a lot of lock contention on a single vnode, locking 111 * schemes which allow for more paralleism would be needed. Given how 112 * infrequently byte-range locks are actually used in typical BSD 113 * code, a more complex approach probably isn't worth it. 114 */ 115 116/* 117 * We enforce a limit on locks by uid, so that a single user cannot 118 * run the kernel out of memory. For now, the limit is pretty coarse. 119 * There is no limit on root. 120 * 121 * Splitting a lock will always succeed, regardless of current allocations. 122 * If you're slightly above the limit, we still have to permit an allocation 123 * so that the unlock can succeed. If the unlocking causes too many splits, 124 * however, you're totally cutoff. 125 */ 126#define MAXLOCKSPERUID (2 * maxfiles) 127 128#ifdef LOCKF_DEBUG 129/* 130 * Print out a lock. 131 */ 132static void 133lf_print(const char *tag, struct lockf *lock) 134{ 135 136 printf("%s: lock %p for ", tag, lock); 137 if (lock->lf_flags & F_POSIX) 138 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 139 else 140 printf("file %p", (struct file *)lock->lf_id); 141 printf(" %s, start %jd, end %jd", 142 lock->lf_type == F_RDLCK ? "shared" : 143 lock->lf_type == F_WRLCK ? "exclusive" : 144 lock->lf_type == F_UNLCK ? "unlock" : 145 "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end); 146 if (TAILQ_FIRST(&lock->lf_blkhd)) 147 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 148 else 149 printf("\n"); 150} 151 152static void 153lf_printlist(const char *tag, struct lockf *lock) 154{ 155 struct lockf *lf, *blk; 156 157 printf("%s: Lock list:\n", tag); 158 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 159 printf("\tlock %p for ", lf); 160 if (lf->lf_flags & F_POSIX) 161 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 162 else 163 printf("file %p", (struct file *)lf->lf_id); 164 printf(", %s, start %jd, end %jd", 165 lf->lf_type == F_RDLCK ? "shared" : 166 lf->lf_type == F_WRLCK ? "exclusive" : 167 lf->lf_type == F_UNLCK ? "unlock" : 168 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); 169 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 170 if (blk->lf_flags & F_POSIX) 171 printf("; proc %d", 172 ((struct proc *)blk->lf_id)->p_pid); 173 else 174 printf("; file %p", (struct file *)blk->lf_id); 175 printf(", %s, start %jd, end %jd", 176 blk->lf_type == F_RDLCK ? "shared" : 177 blk->lf_type == F_WRLCK ? "exclusive" : 178 blk->lf_type == F_UNLCK ? "unlock" : 179 "unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end); 180 if (TAILQ_FIRST(&blk->lf_blkhd)) 181 panic("lf_printlist: bad list"); 182 } 183 printf("\n"); 184 } 185} 186#endif /* LOCKF_DEBUG */ 187 188/* 189 * 3 options for allowfail. 190 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 191 */ 192static struct lockf * 193lf_alloc(int allowfail) 194{ 195 struct uidinfo *uip; 196 struct lockf *lock; 197 u_long lcnt; 198 const uid_t uid = kauth_cred_geteuid(kauth_cred_get()); 199 200 uip = uid_find(uid); 201 lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt); 202 if (uid && allowfail && lcnt > 203 (allowfail == 1 ? MAXLOCKSPERUID : (MAXLOCKSPERUID * 2))) { 204 atomic_dec_ulong(&uip->ui_lockcnt); 205 return NULL; 206 } 207 208 lock = pool_cache_get(lockf_cache, PR_WAITOK); 209 lock->lf_uid = uid; 210 return lock; 211} 212 213static void 214lf_free(struct lockf *lock) 215{ 216 struct uidinfo *uip; 217 218 uip = uid_find(lock->lf_uid); 219 atomic_dec_ulong(&uip->ui_lockcnt); 220 pool_cache_put(lockf_cache, lock); 221} 222 223static int 224lf_ctor(void *arg, void *obj, int flag) 225{ 226 struct lockf *lock; 227 228 lock = obj; 229 cv_init(&lock->lf_cv, lockstr); 230 231 return 0; 232} 233 234static void 235lf_dtor(void *arg, void *obj) 236{ 237 struct lockf *lock; 238 239 lock = obj; 240 cv_destroy(&lock->lf_cv); 241} 242 243/* 244 * Walk the list of locks for an inode to 245 * find an overlapping lock (if any). 246 * 247 * NOTE: this returns only the FIRST overlapping lock. There 248 * may be more than one. 249 */ 250static int 251lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 252 struct lockf ***prev, struct lockf **overlap) 253{ 254 off_t start, end; 255 256 *overlap = lf; 257 if (lf == NULL) 258 return 0; 259#ifdef LOCKF_DEBUG 260 if (lockf_debug & 2) 261 lf_print("lf_findoverlap: looking for overlap in", lock); 262#endif /* LOCKF_DEBUG */ 263 start = lock->lf_start; 264 end = lock->lf_end; 265 while (lf != NULL) { 266 if (((type == SELF) && lf->lf_id != lock->lf_id) || 267 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 268 *prev = &lf->lf_next; 269 *overlap = lf = lf->lf_next; 270 continue; 271 } 272#ifdef LOCKF_DEBUG 273 if (lockf_debug & 2) 274 lf_print("\tchecking", lf); 275#endif /* LOCKF_DEBUG */ 276 /* 277 * OK, check for overlap 278 * 279 * Six cases: 280 * 0) no overlap 281 * 1) overlap == lock 282 * 2) overlap contains lock 283 * 3) lock contains overlap 284 * 4) overlap starts before lock 285 * 5) overlap ends after lock 286 */ 287 if ((lf->lf_end != -1 && start > lf->lf_end) || 288 (end != -1 && lf->lf_start > end)) { 289 /* Case 0 */ 290#ifdef LOCKF_DEBUG 291 if (lockf_debug & 2) 292 printf("no overlap\n"); 293#endif /* LOCKF_DEBUG */ 294 if ((type & SELF) && end != -1 && lf->lf_start > end) 295 return 0; 296 *prev = &lf->lf_next; 297 *overlap = lf = lf->lf_next; 298 continue; 299 } 300 if ((lf->lf_start == start) && (lf->lf_end == end)) { 301 /* Case 1 */ 302#ifdef LOCKF_DEBUG 303 if (lockf_debug & 2) 304 printf("overlap == lock\n"); 305#endif /* LOCKF_DEBUG */ 306 return 1; 307 } 308 if ((lf->lf_start <= start) && 309 (end != -1) && 310 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 311 /* Case 2 */ 312#ifdef LOCKF_DEBUG 313 if (lockf_debug & 2) 314 printf("overlap contains lock\n"); 315#endif /* LOCKF_DEBUG */ 316 return 2; 317 } 318 if (start <= lf->lf_start && 319 (end == -1 || 320 (lf->lf_end != -1 && end >= lf->lf_end))) { 321 /* Case 3 */ 322#ifdef LOCKF_DEBUG 323 if (lockf_debug & 2) 324 printf("lock contains overlap\n"); 325#endif /* LOCKF_DEBUG */ 326 return 3; 327 } 328 if ((lf->lf_start < start) && 329 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 330 /* Case 4 */ 331#ifdef LOCKF_DEBUG 332 if (lockf_debug & 2) 333 printf("overlap starts before lock\n"); 334#endif /* LOCKF_DEBUG */ 335 return 4; 336 } 337 if ((lf->lf_start > start) && 338 (end != -1) && 339 ((lf->lf_end > end) || (lf->lf_end == -1))) { 340 /* Case 5 */ 341#ifdef LOCKF_DEBUG 342 if (lockf_debug & 2) 343 printf("overlap ends after lock\n"); 344#endif /* LOCKF_DEBUG */ 345 return 5; 346 } 347 panic("lf_findoverlap: default"); 348 } 349 return 0; 350} 351 352/* 353 * Split a lock and a contained region into 354 * two or three locks as necessary. 355 */ 356static void 357lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 358{ 359 struct lockf *splitlock; 360 361#ifdef LOCKF_DEBUG 362 if (lockf_debug & 2) { 363 lf_print("lf_split", lock1); 364 lf_print("splitting from", lock2); 365 } 366#endif /* LOCKF_DEBUG */ 367 /* 368 * Check to see if splitting into only two pieces. 369 */ 370 if (lock1->lf_start == lock2->lf_start) { 371 lock1->lf_start = lock2->lf_end + 1; 372 lock2->lf_next = lock1; 373 return; 374 } 375 if (lock1->lf_end == lock2->lf_end) { 376 lock1->lf_end = lock2->lf_start - 1; 377 lock2->lf_next = lock1->lf_next; 378 lock1->lf_next = lock2; 379 return; 380 } 381 /* 382 * Make a new lock consisting of the last part of 383 * the encompassing lock 384 */ 385 splitlock = *sparelock; 386 *sparelock = NULL; 387 cv_destroy(&splitlock->lf_cv); 388 memcpy(splitlock, lock1, sizeof(*splitlock)); 389 cv_init(&splitlock->lf_cv, lockstr); 390 391 splitlock->lf_start = lock2->lf_end + 1; 392 TAILQ_INIT(&splitlock->lf_blkhd); 393 lock1->lf_end = lock2->lf_start - 1; 394 /* 395 * OK, now link it in 396 */ 397 splitlock->lf_next = lock1->lf_next; 398 lock2->lf_next = splitlock; 399 lock1->lf_next = lock2; 400} 401 402/* 403 * Wakeup a blocklist 404 */ 405static void 406lf_wakelock(struct lockf *listhead) 407{ 408 struct lockf *wakelock; 409 410 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 411 KASSERT(wakelock->lf_next == listhead); 412 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 413 wakelock->lf_next = NULL; 414#ifdef LOCKF_DEBUG 415 if (lockf_debug & 2) 416 lf_print("lf_wakelock: awakening", wakelock); 417#endif 418 cv_broadcast(&wakelock->lf_cv); 419 } 420} 421 422/* 423 * Remove a byte-range lock on an inode. 424 * 425 * Generally, find the lock (or an overlap to that lock) 426 * and remove it (or shrink it), then wakeup anyone we can. 427 */ 428static int 429lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 430{ 431 struct lockf **head = unlock->lf_head; 432 struct lockf *lf = *head; 433 struct lockf *overlap, **prev; 434 int ovcase; 435 436 if (lf == NULL) 437 return 0; 438#ifdef LOCKF_DEBUG 439 if (unlock->lf_type != F_UNLCK) 440 panic("lf_clearlock: bad type"); 441 if (lockf_debug & 1) 442 lf_print("lf_clearlock", unlock); 443#endif /* LOCKF_DEBUG */ 444 prev = head; 445 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 446 &prev, &overlap)) != 0) { 447 /* 448 * Wakeup the list of locks to be retried. 449 */ 450 lf_wakelock(overlap); 451 452 switch (ovcase) { 453 454 case 1: /* overlap == lock */ 455 *prev = overlap->lf_next; 456 lf_free(overlap); 457 break; 458 459 case 2: /* overlap contains lock: split it */ 460 if (overlap->lf_start == unlock->lf_start) { 461 overlap->lf_start = unlock->lf_end + 1; 462 break; 463 } 464 lf_split(overlap, unlock, sparelock); 465 overlap->lf_next = unlock->lf_next; 466 break; 467 468 case 3: /* lock contains overlap */ 469 *prev = overlap->lf_next; 470 lf = overlap->lf_next; 471 lf_free(overlap); 472 continue; 473 474 case 4: /* overlap starts before lock */ 475 overlap->lf_end = unlock->lf_start - 1; 476 prev = &overlap->lf_next; 477 lf = overlap->lf_next; 478 continue; 479 480 case 5: /* overlap ends after lock */ 481 overlap->lf_start = unlock->lf_end + 1; 482 break; 483 } 484 break; 485 } 486#ifdef LOCKF_DEBUG 487 if (lockf_debug & 1) 488 lf_printlist("lf_clearlock", unlock); 489#endif /* LOCKF_DEBUG */ 490 return 0; 491} 492 493/* 494 * Walk the list of locks for an inode and 495 * return the first blocking lock. 496 */ 497static struct lockf * 498lf_getblock(struct lockf *lock) 499{ 500 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 501 502 prev = lock->lf_head; 503 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 504 /* 505 * We've found an overlap, see if it blocks us 506 */ 507 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 508 return overlap; 509 /* 510 * Nope, point to the next one on the list and 511 * see if it blocks us 512 */ 513 lf = overlap->lf_next; 514 } 515 return NULL; 516} 517 518/* 519 * Set a byte-range lock. 520 */ 521static int 522lf_setlock(struct lockf *lock, struct lockf **sparelock, 523 kmutex_t *interlock) 524{ 525 struct lockf *block; 526 struct lockf **head = lock->lf_head; 527 struct lockf **prev, *overlap, *ltmp; 528 int ovcase, needtolink, error; 529 530#ifdef LOCKF_DEBUG 531 if (lockf_debug & 1) 532 lf_print("lf_setlock", lock); 533#endif /* LOCKF_DEBUG */ 534 535 /* 536 * Scan lock list for this file looking for locks that would block us. 537 */ 538 while ((block = lf_getblock(lock)) != NULL) { 539 /* 540 * Free the structure and return if nonblocking. 541 */ 542 if ((lock->lf_flags & F_WAIT) == 0) { 543 lf_free(lock); 544 return EAGAIN; 545 } 546 /* 547 * We are blocked. Since flock style locks cover 548 * the whole file, there is no chance for deadlock. 549 * For byte-range locks we must check for deadlock. 550 * 551 * Deadlock detection is done by looking through the 552 * wait channels to see if there are any cycles that 553 * involve us. MAXDEPTH is set just to make sure we 554 * do not go off into neverneverland. 555 */ 556 if ((lock->lf_flags & F_POSIX) && 557 (block->lf_flags & F_POSIX)) { 558 struct lwp *wlwp; 559 volatile const struct lockf *waitblock; 560 int i = 0; 561 struct proc *p; 562 563 p = (struct proc *)block->lf_id; 564 KASSERT(p != NULL); 565 while (i++ < maxlockdepth) { 566 mutex_enter(p->p_lock); 567 if (p->p_nlwps > 1) { 568 mutex_exit(p->p_lock); 569 break; 570 } 571 wlwp = LIST_FIRST(&p->p_lwps); 572 lwp_lock(wlwp); 573 if (wlwp->l_wchan == NULL || 574 wlwp->l_wmesg != lockstr) { 575 lwp_unlock(wlwp); 576 mutex_exit(p->p_lock); 577 break; 578 } 579 waitblock = wlwp->l_wchan; 580 lwp_unlock(wlwp); 581 mutex_exit(p->p_lock); 582 /* Get the owner of the blocking lock */ 583 waitblock = waitblock->lf_next; 584 if ((waitblock->lf_flags & F_POSIX) == 0) 585 break; 586 p = (struct proc *)waitblock->lf_id; 587 if (p == curproc) { 588 lf_free(lock); 589 return EDEADLK; 590 } 591 } 592 /* 593 * If we're still following a dependency chain 594 * after maxlockdepth iterations, assume we're in 595 * a cycle to be safe. 596 */ 597 if (i >= maxlockdepth) { 598 lf_free(lock); 599 return EDEADLK; 600 } 601 } 602 /* 603 * For flock type locks, we must first remove 604 * any shared locks that we hold before we sleep 605 * waiting for an exclusive lock. 606 */ 607 if ((lock->lf_flags & F_FLOCK) && 608 lock->lf_type == F_WRLCK) { 609 lock->lf_type = F_UNLCK; 610 (void) lf_clearlock(lock, NULL); 611 lock->lf_type = F_WRLCK; 612 } 613 /* 614 * Add our lock to the blocked list and sleep until we're free. 615 * Remember who blocked us (for deadlock detection). 616 */ 617 lock->lf_next = block; 618 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 619#ifdef LOCKF_DEBUG 620 if (lockf_debug & 1) { 621 lf_print("lf_setlock: blocking on", block); 622 lf_printlist("lf_setlock", block); 623 } 624#endif /* LOCKF_DEBUG */ 625 error = cv_wait_sig(&lock->lf_cv, interlock); 626 627 /* 628 * We may have been awoken by a signal (in 629 * which case we must remove ourselves from the 630 * blocked list) and/or by another process 631 * releasing a lock (in which case we have already 632 * been removed from the blocked list and our 633 * lf_next field set to NULL). 634 */ 635 if (lock->lf_next != NULL) { 636 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 637 lock->lf_next = NULL; 638 } 639 if (error) { 640 lf_free(lock); 641 return error; 642 } 643 } 644 /* 645 * No blocks!! Add the lock. Note that we will 646 * downgrade or upgrade any overlapping locks this 647 * process already owns. 648 * 649 * Skip over locks owned by other processes. 650 * Handle any locks that overlap and are owned by ourselves. 651 */ 652 prev = head; 653 block = *head; 654 needtolink = 1; 655 for (;;) { 656 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 657 if (ovcase) 658 block = overlap->lf_next; 659 /* 660 * Six cases: 661 * 0) no overlap 662 * 1) overlap == lock 663 * 2) overlap contains lock 664 * 3) lock contains overlap 665 * 4) overlap starts before lock 666 * 5) overlap ends after lock 667 */ 668 switch (ovcase) { 669 case 0: /* no overlap */ 670 if (needtolink) { 671 *prev = lock; 672 lock->lf_next = overlap; 673 } 674 break; 675 676 case 1: /* overlap == lock */ 677 /* 678 * If downgrading lock, others may be 679 * able to acquire it. 680 */ 681 if (lock->lf_type == F_RDLCK && 682 overlap->lf_type == F_WRLCK) 683 lf_wakelock(overlap); 684 overlap->lf_type = lock->lf_type; 685 lf_free(lock); 686 lock = overlap; /* for debug output below */ 687 break; 688 689 case 2: /* overlap contains lock */ 690 /* 691 * Check for common starting point and different types. 692 */ 693 if (overlap->lf_type == lock->lf_type) { 694 lf_free(lock); 695 lock = overlap; /* for debug output below */ 696 break; 697 } 698 if (overlap->lf_start == lock->lf_start) { 699 *prev = lock; 700 lock->lf_next = overlap; 701 overlap->lf_start = lock->lf_end + 1; 702 } else 703 lf_split(overlap, lock, sparelock); 704 lf_wakelock(overlap); 705 break; 706 707 case 3: /* lock contains overlap */ 708 /* 709 * If downgrading lock, others may be able to 710 * acquire it, otherwise take the list. 711 */ 712 if (lock->lf_type == F_RDLCK && 713 overlap->lf_type == F_WRLCK) { 714 lf_wakelock(overlap); 715 } else { 716 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 717 KASSERT(ltmp->lf_next == overlap); 718 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 719 lf_block); 720 ltmp->lf_next = lock; 721 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 722 ltmp, lf_block); 723 } 724 } 725 /* 726 * Add the new lock if necessary and delete the overlap. 727 */ 728 if (needtolink) { 729 *prev = lock; 730 lock->lf_next = overlap->lf_next; 731 prev = &lock->lf_next; 732 needtolink = 0; 733 } else 734 *prev = overlap->lf_next; 735 lf_free(overlap); 736 continue; 737 738 case 4: /* overlap starts before lock */ 739 /* 740 * Add lock after overlap on the list. 741 */ 742 lock->lf_next = overlap->lf_next; 743 overlap->lf_next = lock; 744 overlap->lf_end = lock->lf_start - 1; 745 prev = &lock->lf_next; 746 lf_wakelock(overlap); 747 needtolink = 0; 748 continue; 749 750 case 5: /* overlap ends after lock */ 751 /* 752 * Add the new lock before overlap. 753 */ 754 if (needtolink) { 755 *prev = lock; 756 lock->lf_next = overlap; 757 } 758 overlap->lf_start = lock->lf_end + 1; 759 lf_wakelock(overlap); 760 break; 761 } 762 break; 763 } 764#ifdef LOCKF_DEBUG 765 if (lockf_debug & 1) { 766 lf_print("lf_setlock: got the lock", lock); 767 lf_printlist("lf_setlock", lock); 768 } 769#endif /* LOCKF_DEBUG */ 770 return 0; 771} 772 773/* 774 * Check whether there is a blocking lock, 775 * and if so return its process identifier. 776 */ 777static int 778lf_getlock(struct lockf *lock, struct flock *fl) 779{ 780 struct lockf *block; 781 782#ifdef LOCKF_DEBUG 783 if (lockf_debug & 1) 784 lf_print("lf_getlock", lock); 785#endif /* LOCKF_DEBUG */ 786 787 if ((block = lf_getblock(lock)) != NULL) { 788 fl->l_type = block->lf_type; 789 fl->l_whence = SEEK_SET; 790 fl->l_start = block->lf_start; 791 if (block->lf_end == -1) 792 fl->l_len = 0; 793 else 794 fl->l_len = block->lf_end - block->lf_start + 1; 795 if (block->lf_flags & F_POSIX) 796 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 797 else 798 fl->l_pid = -1; 799 } else { 800 fl->l_type = F_UNLCK; 801 } 802 return 0; 803} 804 805/* 806 * Do an advisory lock operation. 807 */ 808int 809lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 810{ 811 struct flock *fl = ap->a_fl; 812 struct lockf *lock = NULL; 813 struct lockf *sparelock; 814 kmutex_t *interlock = lockf_lock; 815 off_t start, end; 816 int error = 0; 817 818 KASSERTMSG(size >= 0, "size=%jd", (intmax_t)size); 819 820 /* 821 * Convert the flock structure into a start and end. 822 */ 823 switch (fl->l_whence) { 824 case SEEK_SET: 825 case SEEK_CUR: 826 /* 827 * Caller is responsible for adding any necessary offset 828 * when SEEK_CUR is used. 829 */ 830 start = fl->l_start; 831 break; 832 833 case SEEK_END: 834 if (fl->l_start > __type_max(off_t) - size) 835 return EINVAL; 836 start = size + fl->l_start; 837 break; 838 839 default: 840 return EINVAL; 841 } 842 843 if (fl->l_len == 0) 844 end = -1; 845 else { 846 if (fl->l_len >= 0) { 847 if (start >= 0 && 848 fl->l_len - 1 > __type_max(off_t) - start) 849 return EINVAL; 850 end = start + fl->l_len - 1; 851 } else { 852 /* lockf() allows -ve lengths */ 853 if (start < 0) 854 return EINVAL; 855 end = start - 1; 856 start += fl->l_len; 857 } 858 } 859 if (start < 0) 860 return EINVAL; 861 862 /* 863 * Allocate locks before acquiring the interlock. We need two 864 * locks in the worst case. 865 */ 866 switch (ap->a_op) { 867 case F_SETLK: 868 case F_UNLCK: 869 /* 870 * XXX For F_UNLCK case, we can re-use the lock. 871 */ 872 if ((ap->a_flags & F_FLOCK) == 0) { 873 /* 874 * Byte-range lock might need one more lock. 875 */ 876 sparelock = lf_alloc(0); 877 if (sparelock == NULL) { 878 error = ENOMEM; 879 goto quit; 880 } 881 break; 882 } 883 /* FALLTHROUGH */ 884 885 case F_GETLK: 886 sparelock = NULL; 887 break; 888 889 default: 890 return EINVAL; 891 } 892 893 switch (ap->a_op) { 894 case F_SETLK: 895 lock = lf_alloc(1); 896 break; 897 case F_UNLCK: 898 if (start == 0 || end == -1) { 899 /* never split */ 900 lock = lf_alloc(0); 901 } else { 902 /* might split */ 903 lock = lf_alloc(2); 904 } 905 break; 906 case F_GETLK: 907 lock = lf_alloc(0); 908 break; 909 } 910 if (lock == NULL) { 911 error = ENOMEM; 912 goto quit; 913 } 914 915 mutex_enter(interlock); 916 917 /* 918 * Avoid the common case of unlocking when inode has no locks. 919 */ 920 if (*head == (struct lockf *)0) { 921 if (ap->a_op != F_SETLK) { 922 fl->l_type = F_UNLCK; 923 error = 0; 924 goto quit_unlock; 925 } 926 } 927 928 /* 929 * Create the lockf structure. 930 */ 931 lock->lf_start = start; 932 lock->lf_end = end; 933 lock->lf_head = head; 934 lock->lf_type = fl->l_type; 935 lock->lf_next = (struct lockf *)0; 936 TAILQ_INIT(&lock->lf_blkhd); 937 lock->lf_flags = ap->a_flags; 938 if (lock->lf_flags & F_POSIX) { 939 KASSERT(curproc == (struct proc *)ap->a_id); 940 } 941 lock->lf_id = ap->a_id; 942 943 /* 944 * Do the requested operation. 945 */ 946 switch (ap->a_op) { 947 948 case F_SETLK: 949 error = lf_setlock(lock, &sparelock, interlock); 950 lock = NULL; /* lf_setlock freed it */ 951 break; 952 953 case F_UNLCK: 954 error = lf_clearlock(lock, &sparelock); 955 break; 956 957 case F_GETLK: 958 error = lf_getlock(lock, fl); 959 break; 960 961 default: 962 break; 963 /* NOTREACHED */ 964 } 965 966quit_unlock: 967 mutex_exit(interlock); 968quit: 969 if (lock) 970 lf_free(lock); 971 if (sparelock) 972 lf_free(sparelock); 973 974 return error; 975} 976 977/* 978 * Initialize subsystem. XXX We use a global lock. This could be the 979 * vnode interlock, but the deadlock detection code may need to inspect 980 * locks belonging to other files. 981 */ 982void 983lf_init(void) 984{ 985 986 lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf", 987 NULL, IPL_NONE, lf_ctor, lf_dtor, NULL); 988 lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 989} 990