kern_lock.c revision 84812
1/* 2 * Copyright (c) 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Copyright (C) 1997 6 * John S. Dyson. All rights reserved. 7 * 8 * This code contains ideas from software contributed to Berkeley by 9 * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating 10 * System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95 41 * $FreeBSD: head/sys/kern/kern_lock.c 84812 2001-10-11 17:53:43Z jhb $ 42 */ 43 44#include <sys/param.h> 45#include <sys/proc.h> 46#include <sys/kernel.h> 47#include <sys/ktr.h> 48#include <sys/lock.h> 49#include <sys/malloc.h> 50#include <sys/mutex.h> 51#include <sys/systm.h> 52 53/* 54 * Locking primitives implementation. 55 * Locks provide shared/exclusive sychronization. 56 */ 57 58#define LOCK_WAIT_TIME 100 59#define LOCK_SAMPLE_WAIT 7 60 61#if defined(DIAGNOSTIC) 62#define LOCK_INLINE 63#else 64#define LOCK_INLINE __inline 65#endif 66 67#define LK_ALL (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | \ 68 LK_SHARE_NONZERO | LK_WAIT_NONZERO) 69 70/* 71 * Mutex array variables. Rather than each lockmgr lock having its own mutex, 72 * share a fixed (at boot time) number of mutexes across all lockmgr locks in 73 * order to keep sizeof(struct lock) down. 74 */ 75extern int lock_nmtx; 76int lock_mtx_selector; 77struct mtx *lock_mtx_array; 78static struct mtx lock_mtx; 79 80static int acquire(struct lock *lkp, int extflags, int wanted); 81static int apause(struct lock *lkp, int flags); 82static int acquiredrain(struct lock *lkp, int extflags) ; 83 84static void 85lockmgr_init(void *dummy __unused) 86{ 87 int i; 88 89 /* 90 * Initialize the lockmgr protection mutex if it hasn't already been 91 * done. Unless something changes about kernel startup order, VM 92 * initialization will always cause this mutex to already be 93 * initialized in a call to lockinit(). 94 */ 95 if (lock_mtx_selector == 0) 96 mtx_init(&lock_mtx, "lockmgr", MTX_DEF); 97 else { 98 /* 99 * This is necessary if (lock_nmtx == 1) and doesn't hurt 100 * otherwise. 101 */ 102 lock_mtx_selector = 0; 103 } 104 105 lock_mtx_array = (struct mtx *)malloc(sizeof(struct mtx) * lock_nmtx, 106 M_CACHE, M_WAITOK | M_ZERO); 107 for (i = 0; i < lock_nmtx; i++) 108 mtx_init(&lock_mtx_array[i], "lockmgr interlock", MTX_DEF); 109} 110SYSINIT(lmgrinit, SI_SUB_LOCK, SI_ORDER_FIRST, lockmgr_init, NULL) 111 112static LOCK_INLINE void 113sharelock(struct lock *lkp, int incr) { 114 lkp->lk_flags |= LK_SHARE_NONZERO; 115 lkp->lk_sharecount += incr; 116} 117 118static LOCK_INLINE void 119shareunlock(struct lock *lkp, int decr) { 120 121 KASSERT(lkp->lk_sharecount >= decr, ("shareunlock: count < decr")); 122 123 if (lkp->lk_sharecount == decr) { 124 lkp->lk_flags &= ~LK_SHARE_NONZERO; 125 if (lkp->lk_flags & (LK_WANT_UPGRADE | LK_WANT_EXCL)) { 126 wakeup(lkp); 127 } 128 lkp->lk_sharecount = 0; 129 } else { 130 lkp->lk_sharecount -= decr; 131 } 132} 133 134/* 135 * This is the waitloop optimization. 136 */ 137static int 138apause(struct lock *lkp, int flags) 139{ 140#ifdef SMP 141 int i, lock_wait; 142#endif 143 144 if ((lkp->lk_flags & flags) == 0) 145 return 0; 146#ifdef SMP 147 for (lock_wait = LOCK_WAIT_TIME; lock_wait > 0; lock_wait--) { 148 mtx_unlock(lkp->lk_interlock); 149 for (i = LOCK_SAMPLE_WAIT; i > 0; i--) 150 if ((lkp->lk_flags & flags) == 0) 151 break; 152 mtx_lock(lkp->lk_interlock); 153 if ((lkp->lk_flags & flags) == 0) 154 return 0; 155 } 156#endif 157 return 1; 158} 159 160static int 161acquire(struct lock *lkp, int extflags, int wanted) { 162 int s, error; 163 164 CTR3(KTR_LOCKMGR, 165 "acquire(): lkp == %p, extflags == 0x%x, wanted == 0x%x\n", 166 lkp, extflags, wanted); 167 168 if ((extflags & LK_NOWAIT) && (lkp->lk_flags & wanted)) { 169 return EBUSY; 170 } 171 172 if (((lkp->lk_flags | extflags) & LK_NOPAUSE) == 0) { 173 error = apause(lkp, wanted); 174 if (error == 0) 175 return 0; 176 } 177 178 s = splhigh(); 179 while ((lkp->lk_flags & wanted) != 0) { 180 lkp->lk_flags |= LK_WAIT_NONZERO; 181 lkp->lk_waitcount++; 182 error = msleep(lkp, lkp->lk_interlock, lkp->lk_prio, 183 lkp->lk_wmesg, lkp->lk_timo); 184 if (lkp->lk_waitcount == 1) { 185 lkp->lk_flags &= ~LK_WAIT_NONZERO; 186 lkp->lk_waitcount = 0; 187 } else { 188 lkp->lk_waitcount--; 189 } 190 if (error) { 191 splx(s); 192 return error; 193 } 194 if (extflags & LK_SLEEPFAIL) { 195 splx(s); 196 return ENOLCK; 197 } 198 } 199 splx(s); 200 return 0; 201} 202 203/* 204 * Set, change, or release a lock. 205 * 206 * Shared requests increment the shared count. Exclusive requests set the 207 * LK_WANT_EXCL flag (preventing further shared locks), and wait for already 208 * accepted shared locks and shared-to-exclusive upgrades to go away. 209 */ 210int 211#ifndef DEBUG_LOCKS 212lockmgr(lkp, flags, interlkp, td) 213#else 214debuglockmgr(lkp, flags, interlkp, td, name, file, line) 215#endif 216 struct lock *lkp; 217 u_int flags; 218 struct mtx *interlkp; 219 struct thread *td; 220#ifdef DEBUG_LOCKS 221 const char *name; /* Name of lock function */ 222 const char *file; /* Name of file call is from */ 223 int line; /* Line number in file */ 224#endif 225{ 226 int error; 227 pid_t pid; 228 int extflags, lockflags; 229 230 CTR5(KTR_LOCKMGR, 231 "lockmgr(): lkp == %p (lk_wmesg == \"%s\"), flags == 0x%x, " 232 "interlkp == %p, td == %p", lkp, lkp->lk_wmesg, flags, interlkp, td); 233 234 error = 0; 235 if (td == NULL) 236 pid = LK_KERNPROC; 237 else 238 pid = td->td_proc->p_pid; 239 240 mtx_lock(lkp->lk_interlock); 241 if (flags & LK_INTERLOCK) { 242 mtx_assert(interlkp, MA_OWNED | MA_NOTRECURSED); 243 mtx_unlock(interlkp); 244 } 245 246 if (panicstr != NULL) { 247 mtx_unlock(lkp->lk_interlock); 248 return (0); 249 } 250 251 extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; 252 253 switch (flags & LK_TYPE_MASK) { 254 255 case LK_SHARED: 256 /* 257 * If we are not the exclusive lock holder, we have to block 258 * while there is an exclusive lock holder or while an 259 * exclusive lock request or upgrade request is in progress. 260 * 261 * However, if TDF_DEADLKTREAT is set, we override exclusive 262 * lock requests or upgrade requests ( but not the exclusive 263 * lock itself ). 264 */ 265 if (lkp->lk_lockholder != pid) { 266 lockflags = LK_HAVE_EXCL; 267 mtx_lock_spin(&sched_lock); 268 if (td != NULL && !(td->td_flags & TDF_DEADLKTREAT)) 269 lockflags |= LK_WANT_EXCL | LK_WANT_UPGRADE; 270 mtx_unlock_spin(&sched_lock); 271 error = acquire(lkp, extflags, lockflags); 272 if (error) 273 break; 274 sharelock(lkp, 1); 275 break; 276 } 277 /* 278 * We hold an exclusive lock, so downgrade it to shared. 279 * An alternative would be to fail with EDEADLK. 280 */ 281 sharelock(lkp, 1); 282 /* fall into downgrade */ 283 284 case LK_DOWNGRADE: 285 KASSERT(lkp->lk_lockholder == pid && lkp->lk_exclusivecount != 0, 286 ("lockmgr: not holding exclusive lock " 287 "(owner pid (%d) != pid (%d), exlcnt (%d) != 0", 288 lkp->lk_lockholder, pid, lkp->lk_exclusivecount)); 289 sharelock(lkp, lkp->lk_exclusivecount); 290 lkp->lk_exclusivecount = 0; 291 lkp->lk_flags &= ~LK_HAVE_EXCL; 292 lkp->lk_lockholder = LK_NOPROC; 293 if (lkp->lk_waitcount) 294 wakeup((void *)lkp); 295 break; 296 297 case LK_EXCLUPGRADE: 298 /* 299 * If another process is ahead of us to get an upgrade, 300 * then we want to fail rather than have an intervening 301 * exclusive access. 302 */ 303 if (lkp->lk_flags & LK_WANT_UPGRADE) { 304 shareunlock(lkp, 1); 305 error = EBUSY; 306 break; 307 } 308 /* fall into normal upgrade */ 309 310 case LK_UPGRADE: 311 /* 312 * Upgrade a shared lock to an exclusive one. If another 313 * shared lock has already requested an upgrade to an 314 * exclusive lock, our shared lock is released and an 315 * exclusive lock is requested (which will be granted 316 * after the upgrade). If we return an error, the file 317 * will always be unlocked. 318 */ 319 if ((lkp->lk_lockholder == pid) || (lkp->lk_sharecount <= 0)) 320 panic("lockmgr: upgrade exclusive lock"); 321 shareunlock(lkp, 1); 322 /* 323 * If we are just polling, check to see if we will block. 324 */ 325 if ((extflags & LK_NOWAIT) && 326 ((lkp->lk_flags & LK_WANT_UPGRADE) || 327 lkp->lk_sharecount > 1)) { 328 error = EBUSY; 329 break; 330 } 331 if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) { 332 /* 333 * We are first shared lock to request an upgrade, so 334 * request upgrade and wait for the shared count to 335 * drop to zero, then take exclusive lock. 336 */ 337 lkp->lk_flags |= LK_WANT_UPGRADE; 338 error = acquire(lkp, extflags, LK_SHARE_NONZERO); 339 lkp->lk_flags &= ~LK_WANT_UPGRADE; 340 341 if (error) 342 break; 343 lkp->lk_flags |= LK_HAVE_EXCL; 344 lkp->lk_lockholder = pid; 345 if (lkp->lk_exclusivecount != 0) 346 panic("lockmgr: non-zero exclusive count"); 347 lkp->lk_exclusivecount = 1; 348#if defined(DEBUG_LOCKS) 349 lkp->lk_filename = file; 350 lkp->lk_lineno = line; 351 lkp->lk_lockername = name; 352#endif 353 break; 354 } 355 /* 356 * Someone else has requested upgrade. Release our shared 357 * lock, awaken upgrade requestor if we are the last shared 358 * lock, then request an exclusive lock. 359 */ 360 if ( (lkp->lk_flags & (LK_SHARE_NONZERO|LK_WAIT_NONZERO)) == 361 LK_WAIT_NONZERO) 362 wakeup((void *)lkp); 363 /* fall into exclusive request */ 364 365 case LK_EXCLUSIVE: 366 if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) { 367 /* 368 * Recursive lock. 369 */ 370 if ((extflags & (LK_NOWAIT | LK_CANRECURSE)) == 0) 371 panic("lockmgr: locking against myself"); 372 if ((extflags & LK_CANRECURSE) != 0) { 373 lkp->lk_exclusivecount++; 374 break; 375 } 376 } 377 /* 378 * If we are just polling, check to see if we will sleep. 379 */ 380 if ((extflags & LK_NOWAIT) && 381 (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | LK_SHARE_NONZERO))) { 382 error = EBUSY; 383 break; 384 } 385 /* 386 * Try to acquire the want_exclusive flag. 387 */ 388 error = acquire(lkp, extflags, (LK_HAVE_EXCL | LK_WANT_EXCL)); 389 if (error) 390 break; 391 lkp->lk_flags |= LK_WANT_EXCL; 392 /* 393 * Wait for shared locks and upgrades to finish. 394 */ 395 error = acquire(lkp, extflags, LK_WANT_UPGRADE | LK_SHARE_NONZERO); 396 lkp->lk_flags &= ~LK_WANT_EXCL; 397 if (error) 398 break; 399 lkp->lk_flags |= LK_HAVE_EXCL; 400 lkp->lk_lockholder = pid; 401 if (lkp->lk_exclusivecount != 0) 402 panic("lockmgr: non-zero exclusive count"); 403 lkp->lk_exclusivecount = 1; 404#if defined(DEBUG_LOCKS) 405 lkp->lk_filename = file; 406 lkp->lk_lineno = line; 407 lkp->lk_lockername = name; 408#endif 409 break; 410 411 case LK_RELEASE: 412 if (lkp->lk_exclusivecount != 0) { 413 if (lkp->lk_lockholder != pid && 414 lkp->lk_lockholder != LK_KERNPROC) { 415 panic("lockmgr: pid %d, not %s %d unlocking", 416 pid, "exclusive lock holder", 417 lkp->lk_lockholder); 418 } 419 if (lkp->lk_exclusivecount == 1) { 420 lkp->lk_flags &= ~LK_HAVE_EXCL; 421 lkp->lk_lockholder = LK_NOPROC; 422 lkp->lk_exclusivecount = 0; 423 } else { 424 lkp->lk_exclusivecount--; 425 } 426 } else if (lkp->lk_flags & LK_SHARE_NONZERO) 427 shareunlock(lkp, 1); 428 if (lkp->lk_flags & LK_WAIT_NONZERO) 429 wakeup((void *)lkp); 430 break; 431 432 case LK_DRAIN: 433 /* 434 * Check that we do not already hold the lock, as it can 435 * never drain if we do. Unfortunately, we have no way to 436 * check for holding a shared lock, but at least we can 437 * check for an exclusive one. 438 */ 439 if (lkp->lk_lockholder == pid) 440 panic("lockmgr: draining against myself"); 441 442 error = acquiredrain(lkp, extflags); 443 if (error) 444 break; 445 lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; 446 lkp->lk_lockholder = pid; 447 lkp->lk_exclusivecount = 1; 448#if defined(DEBUG_LOCKS) 449 lkp->lk_filename = file; 450 lkp->lk_lineno = line; 451 lkp->lk_lockername = name; 452#endif 453 break; 454 455 default: 456 mtx_unlock(lkp->lk_interlock); 457 panic("lockmgr: unknown locktype request %d", 458 flags & LK_TYPE_MASK); 459 /* NOTREACHED */ 460 } 461 if ((lkp->lk_flags & LK_WAITDRAIN) && 462 (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | 463 LK_SHARE_NONZERO | LK_WAIT_NONZERO)) == 0) { 464 lkp->lk_flags &= ~LK_WAITDRAIN; 465 wakeup((void *)&lkp->lk_flags); 466 } 467 mtx_unlock(lkp->lk_interlock); 468 return (error); 469} 470 471static int 472acquiredrain(struct lock *lkp, int extflags) { 473 int error; 474 475 if ((extflags & LK_NOWAIT) && (lkp->lk_flags & LK_ALL)) { 476 return EBUSY; 477 } 478 479 error = apause(lkp, LK_ALL); 480 if (error == 0) 481 return 0; 482 483 while (lkp->lk_flags & LK_ALL) { 484 lkp->lk_flags |= LK_WAITDRAIN; 485 error = msleep(&lkp->lk_flags, lkp->lk_interlock, lkp->lk_prio, 486 lkp->lk_wmesg, lkp->lk_timo); 487 if (error) 488 return error; 489 if (extflags & LK_SLEEPFAIL) { 490 return ENOLCK; 491 } 492 } 493 return 0; 494} 495 496/* 497 * Initialize a lock; required before use. 498 */ 499void 500lockinit(lkp, prio, wmesg, timo, flags) 501 struct lock *lkp; 502 int prio; 503 char *wmesg; 504 int timo; 505 int flags; 506{ 507 CTR5(KTR_LOCKMGR, "lockinit(): lkp == %p, prio == %d, wmesg == \"%s\", " 508 "timo == %d, flags = 0x%x\n", lkp, prio, wmesg, timo, flags); 509 510 if (lock_mtx_array != NULL) { 511 mtx_lock(&lock_mtx); 512 lkp->lk_interlock = &lock_mtx_array[lock_mtx_selector]; 513 lock_mtx_selector++; 514 if (lock_mtx_selector == lock_nmtx) 515 lock_mtx_selector = 0; 516 mtx_unlock(&lock_mtx); 517 } else { 518 /* 519 * Giving lockmgr locks that are initialized during boot a 520 * pointer to the internal lockmgr mutex is safe, since the 521 * lockmgr code itself doesn't call lockinit() (which could 522 * cause mutex recursion). 523 */ 524 if (lock_mtx_selector == 0) { 525 /* 526 * This case only happens during kernel bootstrapping, 527 * so there's no reason to protect modification of 528 * lock_mtx_selector or lock_mtx. 529 */ 530 mtx_init(&lock_mtx, "lockmgr", MTX_DEF); 531 lock_mtx_selector = 1; 532 } 533 lkp->lk_interlock = &lock_mtx; 534 } 535 lkp->lk_flags = (flags & LK_EXTFLG_MASK); 536 lkp->lk_sharecount = 0; 537 lkp->lk_waitcount = 0; 538 lkp->lk_exclusivecount = 0; 539 lkp->lk_prio = prio; 540 lkp->lk_wmesg = wmesg; 541 lkp->lk_timo = timo; 542 lkp->lk_lockholder = LK_NOPROC; 543} 544 545/* 546 * Destroy a lock. 547 */ 548void 549lockdestroy(lkp) 550 struct lock *lkp; 551{ 552 CTR2(KTR_LOCKMGR, "lockdestroy(): lkp == %p (lk_wmesg == \"%s\")", 553 lkp, lkp->lk_wmesg); 554} 555 556/* 557 * Determine the status of a lock. 558 */ 559int 560lockstatus(lkp, td) 561 struct lock *lkp; 562 struct thread *td; 563{ 564 int lock_type = 0; 565 566 mtx_lock(lkp->lk_interlock); 567 if (lkp->lk_exclusivecount != 0) { 568 if (td == NULL || lkp->lk_lockholder == td->td_proc->p_pid) 569 lock_type = LK_EXCLUSIVE; 570 else 571 lock_type = LK_EXCLOTHER; 572 } else if (lkp->lk_sharecount != 0) 573 lock_type = LK_SHARED; 574 mtx_unlock(lkp->lk_interlock); 575 return (lock_type); 576} 577 578/* 579 * Determine the number of holders of a lock. 580 */ 581int 582lockcount(lkp) 583 struct lock *lkp; 584{ 585 int count; 586 587 mtx_lock(lkp->lk_interlock); 588 count = lkp->lk_exclusivecount + lkp->lk_sharecount; 589 mtx_unlock(lkp->lk_interlock); 590 return (count); 591} 592 593/* 594 * Print out information about state of a lock. Used by VOP_PRINT 595 * routines to display status about contained locks. 596 */ 597void 598lockmgr_printinfo(lkp) 599 struct lock *lkp; 600{ 601 602 if (lkp->lk_sharecount) 603 printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg, 604 lkp->lk_sharecount); 605 else if (lkp->lk_flags & LK_HAVE_EXCL) 606 printf(" lock type %s: EXCL (count %d) by pid %d", 607 lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder); 608 if (lkp->lk_waitcount > 0) 609 printf(" with %d pending", lkp->lk_waitcount); 610} 611