kern_lock.c revision 83366
1/* 2 * Copyright (c) 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Copyright (C) 1997 6 * John S. Dyson. All rights reserved. 7 * 8 * This code contains ideas from software contributed to Berkeley by 9 * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating 10 * System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95 41 * $FreeBSD: head/sys/kern/kern_lock.c 83366 2001-09-12 08:38:13Z julian $ 42 */ 43 44#include <sys/param.h> 45#include <sys/proc.h> 46#include <sys/kernel.h> 47#include <sys/lock.h> 48#include <sys/malloc.h> 49#include <sys/mutex.h> 50#include <sys/systm.h> 51 52/* 53 * Locking primitives implementation. 54 * Locks provide shared/exclusive sychronization. 55 */ 56 57#define LOCK_WAIT_TIME 100 58#define LOCK_SAMPLE_WAIT 7 59 60#if defined(DIAGNOSTIC) 61#define LOCK_INLINE 62#else 63#define LOCK_INLINE __inline 64#endif 65 66#define LK_ALL (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | \ 67 LK_SHARE_NONZERO | LK_WAIT_NONZERO) 68 69/* 70 * Mutex array variables. Rather than each lockmgr lock having its own mutex, 71 * share a fixed (at boot time) number of mutexes across all lockmgr locks in 72 * order to keep sizeof(struct lock) down. 73 */ 74extern int lock_nmtx; 75int lock_mtx_selector; 76struct mtx *lock_mtx_array; 77static struct mtx lock_mtx; 78 79static int acquire(struct lock *lkp, int extflags, int wanted); 80static int apause(struct lock *lkp, int flags); 81static int acquiredrain(struct lock *lkp, int extflags) ; 82 83static void 84lockmgr_init(void *dummy __unused) 85{ 86 int i; 87 88 /* 89 * Initialize the lockmgr protection mutex if it hasn't already been 90 * done. Unless something changes about kernel startup order, VM 91 * initialization will always cause this mutex to already be 92 * initialized in a call to lockinit(). 93 */ 94 if (lock_mtx_selector == 0) 95 mtx_init(&lock_mtx, "lockmgr", MTX_DEF); 96 else { 97 /* 98 * This is necessary if (lock_nmtx == 1) and doesn't hurt 99 * otherwise. 100 */ 101 lock_mtx_selector = 0; 102 } 103 104 lock_mtx_array = (struct mtx *)malloc(sizeof(struct mtx) * lock_nmtx, 105 M_CACHE, M_WAITOK); 106 for (i = 0; i < lock_nmtx; i++) 107 mtx_init(&lock_mtx_array[i], "lockmgr interlock", MTX_DEF); 108} 109SYSINIT(lmgrinit, SI_SUB_LOCK, SI_ORDER_FIRST, lockmgr_init, NULL) 110 111static LOCK_INLINE void 112sharelock(struct lock *lkp, int incr) { 113 lkp->lk_flags |= LK_SHARE_NONZERO; 114 lkp->lk_sharecount += incr; 115} 116 117static LOCK_INLINE void 118shareunlock(struct lock *lkp, int decr) { 119 120 KASSERT(lkp->lk_sharecount >= decr, ("shareunlock: count < decr")); 121 122 if (lkp->lk_sharecount == decr) { 123 lkp->lk_flags &= ~LK_SHARE_NONZERO; 124 if (lkp->lk_flags & (LK_WANT_UPGRADE | LK_WANT_EXCL)) { 125 wakeup(lkp); 126 } 127 lkp->lk_sharecount = 0; 128 } else { 129 lkp->lk_sharecount -= decr; 130 } 131} 132 133/* 134 * This is the waitloop optimization. 135 */ 136static int 137apause(struct lock *lkp, int flags) 138{ 139#ifdef SMP 140 int i, lock_wait; 141#endif 142 143 if ((lkp->lk_flags & flags) == 0) 144 return 0; 145#ifdef SMP 146 for (lock_wait = LOCK_WAIT_TIME; lock_wait > 0; lock_wait--) { 147 mtx_unlock(lkp->lk_interlock); 148 for (i = LOCK_SAMPLE_WAIT; i > 0; i--) 149 if ((lkp->lk_flags & flags) == 0) 150 break; 151 mtx_lock(lkp->lk_interlock); 152 if ((lkp->lk_flags & flags) == 0) 153 return 0; 154 } 155#endif 156 return 1; 157} 158 159static int 160acquire(struct lock *lkp, int extflags, int wanted) { 161 int s, error; 162 163 CTR3(KTR_LOCKMGR, 164 "acquire(): lkp == %p, extflags == 0x%x, wanted == 0x%x\n", 165 lkp, extflags, wanted); 166 167 if ((extflags & LK_NOWAIT) && (lkp->lk_flags & wanted)) { 168 return EBUSY; 169 } 170 171 if (((lkp->lk_flags | extflags) & LK_NOPAUSE) == 0) { 172 error = apause(lkp, wanted); 173 if (error == 0) 174 return 0; 175 } 176 177 s = splhigh(); 178 while ((lkp->lk_flags & wanted) != 0) { 179 lkp->lk_flags |= LK_WAIT_NONZERO; 180 lkp->lk_waitcount++; 181 error = msleep(lkp, lkp->lk_interlock, lkp->lk_prio, 182 lkp->lk_wmesg, lkp->lk_timo); 183 if (lkp->lk_waitcount == 1) { 184 lkp->lk_flags &= ~LK_WAIT_NONZERO; 185 lkp->lk_waitcount = 0; 186 } else { 187 lkp->lk_waitcount--; 188 } 189 if (error) { 190 splx(s); 191 return error; 192 } 193 if (extflags & LK_SLEEPFAIL) { 194 splx(s); 195 return ENOLCK; 196 } 197 } 198 splx(s); 199 return 0; 200} 201 202/* 203 * Set, change, or release a lock. 204 * 205 * Shared requests increment the shared count. Exclusive requests set the 206 * LK_WANT_EXCL flag (preventing further shared locks), and wait for already 207 * accepted shared locks and shared-to-exclusive upgrades to go away. 208 */ 209int 210#ifndef DEBUG_LOCKS 211lockmgr(lkp, flags, interlkp, td) 212#else 213debuglockmgr(lkp, flags, interlkp, td, name, file, line) 214#endif 215 struct lock *lkp; 216 u_int flags; 217 struct mtx *interlkp; 218 struct thread *td; 219#ifdef DEBUG_LOCKS 220 const char *name; /* Name of lock function */ 221 const char *file; /* Name of file call is from */ 222 int line; /* Line number in file */ 223#endif 224{ 225 int error; 226 pid_t pid; 227 int extflags, lockflags; 228 229 CTR5(KTR_LOCKMGR, 230 "lockmgr(): lkp == %p (lk_wmesg == \"%s\"), flags == 0x%x, " 231 "interlkp == %p, td == %p", lkp, lkp->lk_wmesg, flags, interlkp, td); 232 233 error = 0; 234 if (td == NULL) 235 pid = LK_KERNPROC; 236 else 237 pid = td->td_proc->p_pid; 238 239 mtx_lock(lkp->lk_interlock); 240 if (flags & LK_INTERLOCK) { 241 mtx_assert(interlkp, MA_OWNED | MA_NOTRECURSED); 242 mtx_unlock(interlkp); 243 } 244 245 if (panicstr != NULL) { 246 mtx_unlock(lkp->lk_interlock); 247 return (0); 248 } 249 250 extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; 251 252 switch (flags & LK_TYPE_MASK) { 253 254 case LK_SHARED: 255 /* 256 * If we are not the exclusive lock holder, we have to block 257 * while there is an exclusive lock holder or while an 258 * exclusive lock request or upgrade request is in progress. 259 * 260 * However, if TDF_DEADLKTREAT is set, we override exclusive 261 * lock requests or upgrade requests ( but not the exclusive 262 * lock itself ). 263 */ 264 if (lkp->lk_lockholder != pid) { 265 lockflags = LK_HAVE_EXCL; 266 if (td) { 267 PROC_LOCK(td->td_proc); 268 if (!(td->td_flags & TDF_DEADLKTREAT)) { 269 lockflags |= LK_WANT_EXCL | 270 LK_WANT_UPGRADE; 271 } 272 PROC_UNLOCK(td->td_proc); 273 } 274 error = acquire(lkp, extflags, lockflags); 275 if (error) 276 break; 277 sharelock(lkp, 1); 278 break; 279 } 280 /* 281 * We hold an exclusive lock, so downgrade it to shared. 282 * An alternative would be to fail with EDEADLK. 283 */ 284 sharelock(lkp, 1); 285 /* fall into downgrade */ 286 287 case LK_DOWNGRADE: 288 KASSERT(lkp->lk_lockholder == pid && lkp->lk_exclusivecount != 0, 289 ("lockmgr: not holding exclusive lock " 290 "(owner pid (%d) != pid (%d), exlcnt (%d) != 0", 291 lkp->lk_lockholder, pid, lkp->lk_exclusivecount)); 292 sharelock(lkp, lkp->lk_exclusivecount); 293 lkp->lk_exclusivecount = 0; 294 lkp->lk_flags &= ~LK_HAVE_EXCL; 295 lkp->lk_lockholder = LK_NOPROC; 296 if (lkp->lk_waitcount) 297 wakeup((void *)lkp); 298 break; 299 300 case LK_EXCLUPGRADE: 301 /* 302 * If another process is ahead of us to get an upgrade, 303 * then we want to fail rather than have an intervening 304 * exclusive access. 305 */ 306 if (lkp->lk_flags & LK_WANT_UPGRADE) { 307 shareunlock(lkp, 1); 308 error = EBUSY; 309 break; 310 } 311 /* fall into normal upgrade */ 312 313 case LK_UPGRADE: 314 /* 315 * Upgrade a shared lock to an exclusive one. If another 316 * shared lock has already requested an upgrade to an 317 * exclusive lock, our shared lock is released and an 318 * exclusive lock is requested (which will be granted 319 * after the upgrade). If we return an error, the file 320 * will always be unlocked. 321 */ 322 if ((lkp->lk_lockholder == pid) || (lkp->lk_sharecount <= 0)) 323 panic("lockmgr: upgrade exclusive lock"); 324 shareunlock(lkp, 1); 325 /* 326 * If we are just polling, check to see if we will block. 327 */ 328 if ((extflags & LK_NOWAIT) && 329 ((lkp->lk_flags & LK_WANT_UPGRADE) || 330 lkp->lk_sharecount > 1)) { 331 error = EBUSY; 332 break; 333 } 334 if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) { 335 /* 336 * We are first shared lock to request an upgrade, so 337 * request upgrade and wait for the shared count to 338 * drop to zero, then take exclusive lock. 339 */ 340 lkp->lk_flags |= LK_WANT_UPGRADE; 341 error = acquire(lkp, extflags, LK_SHARE_NONZERO); 342 lkp->lk_flags &= ~LK_WANT_UPGRADE; 343 344 if (error) 345 break; 346 lkp->lk_flags |= LK_HAVE_EXCL; 347 lkp->lk_lockholder = pid; 348 if (lkp->lk_exclusivecount != 0) 349 panic("lockmgr: non-zero exclusive count"); 350 lkp->lk_exclusivecount = 1; 351#if defined(DEBUG_LOCKS) 352 lkp->lk_filename = file; 353 lkp->lk_lineno = line; 354 lkp->lk_lockername = name; 355#endif 356 break; 357 } 358 /* 359 * Someone else has requested upgrade. Release our shared 360 * lock, awaken upgrade requestor if we are the last shared 361 * lock, then request an exclusive lock. 362 */ 363 if ( (lkp->lk_flags & (LK_SHARE_NONZERO|LK_WAIT_NONZERO)) == 364 LK_WAIT_NONZERO) 365 wakeup((void *)lkp); 366 /* fall into exclusive request */ 367 368 case LK_EXCLUSIVE: 369 if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) { 370 /* 371 * Recursive lock. 372 */ 373 if ((extflags & (LK_NOWAIT | LK_CANRECURSE)) == 0) 374 panic("lockmgr: locking against myself"); 375 if ((extflags & LK_CANRECURSE) != 0) { 376 lkp->lk_exclusivecount++; 377 break; 378 } 379 } 380 /* 381 * If we are just polling, check to see if we will sleep. 382 */ 383 if ((extflags & LK_NOWAIT) && 384 (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | LK_SHARE_NONZERO))) { 385 error = EBUSY; 386 break; 387 } 388 /* 389 * Try to acquire the want_exclusive flag. 390 */ 391 error = acquire(lkp, extflags, (LK_HAVE_EXCL | LK_WANT_EXCL)); 392 if (error) 393 break; 394 lkp->lk_flags |= LK_WANT_EXCL; 395 /* 396 * Wait for shared locks and upgrades to finish. 397 */ 398 error = acquire(lkp, extflags, LK_WANT_UPGRADE | LK_SHARE_NONZERO); 399 lkp->lk_flags &= ~LK_WANT_EXCL; 400 if (error) 401 break; 402 lkp->lk_flags |= LK_HAVE_EXCL; 403 lkp->lk_lockholder = pid; 404 if (lkp->lk_exclusivecount != 0) 405 panic("lockmgr: non-zero exclusive count"); 406 lkp->lk_exclusivecount = 1; 407#if defined(DEBUG_LOCKS) 408 lkp->lk_filename = file; 409 lkp->lk_lineno = line; 410 lkp->lk_lockername = name; 411#endif 412 break; 413 414 case LK_RELEASE: 415 if (lkp->lk_exclusivecount != 0) { 416 if (lkp->lk_lockholder != pid && 417 lkp->lk_lockholder != LK_KERNPROC) { 418 panic("lockmgr: pid %d, not %s %d unlocking", 419 pid, "exclusive lock holder", 420 lkp->lk_lockholder); 421 } 422 if (lkp->lk_exclusivecount == 1) { 423 lkp->lk_flags &= ~LK_HAVE_EXCL; 424 lkp->lk_lockholder = LK_NOPROC; 425 lkp->lk_exclusivecount = 0; 426 } else { 427 lkp->lk_exclusivecount--; 428 } 429 } else if (lkp->lk_flags & LK_SHARE_NONZERO) 430 shareunlock(lkp, 1); 431 if (lkp->lk_flags & LK_WAIT_NONZERO) 432 wakeup((void *)lkp); 433 break; 434 435 case LK_DRAIN: 436 /* 437 * Check that we do not already hold the lock, as it can 438 * never drain if we do. Unfortunately, we have no way to 439 * check for holding a shared lock, but at least we can 440 * check for an exclusive one. 441 */ 442 if (lkp->lk_lockholder == pid) 443 panic("lockmgr: draining against myself"); 444 445 error = acquiredrain(lkp, extflags); 446 if (error) 447 break; 448 lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; 449 lkp->lk_lockholder = pid; 450 lkp->lk_exclusivecount = 1; 451#if defined(DEBUG_LOCKS) 452 lkp->lk_filename = file; 453 lkp->lk_lineno = line; 454 lkp->lk_lockername = name; 455#endif 456 break; 457 458 default: 459 mtx_unlock(lkp->lk_interlock); 460 panic("lockmgr: unknown locktype request %d", 461 flags & LK_TYPE_MASK); 462 /* NOTREACHED */ 463 } 464 if ((lkp->lk_flags & LK_WAITDRAIN) && 465 (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | 466 LK_SHARE_NONZERO | LK_WAIT_NONZERO)) == 0) { 467 lkp->lk_flags &= ~LK_WAITDRAIN; 468 wakeup((void *)&lkp->lk_flags); 469 } 470 mtx_unlock(lkp->lk_interlock); 471 return (error); 472} 473 474static int 475acquiredrain(struct lock *lkp, int extflags) { 476 int error; 477 478 if ((extflags & LK_NOWAIT) && (lkp->lk_flags & LK_ALL)) { 479 return EBUSY; 480 } 481 482 error = apause(lkp, LK_ALL); 483 if (error == 0) 484 return 0; 485 486 while (lkp->lk_flags & LK_ALL) { 487 lkp->lk_flags |= LK_WAITDRAIN; 488 error = msleep(&lkp->lk_flags, lkp->lk_interlock, lkp->lk_prio, 489 lkp->lk_wmesg, lkp->lk_timo); 490 if (error) 491 return error; 492 if (extflags & LK_SLEEPFAIL) { 493 return ENOLCK; 494 } 495 } 496 return 0; 497} 498 499/* 500 * Initialize a lock; required before use. 501 */ 502void 503lockinit(lkp, prio, wmesg, timo, flags) 504 struct lock *lkp; 505 int prio; 506 char *wmesg; 507 int timo; 508 int flags; 509{ 510 CTR5(KTR_LOCKMGR, "lockinit(): lkp == %p, prio == %d, wmesg == \"%s\", " 511 "timo == %d, flags = 0x%x\n", lkp, prio, wmesg, timo, flags); 512 513 if (lock_mtx_array != NULL) { 514 mtx_lock(&lock_mtx); 515 lkp->lk_interlock = &lock_mtx_array[lock_mtx_selector]; 516 lock_mtx_selector++; 517 if (lock_mtx_selector == lock_nmtx) 518 lock_mtx_selector = 0; 519 mtx_unlock(&lock_mtx); 520 } else { 521 /* 522 * Giving lockmgr locks that are initialized during boot a 523 * pointer to the internal lockmgr mutex is safe, since the 524 * lockmgr code itself doesn't call lockinit() (which could 525 * cause mutex recursion). 526 */ 527 if (lock_mtx_selector == 0) { 528 /* 529 * This case only happens during kernel bootstrapping, 530 * so there's no reason to protect modification of 531 * lock_mtx_selector or lock_mtx. 532 */ 533 mtx_init(&lock_mtx, "lockmgr", MTX_DEF); 534 lock_mtx_selector = 1; 535 } 536 lkp->lk_interlock = &lock_mtx; 537 } 538 lkp->lk_flags = (flags & LK_EXTFLG_MASK); 539 lkp->lk_sharecount = 0; 540 lkp->lk_waitcount = 0; 541 lkp->lk_exclusivecount = 0; 542 lkp->lk_prio = prio; 543 lkp->lk_wmesg = wmesg; 544 lkp->lk_timo = timo; 545 lkp->lk_lockholder = LK_NOPROC; 546} 547 548/* 549 * Destroy a lock. 550 */ 551void 552lockdestroy(lkp) 553 struct lock *lkp; 554{ 555 CTR2(KTR_LOCKMGR, "lockdestroy(): lkp == %p (lk_wmesg == \"%s\")", 556 lkp, lkp->lk_wmesg); 557} 558 559/* 560 * Determine the status of a lock. 561 */ 562int 563lockstatus(lkp, td) 564 struct lock *lkp; 565 struct thread *td; 566{ 567 int lock_type = 0; 568 569 mtx_lock(lkp->lk_interlock); 570 if (lkp->lk_exclusivecount != 0) { 571 if (td == NULL || lkp->lk_lockholder == td->td_proc->p_pid) 572 lock_type = LK_EXCLUSIVE; 573 else 574 lock_type = LK_EXCLOTHER; 575 } else if (lkp->lk_sharecount != 0) 576 lock_type = LK_SHARED; 577 mtx_unlock(lkp->lk_interlock); 578 return (lock_type); 579} 580 581/* 582 * Determine the number of holders of a lock. 583 */ 584int 585lockcount(lkp) 586 struct lock *lkp; 587{ 588 int count; 589 590 mtx_lock(lkp->lk_interlock); 591 count = lkp->lk_exclusivecount + lkp->lk_sharecount; 592 mtx_unlock(lkp->lk_interlock); 593 return (count); 594} 595 596/* 597 * Print out information about state of a lock. Used by VOP_PRINT 598 * routines to display status about contained locks. 599 */ 600void 601lockmgr_printinfo(lkp) 602 struct lock *lkp; 603{ 604 605 if (lkp->lk_sharecount) 606 printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg, 607 lkp->lk_sharecount); 608 else if (lkp->lk_flags & LK_HAVE_EXCL) 609 printf(" lock type %s: EXCL (count %d) by pid %d", 610 lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder); 611 if (lkp->lk_waitcount > 0) 612 printf(" with %d pending", lkp->lk_waitcount); 613} 614