vfs_trans.c revision 1.62
1/* $NetBSD: vfs_trans.c,v 1.62 2020/05/13 09:21:30 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.62 2020/05/13 09:21:30 hannken Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/pserialize.h> 50#include <sys/vnode.h> 51#include <sys/fstrans.h> 52#include <sys/proc.h> 53 54#include <miscfs/specfs/specdev.h> 55 56enum fstrans_lock_type { 57 FSTRANS_LAZY, /* Granted while not suspended */ 58 FSTRANS_SHARED /* Granted while not suspending */ 59}; 60 61struct fscow_handler { 62 LIST_ENTRY(fscow_handler) ch_list; 63 int (*ch_func)(void *, struct buf *, bool); 64 void *ch_arg; 65}; 66struct fstrans_lwp_info { 67 struct fstrans_lwp_info *fli_succ; 68 struct lwp *fli_self; 69 struct mount *fli_mount; 70 struct fstrans_lwp_info *fli_alias; 71 struct fstrans_mount_info *fli_mountinfo; 72 int fli_trans_cnt; 73 int fli_alias_cnt; 74 int fli_cow_cnt; 75 enum fstrans_lock_type fli_lock_type; 76 LIST_ENTRY(fstrans_lwp_info) fli_list; 77}; 78struct fstrans_mount_info { 79 enum fstrans_state fmi_state; 80 unsigned int fmi_ref_cnt; 81 bool fmi_gone; 82 bool fmi_cow_change; 83 LIST_HEAD(, fscow_handler) fmi_cow_handler; 84 struct mount *fmi_mount; 85 struct lwp *fmi_owner; 86}; 87 88static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 89static kmutex_t fstrans_lock; /* Fstrans big lock. */ 90static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */ 91static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 92static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 93static pserialize_t fstrans_psz; /* Pserialize state. */ 94static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 95 /* List of all fstrans_lwp_info. */ 96static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 97 98static void fstrans_mount_dtor(struct fstrans_mount_info *); 99static void fstrans_clear_lwp_info(void); 100static inline struct fstrans_lwp_info * 101 fstrans_get_lwp_info(struct mount *, bool); 102static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 103static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 104static bool grant_lock(const struct fstrans_mount_info *, 105 const enum fstrans_lock_type); 106static bool state_change_done(const struct fstrans_mount_info *); 107static bool cow_state_change_done(const struct fstrans_mount_info *); 108static void cow_change_enter(struct fstrans_mount_info *); 109static void cow_change_done(struct fstrans_mount_info *); 110 111extern struct mount *dead_rootmount; 112 113#if defined(DIAGNOSTIC) 114 115struct fstrans_debug_mount { 116 struct mount *fdm_mount; 117 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 118}; 119 120static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 121 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 122 123static void 124fstrans_debug_mount(struct mount *mp) 125{ 126 struct fstrans_debug_mount *fdm, *new; 127 128 KASSERT(mutex_owned(&fstrans_mount_lock)); 129 130 mutex_exit(&fstrans_mount_lock); 131 new = kmem_alloc(sizeof(*new), KM_SLEEP); 132 new->fdm_mount = mp; 133 mutex_enter(&fstrans_mount_lock); 134 135 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 136 KASSERT(fdm->fdm_mount != mp); 137 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 138} 139 140static void 141fstrans_debug_unmount(struct mount *mp) 142{ 143 struct fstrans_debug_mount *fdm; 144 145 KASSERT(mutex_owned(&fstrans_mount_lock)); 146 147 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 148 if (fdm->fdm_mount == mp) 149 break; 150 KASSERT(fdm != NULL); 151 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 152 fstrans_debug_mount, fdm_list); 153 kmem_free(fdm, sizeof(*fdm)); 154} 155 156static void 157fstrans_debug_validate_mount(struct mount *mp) 158{ 159 struct fstrans_debug_mount *fdm; 160 161 KASSERT(mutex_owned(&fstrans_mount_lock)); 162 163 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 164 if (fdm->fdm_mount == mp) 165 break; 166 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 167} 168 169#else /* defined(DIAGNOSTIC) */ 170 171#define fstrans_debug_mount(mp) 172#define fstrans_debug_unmount(mp) 173#define fstrans_debug_validate_mount(mp) 174 175#endif /* defined(DIAGNOSTIC) */ 176 177/* 178 * Initialize. 179 */ 180void 181fstrans_init(void) 182{ 183 184 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 185 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 186 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE); 187 cv_init(&fstrans_state_cv, "fstchg"); 188 cv_init(&fstrans_count_cv, "fstcnt"); 189 fstrans_psz = pserialize_create(); 190 LIST_INIT(&fstrans_fli_head); 191} 192 193/* 194 * Deallocate lwp state. 195 */ 196void 197fstrans_lwp_dtor(lwp_t *l) 198{ 199 struct fstrans_lwp_info *fli, *fli_next; 200 201 for (fli = l->l_fstrans; fli; fli = fli_next) { 202 KASSERT(fli->fli_trans_cnt == 0); 203 KASSERT(fli->fli_cow_cnt == 0); 204 KASSERT(fli->fli_self == l); 205 if (fli->fli_mount != NULL) 206 fstrans_mount_dtor(fli->fli_mountinfo); 207 fli_next = fli->fli_succ; 208 fli->fli_alias_cnt = 0; 209 fli->fli_mount = NULL; 210 fli->fli_alias = NULL; 211 fli->fli_mountinfo = NULL; 212 membar_sync(); 213 fli->fli_self = NULL; 214 } 215 216 l->l_fstrans = NULL; 217} 218 219/* 220 * Dereference mount state. 221 */ 222static void 223fstrans_mount_dtor(struct fstrans_mount_info *fmi) 224{ 225 226 mutex_enter(&fstrans_mount_lock); 227 228 KASSERT(fmi != NULL); 229 fmi->fmi_ref_cnt -= 1; 230 if (fmi->fmi_ref_cnt > 0) { 231 mutex_exit(&fstrans_mount_lock); 232 return; 233 } 234 235 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 236 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 237 KASSERT(fmi->fmi_owner == NULL); 238 239 KASSERT(fstrans_gone_count > 0); 240 fstrans_gone_count -= 1; 241 242 mutex_exit(&fstrans_mount_lock); 243 244 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 245 kmem_free(fmi, sizeof(*fmi)); 246} 247 248/* 249 * Allocate mount state. 250 */ 251int 252fstrans_mount(struct mount *mp) 253{ 254 struct fstrans_mount_info *newfmi; 255 256 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 257 newfmi->fmi_state = FSTRANS_NORMAL; 258 newfmi->fmi_ref_cnt = 1; 259 newfmi->fmi_gone = false; 260 LIST_INIT(&newfmi->fmi_cow_handler); 261 newfmi->fmi_cow_change = false; 262 newfmi->fmi_mount = mp; 263 newfmi->fmi_owner = NULL; 264 265 mutex_enter(&fstrans_mount_lock); 266 mp->mnt_transinfo = newfmi; 267 fstrans_debug_mount(mp); 268 mutex_exit(&fstrans_mount_lock); 269 270 return 0; 271} 272 273/* 274 * Deallocate mount state. 275 */ 276void 277fstrans_unmount(struct mount *mp) 278{ 279 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 280 281 KASSERT(fmi != NULL); 282 283 mutex_enter(&fstrans_mount_lock); 284 fstrans_debug_unmount(mp); 285 fmi->fmi_gone = true; 286 mp->mnt_transinfo = NULL; 287 fstrans_gone_count += 1; 288 mutex_exit(&fstrans_mount_lock); 289 290 fstrans_mount_dtor(fmi); 291} 292 293/* 294 * Clear mount entries whose mount is gone. 295 */ 296static void 297fstrans_clear_lwp_info(void) 298{ 299 struct fstrans_lwp_info **p, *fli; 300 301 /* 302 * Scan our list clearing entries whose mount is gone. 303 */ 304 for (p = &curlwp->l_fstrans; *p; ) { 305 fli = *p; 306 if (fli->fli_mount != NULL && 307 fli->fli_mountinfo->fmi_gone && 308 fli->fli_trans_cnt == 0 && 309 fli->fli_cow_cnt == 0 && 310 fli->fli_alias_cnt == 0) { 311 *p = (*p)->fli_succ; 312 fstrans_mount_dtor(fli->fli_mountinfo); 313 if (fli->fli_alias) { 314 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 315 fli->fli_alias->fli_alias_cnt--; 316 } 317 fli->fli_mount = NULL; 318 fli->fli_alias = NULL; 319 fli->fli_mountinfo = NULL; 320 membar_sync(); 321 fli->fli_self = NULL; 322 p = &curlwp->l_fstrans; 323 } else { 324 p = &(*p)->fli_succ; 325 } 326 } 327#ifdef DIAGNOSTIC 328 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 329 if (fli->fli_alias != NULL) 330 KASSERT(fli->fli_alias->fli_self == curlwp); 331#endif /* DIAGNOSTIC */ 332} 333 334/* 335 * Allocate and return per lwp info for this mount. 336 */ 337static struct fstrans_lwp_info * 338fstrans_alloc_lwp_info(struct mount *mp) 339{ 340 struct fstrans_lwp_info *fli; 341 struct fstrans_mount_info *fmi; 342 343 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 344 if (fli->fli_mount == mp) 345 return fli; 346 } 347 348 /* 349 * Try to reuse a cleared entry or allocate a new one. 350 */ 351 mutex_enter(&fstrans_lock); 352 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 353 membar_sync(); 354 if (fli->fli_self == NULL) { 355 KASSERT(fli->fli_mount == NULL); 356 KASSERT(fli->fli_trans_cnt == 0); 357 KASSERT(fli->fli_cow_cnt == 0); 358 KASSERT(fli->fli_alias_cnt == 0); 359 fli->fli_self = curlwp; 360 fli->fli_succ = curlwp->l_fstrans; 361 curlwp->l_fstrans = fli; 362 break; 363 } 364 } 365 mutex_exit(&fstrans_lock); 366 367 if (fli == NULL) { 368 fli = kmem_alloc(sizeof(*fli), KM_SLEEP); 369 mutex_enter(&fstrans_lock); 370 memset(fli, 0, sizeof(*fli)); 371 fli->fli_self = curlwp; 372 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 373 mutex_exit(&fstrans_lock); 374 fli->fli_succ = curlwp->l_fstrans; 375 curlwp->l_fstrans = fli; 376 } 377 378 /* 379 * Attach the entry to the mount if its mnt_transinfo is valid. 380 */ 381 382 mutex_enter(&fstrans_mount_lock); 383 fstrans_debug_validate_mount(mp); 384 fmi = mp->mnt_transinfo; 385 KASSERT(fmi != NULL); 386 fli->fli_mount = mp; 387 fli->fli_mountinfo = fmi; 388 fmi->fmi_ref_cnt += 1; 389 do { 390 mp = mp->mnt_lower; 391 } while (mp && mp->mnt_lower); 392 mutex_exit(&fstrans_mount_lock); 393 394 if (mp) { 395 fli->fli_alias = fstrans_alloc_lwp_info(mp); 396 fli->fli_alias->fli_alias_cnt++; 397 fli = fli->fli_alias; 398 } 399 400 return fli; 401} 402 403/* 404 * Retrieve the per lwp info for this mount allocating if necessary. 405 */ 406static inline struct fstrans_lwp_info * 407fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 408{ 409 struct fstrans_lwp_info *fli; 410 411 /* 412 * Scan our list for a match. 413 */ 414 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 415 if (fli->fli_mount == mp) { 416 KASSERT((mp->mnt_lower == NULL) == 417 (fli->fli_alias == NULL)); 418 if (fli->fli_alias != NULL) 419 fli = fli->fli_alias; 420 break; 421 } 422 } 423 424 if (do_alloc) { 425 if (__predict_false(fli == NULL)) 426 fli = fstrans_alloc_lwp_info(mp); 427 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone); 428 } else { 429 KASSERT(fli != NULL); 430 } 431 432 return fli; 433} 434 435/* 436 * Check if this lock type is granted at this state. 437 */ 438static bool 439grant_lock(const struct fstrans_mount_info *fmi, 440 const enum fstrans_lock_type type) 441{ 442 443 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 444 return true; 445 if (fmi->fmi_owner == curlwp) 446 return true; 447 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 448 return true; 449 450 return false; 451} 452 453/* 454 * Start a transaction. If this thread already has a transaction on this 455 * file system increment the reference counter. 456 */ 457static inline int 458_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 459{ 460 int s; 461 struct fstrans_lwp_info *fli; 462 struct fstrans_mount_info *fmi; 463 464#ifndef FSTRANS_DEAD_ENABLED 465 if (mp == dead_rootmount) 466 return 0; 467#endif 468 469 ASSERT_SLEEPABLE(); 470 471 fli = fstrans_get_lwp_info(mp, true); 472 fmi = fli->fli_mountinfo; 473 474 if (fli->fli_trans_cnt > 0) { 475 fli->fli_trans_cnt += 1; 476 477 return 0; 478 } 479 480 s = pserialize_read_enter(); 481 if (__predict_true(grant_lock(fmi, lock_type))) { 482 fli->fli_trans_cnt = 1; 483 fli->fli_lock_type = lock_type; 484 pserialize_read_exit(s); 485 486 return 0; 487 } 488 pserialize_read_exit(s); 489 490 if (! wait) 491 return EBUSY; 492 493 mutex_enter(&fstrans_lock); 494 while (! grant_lock(fmi, lock_type)) 495 cv_wait(&fstrans_state_cv, &fstrans_lock); 496 fli->fli_trans_cnt = 1; 497 fli->fli_lock_type = lock_type; 498 mutex_exit(&fstrans_lock); 499 500 return 0; 501} 502 503void 504fstrans_start(struct mount *mp) 505{ 506 int error __diagused; 507 508 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 509 KASSERT(error == 0); 510} 511 512int 513fstrans_start_nowait(struct mount *mp) 514{ 515 516 return _fstrans_start(mp, FSTRANS_SHARED, 0); 517} 518 519void 520fstrans_start_lazy(struct mount *mp) 521{ 522 int error __diagused; 523 524 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 525 KASSERT(error == 0); 526} 527 528/* 529 * Finish a transaction. 530 */ 531void 532fstrans_done(struct mount *mp) 533{ 534 int s; 535 struct fstrans_lwp_info *fli; 536 struct fstrans_mount_info *fmi; 537 538#ifndef FSTRANS_DEAD_ENABLED 539 if (mp == dead_rootmount) 540 return; 541#endif 542 543 fli = fstrans_get_lwp_info(mp, false); 544 fmi = fli->fli_mountinfo; 545 KASSERT(fli->fli_trans_cnt > 0); 546 547 if (fli->fli_trans_cnt > 1) { 548 fli->fli_trans_cnt -= 1; 549 550 return; 551 } 552 553 if (__predict_false(fstrans_gone_count > 0)) 554 fstrans_clear_lwp_info(); 555 556 s = pserialize_read_enter(); 557 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 558 fli->fli_trans_cnt = 0; 559 pserialize_read_exit(s); 560 561 return; 562 } 563 pserialize_read_exit(s); 564 565 mutex_enter(&fstrans_lock); 566 fli->fli_trans_cnt = 0; 567 cv_signal(&fstrans_count_cv); 568 mutex_exit(&fstrans_lock); 569} 570 571/* 572 * Check if we hold an lock. 573 */ 574int 575fstrans_held(struct mount *mp) 576{ 577 struct fstrans_lwp_info *fli; 578 struct fstrans_mount_info *fmi; 579 580 KASSERT(mp != dead_rootmount); 581 582 fli = fstrans_get_lwp_info(mp, true); 583 fmi = fli->fli_mountinfo; 584 585 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 586} 587 588/* 589 * Check if this thread has an exclusive lock. 590 */ 591int 592fstrans_is_owner(struct mount *mp) 593{ 594 struct fstrans_lwp_info *fli; 595 struct fstrans_mount_info *fmi; 596 597 KASSERT(mp != dead_rootmount); 598 599 fli = fstrans_get_lwp_info(mp, true); 600 fmi = fli->fli_mountinfo; 601 602 return (fmi->fmi_owner == curlwp); 603} 604 605/* 606 * True, if no thread is in a transaction not granted at the current state. 607 */ 608static bool 609state_change_done(const struct fstrans_mount_info *fmi) 610{ 611 struct fstrans_lwp_info *fli; 612 613 KASSERT(mutex_owned(&fstrans_lock)); 614 615 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 616 if (fli->fli_mountinfo != fmi) 617 continue; 618 if (fli->fli_trans_cnt == 0) 619 continue; 620 if (fli->fli_self == curlwp) 621 continue; 622 if (grant_lock(fmi, fli->fli_lock_type)) 623 continue; 624 625 return false; 626 } 627 628 return true; 629} 630 631/* 632 * Set new file system state. 633 */ 634int 635fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 636{ 637 int error; 638 enum fstrans_state old_state; 639 struct fstrans_lwp_info *fli; 640 struct fstrans_mount_info *fmi; 641 642 KASSERT(mp != dead_rootmount); 643 644 fli = fstrans_get_lwp_info(mp, true); 645 fmi = fli->fli_mountinfo; 646 old_state = fmi->fmi_state; 647 if (old_state == new_state) 648 return 0; 649 650 mutex_enter(&fstrans_lock); 651 fmi->fmi_state = new_state; 652 pserialize_perform(fstrans_psz); 653 654 /* 655 * All threads see the new state now. 656 * Wait for transactions invalid at this state to leave. 657 */ 658 error = 0; 659 while (! state_change_done(fmi)) { 660 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 661 if (error) { 662 new_state = fmi->fmi_state = FSTRANS_NORMAL; 663 break; 664 } 665 } 666 if (old_state != new_state) { 667 if (old_state == FSTRANS_NORMAL) { 668 KASSERT(fmi->fmi_owner == NULL); 669 fmi->fmi_owner = curlwp; 670 } 671 if (new_state == FSTRANS_NORMAL) { 672 KASSERT(fmi->fmi_owner == curlwp); 673 fmi->fmi_owner = NULL; 674 } 675 } 676 cv_broadcast(&fstrans_state_cv); 677 mutex_exit(&fstrans_lock); 678 679 return error; 680} 681 682/* 683 * Get current file system state. 684 */ 685enum fstrans_state 686fstrans_getstate(struct mount *mp) 687{ 688 struct fstrans_lwp_info *fli; 689 struct fstrans_mount_info *fmi; 690 691 KASSERT(mp != dead_rootmount); 692 693 fli = fstrans_get_lwp_info(mp, true); 694 fmi = fli->fli_mountinfo; 695 696 return fmi->fmi_state; 697} 698 699/* 700 * Request a filesystem to suspend all operations. 701 */ 702int 703vfs_suspend(struct mount *mp, int nowait) 704{ 705 struct fstrans_lwp_info *fli; 706 int error; 707 708 if (mp == dead_rootmount) 709 return EOPNOTSUPP; 710 711 fli = fstrans_get_lwp_info(mp, true); 712 mp = fli->fli_mount; 713 714 if (nowait) { 715 if (!mutex_tryenter(&vfs_suspend_lock)) 716 return EWOULDBLOCK; 717 } else 718 mutex_enter(&vfs_suspend_lock); 719 720 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 721 mutex_exit(&vfs_suspend_lock); 722 723 return error; 724} 725 726/* 727 * Request a filesystem to resume all operations. 728 */ 729void 730vfs_resume(struct mount *mp) 731{ 732 struct fstrans_lwp_info *fli; 733 734 KASSERT(mp != dead_rootmount); 735 736 fli = fstrans_get_lwp_info(mp, false); 737 mp = fli->fli_mount; 738 739 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 740 mutex_exit(&vfs_suspend_lock); 741} 742 743 744/* 745 * True, if no thread is running a cow handler. 746 */ 747static bool 748cow_state_change_done(const struct fstrans_mount_info *fmi) 749{ 750 struct fstrans_lwp_info *fli; 751 752 KASSERT(mutex_owned(&fstrans_lock)); 753 KASSERT(fmi->fmi_cow_change); 754 755 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 756 if (fli->fli_mount != fmi->fmi_mount) 757 continue; 758 if (fli->fli_cow_cnt == 0) 759 continue; 760 761 return false; 762 } 763 764 return true; 765} 766 767/* 768 * Prepare for changing this mounts cow list. 769 * Returns with fstrans_lock locked. 770 */ 771static void 772cow_change_enter(struct fstrans_mount_info *fmi) 773{ 774 775 mutex_enter(&fstrans_lock); 776 777 /* 778 * Wait for other threads changing the list. 779 */ 780 while (fmi->fmi_cow_change) 781 cv_wait(&fstrans_state_cv, &fstrans_lock); 782 783 /* 784 * Wait until all threads are aware of a state change. 785 */ 786 fmi->fmi_cow_change = true; 787 pserialize_perform(fstrans_psz); 788 789 while (! cow_state_change_done(fmi)) 790 cv_wait(&fstrans_count_cv, &fstrans_lock); 791} 792 793/* 794 * Done changing this mounts cow list. 795 */ 796static void 797cow_change_done(struct fstrans_mount_info *fmi) 798{ 799 800 KASSERT(mutex_owned(&fstrans_lock)); 801 802 fmi->fmi_cow_change = false; 803 pserialize_perform(fstrans_psz); 804 805 cv_broadcast(&fstrans_state_cv); 806 807 mutex_exit(&fstrans_lock); 808} 809 810/* 811 * Add a handler to this mount. 812 */ 813int 814fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 815 void *arg) 816{ 817 struct fstrans_mount_info *fmi; 818 struct fscow_handler *newch; 819 820 KASSERT(mp != dead_rootmount); 821 822 mutex_enter(&fstrans_mount_lock); 823 fmi = mp->mnt_transinfo; 824 KASSERT(fmi != NULL); 825 fmi->fmi_ref_cnt += 1; 826 mutex_exit(&fstrans_mount_lock); 827 828 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 829 newch->ch_func = func; 830 newch->ch_arg = arg; 831 832 cow_change_enter(fmi); 833 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 834 cow_change_done(fmi); 835 836 return 0; 837} 838 839/* 840 * Remove a handler from this mount. 841 */ 842int 843fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 844 void *arg) 845{ 846 struct fstrans_mount_info *fmi; 847 struct fscow_handler *hp = NULL; 848 849 KASSERT(mp != dead_rootmount); 850 851 fmi = mp->mnt_transinfo; 852 KASSERT(fmi != NULL); 853 854 cow_change_enter(fmi); 855 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 856 if (hp->ch_func == func && hp->ch_arg == arg) 857 break; 858 if (hp != NULL) { 859 LIST_REMOVE(hp, ch_list); 860 kmem_free(hp, sizeof(*hp)); 861 } 862 cow_change_done(fmi); 863 864 fstrans_mount_dtor(fmi); 865 866 return hp ? 0 : EINVAL; 867} 868 869/* 870 * Check for need to copy block that is about to be written. 871 */ 872int 873fscow_run(struct buf *bp, bool data_valid) 874{ 875 int error, s; 876 struct mount *mp; 877 struct fstrans_lwp_info *fli; 878 struct fstrans_mount_info *fmi; 879 struct fscow_handler *hp; 880 881 /* 882 * First check if we need run the copy-on-write handler. 883 */ 884 if ((bp->b_flags & B_COWDONE)) 885 return 0; 886 if (bp->b_vp == NULL) { 887 bp->b_flags |= B_COWDONE; 888 return 0; 889 } 890 if (bp->b_vp->v_type == VBLK) 891 mp = spec_node_getmountedfs(bp->b_vp); 892 else 893 mp = bp->b_vp->v_mount; 894 if (mp == NULL || mp == dead_rootmount) { 895 bp->b_flags |= B_COWDONE; 896 return 0; 897 } 898 899 fli = fstrans_get_lwp_info(mp, true); 900 fmi = fli->fli_mountinfo; 901 902 /* 903 * On non-recursed run check if other threads 904 * want to change the list. 905 */ 906 if (fli->fli_cow_cnt == 0) { 907 s = pserialize_read_enter(); 908 if (__predict_false(fmi->fmi_cow_change)) { 909 pserialize_read_exit(s); 910 mutex_enter(&fstrans_lock); 911 while (fmi->fmi_cow_change) 912 cv_wait(&fstrans_state_cv, &fstrans_lock); 913 fli->fli_cow_cnt = 1; 914 mutex_exit(&fstrans_lock); 915 } else { 916 fli->fli_cow_cnt = 1; 917 pserialize_read_exit(s); 918 } 919 } else 920 fli->fli_cow_cnt += 1; 921 922 /* 923 * Run all copy-on-write handlers, stop on error. 924 */ 925 error = 0; 926 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 927 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 928 break; 929 if (error == 0) 930 bp->b_flags |= B_COWDONE; 931 932 /* 933 * Check if other threads want to change the list. 934 */ 935 if (fli->fli_cow_cnt > 1) { 936 fli->fli_cow_cnt -= 1; 937 } else { 938 s = pserialize_read_enter(); 939 if (__predict_false(fmi->fmi_cow_change)) { 940 pserialize_read_exit(s); 941 mutex_enter(&fstrans_lock); 942 fli->fli_cow_cnt = 0; 943 cv_signal(&fstrans_count_cv); 944 mutex_exit(&fstrans_lock); 945 } else { 946 fli->fli_cow_cnt = 0; 947 pserialize_read_exit(s); 948 } 949 } 950 951 return error; 952} 953 954#if defined(DDB) 955void fstrans_dump(int); 956 957static void 958fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 959{ 960 char prefix[9]; 961 struct fstrans_lwp_info *fli; 962 963 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 964 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 965 if (fli->fli_self != l) 966 continue; 967 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 968 if (! verbose) 969 continue; 970 } 971 printf("%-8s", prefix); 972 if (verbose) 973 printf(" @%p", fli); 974 if (fli->fli_mount == dead_rootmount) 975 printf(" <dead>"); 976 else if (fli->fli_mount != NULL) 977 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 978 else 979 printf(" NULL"); 980 if (fli->fli_alias != NULL) { 981 struct mount *amp = fli->fli_alias->fli_mount; 982 983 printf(" alias"); 984 if (verbose) 985 printf(" @%p", fli->fli_alias); 986 if (amp == NULL) 987 printf(" NULL"); 988 else 989 printf(" (%s)", amp->mnt_stat.f_mntonname); 990 } 991 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 992 printf(" gone"); 993 if (fli->fli_trans_cnt == 0) { 994 printf(" -"); 995 } else { 996 switch (fli->fli_lock_type) { 997 case FSTRANS_LAZY: 998 printf(" lazy"); 999 break; 1000 case FSTRANS_SHARED: 1001 printf(" shared"); 1002 break; 1003 default: 1004 printf(" %#x", fli->fli_lock_type); 1005 break; 1006 } 1007 } 1008 printf(" %d cow %d alias %d\n", 1009 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1010 prefix[0] = '\0'; 1011 } 1012} 1013 1014static void 1015fstrans_print_mount(struct mount *mp, int verbose) 1016{ 1017 struct fstrans_mount_info *fmi; 1018 1019 fmi = mp->mnt_transinfo; 1020 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1021 return; 1022 1023 printf("%-16s ", mp->mnt_stat.f_mntonname); 1024 if (fmi == NULL) { 1025 printf("(null)\n"); 1026 return; 1027 } 1028 printf("owner %p ", fmi->fmi_owner); 1029 switch (fmi->fmi_state) { 1030 case FSTRANS_NORMAL: 1031 printf("state normal\n"); 1032 break; 1033 case FSTRANS_SUSPENDING: 1034 printf("state suspending\n"); 1035 break; 1036 case FSTRANS_SUSPENDED: 1037 printf("state suspended\n"); 1038 break; 1039 default: 1040 printf("state %#x\n", fmi->fmi_state); 1041 break; 1042 } 1043} 1044 1045void 1046fstrans_dump(int full) 1047{ 1048 const struct proclist_desc *pd; 1049 struct proc *p; 1050 struct lwp *l; 1051 struct mount *mp; 1052 1053 printf("Fstrans locks by lwp:\n"); 1054 for (pd = proclists; pd->pd_list != NULL; pd++) 1055 PROCLIST_FOREACH(p, pd->pd_list) 1056 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1057 fstrans_print_lwp(p, l, full == 1); 1058 1059 printf("Fstrans state by mount:\n"); 1060 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1061 fstrans_print_mount(mp, full == 1); 1062} 1063#endif /* defined(DDB) */ 1064