vfs_trans.c revision 1.57
1/* $NetBSD: vfs_trans.c,v 1.57 2019/03/01 09:02:03 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.57 2019/03/01 09:02:03 hannken Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/pserialize.h> 50#include <sys/vnode.h> 51#include <sys/fstrans.h> 52#include <sys/proc.h> 53 54#include <miscfs/specfs/specdev.h> 55 56enum fstrans_lock_type { 57 FSTRANS_LAZY, /* Granted while not suspended */ 58 FSTRANS_SHARED, /* Granted while not suspending */ 59 FSTRANS_EXCL /* Internal: exclusive lock */ 60}; 61 62struct fscow_handler { 63 LIST_ENTRY(fscow_handler) ch_list; 64 int (*ch_func)(void *, struct buf *, bool); 65 void *ch_arg; 66}; 67struct fstrans_lwp_info { 68 struct fstrans_lwp_info *fli_succ; 69 struct lwp *fli_self; 70 struct mount *fli_mount; 71 struct mount *fli_alias; 72 struct fstrans_mount_info *fli_mountinfo; 73 int fli_trans_cnt; 74 int fli_cow_cnt; 75 enum fstrans_lock_type fli_lock_type; 76 LIST_ENTRY(fstrans_lwp_info) fli_list; 77}; 78struct fstrans_mount_info { 79 enum fstrans_state fmi_state; 80 unsigned int fmi_ref_cnt; 81 bool fmi_gone; 82 bool fmi_cow_change; 83 LIST_HEAD(, fscow_handler) fmi_cow_handler; 84 struct mount *fmi_mount; 85}; 86 87static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 88static kmutex_t fstrans_lock; /* Fstrans big lock. */ 89static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */ 90static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 91static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 92static pserialize_t fstrans_psz; /* Pserialize state. */ 93static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 94 /* List of all fstrans_lwp_info. */ 95static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 96 97static void fstrans_mount_dtor(struct fstrans_mount_info *); 98static void fstrans_clear_lwp_info(void); 99static inline struct fstrans_lwp_info * 100 fstrans_get_lwp_info(struct mount *, bool); 101static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 102static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 103static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type); 104static bool state_change_done(const struct fstrans_mount_info *); 105static bool cow_state_change_done(const struct fstrans_mount_info *); 106static void cow_change_enter(struct fstrans_mount_info *); 107static void cow_change_done(struct fstrans_mount_info *); 108 109extern struct mount *dead_rootmount; 110 111#if defined(DIAGNOSTIC) 112 113struct fstrans_debug_mount { 114 struct mount *fdm_mount; 115 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 116}; 117 118static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 119 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 120 121static void 122fstrans_debug_mount(struct mount *mp) 123{ 124 struct fstrans_debug_mount *fdm, *new; 125 126 KASSERT(mutex_owned(&fstrans_mount_lock)); 127 128 mutex_exit(&fstrans_mount_lock); 129 new = kmem_alloc(sizeof(*new), KM_SLEEP); 130 new->fdm_mount = mp; 131 mutex_enter(&fstrans_mount_lock); 132 133 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 134 KASSERT(fdm->fdm_mount != mp); 135 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 136} 137 138static void 139fstrans_debug_unmount(struct mount *mp) 140{ 141 struct fstrans_debug_mount *fdm; 142 143 KASSERT(mutex_owned(&fstrans_mount_lock)); 144 145 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 146 if (fdm->fdm_mount == mp) 147 break; 148 KASSERT(fdm != NULL); 149 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 150 fstrans_debug_mount, fdm_list); 151 kmem_free(fdm, sizeof(*fdm)); 152} 153 154static void 155fstrans_debug_validate_mount(struct mount *mp) 156{ 157 struct fstrans_debug_mount *fdm; 158 159 KASSERT(mutex_owned(&fstrans_mount_lock)); 160 161 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 162 if (fdm->fdm_mount == mp) 163 break; 164 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 165} 166 167#else /* defined(DIAGNOSTIC) */ 168 169#define fstrans_debug_mount(mp) 170#define fstrans_debug_unmount(mp) 171#define fstrans_debug_validate_mount(mp) 172 173#endif /* defined(DIAGNOSTIC) */ 174 175/* 176 * Initialize. 177 */ 178void 179fstrans_init(void) 180{ 181 182 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 183 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 184 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE); 185 cv_init(&fstrans_state_cv, "fstchg"); 186 cv_init(&fstrans_count_cv, "fstcnt"); 187 fstrans_psz = pserialize_create(); 188 LIST_INIT(&fstrans_fli_head); 189} 190 191/* 192 * Deallocate lwp state. 193 */ 194void 195fstrans_lwp_dtor(lwp_t *l) 196{ 197 struct fstrans_lwp_info *fli, *fli_next; 198 199 for (fli = l->l_fstrans; fli; fli = fli_next) { 200 KASSERT(fli->fli_trans_cnt == 0); 201 KASSERT(fli->fli_cow_cnt == 0); 202 KASSERT(fli->fli_self == l); 203 if (fli->fli_mount != NULL) 204 fstrans_mount_dtor(fli->fli_mountinfo); 205 fli_next = fli->fli_succ; 206 fli->fli_mount = NULL; 207 fli->fli_alias = NULL; 208 fli->fli_mountinfo = NULL; 209 membar_sync(); 210 fli->fli_self = NULL; 211 } 212 213 l->l_fstrans = NULL; 214} 215 216/* 217 * Dereference mount state. 218 */ 219static void 220fstrans_mount_dtor(struct fstrans_mount_info *fmi) 221{ 222 223 mutex_enter(&fstrans_mount_lock); 224 225 KASSERT(fmi != NULL); 226 fmi->fmi_ref_cnt -= 1; 227 if (fmi->fmi_ref_cnt > 0) { 228 mutex_exit(&fstrans_mount_lock); 229 return; 230 } 231 232 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 233 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 234 235 KASSERT(fstrans_gone_count > 0); 236 fstrans_gone_count -= 1; 237 238 mutex_exit(&fstrans_mount_lock); 239 240 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 241 kmem_free(fmi, sizeof(*fmi)); 242} 243 244/* 245 * Allocate mount state. 246 */ 247int 248fstrans_mount(struct mount *mp) 249{ 250 struct fstrans_mount_info *newfmi; 251 252 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 253 newfmi->fmi_state = FSTRANS_NORMAL; 254 newfmi->fmi_ref_cnt = 1; 255 newfmi->fmi_gone = false; 256 LIST_INIT(&newfmi->fmi_cow_handler); 257 newfmi->fmi_cow_change = false; 258 newfmi->fmi_mount = mp; 259 260 mutex_enter(&fstrans_mount_lock); 261 mp->mnt_transinfo = newfmi; 262 fstrans_debug_mount(mp); 263 mutex_exit(&fstrans_mount_lock); 264 265 return 0; 266} 267 268/* 269 * Deallocate mount state. 270 */ 271void 272fstrans_unmount(struct mount *mp) 273{ 274 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 275 276 KASSERT(fmi != NULL); 277 278 mutex_enter(&fstrans_mount_lock); 279 fstrans_debug_unmount(mp); 280 fmi->fmi_gone = true; 281 mp->mnt_transinfo = NULL; 282 fstrans_gone_count += 1; 283 mutex_exit(&fstrans_mount_lock); 284 285 fstrans_mount_dtor(fmi); 286} 287 288/* 289 * Clear mount entries whose mount is gone. 290 */ 291static void 292fstrans_clear_lwp_info(void) 293{ 294 struct fstrans_lwp_info **p, *fli; 295 296 /* 297 * Scan our list clearing entries whose mount is gone. 298 */ 299 for (p = &curlwp->l_fstrans; *p; p = &(*p)->fli_succ) { 300 fli = *p; 301 if (fli->fli_mount != NULL && 302 fli->fli_mountinfo->fmi_gone && 303 fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 304 *p = (*p)->fli_succ; 305 fstrans_mount_dtor(fli->fli_mountinfo); 306 fli->fli_mount = NULL; 307 fli->fli_alias = NULL; 308 fli->fli_mountinfo = NULL; 309 membar_sync(); 310 fli->fli_self = NULL; 311 312 if (*p == NULL) 313 break; 314 } 315 } 316} 317 318/* 319 * Allocate and return per lwp info for this mount. 320 */ 321static struct fstrans_lwp_info * 322fstrans_alloc_lwp_info(struct mount *mp) 323{ 324 struct fstrans_lwp_info *fli, *fli2; 325 struct fstrans_mount_info *fmi; 326 327 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 328 if (fli->fli_mount == mp) 329 return fli; 330 } 331 332 /* 333 * Try to reuse a cleared entry or allocate a new one. 334 */ 335 mutex_enter(&fstrans_lock); 336 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 337 membar_sync(); 338 if (fli->fli_self == NULL) { 339 KASSERT(fli->fli_mount == NULL); 340 KASSERT(fli->fli_trans_cnt == 0); 341 KASSERT(fli->fli_cow_cnt == 0); 342 fli->fli_self = curlwp; 343 fli->fli_succ = curlwp->l_fstrans; 344 curlwp->l_fstrans = fli; 345 break; 346 } 347 } 348 mutex_exit(&fstrans_lock); 349 350 if (fli == NULL) { 351 fli = kmem_alloc(sizeof(*fli), KM_SLEEP); 352 mutex_enter(&fstrans_lock); 353 memset(fli, 0, sizeof(*fli)); 354 fli->fli_self = curlwp; 355 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 356 mutex_exit(&fstrans_lock); 357 fli->fli_succ = curlwp->l_fstrans; 358 curlwp->l_fstrans = fli; 359 } 360 361 /* 362 * Attach the entry to the mount if its mnt_transinfo is valid. 363 */ 364 365 mutex_enter(&fstrans_mount_lock); 366 fstrans_debug_validate_mount(mp); 367 fmi = mp->mnt_transinfo; 368 KASSERT(fmi != NULL); 369 fli->fli_mount = mp; 370 fli->fli_mountinfo = fmi; 371 fmi->fmi_ref_cnt += 1; 372 mp = mp->mnt_lower; 373 mutex_exit(&fstrans_mount_lock); 374 375 if (mp) { 376 fli2 = fstrans_alloc_lwp_info(mp); 377 fli->fli_alias = fli2->fli_mount; 378 379 fli = fli2; 380 } 381 382 return fli; 383} 384 385/* 386 * Retrieve the per lwp info for this mount allocating if necessary. 387 */ 388static inline struct fstrans_lwp_info * 389fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 390{ 391 struct fstrans_lwp_info *fli, *fli2; 392 393 /* 394 * Scan our list for a match. 395 */ 396 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 397 if (fli->fli_mount == mp) { 398 if (fli->fli_alias != NULL) { 399 for (fli2 = curlwp->l_fstrans; fli2; 400 fli2 = fli2->fli_succ) { 401 if (fli2->fli_mount == fli->fli_alias) 402 break; 403 } 404 KASSERT(fli2 != NULL); 405 fli = fli2; 406 } 407 break; 408 } 409 } 410 411 if (do_alloc) { 412 if (__predict_false(fli == NULL)) 413 fli = fstrans_alloc_lwp_info(mp); 414 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone); 415 } else { 416 KASSERT(fli != NULL); 417 } 418 419 return fli; 420} 421 422/* 423 * Check if this lock type is granted at this state. 424 */ 425static bool 426grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type) 427{ 428 429 if (__predict_true(state == FSTRANS_NORMAL)) 430 return true; 431 if (type == FSTRANS_EXCL) 432 return true; 433 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 434 return true; 435 436 return false; 437} 438 439/* 440 * Start a transaction. If this thread already has a transaction on this 441 * file system increment the reference counter. 442 */ 443static inline int 444_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 445{ 446 int s; 447 struct fstrans_lwp_info *fli; 448 struct fstrans_mount_info *fmi; 449 450#ifndef FSTRANS_DEAD_ENABLED 451 if (mp == dead_rootmount) 452 return 0; 453#endif 454 455 ASSERT_SLEEPABLE(); 456 457 fli = fstrans_get_lwp_info(mp, true); 458 fmi = fli->fli_mountinfo; 459 460 if (fli->fli_trans_cnt > 0) { 461 KASSERT(lock_type != FSTRANS_EXCL); 462 fli->fli_trans_cnt += 1; 463 464 return 0; 465 } 466 467 s = pserialize_read_enter(); 468 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) { 469 fli->fli_trans_cnt = 1; 470 fli->fli_lock_type = lock_type; 471 pserialize_read_exit(s); 472 473 return 0; 474 } 475 pserialize_read_exit(s); 476 477 if (! wait) 478 return EBUSY; 479 480 mutex_enter(&fstrans_lock); 481 while (! grant_lock(fmi->fmi_state, lock_type)) 482 cv_wait(&fstrans_state_cv, &fstrans_lock); 483 fli->fli_trans_cnt = 1; 484 fli->fli_lock_type = lock_type; 485 mutex_exit(&fstrans_lock); 486 487 return 0; 488} 489 490void 491fstrans_start(struct mount *mp) 492{ 493 int error __diagused; 494 495 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 496 KASSERT(error == 0); 497} 498 499int 500fstrans_start_nowait(struct mount *mp) 501{ 502 503 return _fstrans_start(mp, FSTRANS_SHARED, 0); 504} 505 506void 507fstrans_start_lazy(struct mount *mp) 508{ 509 int error __diagused; 510 511 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 512 KASSERT(error == 0); 513} 514 515/* 516 * Finish a transaction. 517 */ 518void 519fstrans_done(struct mount *mp) 520{ 521 int s; 522 struct fstrans_lwp_info *fli; 523 struct fstrans_mount_info *fmi; 524 525#ifndef FSTRANS_DEAD_ENABLED 526 if (mp == dead_rootmount) 527 return; 528#endif 529 530 fli = fstrans_get_lwp_info(mp, false); 531 fmi = fli->fli_mountinfo; 532 KASSERT(fli->fli_trans_cnt > 0); 533 534 if (fli->fli_trans_cnt > 1) { 535 fli->fli_trans_cnt -= 1; 536 537 return; 538 } 539 540 if (__predict_false(fstrans_gone_count > 0)) 541 fstrans_clear_lwp_info(); 542 543 s = pserialize_read_enter(); 544 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 545 fli->fli_trans_cnt = 0; 546 pserialize_read_exit(s); 547 548 return; 549 } 550 pserialize_read_exit(s); 551 552 mutex_enter(&fstrans_lock); 553 fli->fli_trans_cnt = 0; 554 cv_signal(&fstrans_count_cv); 555 mutex_exit(&fstrans_lock); 556} 557 558/* 559 * Check if this thread has an exclusive lock. 560 */ 561int 562fstrans_is_owner(struct mount *mp) 563{ 564 struct fstrans_lwp_info *fli; 565 566 KASSERT(mp != dead_rootmount); 567 568 fli = fstrans_get_lwp_info(mp, true); 569 570 if (fli->fli_trans_cnt == 0) 571 return 0; 572 573 return (fli->fli_lock_type == FSTRANS_EXCL); 574} 575 576/* 577 * True, if no thread is in a transaction not granted at the current state. 578 */ 579static bool 580state_change_done(const struct fstrans_mount_info *fmi) 581{ 582 struct fstrans_lwp_info *fli; 583 584 KASSERT(mutex_owned(&fstrans_lock)); 585 586 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 587 if (fli->fli_mountinfo != fmi) 588 continue; 589 if (fli->fli_trans_cnt == 0) 590 continue; 591 if (grant_lock(fmi->fmi_state, fli->fli_lock_type)) 592 continue; 593 594 return false; 595 } 596 597 return true; 598} 599 600/* 601 * Set new file system state. 602 */ 603int 604fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 605{ 606 int error; 607 enum fstrans_state old_state; 608 struct fstrans_lwp_info *fli; 609 struct fstrans_mount_info *fmi; 610 611 KASSERT(mp != dead_rootmount); 612 613 fli = fstrans_get_lwp_info(mp, true); 614 fmi = fli->fli_mountinfo; 615 old_state = fmi->fmi_state; 616 if (old_state == new_state) 617 return 0; 618 619 mutex_enter(&fstrans_lock); 620 fmi->fmi_state = new_state; 621 pserialize_perform(fstrans_psz); 622 623 /* 624 * All threads see the new state now. 625 * Wait for transactions invalid at this state to leave. 626 */ 627 error = 0; 628 while (! state_change_done(fmi)) { 629 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 630 if (error) { 631 new_state = fmi->fmi_state = FSTRANS_NORMAL; 632 break; 633 } 634 } 635 cv_broadcast(&fstrans_state_cv); 636 mutex_exit(&fstrans_lock); 637 638 if (old_state != new_state) { 639 if (old_state == FSTRANS_NORMAL) 640 _fstrans_start(mp, FSTRANS_EXCL, 1); 641 if (new_state == FSTRANS_NORMAL) 642 fstrans_done(mp); 643 } 644 645 return error; 646} 647 648/* 649 * Get current file system state. 650 */ 651enum fstrans_state 652fstrans_getstate(struct mount *mp) 653{ 654 struct fstrans_lwp_info *fli; 655 struct fstrans_mount_info *fmi; 656 657 KASSERT(mp != dead_rootmount); 658 659 fli = fstrans_get_lwp_info(mp, true); 660 fmi = fli->fli_mountinfo; 661 662 return fmi->fmi_state; 663} 664 665/* 666 * Request a filesystem to suspend all operations. 667 */ 668int 669vfs_suspend(struct mount *mp, int nowait) 670{ 671 struct fstrans_lwp_info *fli; 672 int error; 673 674 if (mp == dead_rootmount) 675 return EOPNOTSUPP; 676 677 fli = fstrans_get_lwp_info(mp, true); 678 mp = fli->fli_mount; 679 680 if (nowait) { 681 if (!mutex_tryenter(&vfs_suspend_lock)) 682 return EWOULDBLOCK; 683 } else 684 mutex_enter(&vfs_suspend_lock); 685 686 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 687 mutex_exit(&vfs_suspend_lock); 688 689 return error; 690} 691 692/* 693 * Request a filesystem to resume all operations. 694 */ 695void 696vfs_resume(struct mount *mp) 697{ 698 struct fstrans_lwp_info *fli; 699 700 KASSERT(mp != dead_rootmount); 701 702 fli = fstrans_get_lwp_info(mp, false); 703 mp = fli->fli_mount; 704 705 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 706 mutex_exit(&vfs_suspend_lock); 707} 708 709 710/* 711 * True, if no thread is running a cow handler. 712 */ 713static bool 714cow_state_change_done(const struct fstrans_mount_info *fmi) 715{ 716 struct fstrans_lwp_info *fli; 717 718 KASSERT(mutex_owned(&fstrans_lock)); 719 KASSERT(fmi->fmi_cow_change); 720 721 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 722 if (fli->fli_mount != fmi->fmi_mount) 723 continue; 724 if (fli->fli_cow_cnt == 0) 725 continue; 726 727 return false; 728 } 729 730 return true; 731} 732 733/* 734 * Prepare for changing this mounts cow list. 735 * Returns with fstrans_lock locked. 736 */ 737static void 738cow_change_enter(struct fstrans_mount_info *fmi) 739{ 740 741 mutex_enter(&fstrans_lock); 742 743 /* 744 * Wait for other threads changing the list. 745 */ 746 while (fmi->fmi_cow_change) 747 cv_wait(&fstrans_state_cv, &fstrans_lock); 748 749 /* 750 * Wait until all threads are aware of a state change. 751 */ 752 fmi->fmi_cow_change = true; 753 pserialize_perform(fstrans_psz); 754 755 while (! cow_state_change_done(fmi)) 756 cv_wait(&fstrans_count_cv, &fstrans_lock); 757} 758 759/* 760 * Done changing this mounts cow list. 761 */ 762static void 763cow_change_done(struct fstrans_mount_info *fmi) 764{ 765 766 KASSERT(mutex_owned(&fstrans_lock)); 767 768 fmi->fmi_cow_change = false; 769 pserialize_perform(fstrans_psz); 770 771 cv_broadcast(&fstrans_state_cv); 772 773 mutex_exit(&fstrans_lock); 774} 775 776/* 777 * Add a handler to this mount. 778 */ 779int 780fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 781 void *arg) 782{ 783 struct fstrans_mount_info *fmi; 784 struct fscow_handler *newch; 785 786 KASSERT(mp != dead_rootmount); 787 788 mutex_enter(&fstrans_mount_lock); 789 fmi = mp->mnt_transinfo; 790 KASSERT(fmi != NULL); 791 fmi->fmi_ref_cnt += 1; 792 mutex_exit(&fstrans_mount_lock); 793 794 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 795 newch->ch_func = func; 796 newch->ch_arg = arg; 797 798 cow_change_enter(fmi); 799 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 800 cow_change_done(fmi); 801 802 return 0; 803} 804 805/* 806 * Remove a handler from this mount. 807 */ 808int 809fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 810 void *arg) 811{ 812 struct fstrans_mount_info *fmi; 813 struct fscow_handler *hp = NULL; 814 815 KASSERT(mp != dead_rootmount); 816 817 fmi = mp->mnt_transinfo; 818 KASSERT(fmi != NULL); 819 820 cow_change_enter(fmi); 821 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 822 if (hp->ch_func == func && hp->ch_arg == arg) 823 break; 824 if (hp != NULL) { 825 LIST_REMOVE(hp, ch_list); 826 kmem_free(hp, sizeof(*hp)); 827 } 828 cow_change_done(fmi); 829 830 fstrans_mount_dtor(fmi); 831 832 return hp ? 0 : EINVAL; 833} 834 835/* 836 * Check for need to copy block that is about to be written. 837 */ 838int 839fscow_run(struct buf *bp, bool data_valid) 840{ 841 int error, s; 842 struct mount *mp; 843 struct fstrans_lwp_info *fli; 844 struct fstrans_mount_info *fmi; 845 struct fscow_handler *hp; 846 847 /* 848 * First check if we need run the copy-on-write handler. 849 */ 850 if ((bp->b_flags & B_COWDONE)) 851 return 0; 852 if (bp->b_vp == NULL) { 853 bp->b_flags |= B_COWDONE; 854 return 0; 855 } 856 if (bp->b_vp->v_type == VBLK) 857 mp = spec_node_getmountedfs(bp->b_vp); 858 else 859 mp = bp->b_vp->v_mount; 860 if (mp == NULL || mp == dead_rootmount) { 861 bp->b_flags |= B_COWDONE; 862 return 0; 863 } 864 865 fli = fstrans_get_lwp_info(mp, true); 866 fmi = fli->fli_mountinfo; 867 868 /* 869 * On non-recursed run check if other threads 870 * want to change the list. 871 */ 872 if (fli->fli_cow_cnt == 0) { 873 s = pserialize_read_enter(); 874 if (__predict_false(fmi->fmi_cow_change)) { 875 pserialize_read_exit(s); 876 mutex_enter(&fstrans_lock); 877 while (fmi->fmi_cow_change) 878 cv_wait(&fstrans_state_cv, &fstrans_lock); 879 fli->fli_cow_cnt = 1; 880 mutex_exit(&fstrans_lock); 881 } else { 882 fli->fli_cow_cnt = 1; 883 pserialize_read_exit(s); 884 } 885 } else 886 fli->fli_cow_cnt += 1; 887 888 /* 889 * Run all copy-on-write handlers, stop on error. 890 */ 891 error = 0; 892 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 893 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 894 break; 895 if (error == 0) 896 bp->b_flags |= B_COWDONE; 897 898 /* 899 * Check if other threads want to change the list. 900 */ 901 if (fli->fli_cow_cnt > 1) { 902 fli->fli_cow_cnt -= 1; 903 } else { 904 s = pserialize_read_enter(); 905 if (__predict_false(fmi->fmi_cow_change)) { 906 pserialize_read_exit(s); 907 mutex_enter(&fstrans_lock); 908 fli->fli_cow_cnt = 0; 909 cv_signal(&fstrans_count_cv); 910 mutex_exit(&fstrans_lock); 911 } else { 912 fli->fli_cow_cnt = 0; 913 pserialize_read_exit(s); 914 } 915 } 916 917 return error; 918} 919 920#if defined(DDB) 921void fstrans_dump(int); 922 923static void 924fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 925{ 926 char prefix[9]; 927 struct fstrans_lwp_info *fli; 928 929 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 930 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 931 if (fli->fli_self != l) 932 continue; 933 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 934 if (! verbose) 935 continue; 936 } 937 printf("%-8s", prefix); 938 if (verbose) 939 printf(" @%p", fli); 940 if (fli->fli_mount == dead_rootmount) 941 printf(" <dead>"); 942 else if (fli->fli_mount != NULL) 943 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 944 else 945 printf(" NULL"); 946 if (fli->fli_alias != NULL) 947 printf(" alias (%s)", 948 fli->fli_alias->mnt_stat.f_mntonname); 949 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 950 printf(" gone"); 951 if (fli->fli_trans_cnt == 0) { 952 printf(" -"); 953 } else { 954 switch (fli->fli_lock_type) { 955 case FSTRANS_LAZY: 956 printf(" lazy"); 957 break; 958 case FSTRANS_SHARED: 959 printf(" shared"); 960 break; 961 case FSTRANS_EXCL: 962 printf(" excl"); 963 break; 964 default: 965 printf(" %#x", fli->fli_lock_type); 966 break; 967 } 968 } 969 printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt); 970 prefix[0] = '\0'; 971 } 972} 973 974static void 975fstrans_print_mount(struct mount *mp, int verbose) 976{ 977 struct fstrans_mount_info *fmi; 978 979 fmi = mp->mnt_transinfo; 980 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 981 return; 982 983 printf("%-16s ", mp->mnt_stat.f_mntonname); 984 if (fmi == NULL) { 985 printf("(null)\n"); 986 return; 987 } 988 switch (fmi->fmi_state) { 989 case FSTRANS_NORMAL: 990 printf("state normal\n"); 991 break; 992 case FSTRANS_SUSPENDING: 993 printf("state suspending\n"); 994 break; 995 case FSTRANS_SUSPENDED: 996 printf("state suspended\n"); 997 break; 998 default: 999 printf("state %#x\n", fmi->fmi_state); 1000 break; 1001 } 1002} 1003 1004void 1005fstrans_dump(int full) 1006{ 1007 const struct proclist_desc *pd; 1008 struct proc *p; 1009 struct lwp *l; 1010 struct mount *mp; 1011 1012 printf("Fstrans locks by lwp:\n"); 1013 for (pd = proclists; pd->pd_list != NULL; pd++) 1014 PROCLIST_FOREACH(p, pd->pd_list) 1015 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1016 fstrans_print_lwp(p, l, full == 1); 1017 1018 printf("Fstrans state by mount:\n"); 1019 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1020 fstrans_print_mount(mp, full == 1); 1021} 1022#endif /* defined(DDB) */ 1023