vfs_trans.c revision 1.60
1/* $NetBSD: vfs_trans.c,v 1.60 2019/05/13 08:16:56 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.60 2019/05/13 08:16:56 hannken Exp $"); 34 35/* 36 * File system transaction operations. 37 */ 38 39#ifdef _KERNEL_OPT 40#include "opt_ddb.h" 41#endif 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/atomic.h> 46#include <sys/buf.h> 47#include <sys/kmem.h> 48#include <sys/mount.h> 49#include <sys/pserialize.h> 50#include <sys/vnode.h> 51#include <sys/fstrans.h> 52#include <sys/proc.h> 53 54#include <miscfs/specfs/specdev.h> 55 56enum fstrans_lock_type { 57 FSTRANS_LAZY, /* Granted while not suspended */ 58 FSTRANS_SHARED, /* Granted while not suspending */ 59 FSTRANS_EXCL /* Internal: exclusive lock */ 60}; 61 62struct fscow_handler { 63 LIST_ENTRY(fscow_handler) ch_list; 64 int (*ch_func)(void *, struct buf *, bool); 65 void *ch_arg; 66}; 67struct fstrans_lwp_info { 68 struct fstrans_lwp_info *fli_succ; 69 struct lwp *fli_self; 70 struct mount *fli_mount; 71 struct fstrans_lwp_info *fli_alias; 72 struct fstrans_mount_info *fli_mountinfo; 73 int fli_trans_cnt; 74 int fli_alias_cnt; 75 int fli_cow_cnt; 76 enum fstrans_lock_type fli_lock_type; 77 LIST_ENTRY(fstrans_lwp_info) fli_list; 78}; 79struct fstrans_mount_info { 80 enum fstrans_state fmi_state; 81 unsigned int fmi_ref_cnt; 82 bool fmi_gone; 83 bool fmi_cow_change; 84 LIST_HEAD(, fscow_handler) fmi_cow_handler; 85 struct mount *fmi_mount; 86}; 87 88static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 89static kmutex_t fstrans_lock; /* Fstrans big lock. */ 90static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */ 91static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 92static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 93static pserialize_t fstrans_psz; /* Pserialize state. */ 94static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 95 /* List of all fstrans_lwp_info. */ 96static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 97 98static void fstrans_mount_dtor(struct fstrans_mount_info *); 99static void fstrans_clear_lwp_info(void); 100static inline struct fstrans_lwp_info * 101 fstrans_get_lwp_info(struct mount *, bool); 102static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 103static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 104static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type); 105static bool state_change_done(const struct fstrans_mount_info *); 106static bool cow_state_change_done(const struct fstrans_mount_info *); 107static void cow_change_enter(struct fstrans_mount_info *); 108static void cow_change_done(struct fstrans_mount_info *); 109 110extern struct mount *dead_rootmount; 111 112#if defined(DIAGNOSTIC) 113 114struct fstrans_debug_mount { 115 struct mount *fdm_mount; 116 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 117}; 118 119static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 120 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 121 122static void 123fstrans_debug_mount(struct mount *mp) 124{ 125 struct fstrans_debug_mount *fdm, *new; 126 127 KASSERT(mutex_owned(&fstrans_mount_lock)); 128 129 mutex_exit(&fstrans_mount_lock); 130 new = kmem_alloc(sizeof(*new), KM_SLEEP); 131 new->fdm_mount = mp; 132 mutex_enter(&fstrans_mount_lock); 133 134 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 135 KASSERT(fdm->fdm_mount != mp); 136 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 137} 138 139static void 140fstrans_debug_unmount(struct mount *mp) 141{ 142 struct fstrans_debug_mount *fdm; 143 144 KASSERT(mutex_owned(&fstrans_mount_lock)); 145 146 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 147 if (fdm->fdm_mount == mp) 148 break; 149 KASSERT(fdm != NULL); 150 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 151 fstrans_debug_mount, fdm_list); 152 kmem_free(fdm, sizeof(*fdm)); 153} 154 155static void 156fstrans_debug_validate_mount(struct mount *mp) 157{ 158 struct fstrans_debug_mount *fdm; 159 160 KASSERT(mutex_owned(&fstrans_mount_lock)); 161 162 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 163 if (fdm->fdm_mount == mp) 164 break; 165 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 166} 167 168#else /* defined(DIAGNOSTIC) */ 169 170#define fstrans_debug_mount(mp) 171#define fstrans_debug_unmount(mp) 172#define fstrans_debug_validate_mount(mp) 173 174#endif /* defined(DIAGNOSTIC) */ 175 176/* 177 * Initialize. 178 */ 179void 180fstrans_init(void) 181{ 182 183 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 184 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 185 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE); 186 cv_init(&fstrans_state_cv, "fstchg"); 187 cv_init(&fstrans_count_cv, "fstcnt"); 188 fstrans_psz = pserialize_create(); 189 LIST_INIT(&fstrans_fli_head); 190} 191 192/* 193 * Deallocate lwp state. 194 */ 195void 196fstrans_lwp_dtor(lwp_t *l) 197{ 198 struct fstrans_lwp_info *fli, *fli_next; 199 200 for (fli = l->l_fstrans; fli; fli = fli_next) { 201 KASSERT(fli->fli_trans_cnt == 0); 202 KASSERT(fli->fli_cow_cnt == 0); 203 KASSERT(fli->fli_self == l); 204 if (fli->fli_mount != NULL) 205 fstrans_mount_dtor(fli->fli_mountinfo); 206 fli_next = fli->fli_succ; 207 fli->fli_alias_cnt = 0; 208 fli->fli_mount = NULL; 209 fli->fli_alias = NULL; 210 fli->fli_mountinfo = NULL; 211 membar_sync(); 212 fli->fli_self = NULL; 213 } 214 215 l->l_fstrans = NULL; 216} 217 218/* 219 * Dereference mount state. 220 */ 221static void 222fstrans_mount_dtor(struct fstrans_mount_info *fmi) 223{ 224 225 mutex_enter(&fstrans_mount_lock); 226 227 KASSERT(fmi != NULL); 228 fmi->fmi_ref_cnt -= 1; 229 if (fmi->fmi_ref_cnt > 0) { 230 mutex_exit(&fstrans_mount_lock); 231 return; 232 } 233 234 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 235 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 236 237 KASSERT(fstrans_gone_count > 0); 238 fstrans_gone_count -= 1; 239 240 mutex_exit(&fstrans_mount_lock); 241 242 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 243 kmem_free(fmi, sizeof(*fmi)); 244} 245 246/* 247 * Allocate mount state. 248 */ 249int 250fstrans_mount(struct mount *mp) 251{ 252 struct fstrans_mount_info *newfmi; 253 254 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 255 newfmi->fmi_state = FSTRANS_NORMAL; 256 newfmi->fmi_ref_cnt = 1; 257 newfmi->fmi_gone = false; 258 LIST_INIT(&newfmi->fmi_cow_handler); 259 newfmi->fmi_cow_change = false; 260 newfmi->fmi_mount = mp; 261 262 mutex_enter(&fstrans_mount_lock); 263 mp->mnt_transinfo = newfmi; 264 fstrans_debug_mount(mp); 265 mutex_exit(&fstrans_mount_lock); 266 267 return 0; 268} 269 270/* 271 * Deallocate mount state. 272 */ 273void 274fstrans_unmount(struct mount *mp) 275{ 276 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 277 278 KASSERT(fmi != NULL); 279 280 mutex_enter(&fstrans_mount_lock); 281 fstrans_debug_unmount(mp); 282 fmi->fmi_gone = true; 283 mp->mnt_transinfo = NULL; 284 fstrans_gone_count += 1; 285 mutex_exit(&fstrans_mount_lock); 286 287 fstrans_mount_dtor(fmi); 288} 289 290/* 291 * Clear mount entries whose mount is gone. 292 */ 293static void 294fstrans_clear_lwp_info(void) 295{ 296 struct fstrans_lwp_info **p, *fli; 297 298 /* 299 * Scan our list clearing entries whose mount is gone. 300 */ 301 for (p = &curlwp->l_fstrans; *p; ) { 302 fli = *p; 303 if (fli->fli_mount != NULL && 304 fli->fli_mountinfo->fmi_gone && 305 fli->fli_trans_cnt == 0 && 306 fli->fli_cow_cnt == 0 && 307 fli->fli_alias_cnt == 0) { 308 *p = (*p)->fli_succ; 309 fstrans_mount_dtor(fli->fli_mountinfo); 310 if (fli->fli_alias) { 311 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 312 fli->fli_alias->fli_alias_cnt--; 313 } 314 fli->fli_mount = NULL; 315 fli->fli_alias = NULL; 316 fli->fli_mountinfo = NULL; 317 membar_sync(); 318 fli->fli_self = NULL; 319 p = &curlwp->l_fstrans; 320 } else { 321 p = &(*p)->fli_succ; 322 } 323 } 324#ifdef DIAGNOSTIC 325 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 326 if (fli->fli_alias != NULL) 327 KASSERT(fli->fli_alias->fli_self == curlwp); 328#endif /* DIAGNOSTIC */ 329} 330 331/* 332 * Allocate and return per lwp info for this mount. 333 */ 334static struct fstrans_lwp_info * 335fstrans_alloc_lwp_info(struct mount *mp) 336{ 337 struct fstrans_lwp_info *fli; 338 struct fstrans_mount_info *fmi; 339 340 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 341 if (fli->fli_mount == mp) 342 return fli; 343 } 344 345 /* 346 * Try to reuse a cleared entry or allocate a new one. 347 */ 348 mutex_enter(&fstrans_lock); 349 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 350 membar_sync(); 351 if (fli->fli_self == NULL) { 352 KASSERT(fli->fli_mount == NULL); 353 KASSERT(fli->fli_trans_cnt == 0); 354 KASSERT(fli->fli_cow_cnt == 0); 355 KASSERT(fli->fli_alias_cnt == 0); 356 fli->fli_self = curlwp; 357 fli->fli_succ = curlwp->l_fstrans; 358 curlwp->l_fstrans = fli; 359 break; 360 } 361 } 362 mutex_exit(&fstrans_lock); 363 364 if (fli == NULL) { 365 fli = kmem_alloc(sizeof(*fli), KM_SLEEP); 366 mutex_enter(&fstrans_lock); 367 memset(fli, 0, sizeof(*fli)); 368 fli->fli_self = curlwp; 369 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 370 mutex_exit(&fstrans_lock); 371 fli->fli_succ = curlwp->l_fstrans; 372 curlwp->l_fstrans = fli; 373 } 374 375 /* 376 * Attach the entry to the mount if its mnt_transinfo is valid. 377 */ 378 379 mutex_enter(&fstrans_mount_lock); 380 fstrans_debug_validate_mount(mp); 381 fmi = mp->mnt_transinfo; 382 KASSERT(fmi != NULL); 383 fli->fli_mount = mp; 384 fli->fli_mountinfo = fmi; 385 fmi->fmi_ref_cnt += 1; 386 do { 387 mp = mp->mnt_lower; 388 } while (mp && mp->mnt_lower); 389 mutex_exit(&fstrans_mount_lock); 390 391 if (mp) { 392 fli->fli_alias = fstrans_alloc_lwp_info(mp); 393 fli->fli_alias->fli_alias_cnt++; 394 fli = fli->fli_alias; 395 } 396 397 return fli; 398} 399 400/* 401 * Retrieve the per lwp info for this mount allocating if necessary. 402 */ 403static inline struct fstrans_lwp_info * 404fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 405{ 406 struct fstrans_lwp_info *fli; 407 408 /* 409 * Scan our list for a match. 410 */ 411 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 412 if (fli->fli_mount == mp) { 413 KASSERT((mp->mnt_lower == NULL) == 414 (fli->fli_alias == NULL)); 415 if (fli->fli_alias != NULL) 416 fli = fli->fli_alias; 417 break; 418 } 419 } 420 421 if (do_alloc) { 422 if (__predict_false(fli == NULL)) 423 fli = fstrans_alloc_lwp_info(mp); 424 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone); 425 } else { 426 KASSERT(fli != NULL); 427 } 428 429 return fli; 430} 431 432/* 433 * Check if this lock type is granted at this state. 434 */ 435static bool 436grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type) 437{ 438 439 if (__predict_true(state == FSTRANS_NORMAL)) 440 return true; 441 if (type == FSTRANS_EXCL) 442 return true; 443 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 444 return true; 445 446 return false; 447} 448 449/* 450 * Start a transaction. If this thread already has a transaction on this 451 * file system increment the reference counter. 452 */ 453static inline int 454_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 455{ 456 int s; 457 struct fstrans_lwp_info *fli; 458 struct fstrans_mount_info *fmi; 459 460#ifndef FSTRANS_DEAD_ENABLED 461 if (mp == dead_rootmount) 462 return 0; 463#endif 464 465 ASSERT_SLEEPABLE(); 466 467 fli = fstrans_get_lwp_info(mp, true); 468 fmi = fli->fli_mountinfo; 469 470 if (fli->fli_trans_cnt > 0) { 471 KASSERT(lock_type != FSTRANS_EXCL); 472 fli->fli_trans_cnt += 1; 473 474 return 0; 475 } 476 477 s = pserialize_read_enter(); 478 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) { 479 fli->fli_trans_cnt = 1; 480 fli->fli_lock_type = lock_type; 481 pserialize_read_exit(s); 482 483 return 0; 484 } 485 pserialize_read_exit(s); 486 487 if (! wait) 488 return EBUSY; 489 490 mutex_enter(&fstrans_lock); 491 while (! grant_lock(fmi->fmi_state, lock_type)) 492 cv_wait(&fstrans_state_cv, &fstrans_lock); 493 fli->fli_trans_cnt = 1; 494 fli->fli_lock_type = lock_type; 495 mutex_exit(&fstrans_lock); 496 497 return 0; 498} 499 500void 501fstrans_start(struct mount *mp) 502{ 503 int error __diagused; 504 505 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 506 KASSERT(error == 0); 507} 508 509int 510fstrans_start_nowait(struct mount *mp) 511{ 512 513 return _fstrans_start(mp, FSTRANS_SHARED, 0); 514} 515 516void 517fstrans_start_lazy(struct mount *mp) 518{ 519 int error __diagused; 520 521 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 522 KASSERT(error == 0); 523} 524 525/* 526 * Finish a transaction. 527 */ 528void 529fstrans_done(struct mount *mp) 530{ 531 int s; 532 struct fstrans_lwp_info *fli; 533 struct fstrans_mount_info *fmi; 534 535#ifndef FSTRANS_DEAD_ENABLED 536 if (mp == dead_rootmount) 537 return; 538#endif 539 540 fli = fstrans_get_lwp_info(mp, false); 541 fmi = fli->fli_mountinfo; 542 KASSERT(fli->fli_trans_cnt > 0); 543 544 if (fli->fli_trans_cnt > 1) { 545 fli->fli_trans_cnt -= 1; 546 547 return; 548 } 549 550 if (__predict_false(fstrans_gone_count > 0)) 551 fstrans_clear_lwp_info(); 552 553 s = pserialize_read_enter(); 554 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 555 fli->fli_trans_cnt = 0; 556 pserialize_read_exit(s); 557 558 return; 559 } 560 pserialize_read_exit(s); 561 562 mutex_enter(&fstrans_lock); 563 fli->fli_trans_cnt = 0; 564 cv_signal(&fstrans_count_cv); 565 mutex_exit(&fstrans_lock); 566} 567 568/* 569 * Check if this thread has an exclusive lock. 570 */ 571int 572fstrans_is_owner(struct mount *mp) 573{ 574 struct fstrans_lwp_info *fli; 575 576 KASSERT(mp != dead_rootmount); 577 578 fli = fstrans_get_lwp_info(mp, true); 579 580 if (fli->fli_trans_cnt == 0) 581 return 0; 582 583 return (fli->fli_lock_type == FSTRANS_EXCL); 584} 585 586/* 587 * True, if no thread is in a transaction not granted at the current state. 588 */ 589static bool 590state_change_done(const struct fstrans_mount_info *fmi) 591{ 592 struct fstrans_lwp_info *fli; 593 594 KASSERT(mutex_owned(&fstrans_lock)); 595 596 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 597 if (fli->fli_mountinfo != fmi) 598 continue; 599 if (fli->fli_trans_cnt == 0) 600 continue; 601 if (grant_lock(fmi->fmi_state, fli->fli_lock_type)) 602 continue; 603 604 return false; 605 } 606 607 return true; 608} 609 610/* 611 * Set new file system state. 612 */ 613int 614fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 615{ 616 int error; 617 enum fstrans_state old_state; 618 struct fstrans_lwp_info *fli; 619 struct fstrans_mount_info *fmi; 620 621 KASSERT(mp != dead_rootmount); 622 623 fli = fstrans_get_lwp_info(mp, true); 624 fmi = fli->fli_mountinfo; 625 old_state = fmi->fmi_state; 626 if (old_state == new_state) 627 return 0; 628 629 mutex_enter(&fstrans_lock); 630 fmi->fmi_state = new_state; 631 pserialize_perform(fstrans_psz); 632 633 /* 634 * All threads see the new state now. 635 * Wait for transactions invalid at this state to leave. 636 */ 637 error = 0; 638 while (! state_change_done(fmi)) { 639 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 640 if (error) { 641 new_state = fmi->fmi_state = FSTRANS_NORMAL; 642 break; 643 } 644 } 645 cv_broadcast(&fstrans_state_cv); 646 mutex_exit(&fstrans_lock); 647 648 if (old_state != new_state) { 649 if (old_state == FSTRANS_NORMAL) 650 _fstrans_start(mp, FSTRANS_EXCL, 1); 651 if (new_state == FSTRANS_NORMAL) 652 fstrans_done(mp); 653 } 654 655 return error; 656} 657 658/* 659 * Get current file system state. 660 */ 661enum fstrans_state 662fstrans_getstate(struct mount *mp) 663{ 664 struct fstrans_lwp_info *fli; 665 struct fstrans_mount_info *fmi; 666 667 KASSERT(mp != dead_rootmount); 668 669 fli = fstrans_get_lwp_info(mp, true); 670 fmi = fli->fli_mountinfo; 671 672 return fmi->fmi_state; 673} 674 675/* 676 * Request a filesystem to suspend all operations. 677 */ 678int 679vfs_suspend(struct mount *mp, int nowait) 680{ 681 struct fstrans_lwp_info *fli; 682 int error; 683 684 if (mp == dead_rootmount) 685 return EOPNOTSUPP; 686 687 fli = fstrans_get_lwp_info(mp, true); 688 mp = fli->fli_mount; 689 690 if (nowait) { 691 if (!mutex_tryenter(&vfs_suspend_lock)) 692 return EWOULDBLOCK; 693 } else 694 mutex_enter(&vfs_suspend_lock); 695 696 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 697 mutex_exit(&vfs_suspend_lock); 698 699 return error; 700} 701 702/* 703 * Request a filesystem to resume all operations. 704 */ 705void 706vfs_resume(struct mount *mp) 707{ 708 struct fstrans_lwp_info *fli; 709 710 KASSERT(mp != dead_rootmount); 711 712 fli = fstrans_get_lwp_info(mp, false); 713 mp = fli->fli_mount; 714 715 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 716 mutex_exit(&vfs_suspend_lock); 717} 718 719 720/* 721 * True, if no thread is running a cow handler. 722 */ 723static bool 724cow_state_change_done(const struct fstrans_mount_info *fmi) 725{ 726 struct fstrans_lwp_info *fli; 727 728 KASSERT(mutex_owned(&fstrans_lock)); 729 KASSERT(fmi->fmi_cow_change); 730 731 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 732 if (fli->fli_mount != fmi->fmi_mount) 733 continue; 734 if (fli->fli_cow_cnt == 0) 735 continue; 736 737 return false; 738 } 739 740 return true; 741} 742 743/* 744 * Prepare for changing this mounts cow list. 745 * Returns with fstrans_lock locked. 746 */ 747static void 748cow_change_enter(struct fstrans_mount_info *fmi) 749{ 750 751 mutex_enter(&fstrans_lock); 752 753 /* 754 * Wait for other threads changing the list. 755 */ 756 while (fmi->fmi_cow_change) 757 cv_wait(&fstrans_state_cv, &fstrans_lock); 758 759 /* 760 * Wait until all threads are aware of a state change. 761 */ 762 fmi->fmi_cow_change = true; 763 pserialize_perform(fstrans_psz); 764 765 while (! cow_state_change_done(fmi)) 766 cv_wait(&fstrans_count_cv, &fstrans_lock); 767} 768 769/* 770 * Done changing this mounts cow list. 771 */ 772static void 773cow_change_done(struct fstrans_mount_info *fmi) 774{ 775 776 KASSERT(mutex_owned(&fstrans_lock)); 777 778 fmi->fmi_cow_change = false; 779 pserialize_perform(fstrans_psz); 780 781 cv_broadcast(&fstrans_state_cv); 782 783 mutex_exit(&fstrans_lock); 784} 785 786/* 787 * Add a handler to this mount. 788 */ 789int 790fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 791 void *arg) 792{ 793 struct fstrans_mount_info *fmi; 794 struct fscow_handler *newch; 795 796 KASSERT(mp != dead_rootmount); 797 798 mutex_enter(&fstrans_mount_lock); 799 fmi = mp->mnt_transinfo; 800 KASSERT(fmi != NULL); 801 fmi->fmi_ref_cnt += 1; 802 mutex_exit(&fstrans_mount_lock); 803 804 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 805 newch->ch_func = func; 806 newch->ch_arg = arg; 807 808 cow_change_enter(fmi); 809 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 810 cow_change_done(fmi); 811 812 return 0; 813} 814 815/* 816 * Remove a handler from this mount. 817 */ 818int 819fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 820 void *arg) 821{ 822 struct fstrans_mount_info *fmi; 823 struct fscow_handler *hp = NULL; 824 825 KASSERT(mp != dead_rootmount); 826 827 fmi = mp->mnt_transinfo; 828 KASSERT(fmi != NULL); 829 830 cow_change_enter(fmi); 831 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 832 if (hp->ch_func == func && hp->ch_arg == arg) 833 break; 834 if (hp != NULL) { 835 LIST_REMOVE(hp, ch_list); 836 kmem_free(hp, sizeof(*hp)); 837 } 838 cow_change_done(fmi); 839 840 fstrans_mount_dtor(fmi); 841 842 return hp ? 0 : EINVAL; 843} 844 845/* 846 * Check for need to copy block that is about to be written. 847 */ 848int 849fscow_run(struct buf *bp, bool data_valid) 850{ 851 int error, s; 852 struct mount *mp; 853 struct fstrans_lwp_info *fli; 854 struct fstrans_mount_info *fmi; 855 struct fscow_handler *hp; 856 857 /* 858 * First check if we need run the copy-on-write handler. 859 */ 860 if ((bp->b_flags & B_COWDONE)) 861 return 0; 862 if (bp->b_vp == NULL) { 863 bp->b_flags |= B_COWDONE; 864 return 0; 865 } 866 if (bp->b_vp->v_type == VBLK) 867 mp = spec_node_getmountedfs(bp->b_vp); 868 else 869 mp = bp->b_vp->v_mount; 870 if (mp == NULL || mp == dead_rootmount) { 871 bp->b_flags |= B_COWDONE; 872 return 0; 873 } 874 875 fli = fstrans_get_lwp_info(mp, true); 876 fmi = fli->fli_mountinfo; 877 878 /* 879 * On non-recursed run check if other threads 880 * want to change the list. 881 */ 882 if (fli->fli_cow_cnt == 0) { 883 s = pserialize_read_enter(); 884 if (__predict_false(fmi->fmi_cow_change)) { 885 pserialize_read_exit(s); 886 mutex_enter(&fstrans_lock); 887 while (fmi->fmi_cow_change) 888 cv_wait(&fstrans_state_cv, &fstrans_lock); 889 fli->fli_cow_cnt = 1; 890 mutex_exit(&fstrans_lock); 891 } else { 892 fli->fli_cow_cnt = 1; 893 pserialize_read_exit(s); 894 } 895 } else 896 fli->fli_cow_cnt += 1; 897 898 /* 899 * Run all copy-on-write handlers, stop on error. 900 */ 901 error = 0; 902 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 903 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 904 break; 905 if (error == 0) 906 bp->b_flags |= B_COWDONE; 907 908 /* 909 * Check if other threads want to change the list. 910 */ 911 if (fli->fli_cow_cnt > 1) { 912 fli->fli_cow_cnt -= 1; 913 } else { 914 s = pserialize_read_enter(); 915 if (__predict_false(fmi->fmi_cow_change)) { 916 pserialize_read_exit(s); 917 mutex_enter(&fstrans_lock); 918 fli->fli_cow_cnt = 0; 919 cv_signal(&fstrans_count_cv); 920 mutex_exit(&fstrans_lock); 921 } else { 922 fli->fli_cow_cnt = 0; 923 pserialize_read_exit(s); 924 } 925 } 926 927 return error; 928} 929 930#if defined(DDB) 931void fstrans_dump(int); 932 933static void 934fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 935{ 936 char prefix[9]; 937 struct fstrans_lwp_info *fli; 938 939 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 940 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 941 if (fli->fli_self != l) 942 continue; 943 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 944 if (! verbose) 945 continue; 946 } 947 printf("%-8s", prefix); 948 if (verbose) 949 printf(" @%p", fli); 950 if (fli->fli_mount == dead_rootmount) 951 printf(" <dead>"); 952 else if (fli->fli_mount != NULL) 953 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 954 else 955 printf(" NULL"); 956 if (fli->fli_alias != NULL) { 957 struct mount *amp = fli->fli_alias->fli_mount; 958 959 printf(" alias"); 960 if (verbose) 961 printf(" @%p", fli->fli_alias); 962 if (amp == NULL) 963 printf(" NULL"); 964 else 965 printf(" (%s)", amp->mnt_stat.f_mntonname); 966 } 967 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 968 printf(" gone"); 969 if (fli->fli_trans_cnt == 0) { 970 printf(" -"); 971 } else { 972 switch (fli->fli_lock_type) { 973 case FSTRANS_LAZY: 974 printf(" lazy"); 975 break; 976 case FSTRANS_SHARED: 977 printf(" shared"); 978 break; 979 case FSTRANS_EXCL: 980 printf(" excl"); 981 break; 982 default: 983 printf(" %#x", fli->fli_lock_type); 984 break; 985 } 986 } 987 printf(" %d cow %d alias %d\n", 988 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 989 prefix[0] = '\0'; 990 } 991} 992 993static void 994fstrans_print_mount(struct mount *mp, int verbose) 995{ 996 struct fstrans_mount_info *fmi; 997 998 fmi = mp->mnt_transinfo; 999 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1000 return; 1001 1002 printf("%-16s ", mp->mnt_stat.f_mntonname); 1003 if (fmi == NULL) { 1004 printf("(null)\n"); 1005 return; 1006 } 1007 switch (fmi->fmi_state) { 1008 case FSTRANS_NORMAL: 1009 printf("state normal\n"); 1010 break; 1011 case FSTRANS_SUSPENDING: 1012 printf("state suspending\n"); 1013 break; 1014 case FSTRANS_SUSPENDED: 1015 printf("state suspended\n"); 1016 break; 1017 default: 1018 printf("state %#x\n", fmi->fmi_state); 1019 break; 1020 } 1021} 1022 1023void 1024fstrans_dump(int full) 1025{ 1026 const struct proclist_desc *pd; 1027 struct proc *p; 1028 struct lwp *l; 1029 struct mount *mp; 1030 1031 printf("Fstrans locks by lwp:\n"); 1032 for (pd = proclists; pd->pd_list != NULL; pd++) 1033 PROCLIST_FOREACH(p, pd->pd_list) 1034 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1035 fstrans_print_lwp(p, l, full == 1); 1036 1037 printf("Fstrans state by mount:\n"); 1038 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1039 fstrans_print_mount(mp, full == 1); 1040} 1041#endif /* defined(DDB) */ 1042